In [1]:
import numpy as np
from matplotlib import pyplot as plt
from matplotlib import style
import librosa
import IPython.display
import librosa.display
import os
import random
from matplotlib.pyplot import specgram
import glob
Networks or lattices of tones. The tonnetz tonal centroids — the “central” tones. These are features that help in Detecting Harmonic Change in Musical Audio or variances due to tones in audio.
Short-term fourier transform. Segments the signal into short frames and computes the fourier transform on each short segment.
Relative distribution of energies.
Chromagrams are based on the pitch scales
In [2]:
def extract_feature(file_name):
X, sample_rate = librosa.load(file_name)
np.nan_to_num(X)
stft = np.abs(librosa.stft(X))
mfccs = np.mean(librosa.feature.mfcc(y=X, sr=sample_rate, n_mfcc=40).T,axis=0)
chroma = np.mean(librosa.feature.chroma_stft(S=stft, sr=sample_rate).T,axis=0)
mel = np.mean(librosa.feature.melspectrogram(X, sr=sample_rate).T,axis=0)
contrast = np.mean(librosa.feature.spectral_contrast(S=stft, sr=sample_rate).T,axis=0)
tonnetz = np.mean(librosa.feature.tonnetz(y=librosa.effects.harmonic(X), sr=sample_rate).T,axis=0)
return mfccs, chroma, mel, contrast, tonnetz
def parse_audio_files(parent_dir, sub_dirs, classes, file_ext='*.ogg'):
features, labels = np.empty((0, 193)), np.empty(0)
for label, sub_dir in enumerate(sub_dirs):
for fn in glob.glob(os.path.join(parent_dir, sub_dir, file_ext)):
mfccs, chroma, mel, contrast,tonnetz = extract_feature(fn)
ext_features = np.hstack([mfccs,chroma,mel,contrast,tonnetz])
features = np.vstack([features,ext_features])
labels = np.append(labels, classes.get(sub_dir))
return np.array(features), np.array(labels, dtype = np.int)
def one_hot_encode(labels):
n_labels = len(labels)
n_unique_labels = len(np.unique(labels))
one_hot_encode = np.zeros((n_labels, n_unique_labels))
one_hot_encode[np.arange(n_labels), labels] = 1
return one_hot_encode
In [3]:
data_dir = '../data/esc-50'
sample_dir = os.path.join(data_dir, 'sample')
train_dir = os.path.join(data_dir, 'train')
test_dir = os.path.join(data_dir, 'test')
print 'Sound Sample Classes'
print '--------------------'
for d in os.listdir(sample_dir):
print d
In [4]:
samples_dict = dict()
for d in os.listdir(sample_dir):
sample_class_dir = os.path.join(sample_dir, d)
samples_dict[d] = [os.path.join(sample_class_dir, f) for f in os.listdir(sample_class_dir)]
In [5]:
mfccs, chroma, mel, contrast,tonnetz = extract_feature(samples_dict.get('insects')[0])
ext_features = np.hstack([mfccs,chroma,mel,contrast,tonnetz])
In [6]:
mfccs, chroma, mel, contrast,tonnetz = extract_feature(samples_dict.get('insects')[0])
ext_features = np.hstack([mfccs,chroma,mel,contrast,tonnetz])
print len(ext_features)
In [7]:
features = np.empty((0,193))
print features.shape
In [8]:
features = np.vstack([features,ext_features])
print features
In [9]:
sample_dir = os.path.join(data_dir, 'sample')
sub_dirs = ['laughing', 'coughing', 'insects', 'rooster']
classes = {'laughing': 0, 'coughing': 1, 'insects': 2, 'rooster': 3}
features, labels = parse_audio_files(sample_dir, sub_dirs, classes)
In [10]:
print features.shape
In [11]:
one_hot = one_hot_encode(labels)
In [12]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(features,
one_hot,
test_size=0.15,
random_state=42)
print X_train.shape
print y_train.shape
In [56]:
n_hidden_units_one = 50
n_hidden_units_two = 50
n_classes = 4
n_dim = X_train.shape[1]
In [57]:
from keras.models import Sequential
from keras.layers import Dense
import numpy
In [58]:
model = Sequential()
model.add(Dense(n_hidden_units_one, input_dim=n_dim, kernel_initializer='uniform', activation='relu'))
model.add(Dense(n_hidden_units_two, kernel_initializer='uniform', activation='relu'))
model.add(Dense(n_classes, kernel_initializer='uniform', activation='softmax'))
In [59]:
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
In [60]:
model.fit(X_train, y_train, nb_epoch=10, batch_size=20)
Out[60]:
In [61]:
scores = model.evaluate(X_test, y_test)
print("%s: %.2f%%" % (model.metrics_names[1], scores[1] * 100))
In [14]:
from sklearn.decomposition import PCA
from sklearn import preprocessing
import pandas as pd
features_df = pd.DataFrame(features)
data_scaled = pd.DataFrame(preprocessing.scale(features_df), columns=features_df.columns)
pca = PCA(n_components=5)
pca_results = pca.fit_transform(data_scaled)
print 'Shape of the transformed feature vector:', pca_results.shape
print 'Original training sample:', list(features_df.loc[0].values)
print 'Training sample after PCA:', list(pca_results[0])
print '\n'
# Percentage of variance explained for each components
print 'Explained variance ratio (first five components)'
print '------------------------------------------------'
for idx, r in enumerate(pca.explained_variance_ratio_):
print 'Principal Component', idx, ':', r
In [18]:
from ggplot import *
df_pca = features_df.copy()
df_pca['x-pca'] = pca_results[:,0]
df_pca['y-pca'] = pca_results[:,1]
df_pca['label'] = labels
chart = ggplot( df_pca, aes(x='x-pca', y='y-pca', color='label') ) \
+ geom_point(size=75,alpha=0.8) \
+ ggtitle("First and Second Principal Components colored by gender")
chart
Out[18]:
In [22]:
from sklearn.manifold import TSNE
print("Computing t-SNE embedding")
tsne = TSNE(n_components=2, verbose=1, perplexity=50, n_iter=500)
tsne_results = tsne.fit_transform(features_df)
In [23]:
df_tsne = features_df.copy()
df_tsne['x-tsne'] = tsne_results[:,0]
df_tsne['y-tsne'] = tsne_results[:,1]
df_tsne['label'] = labels
chart = ggplot( df_tsne, aes(x='x-tsne', y='y-tsne', color='label') ) \
+ geom_point(size=70,alpha=0.1) \
+ ggtitle("tSNE dimensions colored by gender")
chart
Out[23]:
In [28]:
from sklearn.manifold import TSNE
print("Computing t-SNE embedding")
tsne = TSNE(n_components=2, verbose=1, perplexity=50, n_iter=1000)
tsne_results = tsne.fit_transform(pca_results)
In [29]:
df_tsne = features_df.copy()
df_tsne['x-tsne'] = tsne_results[:,0]
df_tsne['y-tsne'] = tsne_results[:,1]
df_tsne['label'] = labels
chart = ggplot( df_tsne, aes(x='x-tsne', y='y-tsne', color='label') ) \
+ geom_point(size=70,alpha=0.1) \
+ ggtitle("tSNE dimensions colored by gender")
chart
Out[29]:
In [ ]: