In [7]:
% pylab inline
import IPython.display as ipd
import os
import pandas as pd
import librosa.display
import glob
plt.figure(figsize=(12, 4))
# audio files download: https://drive.google.com/drive/folders/0By0bAi7hOBAFUHVXd1JCN3MwTEU
# text data (need to register): https://datahack.analyticsvidhya.com/contest/practice-problem-urban-sound-classification/
Out[7]:
In [8]:
ipd.Audio('train/Train/410.wav')
Out[8]:
In [9]:
data, sampling_rate = librosa.load('train/Train/410.wav')
print data
print sampling_rate
In [10]:
librosa.display.waveplot(data, sr=sampling_rate)
Out[10]:
In [12]:
train = pd.read_csv('train/train.csv')
test = pd.read_csv('test/test.csv')
In [14]:
i = random.choice(train.index)
audio_name = train.ID[i]
path = os.path.join('train/Train', str(audio_name) + '.wav')
print('Class: ', train.Class[i])
x, sr = librosa.load(path)
plt.figure(figsize=(12, 4))
librosa.display.waveplot(x, sr=sr)
Out[14]:
In [15]:
i = random.choice(train.index)
audio_name = train.ID[i]
path = os.path.join('train/Train', str(audio_name) + '.wav')
print('Class: ', train.Class[i])
x, sr = librosa.load(path)
plt.figure(figsize=(12, 4))
librosa.display.waveplot(x, sr=sr)
Out[15]:
In [16]:
i = random.choice(train.index)
audio_name = train.ID[i]
path = os.path.join('train/Train', str(audio_name) + '.wav')
print('Class: ', train.Class[i])
x, sr = librosa.load(path)
plt.figure(figsize=(12, 4))
librosa.display.waveplot(x, sr=sr)
Out[16]:
In [18]:
i = random.choice(train.index)
audio_name = train.ID[i]
path = os.path.join('train/Train', str(audio_name) + '.wav')
print('Class: ', train.Class[i])
x, sr = librosa.load(path)
plt.figure(figsize=(12, 4))
librosa.display.waveplot(x, sr=sr)
Out[18]:
In [34]:
# extract features put them with label
def train_parser(row):
# function to load files and extract features
file_name = os.path.join('train/Train', str(row.ID) + '.wav')
# handle exception to check if there isn't a file which is corrupted
try:
# kaiser_fast is a technique used for faster extraction
X, sample_rate = librosa.load(file_name, res_type='kaiser_fast')
# Extract mfcc feature from data
mfccs = np.mean(librosa.feature.mfcc(y=X, sr=sample_rate, n_mfcc=40).T,axis=0)
except Exception as e:
print("Error encountered while parsing file: ", file)
return None, None
feature = mfccs
label = row.Class
return [feature, label]
In [35]:
# This part takes longer time
temp = train.apply(train_parser, axis=1)
temp.columns = ['feature', 'label']
In [26]:
import numpy as np
from sklearn.preprocessing import LabelEncoder
from keras.utils.np_utils import to_categorical
seed = 410
rng = np.random.RandomState(seed)
X = np.array(temp.feature.tolist())
y = np.array(temp.label.tolist())
lb = LabelEncoder()
y = to_categorical(lb.fit_transform(y))
In [28]:
# Check input dimension
X.shape
Out[28]:
In [29]:
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation, Flatten
from keras.optimizers import Adam
from sklearn import metrics
num_labels = y.shape[1]
filter_size = 2
# build model
model = Sequential()
# Only in first layer, need to specify shape; here means array of shape (*, 40)
# Here 256 means output array of shape (*, 256)
model.add(Dense(256, input_shape=(40,)))
model.add(Activation('relu'))
model.add(Dropout(0.5))
model.add(Dense(256))
model.add(Activation('relu'))
model.add(Dropout(0.5))
# In last layer, output should be array of (*, num_labels)
model.add(Dense(num_labels))
model.add(Activation('softmax'))
model.compile(loss='categorical_crossentropy', metrics=['accuracy'], optimizer='adam')
In [30]:
model.fit(X, y, batch_size=128, epochs=5, validation_split = 0.3)
Out[30]:
In [37]:
# evaluation
def test_parser(row):
# function to load files and extract features
file_name = os.path.join('test/Test', str(row.ID) + '.wav')
# handle exception to check if there isn't a file which is corrupted
try:
# kaiser_fast is a technique used for faster extraction
X, sample_rate = librosa.load(file_name, res_type='kaiser_fast')
# Extract mfcc feature from data
mfccs = np.mean(librosa.feature.mfcc(y=X, sr=sample_rate, n_mfcc=40).T,axis=0)
except Exception as e:
print("Error encountered while parsing file: ", file)
return None, None
feature = mfccs
label = row.Class
return [feature, label]
In [ ]:
test_temp = test.apply(test_parser, axis=1)
test_temp.columns = ['feature', 'label']
In [ ]:
test_X = np.array(test_temp.feature.tolist())
test_y = np.array(test_temp.label.tolist())
lb = LabelEncoder()
test_y = to_categorical(lb.fit_transform(test_y))
pred = model.predict_classes(test_X)
In [ ]:
from sklearn.metrics import confusion_matrix, roc_auc_score
cm = confusion_matrix(test_y, pred)
TP = cm[0][0]
FP = cm[0][1]
FN = cm[1][0]
TN = cm[1][1]
accuracy = (TP + TN)/(TP+FP+FN+TN)
precision = TP/(TP+FP)
specificity= TN/(TN+FP)
recall = TP/(TP+FN)
auc_score = roc_auc_score(test_y, pred)
print "accuracy:", accuracy
print "precision:", precision
print "specificity:", specificity
print "recall:", recall