In [1]:
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.style as ms
ms.use('seaborn-muted')
%matplotlib inline
import librosa
import librosa.display
import IPython.display
import os
import glob
import sys
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.externals import joblib
import keras
GENRE_DIR = "/home/minato/deep_learning/buildMLSystem/data/songData/genres/"
GENRE_LIST = []
# 0
GENRE_LIST.append("blues")
# 1
GENRE_LIST.append("classical")
# 2
GENRE_LIST.append("country")
# 3
GENRE_LIST.append("disco")
# 4
GENRE_LIST.append("hiphop")
# 5
GENRE_LIST.append("jazz")
# 6
GENRE_LIST.append("metal")
# 7
GENRE_LIST.append("pop")
# 8
GENRE_LIST.append("reggae")
# 9
GENRE_LIST.append("rock")
In [2]:
def create_data_for_conv_lstm(genre_list=GENRE_LIST):
os.chdir(GENRE_DIR)
for genre in genre_list:
glob_wav = os.path.join(genre, "*.wav")
for fn in glob.glob(glob_wav):
create_mfcc_for_conv_lstm(fn)
In [3]:
def create_mfcc_for_conv_lstm(fn):
y, sr = librosa.load(fn)
mfcc = librosa.feature.mfcc(y=y, sr=sr,n_mfcc=128)
last_dim = mfcc.shape[1]
if last_dim < 1290:
add_dim = 1290 - last_dim
add_list = np.zeros((30, add_dim))
mfcc = np.append(mfcc, add_list, axis=1)
elif last_dim > 1290:
mfcc = mfcc[:,:1290]
mfcc = mfcc.T
data = []
start_num = 0
end_num = 128
for i in range(0, 10):
data.append(mfcc[start_num:end_num, :])
start_num = start_num + 129
end_num = end_num + 129
dim1 = []
dim1.append(data)
dim1_10frame_data = np.array(dim1)
base_fn, ext = os.path.splitext(fn)
data_fn = base_fn + ".mfcc_10frame"
np.save(data_fn, dim1_10frame_data)
print("Written", data_fn)
In [ ]:
def create_mfcc_all_data():
genre_list = GENRE_LIST
base_dir = GENRE_DIR
X = []
y = []
for label, genre in enumerate(genre_list):
mfcc_files = os.path.join(base_dir, genre, "*.mfcc_10frame.npy")
for fn in glob.glob(mfcc_files):
mfcc = np.load(fn)
X.append(mfcc)
y.append(label)
print("loaded all data")
all_x_data = np.array(X)
all_y_data = np.array(y)
print("all data x shape is")
print(all_x_data.shape)
print("all data y shape is")
print(all_y_data.shape)
x_data_path = '/home/minato/deep_learning/buildMLSystem/data/songData/genres/x_mfcc_10frame_all_data'
y_data_path = '/home/minato/deep_learning/buildMLSystem/data/songData/genres/y_mfcc_10frame_all_data'
np.save(x_data_path, all_x_data)
np.save(y_data_path, all_y_data)
print("Written", x_data_path)
print("Written", y_data_path)
In [ ]:
def createStdScaler():
file_path = "/home/minato/deep_learning/buildMLSystem/data/songData/genres/x_mfcc_10frame_all_data.npy"
all_x_data = np.load(file_path)
n = all_x_data.shape[0]
reshaped_data = all_x_data.reshape(n, -1)
ss = StandardScaler()
ss.fit(reshaped_data)
joblib.dump(ss, './savedStanderdScaler/mfcc_10frame_ss.pkl')
In [ ]:
#create_data_for_conv_lstm()
In [ ]:
#create_mfcc_all_data()
In [ ]:
create_mfcc_for_conv_lstm("/home/minato/deep_learning/buildMLSystem/data/songData/genres/blues/blues.00000.wav")
In [ ]: