In [1]:
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.style as ms
ms.use('seaborn-muted')
%matplotlib inline

import librosa
import librosa.display

import IPython.display

import os
import glob
import sys

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.externals import joblib

import keras

GENRE_DIR = "/home/minato/deep_learning/buildMLSystem/data/songData/genres/"
GENRE_LIST = []
# 0
GENRE_LIST.append("blues")
# 1
GENRE_LIST.append("classical")
# 2
GENRE_LIST.append("country")
# 3
GENRE_LIST.append("disco")
# 4
GENRE_LIST.append("hiphop")
# 5
GENRE_LIST.append("jazz")
# 6
GENRE_LIST.append("metal")
# 7
GENRE_LIST.append("pop")
# 8
GENRE_LIST.append("reggae")
# 9
GENRE_LIST.append("rock")


Using TensorFlow backend.

In [2]:
def create_data_for_conv_lstm(genre_list=GENRE_LIST):
    os.chdir(GENRE_DIR)
    for genre in genre_list:
        glob_wav = os.path.join(genre, "*.wav")
        for fn in glob.glob(glob_wav):
            create_mfcc_for_conv_lstm(fn)

In [3]:
def create_mfcc_for_conv_lstm(fn):
    y, sr = librosa.load(fn)
    mfcc = librosa.feature.mfcc(y=y, sr=sr,n_mfcc=128)
    last_dim = mfcc.shape[1]
    if last_dim < 1290:
        add_dim = 1290 - last_dim
        add_list = np.zeros((30, add_dim))
        mfcc = np.append(mfcc, add_list, axis=1)
    elif  last_dim > 1290:
        mfcc = mfcc[:,:1290]

    mfcc = mfcc.T
    data = []
    start_num = 0
    end_num = 128
    for i in range(0, 10):
        data.append(mfcc[start_num:end_num, :])
        start_num = start_num + 129
        end_num = end_num + 129

    dim1 = []
    dim1.append(data)
    dim1_10frame_data = np.array(dim1)
    base_fn, ext = os.path.splitext(fn)
    data_fn = base_fn + ".mfcc_10frame"
    np.save(data_fn, dim1_10frame_data)
    print("Written", data_fn)


  File "<ipython-input-3-20d83db193b5>", line 21
    1dim = []
       ^
SyntaxError: invalid syntax

In [ ]:
def create_mfcc_all_data():
    genre_list = GENRE_LIST
    base_dir = GENRE_DIR
    X = []
    y = []
    for label, genre in enumerate(genre_list):
        mfcc_files = os.path.join(base_dir, genre, "*.mfcc_10frame.npy")
        for fn in glob.glob(mfcc_files):
            mfcc = np.load(fn)
            X.append(mfcc)
            y.append(label)

    print("loaded all data")
    all_x_data = np.array(X)
    all_y_data = np.array(y)
    print("all data x shape is")
    print(all_x_data.shape)
    print("all data y shape is")
    print(all_y_data.shape)

    x_data_path = '/home/minato/deep_learning/buildMLSystem/data/songData/genres/x_mfcc_10frame_all_data'
    y_data_path = '/home/minato/deep_learning/buildMLSystem/data/songData/genres/y_mfcc_10frame_all_data'

    np.save(x_data_path, all_x_data)
    np.save(y_data_path, all_y_data)
    print("Written", x_data_path)
    print("Written", y_data_path)

In [ ]:
def createStdScaler():
    file_path = "/home/minato/deep_learning/buildMLSystem/data/songData/genres/x_mfcc_10frame_all_data.npy"
    all_x_data = np.load(file_path)
    n = all_x_data.shape[0]
    reshaped_data = all_x_data.reshape(n, -1)

    ss = StandardScaler()
    ss.fit(reshaped_data)
    joblib.dump(ss, './savedStanderdScaler/mfcc_10frame_ss.pkl')

In [ ]:
#create_data_for_conv_lstm()

In [ ]:
#create_mfcc_all_data()

In [ ]:
create_mfcc_for_conv_lstm("/home/minato/deep_learning/buildMLSystem/data/songData/genres/blues/blues.00000.wav")

In [ ]: