MusicLearning


In [ ]:
import os
import sys
import glob

from configparser import ConfigParser
import numpy as np
%pylab inline

from log import initLog, writeLog, logAndCall
from timer import timer_start, timer_stop
import file_actions
import extract_features

In [ ]:
# Reading the config file
config = ConfigParser()
cfile = os.path.join(os.getcwd(), "config.ini")
config.read(cfile)

In [ ]:
# Logs initialization
initLog(config)
writeLog("info", "Program restarted")

# Python version info
sys.path.append(os.path.abspath(os.getcwd()))
python_version = sys.version_info.major
writeLog("debug", "Python version: {}".format(sys.version))

Getting the data, extracting features


In [ ]:
file_actions.folder_mp3_to_wav()

In [ ]:
folders = os.listdir("../data/samples")
labels = [f.title() for f in folders]
writeLog("info", "Folders in ../data/samples: {}".format(folders))

In [ ]:
def recover_saved_data():
    """Recover previously saved data"""
    X = np.loadtxt("../tmp/X.csv")
    Y = np.loadtxt("../tmp/Y.csv").astype("int")
    with open("../tmp/flabels.txt", "r") as f:
        flabels = [l.strip() for l in f.readlines()]
    with open("../tmp/trackNames.txt", "r") as f:
        trackNames = [l.strip() for l in f.readlines()]
    return X, Y, flabels, trackNames

In [ ]:
def file_name_to_track_name(fn):
    """Make a clean track name with the file name"""
    tn = os.path.split(fn)[1]
    tn = os.path.splitext(tn)[0]
    return tn.lstrip('0123456789').lstrip(' -.')

def create_data_from_files():
    """Compute the data (features, labels,...) from audio files and save the results"""
    Xl = []
    Yl = []
    trackNames = []
    # Extract all the features
    for i, f in enumerate(folders):
        samples = glob.glob("../data/samples/{}/*.wav".format(f))
        for s in samples:
            # es = file_actions.extract_sound_light(s, ratio=0.5, duration=10) # test light
            es = file_actions.extract_sound(s)
            esm = file_actions.convert_to_mono(es[0])[0]
            d = {"label": i, "sound": esm, "params": es[1], "file": s}
            wins = int(0.050 * es[1][2])
            steps = int(es[1][3]/120 - wins)
            Xl.append(extract_features.extract_all_features(d, wins=wins, steps=steps))
            #Xl.append(extract_features.extract_all_features0(d))  # TEMP
            print(Xl[-1].shape)
            Yl.append(d["label"])
            trackNames.append(file_name_to_track_name(d["file"]))
    # Make arrays from the data
    X = np.array(Xl)
    Y = np.array(Yl)
    flabels = extract_features.features_labels()
    #flabels = extract_features.features_labels0()  # TEMP
    # Save the data and returning it
    np.savetxt("../tmp/X.csv", X)
    np.savetxt("../tmp/Y.csv", Y)
    with open("../tmp/flabels.txt", "w") as f:
        f.write("\n".join(flabels))
    with open("../tmp/trackNames.txt", "w") as f:
        f.write("\n".join(trackNames))
    writeLog("info", "File extraction finished")
    return X, Y, flabels, trackNames

In [ ]:
load_saved = False

if load_saved:
    try:
        X, Y, flabels, trackNames = recover_saved_data()
    except Exception:
        writeLog("warn", "Could not load the data, will extract from files.")
        X, Y, flabels, trackNames = create_data_from_files()
else:
    X, Y, flabels, trackNames = create_data_from_files()

print(X.shape)
# print(X[:, :4])
# print(Y)
# print(flabels)

Prepare the data


In [ ]:
# Shuffle all the samples
data_group = list(zip(X, Y, trackNames))
random.shuffle(data_group)
X, Y, trackNames = list(zip(*data_group))
X = np.array(X)
Y = np.array(Y)
trackNames = np.array(trackNames)

In [ ]:
# Normalize and/or scale the data
from sklearn import preprocessing
X_s = preprocessing.scale(X)
X_n = preprocessing.normalize(X)
X_sn = preprocessing.normalize(X_s)

In [ ]:
tr_ratio = 0.75
sep_ind = int(tr_ratio*len(Y))

# Training samples
X_tr = X[:sep_ind, :]
X_tr_s = X_s[:sep_ind, :]
X_tr_n = X_n[:sep_ind, :]
X_tr_sn = X_sn[:sep_ind, :]
Y_tr = Y[:sep_ind]
trackNames_tr = trackNames[:sep_ind]

# Test samples
X_te = X[sep_ind:, :]
X_te_s = X_s[sep_ind:, :]
X_te_n = X_n[sep_ind:, :]
X_te_sn = X_sn[sep_ind:, :]
Y_te = Y[sep_ind:]
trackNames_te = trackNames[sep_ind:]

In [ ]:
def plot_feature_per_label():
    fig = plt.figure(figsize=(12, 2*X.shape[1])) # (width, height)
    
    for idx in range(X.shape[1]):
        for (number, Xi, legend) in [(1, X, '(not scaled)'), (2, X_sn, '(scaled)')]:
            fig.add_subplot(X.shape[1],2,2*idx+number)
            
            for i in range(len(labels)):
                indexes = [ind for ind in range(len(Y)) if Y[ind] == i]
                xdata = range(len(indexes))
                ydata = [Xi[ind, idx] for ind in indexes]
            
                plt.plot(xdata, ydata, 'o')
            plt.title("{} {}".format(flabels[idx], legend), fontsize=16)
    
    plt.tight_layout() # improve spacing between subplots

In [ ]:
plot_feature_per_label()

In [ ]:
plot_feature_per_label()

Training


In [ ]:
def train_and_fit(classifier, X_tr, Y_tr, X_te):
    """Train the classifier using X_tr and Y_tr, and fit X_te"""
    classifier.fit(X_tr, Y_tr)
    return classifier.predict(X_te).astype('int')

In [ ]:
# Set up a stratified 3-fold cross-validation
from sklearn import model_selection
folds = model_selection.StratifiedKFold(6, shuffle=True)

In [ ]:
def cross_validate(classifier, design_matrix, labels, cv_folds):
    """Perform a cross-validation and returns the predictions."""
    pred = np.zeros(labels.shape)
    for tr, te in cv_folds.split(design_matrix, labels):
        # Restrict data to train/test folds
        Xtr = design_matrix[tr, :]
        ytr = labels[tr]
        Xte = design_matrix[te, :]

        # Fit classifier
        classifier.fit(Xtr, ytr)

        # Predict the label with the features
        yte_pred = classifier.predict(Xte)
        pred[te] = yte_pred[:]
    return pred.astype('int')

Results visualization

Plotting confusion matrix


In [ ]:
from sklearn.metrics import confusion_matrix
import itertools

def plot_confusion_matrix(cm, classes, normalize=False, title='Confusion matrix'):
    """
    This function prints and plots the confusion matrix.
    """
    plt.imshow(cm, interpolation='nearest', cmap=plt.cm.Blues)
    plt.title(title)
    plt.colorbar()
    tick_marks = np.arange(len(classes))
    plt.xticks(tick_marks, classes, rotation=45)
    plt.yticks(tick_marks, classes)

    if normalize:
        cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
        print("Normalized confusion matrix")
    else:
        print('Confusion matrix, without normalization')

    # print(cm)

    thresh = cm.max() / 2.
    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
        plt.text(j, i, cm[i, j],
            horizontalalignment="center",
            color="white" if cm[i, j] > thresh else "black")

    plt.tight_layout()
    plt.ylabel('True label')
    plt.xlabel('Predicted label')

Results analysis


In [ ]:
def analyse_results(Ypred, Y, trackNames=trackNames, details=1):
    """
    Analyse the results, several levels of details:
    0: Print the score
    1: Print the score and confusion matrix
    2: Print the score, confusion matrix, and prediction for each track
    """
    # print("Labels:    ", Y)
    # print("Prediction:", Ypred)
    
    score = sum([1 if Ypred[i] == yi else 0 for i, yi in enumerate(Y)])
    ratio = score / len(Y)
    writeLog("info", "Score: {:03f}  ({}/{})".format(ratio, score, len(Y)))
    
    if details >= 1:
        cnf_matrix = confusion_matrix(Y, Ypred)
        np.set_printoptions(precision=2)

        # Plot non-normalized confusion matrix
        plt.figure(figsize=(8, 6))
        plot_confusion_matrix(cnf_matrix, classes=labels, title='Confusion matrix')
        plt.show()

    if details >= 2:
        for i, track in enumerate(trackNames):
            if Ypred[i] != Y[i]:
                res = "{} -> {} ({})".format(track, labels[Ypred[i]], labels[Y[i]])
                res = "\033[91m{}\033[0m".format(res)
            else:
                res = "{} -> {}".format(track, labels[Y[i]])
            print(res)

Predictions


In [ ]:
from sklearn import linear_model

clf_lr_s = linear_model.LogisticRegression(C=1e6) # high C means no regularization

# Ypred_lr_s = train_and_fit(clf_lr_s, X_tr_s, Y_tr, X_te_s)
# analyse_results(Ypred_lr_s, Y_te, trackNames_te, details=2)

Ypred_lr_s = cross_validate(clf_lr_s, X_sn, Y, folds)
analyse_results(Ypred_lr_s, Y, trackNames, details=1)

In [ ]:
from sklearn import neighbors

clf_k = neighbors.KNeighborsClassifier(n_neighbors=5, weights='distance')

# Ypred_k_s = train_and_fit(clf_k_s, X_tr_s, Y_tr, X_te_s)
# analyse_results(Ypred_k_s, Y_te, trackNames_te, details=2)

Ypred_k = cross_validate(clf_k, X_sn[:, 5:18], Y, folds)
analyse_results(Ypred_k, Y, trackNames, details=2)

In [ ]:


In [ ]:


In [ ]:
import file_actions
import extract_features
import numpy as np

s = "../data/music.wav"
es = file_actions.extract_sound(s)
esm = file_actions.convert_to_mono(es[0])[0]
d = {"label": 0, "sound": esm, "params": es[1], "file": s}
wins = int(0.050 * es[1][2])
steps = int(es[1][3]/180 - wins)

F0 = extract_features.extract_all_features0(d)
print("F0:")
print(F0.shape)
print(F0)

F = extract_features.extract_all_features(d, wins=wins, steps=steps)
print("F:")
print(F.shape)
print(F)

G = []
G.append(F)
G.append(F)
G = np.array(G)

print("G.shape:", G.shape)
print("G.dtype:", G.dtype)
print("G:")
print(G)

# print(F.shape)
# print(F)

In [ ]: