In [1]:
import pickle
import pumpp
import numpy as np
import librosa
import os
from glob import glob

import tensorflow as tf
import keras as K
import pescador
import pandas as pd

from sklearn.model_selection import GroupShuffleSplit


Using TensorFlow backend.

In [2]:
SEED = 20170401

In [3]:
DATA_DIR = '/home/bmcfee/working/chords/pump'

In [4]:
# Reload the pump
with open('/home/bmcfee/working/chords/pump.pkl', 'rb') as fd:
    pump = pickle.load(fd)

In [5]:
# Calculate the number of frames

MAX_SAMPLES = 128

duration = 8.0

pump['cqt'].sr

n_frames = librosa.time_to_frames(duration,
                                  sr=pump['cqt'].sr,
                                  hop_length=pump['cqt'].hop_length)[0]

sampler = pump.sampler(MAX_SAMPLES, n_frames, random_state=SEED)

In [6]:
n_frames


Out[6]:
86

In [7]:
def data_sampler(fname, sampler):

    data = np.load(fname)
    yield from sampler(data)
    data.close()

In [8]:
def data_sampler(fname, sampler):

    data = np.load(fname)
    d2 = dict(data)
    data.close()
    data = d2
    yield from sampler(data)

In [9]:
def data_generator(tracks, sampler, k, batch_size=16, **kwargs):
    
    seeds = []
    for track in tracks:
        fname = os.path.join(DATA_DIR, os.path.extsep.join([track, 'npz']))
        
        seeds.append(pescador.Streamer(data_sampler, fname, sampler))
        
    # Send it all to a mux
    mux = pescador.Mux(seeds, k, **kwargs)
    
    if batch_size == 1:
        return mux
    else:
        return pescador.BufferedStreamer(mux, batch_size)

In [10]:
def data_generator(tracks, sampler, k, batch_size=16, augmentation=False, **kwargs):
    
    seeds = []
    for track in tracks:
        
        fname = os.path.join(DATA_DIR, os.path.extsep.join([track, 'npz']))
        seeds.append(pescador.Streamer(data_sampler, fname, sampler))
        
        if augmentation:
            for fname in sorted(glob(os.path.join(DATA_DIR, '{}.*.npz'.format(track)))):
                seeds.append(pescador.Streamer(data_sampler, fname, sampler))
        
    # Send it all to a mux
    mux = pescador.Mux(seeds, k, **kwargs)
    
    if batch_size == 1:
        return mux
    else:
        return pescador.BufferedStreamer(mux, batch_size)

In [11]:
def wrap(gen):
    
    for batch in gen:
        yield batch[0], list(batch[1:])

Construct the model


In [12]:
pump.fields


Out[12]:
{'chord_struct/bass': Tensor(shape=(None, 1), dtype=<class 'int'>),
 'chord_struct/pitch': Tensor(shape=(None, 12), dtype=<class 'bool'>),
 'chord_struct/root': Tensor(shape=(None, 1), dtype=<class 'int'>),
 'chord_tag/chord': Tensor(shape=(None, 1), dtype=<class 'int'>),
 'cqt/mag': Tensor(shape=(None, 216, 1), dtype=<class 'numpy.float32'>)}

In [13]:
len(pump['chord_tag'].vocabulary())


Out[13]:
170

In [14]:
x = pump.layers()['cqt/mag']

b = K.layers.BatchNormalization()(x)

c0 = K.layers.Convolution2D(1, (5, 5), padding='same', activation='relu',
                            data_format='channels_last')(b)

c1 = K.layers.Convolution2D(36, (1, int(c0.shape[2])), padding='valid', activation='relu',
                            data_format='channels_last')(c0)

r1 = K.layers.Lambda(lambda x: K.backend.squeeze(x, axis=2))(c1)

rs = K.layers.Bidirectional(K.layers.GRU(64,
                                         return_sequences=True))(r1)

# 1: pitch class predictor
pc_p = K.layers.TimeDistributed(K.layers.Dense(pump.fields['chord_struct/pitch'].shape[1], activation='sigmoid'),
                       name='chord_pitch')(rs)

# 2: root predictor
root_p = K.layers.TimeDistributed(K.layers.Dense(13, activation='softmax'),
                         name='chord_root')(rs)

# 3: bass predictor
bass_p = K.layers.TimeDistributed(K.layers.Dense(13, activation='softmax'),
                         name='chord_bass')(rs)

# 4: merge layer
codec = K.layers.concatenate([rs, pc_p, root_p, bass_p])


p0 = K.layers.Dense(len(pump['chord_tag'].vocabulary()), activation='softmax',
                    bias_regularizer=K.regularizers.l2())

tag = K.layers.TimeDistributed(p0, name='chord_tag')(codec)


model = K.models.Model(x, [tag, pc_p, root_p, bass_p])

Run a train-test split


In [15]:
index = pd.read_json('/home/bmcfee/working/chords/artist_index.json', typ='series')

splitter_tt = GroupShuffleSplit(n_splits=1, random_state=SEED)
for train_, test in splitter_tt.split(index, groups=list(index)):
    idx_train_ = index.iloc[train_]
    idx_test = index.iloc[test]
    splitter_tv = GroupShuffleSplit(n_splits=1, test_size=0.25, random_state=SEED)
    
    for train, val in splitter_tv.split(idx_train_, groups=list(idx_train_)):
        idx_train = idx_train_.iloc[train]
        idx_val = idx_train_.iloc[val]
    
        gen_train = data_generator(idx_train.index, sampler, 1024, augmentation=True,
                                   lam=8, batch_size=32, revive=True, random_state=SEED)
        
        gen_val = data_generator(idx_val.index, sampler, len(idx_val), batch_size=32, revive=True, random_state=SEED)
        

        model.compile(K.optimizers.Adam(),
                      loss={'chord_tag': 'sparse_categorical_crossentropy',
                            'chord_pitch': 'binary_crossentropy',
                            'chord_root': 'sparse_categorical_crossentropy',
                            'chord_bass': 'sparse_categorical_crossentropy'},
                      metrics={'chord_tag': 'sparse_categorical_accuracy'})

        model.fit_generator(wrap(gen_train.tuples('cqt/mag', 
                                              'chord_tag/chord',
                                              'chord_struct/pitch',
                                              'chord_struct/root',
                                              'chord_struct/bass')),
                            512, 100,
                            
                            validation_data=wrap(gen_val.tuples('cqt/mag',
                                                           'chord_tag/chord',
                                                            'chord_struct/pitch',
                                                            'chord_struct/root',
                                                            'chord_struct/bass')),
                            validation_steps=1024,
                            callbacks=[K.callbacks.ModelCheckpoint('/home/bmcfee/working/chords/model_direct_ckpt.pkl',
                                                                   save_best_only=True,
                                                                   verbose=1,
                                                                   monitor='val_chord_tag_loss'),
                                       K.callbacks.ReduceLROnPlateau(monitor='val_chord_tag_loss', patience=5, verbose=1),
                                       K.callbacks.EarlyStopping(monitor='val_chord_tag_loss', patience=15, verbose=1)])


Epoch 1/100
511/512 [============================>.] - ETA: 0s - loss: 4.9652 - chord_tag_loss: 2.3801 - chord_pitch_loss: 0.3395 - chord_root_loss: 1.1091 - chord_bass_loss: 1.1361 - chord_tag_sparse_categorical_accuracy: 0.4368Epoch 00000: val_chord_tag_loss improved from inf to 1.85343, saving model to /home/bmcfee/working/chords/model_direct_ckpt.pkl
512/512 [==============================] - 225s - loss: 4.9624 - chord_tag_loss: 2.3789 - chord_pitch_loss: 0.3393 - chord_root_loss: 1.1083 - chord_bass_loss: 1.1355 - chord_tag_sparse_categorical_accuracy: 0.4370 - val_loss: 3.7137 - val_chord_tag_loss: 1.8534 - val_chord_pitch_loss: 0.2562 - val_chord_root_loss: 0.8205 - val_chord_bass_loss: 0.7834 - val_chord_tag_sparse_categorical_accuracy: 0.5283
Epoch 2/100
511/512 [============================>.] - ETA: 0s - loss: 3.2349 - chord_tag_loss: 1.6308 - chord_pitch_loss: 0.2415 - chord_root_loss: 0.6855 - chord_bass_loss: 0.6769 - chord_tag_sparse_categorical_accuracy: 0.5629Epoch 00001: val_chord_tag_loss improved from 1.85343 to 1.83270, saving model to /home/bmcfee/working/chords/model_direct_ckpt.pkl
512/512 [==============================] - 213s - loss: 3.2353 - chord_tag_loss: 1.6311 - chord_pitch_loss: 0.2415 - chord_root_loss: 0.6856 - chord_bass_loss: 0.6770 - chord_tag_sparse_categorical_accuracy: 0.5628 - val_loss: 3.7162 - val_chord_tag_loss: 1.8327 - val_chord_pitch_loss: 0.2604 - val_chord_root_loss: 0.8328 - val_chord_bass_loss: 0.7902 - val_chord_tag_sparse_categorical_accuracy: 0.5258
Epoch 3/100
511/512 [============================>.] - ETA: 0s - loss: 3.0492 - chord_tag_loss: 1.5297 - chord_pitch_loss: 0.2325 - chord_root_loss: 0.6465 - chord_bass_loss: 0.6403 - chord_tag_sparse_categorical_accuracy: 0.5818Epoch 00002: val_chord_tag_loss improved from 1.83270 to 1.70164, saving model to /home/bmcfee/working/chords/model_direct_ckpt.pkl
512/512 [==============================] - 213s - loss: 3.0497 - chord_tag_loss: 1.5296 - chord_pitch_loss: 0.2325 - chord_root_loss: 0.6468 - chord_bass_loss: 0.6406 - chord_tag_sparse_categorical_accuracy: 0.5819 - val_loss: 3.4240 - val_chord_tag_loss: 1.7016 - val_chord_pitch_loss: 0.2479 - val_chord_root_loss: 0.7639 - val_chord_bass_loss: 0.7105 - val_chord_tag_sparse_categorical_accuracy: 0.5507
Epoch 4/100
511/512 [============================>.] - ETA: 0s - loss: 2.9660 - chord_tag_loss: 1.4693 - chord_pitch_loss: 0.2287 - chord_root_loss: 0.6408 - chord_bass_loss: 0.6272 - chord_tag_sparse_categorical_accuracy: 0.5944Epoch 00003: val_chord_tag_loss improved from 1.70164 to 1.61162, saving model to /home/bmcfee/working/chords/model_direct_ckpt.pkl
512/512 [==============================] - 214s - loss: 2.9656 - chord_tag_loss: 1.4690 - chord_pitch_loss: 0.2287 - chord_root_loss: 0.6406 - chord_bass_loss: 0.6272 - chord_tag_sparse_categorical_accuracy: 0.5945 - val_loss: 3.1739 - val_chord_tag_loss: 1.6116 - val_chord_pitch_loss: 0.2329 - val_chord_root_loss: 0.6836 - val_chord_bass_loss: 0.6457 - val_chord_tag_sparse_categorical_accuracy: 0.5701
Epoch 5/100
511/512 [============================>.] - ETA: 0s - loss: 2.9380 - chord_tag_loss: 1.4537 - chord_pitch_loss: 0.2283 - chord_root_loss: 0.6332 - chord_bass_loss: 0.6227 - chord_tag_sparse_categorical_accuracy: 0.5951Epoch 00004: val_chord_tag_loss improved from 1.61162 to 1.54873, saving model to /home/bmcfee/working/chords/model_direct_ckpt.pkl
512/512 [==============================] - 215s - loss: 2.9371 - chord_tag_loss: 1.4534 - chord_pitch_loss: 0.2283 - chord_root_loss: 0.6329 - chord_bass_loss: 0.6224 - chord_tag_sparse_categorical_accuracy: 0.5952 - val_loss: 3.0854 - val_chord_tag_loss: 1.5487 - val_chord_pitch_loss: 0.2291 - val_chord_root_loss: 0.6882 - val_chord_bass_loss: 0.6193 - val_chord_tag_sparse_categorical_accuracy: 0.5740
Epoch 6/100
511/512 [============================>.] - ETA: 0s - loss: 2.8902 - chord_tag_loss: 1.4278 - chord_pitch_loss: 0.2246 - chord_root_loss: 0.6254 - chord_bass_loss: 0.6124 - chord_tag_sparse_categorical_accuracy: 0.6021Epoch 00005: val_chord_tag_loss did not improve
512/512 [==============================] - 215s - loss: 2.8891 - chord_tag_loss: 1.4273 - chord_pitch_loss: 0.2245 - chord_root_loss: 0.6251 - chord_bass_loss: 0.6122 - chord_tag_sparse_categorical_accuracy: 0.6023 - val_loss: 3.2164 - val_chord_tag_loss: 1.6246 - val_chord_pitch_loss: 0.2440 - val_chord_root_loss: 0.7092 - val_chord_bass_loss: 0.6385 - val_chord_tag_sparse_categorical_accuracy: 0.5656
Epoch 7/100
511/512 [============================>.] - ETA: 0s - loss: 2.7405 - chord_tag_loss: 1.3712 - chord_pitch_loss: 0.2157 - chord_root_loss: 0.5830 - chord_bass_loss: 0.5706 - chord_tag_sparse_categorical_accuracy: 0.6143Epoch 00006: val_chord_tag_loss did not improve
512/512 [==============================] - 214s - loss: 2.7411 - chord_tag_loss: 1.3714 - chord_pitch_loss: 0.2158 - chord_root_loss: 0.5831 - chord_bass_loss: 0.5708 - chord_tag_sparse_categorical_accuracy: 0.6142 - val_loss: 3.2464 - val_chord_tag_loss: 1.6246 - val_chord_pitch_loss: 0.2395 - val_chord_root_loss: 0.7181 - val_chord_bass_loss: 0.6642 - val_chord_tag_sparse_categorical_accuracy: 0.5658
Epoch 8/100
511/512 [============================>.] - ETA: 0s - loss: 2.7601 - chord_tag_loss: 1.3692 - chord_pitch_loss: 0.2204 - chord_root_loss: 0.5897 - chord_bass_loss: 0.5807 - chord_tag_sparse_categorical_accuracy: 0.6102Epoch 00007: val_chord_tag_loss improved from 1.54873 to 1.50054, saving model to /home/bmcfee/working/chords/model_direct_ckpt.pkl
512/512 [==============================] - 215s - loss: 2.7601 - chord_tag_loss: 1.3692 - chord_pitch_loss: 0.2204 - chord_root_loss: 0.5897 - chord_bass_loss: 0.5807 - chord_tag_sparse_categorical_accuracy: 0.6102 - val_loss: 2.9737 - val_chord_tag_loss: 1.5005 - val_chord_pitch_loss: 0.2236 - val_chord_root_loss: 0.6556 - val_chord_bass_loss: 0.5939 - val_chord_tag_sparse_categorical_accuracy: 0.5928
Epoch 9/100
511/512 [============================>.] - ETA: 0s - loss: 2.7234 - chord_tag_loss: 1.3479 - chord_pitch_loss: 0.2172 - chord_root_loss: 0.5854 - chord_bass_loss: 0.5729 - chord_tag_sparse_categorical_accuracy: 0.6172Epoch 00008: val_chord_tag_loss did not improve
512/512 [==============================] - 214s - loss: 2.7248 - chord_tag_loss: 1.3485 - chord_pitch_loss: 0.2172 - chord_root_loss: 0.5858 - chord_bass_loss: 0.5732 - chord_tag_sparse_categorical_accuracy: 0.6170 - val_loss: 3.1828 - val_chord_tag_loss: 1.5684 - val_chord_pitch_loss: 0.2414 - val_chord_root_loss: 0.7215 - val_chord_bass_loss: 0.6515 - val_chord_tag_sparse_categorical_accuracy: 0.5776
Epoch 10/100
511/512 [============================>.] - ETA: 0s - loss: 2.6929 - chord_tag_loss: 1.3311 - chord_pitch_loss: 0.2151 - chord_root_loss: 0.5753 - chord_bass_loss: 0.5714 - chord_tag_sparse_categorical_accuracy: 0.6206Epoch 00009: val_chord_tag_loss did not improve
512/512 [==============================] - 214s - loss: 2.6925 - chord_tag_loss: 1.3311 - chord_pitch_loss: 0.2151 - chord_root_loss: 0.5751 - chord_bass_loss: 0.5711 - chord_tag_sparse_categorical_accuracy: 0.6206 - val_loss: 2.9951 - val_chord_tag_loss: 1.5273 - val_chord_pitch_loss: 0.2225 - val_chord_root_loss: 0.6503 - val_chord_bass_loss: 0.5950 - val_chord_tag_sparse_categorical_accuracy: 0.5838
Epoch 11/100
511/512 [============================>.] - ETA: 0s - loss: 2.7290 - chord_tag_loss: 1.3395 - chord_pitch_loss: 0.2175 - chord_root_loss: 0.5927 - chord_bass_loss: 0.5793 - chord_tag_sparse_categorical_accuracy: 0.6215Epoch 00010: val_chord_tag_loss did not improve
512/512 [==============================] - 212s - loss: 2.7285 - chord_tag_loss: 1.3392 - chord_pitch_loss: 0.2175 - chord_root_loss: 0.5925 - chord_bass_loss: 0.5792 - chord_tag_sparse_categorical_accuracy: 0.6216 - val_loss: 3.2584 - val_chord_tag_loss: 1.6024 - val_chord_pitch_loss: 0.2429 - val_chord_root_loss: 0.7284 - val_chord_bass_loss: 0.6846 - val_chord_tag_sparse_categorical_accuracy: 0.5772
Epoch 12/100
511/512 [============================>.] - ETA: 0s - loss: 2.7190 - chord_tag_loss: 1.3368 - chord_pitch_loss: 0.2171 - chord_root_loss: 0.5906 - chord_bass_loss: 0.5745 - chord_tag_sparse_categorical_accuracy: 0.6192Epoch 00011: val_chord_tag_loss did not improve
512/512 [==============================] - 213s - loss: 2.7187 - chord_tag_loss: 1.3365 - chord_pitch_loss: 0.2171 - chord_root_loss: 0.5907 - chord_bass_loss: 0.5743 - chord_tag_sparse_categorical_accuracy: 0.6192 - val_loss: 3.2680 - val_chord_tag_loss: 1.6287 - val_chord_pitch_loss: 0.2424 - val_chord_root_loss: 0.7285 - val_chord_bass_loss: 0.6682 - val_chord_tag_sparse_categorical_accuracy: 0.5718
Epoch 13/100
511/512 [============================>.] - ETA: 0s - loss: 2.6475 - chord_tag_loss: 1.3102 - chord_pitch_loss: 0.2145 - chord_root_loss: 0.5677 - chord_bass_loss: 0.5551 - chord_tag_sparse_categorical_accuracy: 0.6265Epoch 00012: val_chord_tag_loss did not improve
512/512 [==============================] - 213s - loss: 2.6473 - chord_tag_loss: 1.3100 - chord_pitch_loss: 0.2145 - chord_root_loss: 0.5678 - chord_bass_loss: 0.5550 - chord_tag_sparse_categorical_accuracy: 0.6265 - val_loss: 3.1385 - val_chord_tag_loss: 1.5753 - val_chord_pitch_loss: 0.2334 - val_chord_root_loss: 0.6936 - val_chord_bass_loss: 0.6362 - val_chord_tag_sparse_categorical_accuracy: 0.5822
Epoch 14/100
511/512 [============================>.] - ETA: 0s - loss: 2.5840 - chord_tag_loss: 1.2836 - chord_pitch_loss: 0.2092 - chord_root_loss: 0.5514 - chord_bass_loss: 0.5397 - chord_tag_sparse_categorical_accuracy: 0.6309Epoch 00013: val_chord_tag_loss did not improve

Epoch 00013: reducing learning rate to 0.00010000000474974513.
512/512 [==============================] - 213s - loss: 2.5838 - chord_tag_loss: 1.2836 - chord_pitch_loss: 0.2092 - chord_root_loss: 0.5513 - chord_bass_loss: 0.5396 - chord_tag_sparse_categorical_accuracy: 0.6308 - val_loss: 3.0148 - val_chord_tag_loss: 1.5140 - val_chord_pitch_loss: 0.2241 - val_chord_root_loss: 0.6666 - val_chord_bass_loss: 0.6101 - val_chord_tag_sparse_categorical_accuracy: 0.5967
Epoch 15/100
511/512 [============================>.] - ETA: 0s - loss: 2.6220 - chord_tag_loss: 1.3005 - chord_pitch_loss: 0.2122 - chord_root_loss: 0.5652 - chord_bass_loss: 0.5441 - chord_tag_sparse_categorical_accuracy: 0.6274Epoch 00014: val_chord_tag_loss improved from 1.50054 to 1.49005, saving model to /home/bmcfee/working/chords/model_direct_ckpt.pkl
512/512 [==============================] - 213s - loss: 2.6205 - chord_tag_loss: 1.2999 - chord_pitch_loss: 0.2120 - chord_root_loss: 0.5648 - chord_bass_loss: 0.5438 - chord_tag_sparse_categorical_accuracy: 0.6275 - val_loss: 2.9224 - val_chord_tag_loss: 1.4901 - val_chord_pitch_loss: 0.2207 - val_chord_root_loss: 0.6296 - val_chord_bass_loss: 0.5820 - val_chord_tag_sparse_categorical_accuracy: 0.5950
Epoch 16/100
511/512 [============================>.] - ETA: 0s - loss: 2.6673 - chord_tag_loss: 1.3034 - chord_pitch_loss: 0.2158 - chord_root_loss: 0.5839 - chord_bass_loss: 0.5643 - chord_tag_sparse_categorical_accuracy: 0.6307Epoch 00015: val_chord_tag_loss improved from 1.49005 to 1.46107, saving model to /home/bmcfee/working/chords/model_direct_ckpt.pkl
512/512 [==============================] - 207s - loss: 2.6682 - chord_tag_loss: 1.3038 - chord_pitch_loss: 0.2158 - chord_root_loss: 0.5840 - chord_bass_loss: 0.5645 - chord_tag_sparse_categorical_accuracy: 0.6306 - val_loss: 2.9601 - val_chord_tag_loss: 1.4611 - val_chord_pitch_loss: 0.2206 - val_chord_root_loss: 0.6650 - val_chord_bass_loss: 0.6134 - val_chord_tag_sparse_categorical_accuracy: 0.5960
Epoch 17/100
511/512 [============================>.] - ETA: 0s - loss: 2.6354 - chord_tag_loss: 1.2951 - chord_pitch_loss: 0.2131 - chord_root_loss: 0.5728 - chord_bass_loss: 0.5545 - chord_tag_sparse_categorical_accuracy: 0.6303Epoch 00016: val_chord_tag_loss improved from 1.46107 to 1.44011, saving model to /home/bmcfee/working/chords/model_direct_ckpt.pkl
512/512 [==============================] - 197s - loss: 2.6369 - chord_tag_loss: 1.2958 - chord_pitch_loss: 0.2132 - chord_root_loss: 0.5732 - chord_bass_loss: 0.5548 - chord_tag_sparse_categorical_accuracy: 0.6301 - val_loss: 2.9299 - val_chord_tag_loss: 1.4401 - val_chord_pitch_loss: 0.2228 - val_chord_root_loss: 0.6656 - val_chord_bass_loss: 0.6014 - val_chord_tag_sparse_categorical_accuracy: 0.6121
Epoch 18/100
511/512 [============================>.] - ETA: 0s - loss: 2.5763 - chord_tag_loss: 1.2639 - chord_pitch_loss: 0.2079 - chord_root_loss: 0.5614 - chord_bass_loss: 0.5431 - chord_tag_sparse_categorical_accuracy: 0.6401Epoch 00017: val_chord_tag_loss did not improve
512/512 [==============================] - 197s - loss: 2.5768 - chord_tag_loss: 1.2639 - chord_pitch_loss: 0.2079 - chord_root_loss: 0.5617 - chord_bass_loss: 0.5433 - chord_tag_sparse_categorical_accuracy: 0.6400 - val_loss: 2.9796 - val_chord_tag_loss: 1.4768 - val_chord_pitch_loss: 0.2289 - val_chord_root_loss: 0.6639 - val_chord_bass_loss: 0.6099 - val_chord_tag_sparse_categorical_accuracy: 0.6052
Epoch 19/100
511/512 [============================>.] - ETA: 0s - loss: 2.6024 - chord_tag_loss: 1.2822 - chord_pitch_loss: 0.2120 - chord_root_loss: 0.5655 - chord_bass_loss: 0.5427 - chord_tag_sparse_categorical_accuracy: 0.6353Epoch 00018: val_chord_tag_loss did not improve
512/512 [==============================] - 197s - loss: 2.6014 - chord_tag_loss: 1.2816 - chord_pitch_loss: 0.2119 - chord_root_loss: 0.5653 - chord_bass_loss: 0.5426 - chord_tag_sparse_categorical_accuracy: 0.6355 - val_loss: 3.0201 - val_chord_tag_loss: 1.4870 - val_chord_pitch_loss: 0.2288 - val_chord_root_loss: 0.6870 - val_chord_bass_loss: 0.6174 - val_chord_tag_sparse_categorical_accuracy: 0.5928
Epoch 20/100
511/512 [============================>.] - ETA: 0s - loss: 2.5867 - chord_tag_loss: 1.2713 - chord_pitch_loss: 0.2091 - chord_root_loss: 0.5626 - chord_bass_loss: 0.5438 - chord_tag_sparse_categorical_accuracy: 0.6351Epoch 00019: val_chord_tag_loss did not improve
512/512 [==============================] - 198s - loss: 2.5870 - chord_tag_loss: 1.2713 - chord_pitch_loss: 0.2092 - chord_root_loss: 0.5627 - chord_bass_loss: 0.5438 - chord_tag_sparse_categorical_accuracy: 0.6351 - val_loss: 3.0954 - val_chord_tag_loss: 1.5140 - val_chord_pitch_loss: 0.2369 - val_chord_root_loss: 0.6970 - val_chord_bass_loss: 0.6475 - val_chord_tag_sparse_categorical_accuracy: 0.5814
Epoch 21/100
511/512 [============================>.] - ETA: 0s - loss: 2.6141 - chord_tag_loss: 1.2704 - chord_pitch_loss: 0.2133 - chord_root_loss: 0.5731 - chord_bass_loss: 0.5574 - chord_tag_sparse_categorical_accuracy: 0.6400Epoch 00020: val_chord_tag_loss did not improve
512/512 [==============================] - 197s - loss: 2.6142 - chord_tag_loss: 1.2704 - chord_pitch_loss: 0.2133 - chord_root_loss: 0.5730 - chord_bass_loss: 0.5574 - chord_tag_sparse_categorical_accuracy: 0.6398 - val_loss: 3.2171 - val_chord_tag_loss: 1.6066 - val_chord_pitch_loss: 0.2427 - val_chord_root_loss: 0.7018 - val_chord_bass_loss: 0.6660 - val_chord_tag_sparse_categorical_accuracy: 0.5777
Epoch 22/100
511/512 [============================>.] - ETA: 0s - loss: 2.4931 - chord_tag_loss: 1.2343 - chord_pitch_loss: 0.2053 - chord_root_loss: 0.5354 - chord_bass_loss: 0.5181 - chord_tag_sparse_categorical_accuracy: 0.6440Epoch 00021: val_chord_tag_loss improved from 1.44011 to 1.41843, saving model to /home/bmcfee/working/chords/model_direct_ckpt.pkl
512/512 [==============================] - 197s - loss: 2.4941 - chord_tag_loss: 1.2346 - chord_pitch_loss: 0.2054 - chord_root_loss: 0.5357 - chord_bass_loss: 0.5184 - chord_tag_sparse_categorical_accuracy: 0.6439 - val_loss: 2.8755 - val_chord_tag_loss: 1.4184 - val_chord_pitch_loss: 0.2241 - val_chord_root_loss: 0.6450 - val_chord_bass_loss: 0.5880 - val_chord_tag_sparse_categorical_accuracy: 0.6071
Epoch 23/100
511/512 [============================>.] - ETA: 0s - loss: 2.5079 - chord_tag_loss: 1.2397 - chord_pitch_loss: 0.2073 - chord_root_loss: 0.5392 - chord_bass_loss: 0.5216 - chord_tag_sparse_categorical_accuracy: 0.6412Epoch 00022: val_chord_tag_loss did not improve
512/512 [==============================] - 197s - loss: 2.5081 - chord_tag_loss: 1.2397 - chord_pitch_loss: 0.2074 - chord_root_loss: 0.5393 - chord_bass_loss: 0.5217 - chord_tag_sparse_categorical_accuracy: 0.6412 - val_loss: 3.0877 - val_chord_tag_loss: 1.5041 - val_chord_pitch_loss: 0.2357 - val_chord_root_loss: 0.6936 - val_chord_bass_loss: 0.6543 - val_chord_tag_sparse_categorical_accuracy: 0.5986
Epoch 24/100
511/512 [============================>.] - ETA: 0s - loss: 2.4954 - chord_tag_loss: 1.2296 - chord_pitch_loss: 0.2064 - chord_root_loss: 0.5381 - chord_bass_loss: 0.5213 - chord_tag_sparse_categorical_accuracy: 0.6466Epoch 00023: val_chord_tag_loss did not improve
512/512 [==============================] - 199s - loss: 2.4955 - chord_tag_loss: 1.2297 - chord_pitch_loss: 0.2064 - chord_root_loss: 0.5379 - chord_bass_loss: 0.5215 - chord_tag_sparse_categorical_accuracy: 0.6466 - val_loss: 2.9884 - val_chord_tag_loss: 1.4511 - val_chord_pitch_loss: 0.2264 - val_chord_root_loss: 0.6761 - val_chord_bass_loss: 0.6347 - val_chord_tag_sparse_categorical_accuracy: 0.6081
Epoch 25/100
511/512 [============================>.] - ETA: 0s - loss: 2.5023 - chord_tag_loss: 1.2305 - chord_pitch_loss: 0.2043 - chord_root_loss: 0.5442 - chord_bass_loss: 0.5232 - chord_tag_sparse_categorical_accuracy: 0.6485Epoch 00024: val_chord_tag_loss did not improve
512/512 [==============================] - 198s - loss: 2.5025 - chord_tag_loss: 1.2305 - chord_pitch_loss: 0.2043 - chord_root_loss: 0.5443 - chord_bass_loss: 0.5233 - chord_tag_sparse_categorical_accuracy: 0.6486 - val_loss: 3.2939 - val_chord_tag_loss: 1.6295 - val_chord_pitch_loss: 0.2481 - val_chord_root_loss: 0.7460 - val_chord_bass_loss: 0.6704 - val_chord_tag_sparse_categorical_accuracy: 0.5778
Epoch 26/100
511/512 [============================>.] - ETA: 0s - loss: 2.5216 - chord_tag_loss: 1.2310 - chord_pitch_loss: 0.2070 - chord_root_loss: 0.5501 - chord_bass_loss: 0.5336 - chord_tag_sparse_categorical_accuracy: 0.6484Epoch 00025: val_chord_tag_loss did not improve
512/512 [==============================] - 198s - loss: 2.5225 - chord_tag_loss: 1.2315 - chord_pitch_loss: 0.2071 - chord_root_loss: 0.5502 - chord_bass_loss: 0.5337 - chord_tag_sparse_categorical_accuracy: 0.6483 - val_loss: 2.9331 - val_chord_tag_loss: 1.4635 - val_chord_pitch_loss: 0.2260 - val_chord_root_loss: 0.6538 - val_chord_bass_loss: 0.5897 - val_chord_tag_sparse_categorical_accuracy: 0.6116
Epoch 27/100
511/512 [============================>.] - ETA: 0s - loss: 2.5543 - chord_tag_loss: 1.2682 - chord_pitch_loss: 0.2092 - chord_root_loss: 0.5538 - chord_bass_loss: 0.5232 - chord_tag_sparse_categorical_accuracy: 0.6357Epoch 00026: val_chord_tag_loss did not improve
512/512 [==============================] - 199s - loss: 2.5552 - chord_tag_loss: 1.2685 - chord_pitch_loss: 0.2092 - chord_root_loss: 0.5541 - chord_bass_loss: 0.5234 - chord_tag_sparse_categorical_accuracy: 0.6356 - val_loss: 2.9419 - val_chord_tag_loss: 1.5011 - val_chord_pitch_loss: 0.2261 - val_chord_root_loss: 0.6336 - val_chord_bass_loss: 0.5811 - val_chord_tag_sparse_categorical_accuracy: 0.5934
Epoch 28/100
511/512 [============================>.] - ETA: 0s - loss: 2.4975 - chord_tag_loss: 1.2322 - chord_pitch_loss: 0.2057 - chord_root_loss: 0.5426 - chord_bass_loss: 0.5170 - chord_tag_sparse_categorical_accuracy: 0.6439Epoch 00027: val_chord_tag_loss did not improve

Epoch 00027: reducing learning rate to 1.0000000474974514e-05.
512/512 [==============================] - 199s - loss: 2.4986 - chord_tag_loss: 1.2327 - chord_pitch_loss: 0.2058 - chord_root_loss: 0.5430 - chord_bass_loss: 0.5171 - chord_tag_sparse_categorical_accuracy: 0.6438 - val_loss: 2.9304 - val_chord_tag_loss: 1.4866 - val_chord_pitch_loss: 0.2269 - val_chord_root_loss: 0.6370 - val_chord_bass_loss: 0.5798 - val_chord_tag_sparse_categorical_accuracy: 0.5994
Epoch 29/100
511/512 [============================>.] - ETA: 0s - loss: 2.5443 - chord_tag_loss: 1.2534 - chord_pitch_loss: 0.2089 - chord_root_loss: 0.5542 - chord_bass_loss: 0.5278 - chord_tag_sparse_categorical_accuracy: 0.6426Epoch 00028: val_chord_tag_loss did not improve
512/512 [==============================] - 199s - loss: 2.5438 - chord_tag_loss: 1.2534 - chord_pitch_loss: 0.2088 - chord_root_loss: 0.5539 - chord_bass_loss: 0.5276 - chord_tag_sparse_categorical_accuracy: 0.6427 - val_loss: 3.0050 - val_chord_tag_loss: 1.5060 - val_chord_pitch_loss: 0.2335 - val_chord_root_loss: 0.6616 - val_chord_bass_loss: 0.6039 - val_chord_tag_sparse_categorical_accuracy: 0.5852
Epoch 30/100
511/512 [============================>.] - ETA: 0s - loss: 2.5610 - chord_tag_loss: 1.2567 - chord_pitch_loss: 0.2092 - chord_root_loss: 0.5538 - chord_bass_loss: 0.5412 - chord_tag_sparse_categorical_accuracy: 0.6421Epoch 00029: val_chord_tag_loss did not improve
512/512 [==============================] - 198s - loss: 2.5600 - chord_tag_loss: 1.2565 - chord_pitch_loss: 0.2092 - chord_root_loss: 0.5535 - chord_bass_loss: 0.5408 - chord_tag_sparse_categorical_accuracy: 0.6422 - val_loss: 3.1209 - val_chord_tag_loss: 1.5436 - val_chord_pitch_loss: 0.2329 - val_chord_root_loss: 0.7018 - val_chord_bass_loss: 0.6426 - val_chord_tag_sparse_categorical_accuracy: 0.5942
Epoch 31/100
511/512 [============================>.] - ETA: 0s - loss: 2.5292 - chord_tag_loss: 1.2483 - chord_pitch_loss: 0.2074 - chord_root_loss: 0.5443 - chord_bass_loss: 0.5293 - chord_tag_sparse_categorical_accuracy: 0.6403Epoch 00030: val_chord_tag_loss did not improve
512/512 [==============================] - 197s - loss: 2.5290 - chord_tag_loss: 1.2483 - chord_pitch_loss: 0.2074 - chord_root_loss: 0.5442 - chord_bass_loss: 0.5291 - chord_tag_sparse_categorical_accuracy: 0.6403 - val_loss: 2.9323 - val_chord_tag_loss: 1.5144 - val_chord_pitch_loss: 0.2236 - val_chord_root_loss: 0.6305 - val_chord_bass_loss: 0.5637 - val_chord_tag_sparse_categorical_accuracy: 0.5859
Epoch 32/100
511/512 [============================>.] - ETA: 0s - loss: 2.5134 - chord_tag_loss: 1.2491 - chord_pitch_loss: 0.2070 - chord_root_loss: 0.5372 - chord_bass_loss: 0.5201 - chord_tag_sparse_categorical_accuracy: 0.6370Epoch 00031: val_chord_tag_loss did not improve
512/512 [==============================] - 198s - loss: 2.5136 - chord_tag_loss: 1.2491 - chord_pitch_loss: 0.2070 - chord_root_loss: 0.5373 - chord_bass_loss: 0.5202 - chord_tag_sparse_categorical_accuracy: 0.6370 - val_loss: 2.8527 - val_chord_tag_loss: 1.4214 - val_chord_pitch_loss: 0.2216 - val_chord_root_loss: 0.6339 - val_chord_bass_loss: 0.5757 - val_chord_tag_sparse_categorical_accuracy: 0.6152
Epoch 33/100
511/512 [============================>.] - ETA: 0s - loss: 2.5496 - chord_tag_loss: 1.2625 - chord_pitch_loss: 0.2081 - chord_root_loss: 0.5483 - chord_bass_loss: 0.5307 - chord_tag_sparse_categorical_accuracy: 0.6361Epoch 00032: val_chord_tag_loss did not improve

Epoch 00032: reducing learning rate to 1.0000000656873453e-06.
512/512 [==============================] - 198s - loss: 2.5507 - chord_tag_loss: 1.2631 - chord_pitch_loss: 0.2081 - chord_root_loss: 0.5485 - chord_bass_loss: 0.5311 - chord_tag_sparse_categorical_accuracy: 0.6360 - val_loss: 2.9613 - val_chord_tag_loss: 1.4493 - val_chord_pitch_loss: 0.2291 - val_chord_root_loss: 0.6694 - val_chord_bass_loss: 0.6135 - val_chord_tag_sparse_categorical_accuracy: 0.6072
Epoch 34/100
511/512 [============================>.] - ETA: 0s - loss: 2.5603 - chord_tag_loss: 1.2707 - chord_pitch_loss: 0.2110 - chord_root_loss: 0.5537 - chord_bass_loss: 0.5249 - chord_tag_sparse_categorical_accuracy: 0.6347Epoch 00033: val_chord_tag_loss did not improve
512/512 [==============================] - 199s - loss: 2.5596 - chord_tag_loss: 1.2704 - chord_pitch_loss: 0.2109 - chord_root_loss: 0.5534 - chord_bass_loss: 0.5248 - chord_tag_sparse_categorical_accuracy: 0.6348 - val_loss: 3.0588 - val_chord_tag_loss: 1.5128 - val_chord_pitch_loss: 0.2319 - val_chord_root_loss: 0.6878 - val_chord_bass_loss: 0.6263 - val_chord_tag_sparse_categorical_accuracy: 0.5939
Epoch 35/100
511/512 [============================>.] - ETA: 0s - loss: 2.5208 - chord_tag_loss: 1.2290 - chord_pitch_loss: 0.2069 - chord_root_loss: 0.5466 - chord_bass_loss: 0.5383 - chord_tag_sparse_categorical_accuracy: 0.6491Epoch 00034: val_chord_tag_loss did not improve
512/512 [==============================] - 200s - loss: 2.5215 - chord_tag_loss: 1.2292 - chord_pitch_loss: 0.2069 - chord_root_loss: 0.5467 - chord_bass_loss: 0.5387 - chord_tag_sparse_categorical_accuracy: 0.6491 - val_loss: 3.1751 - val_chord_tag_loss: 1.5570 - val_chord_pitch_loss: 0.2422 - val_chord_root_loss: 0.7220 - val_chord_bass_loss: 0.6539 - val_chord_tag_sparse_categorical_accuracy: 0.5773
Epoch 36/100
511/512 [============================>.] - ETA: 0s - loss: 2.5240 - chord_tag_loss: 1.2440 - chord_pitch_loss: 0.2048 - chord_root_loss: 0.5443 - chord_bass_loss: 0.5309 - chord_tag_sparse_categorical_accuracy: 0.6416Epoch 00035: val_chord_tag_loss did not improve
512/512 [==============================] - 202s - loss: 2.5236 - chord_tag_loss: 1.2438 - chord_pitch_loss: 0.2048 - chord_root_loss: 0.5442 - chord_bass_loss: 0.5308 - chord_tag_sparse_categorical_accuracy: 0.6416 - val_loss: 2.9236 - val_chord_tag_loss: 1.4556 - val_chord_pitch_loss: 0.2263 - val_chord_root_loss: 0.6517 - val_chord_bass_loss: 0.5900 - val_chord_tag_sparse_categorical_accuracy: 0.6047
Epoch 37/100
511/512 [============================>.] - ETA: 0s - loss: 2.6120 - chord_tag_loss: 1.2694 - chord_pitch_loss: 0.2109 - chord_root_loss: 0.5771 - chord_bass_loss: 0.5546 - chord_tag_sparse_categorical_accuracy: 0.6371Epoch 00036: val_chord_tag_loss did not improve
512/512 [==============================] - 203s - loss: 2.6119 - chord_tag_loss: 1.2692 - chord_pitch_loss: 0.2109 - chord_root_loss: 0.5771 - chord_bass_loss: 0.5547 - chord_tag_sparse_categorical_accuracy: 0.6370 - val_loss: 3.0402 - val_chord_tag_loss: 1.5089 - val_chord_pitch_loss: 0.2330 - val_chord_root_loss: 0.6794 - val_chord_bass_loss: 0.6188 - val_chord_tag_sparse_categorical_accuracy: 0.5827
Epoch 38/100
511/512 [============================>.] - ETA: 0s - loss: 2.5565 - chord_tag_loss: 1.2639 - chord_pitch_loss: 0.2097 - chord_root_loss: 0.5496 - chord_bass_loss: 0.5332 - chord_tag_sparse_categorical_accuracy: 0.6355Epoch 00037: val_chord_tag_loss did not improve

Epoch 00037: reducing learning rate to 1.0000001111620805e-07.
512/512 [==============================] - 203s - loss: 2.5563 - chord_tag_loss: 1.2640 - chord_pitch_loss: 0.2097 - chord_root_loss: 0.5495 - chord_bass_loss: 0.5331 - chord_tag_sparse_categorical_accuracy: 0.6355 - val_loss: 2.8546 - val_chord_tag_loss: 1.4197 - val_chord_pitch_loss: 0.2180 - val_chord_root_loss: 0.6449 - val_chord_bass_loss: 0.5721 - val_chord_tag_sparse_categorical_accuracy: 0.6127
Epoch 00037: early stopping

In [16]:
model.load_weights('/home/bmcfee/working/chords/model_direct_ckpt.pkl')

Diagnostics


In [17]:
import jams.display
import matplotlib.pyplot as plt
import pandas as pd
%matplotlib nbagg
import librosa.display

In [18]:
history = pd.DataFrame.from_dict(model.history.history)
plt.figure()

plt.plot(history['chord_tag_sparse_categorical_accuracy'], label='Training accuracy')
plt.plot(history['val_chord_tag_sparse_categorical_accuracy'], label='Validation accuracy')
plt.legend(loc='best')


Out[18]:
<matplotlib.legend.Legend at 0x7f730605bd30>

In [19]:
l1 = model.get_weights()[4]

In [20]:
plt.figure(figsize=2 * np.asarray(l1.shape[2:]))

pi = 0
for f in range(l1.shape[3]):
    for c in range(l1.shape[2]):
        pi += 1
        plt.subplot(l1.shape[3], l1.shape[2], pi)
        librosa.display.specshow(l1[:, :, c, f].T, vmin=l1.min(), vmax=l1.max())
        
        
plt.tight_layout()



In [21]:
l2 = model.get_weights()[6].squeeze()
l2 = librosa.util.axis_sort(l2)

In [22]:
plt.figure(figsize=(8,4))
librosa.display.specshow(l2, y_axis='cqt_note', sr=pump.ops[0].sr, bins_per_octave=36)
plt.tight_layout()



Validation viz


In [23]:
import pandas as pd
import jams

from tqdm import tqdm_notebook as tqdm

from IPython.display import Audio

import jams

import librosa

In [24]:
def score_model(pump, model, idx,
                features='/home/bmcfee/working/chords/pump',
                refs='/home/bmcfee/data/eric_chords/references_v2/'):
    
    results = {}
    for item in tqdm(idx.index):
        jam = jams.load('{}/{}.jams'.format(refs, item), validate=False)
        datum = np.load('{}/{}.npz'.format(features, item))['cqt/mag']
        
        ann = pump.ops[1].inverse(model.predict(datum)[0][0])
        results[item] = jams.eval.chord(jam.annotations['chord', 0], ann)
        
    return pd.DataFrame.from_dict(results, orient='index')

In [25]:
df = score_model(pump, model, idx_test)




In [26]:
dfr = df[['thirds', 'triads', 'tetrads', 'root', 'mirex', 'majmin', 'sevenths']]

In [27]:
dfr.describe()


Out[27]:
thirds triads tetrads root mirex majmin sevenths
count 223.000000 223.000000 223.000000 223.000000 223.000000 223.000000 223.000000
mean 0.772842 0.729613 0.561039 0.810312 0.781182 0.786274 0.616187
std 0.144209 0.186337 0.216865 0.119168 0.146628 0.150618 0.207255
min 0.044252 0.041066 0.020381 0.069485 0.048453 0.045054 0.030845
25% 0.685084 0.636888 0.388797 0.738709 0.717685 0.712429 0.477744
50% 0.806966 0.775079 0.603181 0.833583 0.814690 0.827560 0.668461
75% 0.888113 0.860181 0.728466 0.897009 0.883560 0.895237 0.756668
max 0.985837 0.985837 0.931349 0.985837 0.985837 0.985837 0.974973

In [28]:
plt.figure()
dfr.boxplot();



In [30]:
dfr.describe().loc['mean']


Out[30]:
thirds      0.772842
triads      0.729613
tetrads     0.561039
root        0.810312
mirex       0.781182
majmin      0.786274
sevenths    0.616187
Name: mean, dtype: float64

In [40]:
#F = idx_val.index[99] #F = 'TRDAJDG149E3784BF8'

In [41]:
F = df['mirex'].argmin()

In [42]:
datum = np.load('/home/bmcfee/working/chords/pump/{}.npz'.format(F))

In [43]:
J = jams.load('/home/bmcfee/data/eric_chords/references_v2/{}.jams'.format(F))

In [44]:
ann_true = pump['chord_tag'].inverse(datum['chord_tag/chord'][0])

In [45]:
ann = pump['chord_tag'].inverse(model.predict(datum['cqt/mag'])[0][0])

In [46]:
pd.DataFrame.from_records([jams.eval.chord(J.annotations['chord', 0], ann)]).loc[0]


Out[46]:
thirds          0.044252
thirds_inv      0.044252
triads          0.044252
triads_inv      0.044252
tetrads         0.032892
tetrads_inv     0.032892
root            0.069485
mirex           0.048453
majmin          0.045054
majmin_inv      0.045054
sevenths        0.033489
sevenths_inv    0.033489
Name: 0, dtype: float64

In [47]:
Audio(filename='/home/bmcfee/data/eric_chords/audio/{}.mp3'.format(F))


Out[47]:

In [48]:
plt.figure(figsize=(10, 8))

ax = plt.subplot(2,1,1)
librosa.display.specshow(datum['cqt/mag'][0, :, :, 0].T,
                         sr=pump['cqt'].sr,
                         hop_length=pump['cqt'].hop_length,
                         x_axis='time')

plt.subplot(2,1,2, sharex=ax)
jams.display.display(ann_true, meta=False, label='Reference', alpha=0.5)
jams.display.display(ann, meta=False, label='Estimate', alpha=0.5)
plt.legend(loc='best')
plt.tight_layout()



In [49]:
y, sr = librosa.load('/home/bmcfee/data/eric_chords/audio/{}.mp3'.format(F))

In [50]:
Audio(data=np.vstack([y, jams.sonify.sonify(ann, sr=sr, duration=int(np.ceil(len(y) / sr)))[:len(y)]]),
      rate=sr)


Out[50]:

In [ ]: