In [1]:
from keras import backend as K
from keras.callbacks import EarlyStopping
from keras.models import Sequential
from keras.layers import Activation, Conv1D, Dense, Dropout, Flatten, MaxPooling1D
from keras.wrappers.scikit_learn import KerasClassifier

from matplotlib import pyplot as plt
%matplotlib inline
import numpy as np
from math import ceil

from sklearn.metrics import classification_report
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, MultiLabelBinarizer
from sklearn.model_selection import train_test_split

# append parent folder to path for imports
import sys
import os
PACKAGE_PARENT = '..'
SCRIPT_DIR = os.path.dirname(os.path.realpath(os.path.join(os.getcwd(), os.path.expanduser('__file__'))))
sys.path.append(os.path.normpath(os.path.join(SCRIPT_DIR, PACKAGE_PARENT)))

from music_transcription.onset_detection.metrics import onset_metric
from music_transcription.onset_detection.read_data import get_wav_and_truth_files
from music_transcription.string_fret_detection.read_data import read_data_y
from music_transcription.string_fret_detection.cnn_string_detector import CnnStringDetector, CnnStringFeatureExtractor


Using Theano backend.

In [2]:
active_datasets = {1, 2, 3, 4}
active_datasets = {1}
# X_parts, y_parts, y_start_only_parts, ds_labels
wav_file_paths, truth_dataset_format_tuples = get_wav_and_truth_files(active_datasets)
wav_file_paths_train, wav_file_paths_test, truth_dataset_format_tuples_train, truth_dataset_format_tuples_test = train_test_split(
    wav_file_paths, truth_dataset_format_tuples, test_size=0.2, random_state=42
)

In [12]:
data_train, _, _ = read_data_y(wav_file_paths_train, truth_dataset_format_tuples_train, 44100, 1, 6, 0.05)
samples, onsets, pitches, strings = data_train

In [60]:
fe = CnnStringFeatureExtractor(6, 441)
X = fe.fit_transform(data_train[0])
X.shape


Fitting standard scaler
Standardizing samples
Out[60]:
(77699, 441, 1)

In [13]:
print(len(wav_file_paths_train), len(X), X[0].shape, X[7].shape)
print(np.concatenate(X).ravel().shape) # flatten


320 320 (110250,) (94585,)
(34255306,)

In [14]:
print(len(onsets), len(pitches), len(strings))


320 320 320

In [15]:
strings


Out[15]:
[[{1}],
 [{1}],
 [{1, 2, 3, 4, 5, 6}],
 [{2}],
 [{4}],
 [{5}],
 [{1}],
 [{5}],
 [{2, 3, 4}, {5, 6}],
 [{1, 2, 3, 4, 5, 6}],
 [{2, 3, 4, 5, 6}],
 [{1}],
 [{2, 3, 4, 5, 6}],
 [{1, 2, 3, 4, 5, 6}],
 [{6}],
 [{6}],
 [{4}],
 [{6}],
 [{2, 3, 4, 5, 6}],
 [{6}],
 [{1}],
 [{2, 3, 4}, {5, 6}],
 [{2}],
 [{6}],
 [{2}],
 [{1}],
 [{2}],
 [{1}],
 [{4}],
 [{3}],
 [{1}],
 [{5}],
 [{3}],
 [{6}],
 [{4}],
 [{5}],
 [{1, 2, 3, 4, 5, 6}],
 [{5}],
 [{6}],
 [{3}],
 [{3}],
 [{2, 3, 4, 5, 6}],
 [{3}],
 [{3}],
 [{2}],
 [{4}],
 [{3}],
 [{2}],
 [{3}],
 [{1}],
 [{5}],
 [{3}],
 [{2}],
 [{6}],
 [{2, 3, 4, 5, 6}],
 [{6}],
 [{2, 3, 4, 5, 6}],
 [{1}],
 [{1, 2, 3, 4, 5, 6}],
 [{5}],
 [{4}],
 [{3}],
 [{5}],
 [{4}],
 [{6}],
 [{4}],
 [{4}],
 [{2, 3, 4, 5, 6}],
 [{4}],
 [{5}],
 [{2}],
 [{1}],
 [{2}],
 [{2}],
 [{2}],
 [{6}],
 [{2, 3, 4, 5, 6}],
 [{1, 2, 3, 4, 5, 6}],
 [{6}],
 [{6}],
 [{4}],
 [{1, 2, 3, 4, 5, 6}],
 [{3}],
 [{4}],
 [{2}],
 [{1, 2, 3, 4, 5, 6}],
 [{6}],
 [{5}],
 [{3}],
 [{2}, {3, 4, 5, 6}],
 [{1, 2, 3, 4, 5, 6}],
 [{5}],
 [{1}],
 [{1, 2, 3, 4, 5, 6}],
 [{1}],
 [{1}],
 [{1, 2, 3, 4, 5, 6}],
 [{2, 3, 4, 5, 6}],
 [{6}],
 [{3}],
 [{3}],
 [{2}],
 [{1}],
 [{5}],
 [{5}],
 [{6}],
 [{1}],
 [{1, 2, 3, 4, 5, 6}],
 [{6}],
 [{6}],
 [{1}],
 [{2}],
 [{4}],
 [{1, 2, 3, 4, 5, 6}],
 [{2}],
 [{1}],
 [{1}],
 [{3}],
 [{2}],
 [{6}],
 [{6}],
 [{1, 2, 3, 4, 5, 6}],
 [{3}],
 [{1, 2, 3, 4, 5, 6}],
 [{4}],
 [{1}],
 [{4}],
 [{1}],
 [{1}],
 [{5}],
 [{2, 3, 4, 5, 6}],
 [{2, 3, 4, 5, 6}],
 [{5}],
 [{1}],
 [{6}],
 [{2, 3, 4, 5, 6}],
 [{4}],
 [{4}],
 [{4}],
 [{4}],
 [{4}],
 [{3}],
 [{1}],
 [{2}],
 [{3}],
 [{6}],
 [{6}],
 [{6}],
 [{1}],
 [{1, 2, 3, 4, 5, 6}],
 [{2, 3, 4, 5, 6}],
 [{2, 3, 4, 5, 6}],
 [{2, 3, 4, 5, 6}],
 [{2}],
 [{2, 3, 4, 5, 6}],
 [{3}],
 [{2, 3, 4, 5, 6}],
 [{4}],
 [{4}],
 [{2}],
 [{5}],
 [{1}],
 [{6}],
 [{1}],
 [{3}],
 [{1, 2, 3, 4, 5, 6}],
 [{5}],
 [{2, 3}, {4, 5, 6}],
 [{1}],
 [{5}],
 [{1}],
 [{1}],
 [{4}],
 [{4}],
 [{5}],
 [{3}],
 [{5}],
 [{1, 2, 3, 4, 5, 6}],
 [{2, 3, 4, 5, 6}],
 [{6}],
 [{3}],
 [{2}],
 [{3}],
 [{2}],
 [{3}],
 [{3}],
 [{6}],
 [{2}],
 [{1, 2, 3, 4, 5, 6}],
 [{1, 2, 3, 4, 5, 6}],
 [{4}],
 [{2, 3, 4, 5, 6}],
 [{5}],
 [{5}],
 [{4}],
 [{5}],
 [{6}],
 [{1, 2, 3, 4, 5, 6}],
 [{2}],
 [{1}],
 [{1}],
 [{2}],
 [{4}],
 [{2, 3, 4, 5, 6}],
 [{2, 3}, {4, 5, 6}],
 [{2}],
 [{1, 2, 3, 4, 5, 6}],
 [{4}],
 [{1, 2, 3, 4, 5, 6}],
 [{1, 2, 3, 4, 5, 6}],
 [{6}],
 [{5}],
 [{3}],
 [{3}],
 [{1, 2, 3, 4, 5, 6}],
 [{2}],
 [{3}],
 [{6}],
 [{2}, {3, 4}, {5, 6}],
 [{2}],
 [{5}],
 [{4}],
 [{3}],
 [{2}],
 [{4}],
 [{1}],
 [{2}],
 [{2}],
 [{6}],
 [{4}],
 [{2}],
 [{2}],
 [{4}],
 [{5}],
 [{1}],
 [{1, 2, 3, 4, 5, 6}],
 [{3}],
 [{2}],
 [{2}, {3}, {4}, {5, 6}],
 [{1, 2, 3, 4, 5, 6}],
 [{1, 2, 3, 4, 5, 6}],
 [{4}],
 [{3}],
 [{1}],
 [{6}],
 [{4}],
 [{4}],
 [{3}],
 [{1, 2, 3, 4, 5, 6}],
 [{3}],
 [{4}],
 [{1, 2, 3, 4, 5, 6}],
 [{6}],
 [{5}],
 [{1}],
 [{2, 3, 4, 5, 6}],
 [{4}],
 [{1}],
 [{4}],
 [{1, 2, 3, 4, 5, 6}],
 [{1, 2, 3, 4, 5, 6}],
 [{2}],
 [{2}],
 [{1, 2, 3, 4, 5, 6}],
 [{3}],
 [{5}],
 [{3}],
 [{3}],
 [{2, 3, 4, 5, 6}],
 [{1}],
 [{5}],
 [{1, 2, 3, 4, 5, 6}],
 [{3}],
 [{5}],
 [{2}],
 [{5}],
 [{2}],
 [{4}],
 [{6}],
 [{6}],
 [{2}, {3, 4, 5, 6}],
 [{5}],
 [{5}],
 [{5}],
 [{5}],
 [{6}],
 [{5}],
 [{5}],
 [{6}],
 [{3}],
 [{2}],
 [{2, 3, 4, 5, 6}],
 [{2}],
 [{1}],
 [{5}],
 [{4}],
 [{3}],
 [{5}],
 [{4}],
 [{1}],
 [{1}],
 [{6}],
 [{3}],
 [{2}],
 [{3}],
 [{3}],
 [{1, 2, 3, 4, 5, 6}],
 [{5}],
 [{1, 2, 3, 4, 5, 6}],
 [{2}],
 [{1, 2, 3, 4, 5, 6}],
 [{2, 3, 4, 5, 6}],
 [{6}],
 [{3}],
 [{6}],
 [{5}],
 [{2, 3, 4}, {5, 6}],
 [{2}],
 [{2}],
 [{2}, {3, 4, 5, 6}]]

In [16]:
onsets


Out[16]:
[[0.2],
 [0.2],
 [0.2],
 [0.2],
 [0.2],
 [0.2],
 [0.237],
 [0.2],
 [0.2, 0.33],
 [0.217],
 [0.198],
 [0.232],
 [0.2],
 [0.198],
 [0.2],
 [0.2],
 [0.211],
 [0.2],
 [0.2],
 [0.2],
 [0.221],
 [0.2, 0.31],
 [0.2],
 [0.2],
 [0.212],
 [0.2],
 [0.2],
 [0.226],
 [0.21],
 [0.204],
 [0.215],
 [0.241],
 [0.2],
 [0.202],
 [0.2],
 [0.2],
 [0.208],
 [0.206],
 [0.208],
 [0.2],
 [0.2],
 [0.2],
 [0.19],
 [0.2],
 [0.2],
 [0.2],
 [0.2],
 [0.2],
 [0.2],
 [0.226],
 [0.207],
 [0.2],
 [0.2],
 [0.2],
 [0.2],
 [0.2],
 [0.198],
 [0.2],
 [0.2],
 [0.2],
 [0.2],
 [0.2],
 [0.204],
 [0.2],
 [0.2],
 [0.2],
 [0.2],
 [0.168],
 [0.2],
 [0.2],
 [0.2],
 [0.236],
 [0.2],
 [0.2],
 [0.2],
 [0.2],
 [0.189],
 [0.199],
 [0.2],
 [0.202],
 [0.205],
 [0.2],
 [0.2],
 [0.2],
 [0.205],
 [0.167],
 [0.2],
 [0.2],
 [0.2],
 [0.2, 0.256],
 [0.186],
 [0.2],
 [0.215],
 [0.2],
 [0.233],
 [0.219],
 [0.196],
 [0.193],
 [0.2],
 [0.2],
 [0.2],
 [0.2],
 [0.227],
 [0.2],
 [0.2],
 [0.2],
 [0.225],
 [0.197],
 [0.222],
 [0.2],
 [0.227],
 [0.2],
 [0.205],
 [0.17],
 [0.2],
 [0.224],
 [0.219],
 [0.2],
 [0.2],
 [0.205],
 [0.207],
 [0.198],
 [0.2],
 [0.2],
 [0.2],
 [0.215],
 [0.2],
 [0.2],
 [0.219],
 [0.2],
 [0.199],
 [0.19],
 [0.202],
 [0.225],
 [0.2],
 [0.2],
 [0.205],
 [0.207],
 [0.215],
 [0.2],
 [0.204],
 [0.2],
 [0.214],
 [0.205],
 [0.2],
 [0.2],
 [0.202],
 [0.213],
 [0.2],
 [0.2],
 [0.2],
 [0.198],
 [0.198],
 [0.2],
 [0.2],
 [0.2],
 [0.198],
 [0.214],
 [0.212],
 [0.226],
 [0.2],
 [0.223],
 [0.2],
 [0.227],
 [0.2],
 [0.174],
 [0.207],
 [0.186, 0.29],
 [0.2],
 [0.205],
 [0.227],
 [0.227],
 [0.21],
 [0.2],
 [0.2],
 [0.209],
 [0.2],
 [0.166],
 [0.2],
 [0.224],
 [0.2],
 [0.2],
 [0.2],
 [0.2],
 [0.2],
 [0.2],
 [0.2],
 [0.2],
 [0.198],
 [0.2],
 [0.2],
 [0.2],
 [0.207],
 [0.2],
 [0.2],
 [0.19],
 [0.2],
 [0.2],
 [0.2],
 [0.232],
 [0.2],
 [0.2],
 [0.24],
 [0.179],
 [0.198, 0.299],
 [0.226],
 [0.2],
 [0.2],
 [0.183],
 [0.2],
 [0.2],
 [0.2],
 [0.2],
 [0.2],
 [0.205],
 [0.212],
 [0.2],
 [0.206],
 [0.197, 0.257, 0.362],
 [0.2],
 [0.2],
 [0.202],
 [0.206],
 [0.2],
 [0.2],
 [0.223],
 [0.2],
 [0.2],
 [0.2],
 [0.2],
 [0.2],
 [0.208],
 [0.2],
 [0.2],
 [0.236],
 [0.198],
 [0.2],
 [0.238],
 [0.2, 0.256, 0.316, 0.369],
 [0.2],
 [0.2],
 [0.2],
 [0.206],
 [0.204],
 [0.2],
 [0.218],
 [0.219],
 [0.2],
 [0.214],
 [0.2],
 [0.2],
 [0.197],
 [0.2],
 [0.2],
 [0.216],
 [0.2],
 [0.2],
 [0.2],
 [0.2],
 [0.211],
 [0.2],
 [0.2],
 [0.2],
 [0.22],
 [0.2],
 [0.2],
 [0.2],
 [0.2],
 [0.2],
 [0.2],
 [0.211],
 [0.195],
 [0.2],
 [0.2],
 [0.21],
 [0.2],
 [0.2],
 [0.2],
 [0.215],
 [0.2],
 [0.198, 0.253],
 [0.2],
 [0.2],
 [0.215],
 [0.212],
 [0.212],
 [0.2],
 [0.2],
 [0.2],
 [0.2],
 [0.228],
 [0.192],
 [0.2],
 [0.2],
 [0.2],
 [0.212],
 [0.2],
 [0.212],
 [0.207],
 [0.217],
 [0.2],
 [0.2],
 [0.2],
 [0.204],
 [0.2],
 [0.2],
 [0.195],
 [0.213],
 [0.186],
 [0.238],
 [0.204],
 [0.2],
 [0.2],
 [0.2],
 [0.2],
 [0.2],
 [0.192, 0.31],
 [0.2],
 [0.221],
 [0.188, 0.24]]

In [17]:
label_binarizer = MultiLabelBinarizer(classes=range(1, 7))
label_binarizer.fit(None)  # fit needs to be called before transform
cur_y = label_binarizer.transform([[]])
cur_y


Out[17]:
array([[0, 0, 0, 0, 0, 0]])

In [18]:
np.zeros((1,6)).astype('int')


Out[18]:
array([[0, 0, 0, 0, 0, 0]])

In [19]:
label_binarizer.transform(strings[8])


Out[19]:
array([[0, 1, 1, 1, 0, 0],
       [0, 0, 0, 0, 1, 1]])

In [40]:
### VERY SLOW ###
import time, datetime

start = time.clock()

y = np.array([]).reshape(-1, 6)
no_string = np.zeros((1,6)).astype('uint8')
for i in range(len(strings)):
    frame = 0
    cur_y = no_string
    for j in range(len(onsets[i])):
        frame_end_prev = max(0, int(onsets[i][j]*100)-3)  # TODO consider offset detection!
        for k in range(frame, frame_end_prev):
            y = np.vstack((y, cur_y))  # label prev strings
        frame = max(0, int(onsets[i][j]*100)-1)  # update frame: label even one frame before onset!
        for k in range(frame_end_prev, frame):
            y = np.vstack((y, no_string))  # label empty strings between
        cur_y = label_binarizer.transform([strings[i][j]]).astype('uint8')  # update current strings
        # TODO add pitch list?

    # write last note until the end
    total_frames = ceil(samples[i].shape[0] / 441)
    for k in range(frame, total_frames + 1):
        y = np.vstack((y, cur_y))  # label prev strings

end = time.clock()
value = end - start
timestamp = datetime.datetime.fromtimestamp(value)
print(value, '->', timestamp.strftime('%M:%S'))


---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-40-49d591a42d33> in <module>()
     26 value = end - start
     27 timestamp = datetime.datetime.fromtimestamp(value)
---> 28 print(vlue, '->', timestamp.strftime('%H:%M:%S'))

NameError: name 'vlue' is not defined

In [52]:
import time, datetime

start = time.clock()

y = np.array([]).reshape(-1, 6)
no_string = np.zeros((1,6)).astype('uint8')
for i in range(len(strings)):
    frame = 0
    cur_y = no_string
    total_frames = ceil(samples[i].shape[0] / 441)
    yy = np.empty((total_frames, 6), 'uint8')
    for j in range(len(onsets[i])):
        frame_end_prev = max(0, int(onsets[i][j]*100)-3)  # TODO consider offset detection!
        for k in range(frame, frame_end_prev):
            yy[k] = cur_y  # label prev strings
        frame = max(0, int(onsets[i][j]*100)-1)  # update frame: label even one frame before onset!
        for k in range(frame_end_prev, frame):
            yy[k] = no_string  # label empty strings between
        cur_y = label_binarizer.transform([strings[i][j]]).astype('uint8')  # update current strings
        # TODO add pitch list?

    # write last note until the end
    for k in range(frame, total_frames):
        yy[k] = cur_y  # label prev strings
    
    # append yy to y
    y = np.vstack((y, yy))

end = time.clock()
value = end - start
timestamp = datetime.datetime.fromtimestamp(value)
print(value, '->', timestamp.strftime('%M:%S'))


0.3580416655619274 -> 00:00

In [54]:
start = time.clock()

y_list = []
no_string = np.zeros((1, 6)).astype('uint8')
for i in range(len(strings)):
    frame = 0
    cur_y = no_string
    total_frames = ceil(samples[i].shape[0] / 441)
    y = np.empty((total_frames, 6), 'uint8')
    for j in range(len(onsets[i])):
        frame_end_prev = max(0, int(onsets[i][j] * 100) - 3)  # TODO consider offset detection!
        for k in range(frame, frame_end_prev):
            y[k] = cur_y  # label prev strings
        frame = max(0, int(onsets[i][j] * 100) - 1)  # update frame: label even one frame before onset!
        for k in range(frame_end_prev, frame):
            y[k] = no_string  # label empty strings between
        cur_y = label_binarizer.transform([strings[i][j]]).astype('uint8')  # update current strings
        # TODO add pitch list?

    # write last note until the end
    for k in range(frame, total_frames):
        y[k] = cur_y  # label prev strings

    y_list.append(y)

y = np.concatenate(y_list)
    
end = time.clock()
print(end - start, '->', timestamp.strftime('%M:%S'))


0.09744886744147152 -> 00:00

In [53]:
y.shape


Out[53]:
(77699, 6)

In [39]:
y.shape


Out[39]:
(78019, 6)

In [17]:
print(X[0].shape)
print(X[0].shape[0] / 441, '->', ceil(X[0].shape[0] / 441))


(110250,)
250.0 -> 250

In [14]:
A = np.array([]).reshape(-1, 6)
A = np.vstack((A, np.zeros((1,6))))
A = np.vstack((A, np.ones((1,6)) + 1))
A = np.vstack((A, np.ones((1,6))))
A


Out[14]:
array([[ 0.,  0.,  0.,  0.,  0.,  0.],
       [ 2.,  2.,  2.,  2.,  2.,  2.],
       [ 1.,  1.,  1.,  1.,  1.,  1.]])

In [ ]: