notebook.community

Edit and run



In [1]:

    
from keras import backend as K
from keras.callbacks import EarlyStopping
from keras.models import Sequential
from keras.layers import Activation, Conv1D, Dense, Dropout, Flatten, MaxPooling1D
from keras.wrappers.scikit_learn import KerasClassifier

from matplotlib import pyplot as plt
%matplotlib inline
import numpy as np
from math import ceil

from sklearn.metrics import classification_report
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, MultiLabelBinarizer
from sklearn.model_selection import train_test_split

# append parent folder to path for imports
import sys
import os
PACKAGE_PARENT = '..'
SCRIPT_DIR = os.path.dirname(os.path.realpath(os.path.join(os.getcwd(), os.path.expanduser('__file__'))))
sys.path.append(os.path.normpath(os.path.join(SCRIPT_DIR, PACKAGE_PARENT)))

from music_transcription.onset_detection.metrics import onset_metric
from music_transcription.onset_detection.read_data import get_wav_and_truth_files
from music_transcription.string_fret_detection.read_data import read_data_y
from music_transcription.string_fret_detection.cnn_string_detector import CnnStringDetector, CnnStringFeatureExtractor









    



Using Theano backend.



In [2]:

    
active_datasets = {1, 2, 3, 4}
active_datasets = {1}
# X_parts, y_parts, y_start_only_parts, ds_labels
wav_file_paths, truth_dataset_format_tuples = get_wav_and_truth_files(active_datasets)
wav_file_paths_train, wav_file_paths_test, truth_dataset_format_tuples_train, truth_dataset_format_tuples_test = train_test_split(
    wav_file_paths, truth_dataset_format_tuples, test_size=0.2, random_state=42
)



In [12]:

    
data_train, _, _ = read_data_y(wav_file_paths_train, truth_dataset_format_tuples_train, 44100, 1, 6, 0.05)
samples, onsets, pitches, strings = data_train



In [60]:

    
fe = CnnStringFeatureExtractor(6, 441)
X = fe.fit_transform(data_train[0])
X.shape









    



Fitting standard scaler
Standardizing samples






    Out[60]:





(77699, 441, 1)



In [13]:

    
print(len(wav_file_paths_train), len(X), X[0].shape, X[7].shape)
print(np.concatenate(X).ravel().shape) # flatten









    



320 320 (110250,) (94585,)
(34255306,)



In [14]:

    
print(len(onsets), len(pitches), len(strings))









    



320 320 320



In [15]:

    
strings









    Out[15]:





[[{1}],
 [{1}],
 [{1, 2, 3, 4, 5, 6}],
 [{2}],
 [{4}],
 [{5}],
 [{1}],
 [{5}],
 [{2, 3, 4}, {5, 6}],
 [{1, 2, 3, 4, 5, 6}],
 [{2, 3, 4, 5, 6}],
 [{1}],
 [{2, 3, 4, 5, 6}],
 [{1, 2, 3, 4, 5, 6}],
 [{6}],
 [{6}],
 [{4}],
 [{6}],
 [{2, 3, 4, 5, 6}],
 [{6}],
 [{1}],
 [{2, 3, 4}, {5, 6}],
 [{2}],
 [{6}],
 [{2}],
 [{1}],
 [{2}],
 [{1}],
 [{4}],
 [{3}],
 [{1}],
 [{5}],
 [{3}],
 [{6}],
 [{4}],
 [{5}],
 [{1, 2, 3, 4, 5, 6}],
 [{5}],
 [{6}],
 [{3}],
 [{3}],
 [{2, 3, 4, 5, 6}],
 [{3}],
 [{3}],
 [{2}],
 [{4}],
 [{3}],
 [{2}],
 [{3}],
 [{1}],
 [{5}],
 [{3}],
 [{2}],
 [{6}],
 [{2, 3, 4, 5, 6}],
 [{6}],
 [{2, 3, 4, 5, 6}],
 [{1}],
 [{1, 2, 3, 4, 5, 6}],
 [{5}],
 [{4}],
 [{3}],
 [{5}],
 [{4}],
 [{6}],
 [{4}],
 [{4}],
 [{2, 3, 4, 5, 6}],
 [{4}],
 [{5}],
 [{2}],
 [{1}],
 [{2}],
 [{2}],
 [{2}],
 [{6}],
 [{2, 3, 4, 5, 6}],
 [{1, 2, 3, 4, 5, 6}],
 [{6}],
 [{6}],
 [{4}],
 [{1, 2, 3, 4, 5, 6}],
 [{3}],
 [{4}],
 [{2}],
 [{1, 2, 3, 4, 5, 6}],
 [{6}],
 [{5}],
 [{3}],
 [{2}, {3, 4, 5, 6}],
 [{1, 2, 3, 4, 5, 6}],
 [{5}],
 [{1}],
 [{1, 2, 3, 4, 5, 6}],
 [{1}],
 [{1}],
 [{1, 2, 3, 4, 5, 6}],
 [{2, 3, 4, 5, 6}],
 [{6}],
 [{3}],
 [{3}],
 [{2}],
 [{1}],
 [{5}],
 [{5}],
 [{6}],
 [{1}],
 [{1, 2, 3, 4, 5, 6}],
 [{6}],
 [{6}],
 [{1}],
 [{2}],
 [{4}],
 [{1, 2, 3, 4, 5, 6}],
 [{2}],
 [{1}],
 [{1}],
 [{3}],
 [{2}],
 [{6}],
 [{6}],
 [{1, 2, 3, 4, 5, 6}],
 [{3}],
 [{1, 2, 3, 4, 5, 6}],
 [{4}],
 [{1}],
 [{4}],
 [{1}],
 [{1}],
 [{5}],
 [{2, 3, 4, 5, 6}],
 [{2, 3, 4, 5, 6}],
 [{5}],
 [{1}],
 [{6}],
 [{2, 3, 4, 5, 6}],
 [{4}],
 [{4}],
 [{4}],
 [{4}],
 [{4}],
 [{3}],
 [{1}],
 [{2}],
 [{3}],
 [{6}],
 [{6}],
 [{6}],
 [{1}],
 [{1, 2, 3, 4, 5, 6}],
 [{2, 3, 4, 5, 6}],
 [{2, 3, 4, 5, 6}],
 [{2, 3, 4, 5, 6}],
 [{2}],
 [{2, 3, 4, 5, 6}],
 [{3}],
 [{2, 3, 4, 5, 6}],
 [{4}],
 [{4}],
 [{2}],
 [{5}],
 [{1}],
 [{6}],
 [{1}],
 [{3}],
 [{1, 2, 3, 4, 5, 6}],
 [{5}],
 [{2, 3}, {4, 5, 6}],
 [{1}],
 [{5}],
 [{1}],
 [{1}],
 [{4}],
 [{4}],
 [{5}],
 [{3}],
 [{5}],
 [{1, 2, 3, 4, 5, 6}],
 [{2, 3, 4, 5, 6}],
 [{6}],
 [{3}],
 [{2}],
 [{3}],
 [{2}],
 [{3}],
 [{3}],
 [{6}],
 [{2}],
 [{1, 2, 3, 4, 5, 6}],
 [{1, 2, 3, 4, 5, 6}],
 [{4}],
 [{2, 3, 4, 5, 6}],
 [{5}],
 [{5}],
 [{4}],
 [{5}],
 [{6}],
 [{1, 2, 3, 4, 5, 6}],
 [{2}],
 [{1}],
 [{1}],
 [{2}],
 [{4}],
 [{2, 3, 4, 5, 6}],
 [{2, 3}, {4, 5, 6}],
 [{2}],
 [{1, 2, 3, 4, 5, 6}],
 [{4}],
 [{1, 2, 3, 4, 5, 6}],
 [{1, 2, 3, 4, 5, 6}],
 [{6}],
 [{5}],
 [{3}],
 [{3}],
 [{1, 2, 3, 4, 5, 6}],
 [{2}],
 [{3}],
 [{6}],
 [{2}, {3, 4}, {5, 6}],
 [{2}],
 [{5}],
 [{4}],
 [{3}],
 [{2}],
 [{4}],
 [{1}],
 [{2}],
 [{2}],
 [{6}],
 [{4}],
 [{2}],
 [{2}],
 [{4}],
 [{5}],
 [{1}],
 [{1, 2, 3, 4, 5, 6}],
 [{3}],
 [{2}],
 [{2}, {3}, {4}, {5, 6}],
 [{1, 2, 3, 4, 5, 6}],
 [{1, 2, 3, 4, 5, 6}],
 [{4}],
 [{3}],
 [{1}],
 [{6}],
 [{4}],
 [{4}],
 [{3}],
 [{1, 2, 3, 4, 5, 6}],
 [{3}],
 [{4}],
 [{1, 2, 3, 4, 5, 6}],
 [{6}],
 [{5}],
 [{1}],
 [{2, 3, 4, 5, 6}],
 [{4}],
 [{1}],
 [{4}],
 [{1, 2, 3, 4, 5, 6}],
 [{1, 2, 3, 4, 5, 6}],
 [{2}],
 [{2}],
 [{1, 2, 3, 4, 5, 6}],
 [{3}],
 [{5}],
 [{3}],
 [{3}],
 [{2, 3, 4, 5, 6}],
 [{1}],
 [{5}],
 [{1, 2, 3, 4, 5, 6}],
 [{3}],
 [{5}],
 [{2}],
 [{5}],
 [{2}],
 [{4}],
 [{6}],
 [{6}],
 [{2}, {3, 4, 5, 6}],
 [{5}],
 [{5}],
 [{5}],
 [{5}],
 [{6}],
 [{5}],
 [{5}],
 [{6}],
 [{3}],
 [{2}],
 [{2, 3, 4, 5, 6}],
 [{2}],
 [{1}],
 [{5}],
 [{4}],
 [{3}],
 [{5}],
 [{4}],
 [{1}],
 [{1}],
 [{6}],
 [{3}],
 [{2}],
 [{3}],
 [{3}],
 [{1, 2, 3, 4, 5, 6}],
 [{5}],
 [{1, 2, 3, 4, 5, 6}],
 [{2}],
 [{1, 2, 3, 4, 5, 6}],
 [{2, 3, 4, 5, 6}],
 [{6}],
 [{3}],
 [{6}],
 [{5}],
 [{2, 3, 4}, {5, 6}],
 [{2}],
 [{2}],
 [{2}, {3, 4, 5, 6}]]



In [16]:

    
onsets









    Out[16]:





[[0.2],
 [0.2],
 [0.2],
 [0.2],
 [0.2],
 [0.2],
 [0.237],
 [0.2],
 [0.2, 0.33],
 [0.217],
 [0.198],
 [0.232],
 [0.2],
 [0.198],
 [0.2],
 [0.2],
 [0.211],
 [0.2],
 [0.2],
 [0.2],
 [0.221],
 [0.2, 0.31],
 [0.2],
 [0.2],
 [0.212],
 [0.2],
 [0.2],
 [0.226],
 [0.21],
 [0.204],
 [0.215],
 [0.241],
 [0.2],
 [0.202],
 [0.2],
 [0.2],
 [0.208],
 [0.206],
 [0.208],
 [0.2],
 [0.2],
 [0.2],
 [0.19],
 [0.2],
 [0.2],
 [0.2],
 [0.2],
 [0.2],
 [0.2],
 [0.226],
 [0.207],
 [0.2],
 [0.2],
 [0.2],
 [0.2],
 [0.2],
 [0.198],
 [0.2],
 [0.2],
 [0.2],
 [0.2],
 [0.2],
 [0.204],
 [0.2],
 [0.2],
 [0.2],
 [0.2],
 [0.168],
 [0.2],
 [0.2],
 [0.2],
 [0.236],
 [0.2],
 [0.2],
 [0.2],
 [0.2],
 [0.189],
 [0.199],
 [0.2],
 [0.202],
 [0.205],
 [0.2],
 [0.2],
 [0.2],
 [0.205],
 [0.167],
 [0.2],
 [0.2],
 [0.2],
 [0.2, 0.256],
 [0.186],
 [0.2],
 [0.215],
 [0.2],
 [0.233],
 [0.219],
 [0.196],
 [0.193],
 [0.2],
 [0.2],
 [0.2],
 [0.2],
 [0.227],
 [0.2],
 [0.2],
 [0.2],
 [0.225],
 [0.197],
 [0.222],
 [0.2],
 [0.227],
 [0.2],
 [0.205],
 [0.17],
 [0.2],
 [0.224],
 [0.219],
 [0.2],
 [0.2],
 [0.205],
 [0.207],
 [0.198],
 [0.2],
 [0.2],
 [0.2],
 [0.215],
 [0.2],
 [0.2],
 [0.219],
 [0.2],
 [0.199],
 [0.19],
 [0.202],
 [0.225],
 [0.2],
 [0.2],
 [0.205],
 [0.207],
 [0.215],
 [0.2],
 [0.204],
 [0.2],
 [0.214],
 [0.205],
 [0.2],
 [0.2],
 [0.202],
 [0.213],
 [0.2],
 [0.2],
 [0.2],
 [0.198],
 [0.198],
 [0.2],
 [0.2],
 [0.2],
 [0.198],
 [0.214],
 [0.212],
 [0.226],
 [0.2],
 [0.223],
 [0.2],
 [0.227],
 [0.2],
 [0.174],
 [0.207],
 [0.186, 0.29],
 [0.2],
 [0.205],
 [0.227],
 [0.227],
 [0.21],
 [0.2],
 [0.2],
 [0.209],
 [0.2],
 [0.166],
 [0.2],
 [0.224],
 [0.2],
 [0.2],
 [0.2],
 [0.2],
 [0.2],
 [0.2],
 [0.2],
 [0.2],
 [0.198],
 [0.2],
 [0.2],
 [0.2],
 [0.207],
 [0.2],
 [0.2],
 [0.19],
 [0.2],
 [0.2],
 [0.2],
 [0.232],
 [0.2],
 [0.2],
 [0.24],
 [0.179],
 [0.198, 0.299],
 [0.226],
 [0.2],
 [0.2],
 [0.183],
 [0.2],
 [0.2],
 [0.2],
 [0.2],
 [0.2],
 [0.205],
 [0.212],
 [0.2],
 [0.206],
 [0.197, 0.257, 0.362],
 [0.2],
 [0.2],
 [0.202],
 [0.206],
 [0.2],
 [0.2],
 [0.223],
 [0.2],
 [0.2],
 [0.2],
 [0.2],
 [0.2],
 [0.208],
 [0.2],
 [0.2],
 [0.236],
 [0.198],
 [0.2],
 [0.238],
 [0.2, 0.256, 0.316, 0.369],
 [0.2],
 [0.2],
 [0.2],
 [0.206],
 [0.204],
 [0.2],
 [0.218],
 [0.219],
 [0.2],
 [0.214],
 [0.2],
 [0.2],
 [0.197],
 [0.2],
 [0.2],
 [0.216],
 [0.2],
 [0.2],
 [0.2],
 [0.2],
 [0.211],
 [0.2],
 [0.2],
 [0.2],
 [0.22],
 [0.2],
 [0.2],
 [0.2],
 [0.2],
 [0.2],
 [0.2],
 [0.211],
 [0.195],
 [0.2],
 [0.2],
 [0.21],
 [0.2],
 [0.2],
 [0.2],
 [0.215],
 [0.2],
 [0.198, 0.253],
 [0.2],
 [0.2],
 [0.215],
 [0.212],
 [0.212],
 [0.2],
 [0.2],
 [0.2],
 [0.2],
 [0.228],
 [0.192],
 [0.2],
 [0.2],
 [0.2],
 [0.212],
 [0.2],
 [0.212],
 [0.207],
 [0.217],
 [0.2],
 [0.2],
 [0.2],
 [0.204],
 [0.2],
 [0.2],
 [0.195],
 [0.213],
 [0.186],
 [0.238],
 [0.204],
 [0.2],
 [0.2],
 [0.2],
 [0.2],
 [0.2],
 [0.192, 0.31],
 [0.2],
 [0.221],
 [0.188, 0.24]]



In [17]:

    
label_binarizer = MultiLabelBinarizer(classes=range(1, 7))
label_binarizer.fit(None)  # fit needs to be called before transform
cur_y = label_binarizer.transform([[]])
cur_y









    Out[17]:





array([[0, 0, 0, 0, 0, 0]])



In [18]:

    
np.zeros((1,6)).astype('int')









    Out[18]:





array([[0, 0, 0, 0, 0, 0]])



In [19]:

    
label_binarizer.transform(strings[8])









    Out[19]:





array([[0, 1, 1, 1, 0, 0],
       [0, 0, 0, 0, 1, 1]])



In [40]:

    
### VERY SLOW ###
import time, datetime

start = time.clock()

y = np.array([]).reshape(-1, 6)
no_string = np.zeros((1,6)).astype('uint8')
for i in range(len(strings)):
    frame = 0
    cur_y = no_string
    for j in range(len(onsets[i])):
        frame_end_prev = max(0, int(onsets[i][j]*100)-3)  # TODO consider offset detection!
        for k in range(frame, frame_end_prev):
            y = np.vstack((y, cur_y))  # label prev strings
        frame = max(0, int(onsets[i][j]*100)-1)  # update frame: label even one frame before onset!
        for k in range(frame_end_prev, frame):
            y = np.vstack((y, no_string))  # label empty strings between
        cur_y = label_binarizer.transform([strings[i][j]]).astype('uint8')  # update current strings
        # TODO add pitch list?

    # write last note until the end
    total_frames = ceil(samples[i].shape[0] / 441)
    for k in range(frame, total_frames + 1):
        y = np.vstack((y, cur_y))  # label prev strings

end = time.clock()
value = end - start
timestamp = datetime.datetime.fromtimestamp(value)
print(value, '->', timestamp.strftime('%M:%S'))









    



---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-40-49d591a42d33> in <module>()
     26 value = end - start
     27 timestamp = datetime.datetime.fromtimestamp(value)
---> 28 print(vlue, '->', timestamp.strftime('%H:%M:%S'))

NameError: name 'vlue' is not defined



In [52]:

    
import time, datetime

start = time.clock()

y = np.array([]).reshape(-1, 6)
no_string = np.zeros((1,6)).astype('uint8')
for i in range(len(strings)):
    frame = 0
    cur_y = no_string
    total_frames = ceil(samples[i].shape[0] / 441)
    yy = np.empty((total_frames, 6), 'uint8')
    for j in range(len(onsets[i])):
        frame_end_prev = max(0, int(onsets[i][j]*100)-3)  # TODO consider offset detection!
        for k in range(frame, frame_end_prev):
            yy[k] = cur_y  # label prev strings
        frame = max(0, int(onsets[i][j]*100)-1)  # update frame: label even one frame before onset!
        for k in range(frame_end_prev, frame):
            yy[k] = no_string  # label empty strings between
        cur_y = label_binarizer.transform([strings[i][j]]).astype('uint8')  # update current strings
        # TODO add pitch list?

    # write last note until the end
    for k in range(frame, total_frames):
        yy[k] = cur_y  # label prev strings
    
    # append yy to y
    y = np.vstack((y, yy))

end = time.clock()
value = end - start
timestamp = datetime.datetime.fromtimestamp(value)
print(value, '->', timestamp.strftime('%M:%S'))









    



0.3580416655619274 -> 00:00



In [54]:

    
start = time.clock()

y_list = []
no_string = np.zeros((1, 6)).astype('uint8')
for i in range(len(strings)):
    frame = 0
    cur_y = no_string
    total_frames = ceil(samples[i].shape[0] / 441)
    y = np.empty((total_frames, 6), 'uint8')
    for j in range(len(onsets[i])):
        frame_end_prev = max(0, int(onsets[i][j] * 100) - 3)  # TODO consider offset detection!
        for k in range(frame, frame_end_prev):
            y[k] = cur_y  # label prev strings
        frame = max(0, int(onsets[i][j] * 100) - 1)  # update frame: label even one frame before onset!
        for k in range(frame_end_prev, frame):
            y[k] = no_string  # label empty strings between
        cur_y = label_binarizer.transform([strings[i][j]]).astype('uint8')  # update current strings
        # TODO add pitch list?

    # write last note until the end
    for k in range(frame, total_frames):
        y[k] = cur_y  # label prev strings

    y_list.append(y)

y = np.concatenate(y_list)
    
end = time.clock()
print(end - start, '->', timestamp.strftime('%M:%S'))









    



0.09744886744147152 -> 00:00



In [53]:

    
y.shape









    Out[53]:





(77699, 6)



In [39]:

    
y.shape









    Out[39]:





(78019, 6)



In [17]:

    
print(X[0].shape)
print(X[0].shape[0] / 441, '->', ceil(X[0].shape[0] / 441))









    



(110250,)
250.0 -> 250



In [14]:

    
A = np.array([]).reshape(-1, 6)
A = np.vstack((A, np.zeros((1,6))))
A = np.vstack((A, np.ones((1,6)) + 1))
A = np.vstack((A, np.ones((1,6))))
A









    Out[14]:





array([[ 0.,  0.,  0.,  0.,  0.,  0.],
       [ 2.,  2.,  2.,  2.,  2.,  2.],
       [ 1.,  1.,  1.,  1.,  1.,  1.]])



In [ ]: