In [1]:
from keras import backend as K
from keras.callbacks import EarlyStopping
from keras.models import Sequential
from keras.layers import Activation, Conv1D, Dense, Dropout, Flatten, MaxPooling1D
from keras.wrappers.scikit_learn import KerasClassifier
from matplotlib import pyplot as plt
%matplotlib inline
import numpy as np
from math import ceil
from sklearn.metrics import classification_report
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, MultiLabelBinarizer
from sklearn.model_selection import train_test_split
# append parent folder to path for imports
import sys
import os
PACKAGE_PARENT = '..'
SCRIPT_DIR = os.path.dirname(os.path.realpath(os.path.join(os.getcwd(), os.path.expanduser('__file__'))))
sys.path.append(os.path.normpath(os.path.join(SCRIPT_DIR, PACKAGE_PARENT)))
from music_transcription.onset_detection.metrics import onset_metric
from music_transcription.onset_detection.read_data import get_wav_and_truth_files
from music_transcription.string_fret_detection.read_data import read_data_y
from music_transcription.string_fret_detection.cnn_string_detector import CnnStringDetector, CnnStringFeatureExtractor
In [2]:
active_datasets = {1, 2, 3, 4}
active_datasets = {1}
# X_parts, y_parts, y_start_only_parts, ds_labels
wav_file_paths, truth_dataset_format_tuples = get_wav_and_truth_files(active_datasets)
wav_file_paths_train, wav_file_paths_test, truth_dataset_format_tuples_train, truth_dataset_format_tuples_test = train_test_split(
wav_file_paths, truth_dataset_format_tuples, test_size=0.2, random_state=42
)
In [12]:
data_train, _, _ = read_data_y(wav_file_paths_train, truth_dataset_format_tuples_train, 44100, 1, 6, 0.05)
samples, onsets, pitches, strings = data_train
In [60]:
fe = CnnStringFeatureExtractor(6, 441)
X = fe.fit_transform(data_train[0])
X.shape
Out[60]:
In [13]:
print(len(wav_file_paths_train), len(X), X[0].shape, X[7].shape)
print(np.concatenate(X).ravel().shape) # flatten
In [14]:
print(len(onsets), len(pitches), len(strings))
In [15]:
strings
Out[15]:
In [16]:
onsets
Out[16]:
In [17]:
label_binarizer = MultiLabelBinarizer(classes=range(1, 7))
label_binarizer.fit(None) # fit needs to be called before transform
cur_y = label_binarizer.transform([[]])
cur_y
Out[17]:
In [18]:
np.zeros((1,6)).astype('int')
Out[18]:
In [19]:
label_binarizer.transform(strings[8])
Out[19]:
In [40]:
### VERY SLOW ###
import time, datetime
start = time.clock()
y = np.array([]).reshape(-1, 6)
no_string = np.zeros((1,6)).astype('uint8')
for i in range(len(strings)):
frame = 0
cur_y = no_string
for j in range(len(onsets[i])):
frame_end_prev = max(0, int(onsets[i][j]*100)-3) # TODO consider offset detection!
for k in range(frame, frame_end_prev):
y = np.vstack((y, cur_y)) # label prev strings
frame = max(0, int(onsets[i][j]*100)-1) # update frame: label even one frame before onset!
for k in range(frame_end_prev, frame):
y = np.vstack((y, no_string)) # label empty strings between
cur_y = label_binarizer.transform([strings[i][j]]).astype('uint8') # update current strings
# TODO add pitch list?
# write last note until the end
total_frames = ceil(samples[i].shape[0] / 441)
for k in range(frame, total_frames + 1):
y = np.vstack((y, cur_y)) # label prev strings
end = time.clock()
value = end - start
timestamp = datetime.datetime.fromtimestamp(value)
print(value, '->', timestamp.strftime('%M:%S'))
In [52]:
import time, datetime
start = time.clock()
y = np.array([]).reshape(-1, 6)
no_string = np.zeros((1,6)).astype('uint8')
for i in range(len(strings)):
frame = 0
cur_y = no_string
total_frames = ceil(samples[i].shape[0] / 441)
yy = np.empty((total_frames, 6), 'uint8')
for j in range(len(onsets[i])):
frame_end_prev = max(0, int(onsets[i][j]*100)-3) # TODO consider offset detection!
for k in range(frame, frame_end_prev):
yy[k] = cur_y # label prev strings
frame = max(0, int(onsets[i][j]*100)-1) # update frame: label even one frame before onset!
for k in range(frame_end_prev, frame):
yy[k] = no_string # label empty strings between
cur_y = label_binarizer.transform([strings[i][j]]).astype('uint8') # update current strings
# TODO add pitch list?
# write last note until the end
for k in range(frame, total_frames):
yy[k] = cur_y # label prev strings
# append yy to y
y = np.vstack((y, yy))
end = time.clock()
value = end - start
timestamp = datetime.datetime.fromtimestamp(value)
print(value, '->', timestamp.strftime('%M:%S'))
In [54]:
start = time.clock()
y_list = []
no_string = np.zeros((1, 6)).astype('uint8')
for i in range(len(strings)):
frame = 0
cur_y = no_string
total_frames = ceil(samples[i].shape[0] / 441)
y = np.empty((total_frames, 6), 'uint8')
for j in range(len(onsets[i])):
frame_end_prev = max(0, int(onsets[i][j] * 100) - 3) # TODO consider offset detection!
for k in range(frame, frame_end_prev):
y[k] = cur_y # label prev strings
frame = max(0, int(onsets[i][j] * 100) - 1) # update frame: label even one frame before onset!
for k in range(frame_end_prev, frame):
y[k] = no_string # label empty strings between
cur_y = label_binarizer.transform([strings[i][j]]).astype('uint8') # update current strings
# TODO add pitch list?
# write last note until the end
for k in range(frame, total_frames):
y[k] = cur_y # label prev strings
y_list.append(y)
y = np.concatenate(y_list)
end = time.clock()
print(end - start, '->', timestamp.strftime('%M:%S'))
In [53]:
y.shape
Out[53]:
In [39]:
y.shape
Out[39]:
In [17]:
print(X[0].shape)
print(X[0].shape[0] / 441, '->', ceil(X[0].shape[0] / 441))
In [14]:
A = np.array([]).reshape(-1, 6)
A = np.vstack((A, np.zeros((1,6))))
A = np.vstack((A, np.ones((1,6)) + 1))
A = np.vstack((A, np.ones((1,6))))
A
Out[14]:
In [ ]: