In [1]:
import glob
import json
import os
from tomato.audio.pitchdistribution import PitchDistribution
import pickle
from tomato.io import IO

We use the features that are already computed for DLfM 2016. If you want to compute from the start, you can refer to the feature extraction and training notebook in that repository.


In [2]:
# get the training data with the best performing parameters
parameter_str = 'pcd--7_5--15_0'
experiment_folder = '../../../../../experiments/makam_recognition_experiments/data/features/'

training_folder = os.path.join(experiment_folder, parameter_str)

# get the relevant files storing the data points
training_files = glob.glob(training_folder + '/*pdf.json')

print("Read %d training files" % (len(training_files)))
display(training_files[:5])


Read 1000 training files
['../../../../../experiments/makam_recognition_experiments/data/features/pcd--7_5--15_0/135415d2-0df5-454a-bd14-d0482d558390--pdf.json',
 '../../../../../experiments/makam_recognition_experiments/data/features/pcd--7_5--15_0/88e95b47-aa5a-4a45-ad7a-51b68138affc--pdf.json',
 '../../../../../experiments/makam_recognition_experiments/data/features/pcd--7_5--15_0/c79290ea-2b38-41d1-ae53-bce6c12682d1--pdf.json',
 '../../../../../experiments/makam_recognition_experiments/data/features/pcd--7_5--15_0/9df6ac0f-60fa-4ffa-ac25-a1300c5d5e07--pdf.json',
 '../../../../../experiments/makam_recognition_experiments/data/features/pcd--7_5--15_0/02fa6e35-2962-4ef4-9f75-bf8d712f9c07--pdf.json']

In [3]:
# instantiate the model
model = []
for j in training_files:
    training_datum = json.load(open(j))
    training_datum['feature'] = PitchDistribution.from_dict(training_datum['feature'])
    
    # make sure the training labels are slugified and lowercase
    training_datum['mode'] = IO.slugify_tr(training_datum['mode']).lower()
    
    model.append(training_datum)

# save to pickle
save_file = "./training_model--%s--%s.pkl" %(parameter_str, 'dlfm2016')
pickle.dump(model, open(save_file, 'wb'))