In [1]:
import os
import simplejson
import logging
tracks = []
def add_track(cls, raw_data):
tracks.append({
'class': cls,
'raw_data': raw_data,
})
def import_tracks(cls, directory):
for root, dirs, files in os.walk(directory):
for file in files:
if file.endswith('.json'):
with open(os.path.join(root, file), 'r') as f:
add_track(cls, simplejson.load(f))
classes = {
0: 'austin wintory',
1: 'german stuff',
2: 'hard rock',
}
DATA_DIR = os.path.join('data', 'ml_experiments')
# Data set "AUSTIN WINTORY"
import_tracks(0, os.path.join(DATA_DIR, 'journey'))
# Data set "GERMAN STUFF"
import_tracks(1, os.path.join(DATA_DIR, 'german_stuff'))
# Data set "HARD ROCK"
import_tracks(2, os.path.join(DATA_DIR, 'hard'))
In [2]:
features = [
# (name_of_the_feature, path_in_raw_data)
('mean_spectral_energy', ['lowlevel', 'spectral_energy', 'mean']),
('bpm', ['rhythm', 'bpm']),
('beats_count', ['rhythm', 'beats_count']),
('danceability', ['rhythm', 'danceability']),
]
def _extract_item(dictionary, path):
"""Extracts item located at the specified path in dictionary."""
item = dictionary
for i in path:
item = item[i]
if isinstance(item, dict):
logging.warn('Extracted dictionary: %s', str(item))
return item
def extract_features(features, raw_data):
"""Extracts list of features from raw data."""
extracted_features = []
for feature in features:
extracted_features.append(_extract_item(raw_data, feature[1]))
return extracted_features
# Extracting data
data = []
target = []
for track in tracks:
data.append(extract_features(features, track['raw_data']))
target.append(track['class'])
import numpy as np
data = np.array(data)
X = data[:, :2] # we only take the first two features
y = target
In [3]:
%matplotlib inline
import matplotlib.pyplot as plt
plt.scatter(
X[:, 0], # mean_spectral_energy
X[:, 1], # bpm
c=target
)
plt.xlabel('mean_spectral_energy')
plt.ylabel('bpm')
formatter = plt.FuncFormatter(lambda i, *args: classes[i])
plt.scatter(X[:, 0], X[:, 1], c=y)
plt.colorbar(ticks=[0, 1, 2], format=formatter)
plt.show()
In [4]:
# TIME TO FIT!
from sklearn import svm
svc = svm.SVC(kernel='rbf').fit(X, y)
# TODO: Implement the rest!