Machine Learning Experiments


In [1]:
import os
import simplejson
import logging

tracks = []

def add_track(cls, raw_data):
    tracks.append({
        'class': cls,
        'raw_data': raw_data,
    })

def import_tracks(cls, directory):
    for root, dirs, files in os.walk(directory):
        for file in files:
            if file.endswith('.json'):            
                with open(os.path.join(root, file), 'r') as f:
                    add_track(cls, simplejson.load(f))

classes = {
    0: 'austin wintory',
    1: 'german stuff',
    2: 'hard rock',
}

DATA_DIR = os.path.join('data', 'ml_experiments')

# Data set "AUSTIN WINTORY"
import_tracks(0, os.path.join(DATA_DIR, 'journey'))
                
# Data set "GERMAN STUFF"
import_tracks(1, os.path.join(DATA_DIR, 'german_stuff'))

# Data set "HARD ROCK"
import_tracks(2, os.path.join(DATA_DIR, 'hard'))

In [2]:
features = [
    # (name_of_the_feature, path_in_raw_data)
    ('mean_spectral_energy', ['lowlevel', 'spectral_energy', 'mean']),
    ('bpm',                  ['rhythm', 'bpm']),
    ('beats_count',          ['rhythm', 'beats_count']),
    ('danceability',         ['rhythm', 'danceability']),
]

def _extract_item(dictionary, path):
    """Extracts item located at the specified path in dictionary."""
    item = dictionary
    for i in path:
        item = item[i]
    if isinstance(item, dict):
        logging.warn('Extracted dictionary: %s', str(item))
    return item

def extract_features(features, raw_data):
    """Extracts list of features from raw data."""
    extracted_features = []
    for feature in features:
        extracted_features.append(_extract_item(raw_data, feature[1]))
    return extracted_features

# Extracting data
data = []
target = []
for track in tracks:
    data.append(extract_features(features, track['raw_data']))
    target.append(track['class'])

import numpy as np
data = np.array(data)
X = data[:, :2]  # we only take the first two features
y = target

In [3]:
%matplotlib inline
import matplotlib.pyplot as plt

plt.scatter(
    X[:, 0],  # mean_spectral_energy
    X[:, 1],  # bpm
    c=target
)
plt.xlabel('mean_spectral_energy')
plt.ylabel('bpm')
formatter = plt.FuncFormatter(lambda i, *args: classes[i])
plt.scatter(X[:, 0], X[:, 1], c=y)
plt.colorbar(ticks=[0, 1, 2], format=formatter)
plt.show()



In [4]:
# TIME TO FIT!
from sklearn import svm

svc = svm.SVC(kernel='rbf').fit(X, y)

# TODO: Implement the rest!