In [32]:
%matplotlib inline

from __future__ import division, print_function

import numpy as np
import matplotlib.pyplot as plt
from itertools import combinations
import tensorflow

import SHS_data
import main
import fingerprints as fp
import util

import paired_data
reload(paired_data);

Load some data


In [25]:
# ratio = (5, 15, 80)
ratio = (1, 9, 90)
clique_dict, cliques_by_uri = SHS_data.read_cliques()
train_cliques, test_cliques, val_cliques = util.split_train_test_validation(clique_dict, ratio=ratio)

Load pairs of covers and non-covers

def get_pairs(clique_dict):
...

In [26]:
pairs, non_pairs = paired_data.get_pairs(train_cliques)

assert len(pairs) == len(non_pairs)
assert np.all([len(pair) == 2 for pair in pairs])
assert np.all([len(non_pair) == 2 for non_pair in non_pairs])
assert np.all([cliques_by_uri[pair[0]] == cliques_by_uri[pair[1]] for pair in pairs])
assert not np.any([cliques_by_uri[non_pair[0]] == cliques_by_uri[non_pair[1]] for non_pair in non_pairs])

Cut chroma features to fixed-length arrays

def patchwork(chroma, n_patches=7, patch_len=64):
...

Strategy: cuttinging out n_patches equally-spaced (possibly overlapping) patches of length patch_len and stitching them back together.

Note that this requires some extra attention as there are unusually short chroma files in the dataset: Around 30 files are less than 64 beats long. Hence an exta test in which patch_len > len(chroma).


In [99]:
reload(paired_data)

# simple array
len_x = 10
n_patch, patch_len = 3, 14

x_test = np.arange(len_x).reshape((-1,1))

x_patches = paired_data.patchwork(x_test, n_patches=n_patch, patch_len=patch_len)

assert x_patches[0] == x_test[0]
assert x_patches[-1] == x_test[-1]
assert len(x_patches) == n_patch * patch_len

# real data
test_pair = pairs[0]
chroma_1 = SHS_data.read_chroma(test_pair[0])
chroma_2 = SHS_data.read_chroma(test_pair[1])

patches_1 = paired_data.patchwork(chroma_1)
patches_2 = paired_data.patchwork(chroma_2)

assert patches_1.shape == patches_2.shape

# short chroma
n_patches = 3
patch_len = min(len(chroma_1), len(chroma_2)) + 10

patches_1 = paired_data.patchwork(chroma_1, n_patches=n_patches, patch_len=patch_len)
patches_2 = paired_data.patchwork(chroma_2, n_patches=n_patches, patch_len=patch_len)
        
assert np.all(patches_1.shape == patches_2.shape)
assert patches_1.shape[0] == n_patches * patch_len

Align chroma pitch dimension

def align_pitch(chroma_1, chroma_2):
...

In [28]:
a = np.array([[2,0,1,0,0,0],
              [2,0,1,0,0,0]])

b = np.array([[0,0,1,0,3,0],
              [0,0,1,0,3,0]])

a_, b_ = paired_data.align_pitch(a, b)

print(a)
print(b)
print('\n', b_)


[[2 0 1 0 0 0]
 [2 0 1 0 0 0]]
[[0 0 1 0 3 0]
 [0 0 1 0 3 0]]

 [[3 0 0 0 1 0]
 [3 0 0 0 1 0]]

Construct a dataset of cover and non-cover 'patchworks'

def dataset_of_pairs(clique_dict, chroma_dict):
...

In [29]:
train_uris = util.uris_from_clique_dict(train_cliques)
chroma_dict = SHS_data.preload_chroma(train_uris)

X_1, X_2, is_cover, _ = paired_data.dataset_of_pairs(train_cliques, chroma_dict)

print(X_1.shape, X_2.shape, is_cover.shape)


(244,) (244, 448, 12) (244,)

In [ ]: