In [2]:
%pylab inline
import numpy as np
import pandas as pd
import sys
import math
import os
sys.path.append('../../tools/music-processing-experiments')
from analysis import split_to_blocks
from files import load_wav
from time_intervals import block_labels
from spectrogram import create_window
from reassignment import chromagram
In [3]:
song = 'The_Beatles/01_-_Please_Please_Me/08_-_Love_Me_Do'
data_dir = 'data/beatles/'
audio_file = data_dir + 'audio-cd/' + song + '.wav'
chord_file = data_dir + 'chordlab/' + song + '.lab.pcs.tsv'
audio_file, chord_file
Out[3]:
In [4]:
x, fs = load_wav(audio_file)
In [5]:
print('shape:', x.shape)
print('sampling rate:', fs, 'Hz')
print('number of samples:', len(x))
print('duration in audio:', len(x) / fs, 'sec')
In [6]:
chords = pd.read_csv(chord_file, sep='\t')
In [7]:
chords.head()
Out[7]:
In [8]:
chords.shape
Out[8]:
In [9]:
print('duration in chords:', chords['end'].iloc[-1])
In [10]:
target_cols = ['C', 'Db', 'D', 'Eb', 'E', 'F', 'Gb', 'G', 'Ab', 'A', 'Bb', 'B']
In [11]:
block_size = 4096
hop_size = 2048
In [12]:
x_blocks, x_times = split_to_blocks(x, block_size, hop_size, fs)
x_blocks.shape
Out[12]:
In [13]:
print('number of blocks:', len(x_blocks))
# start times for each block
print('last block starts at:', x_times[-1], 'sec')
In [14]:
def chords_to_blocks(chords, block_center_times):
chord_ix = 0
for t in block_center_times:
yield chords.iloc[i][target_cols]
In [15]:
def time_to_samples(time):
return np.round(time * fs)
chords['start_sample'] = time_to_samples(chords['start'])
chords['end_sample'] = time_to_samples(chords['end'])
df_blocks = pd.DataFrame({'start': time_to_samples(x_times).astype(np.int64)})
df_blocks['end'] = df_blocks['start'] + block_size
In [16]:
chords.head()
Out[16]:
In [17]:
df_blocks.head()
Out[17]:
In [34]:
pcs_cols = ['C','Db','D','Eb','E','F','Gb','G','Ab','A','Bb','B']
label_cols = ['label','root','bass'] + pcs_cols
In [50]:
label_dict = chords[label_cols].drop_duplicates().set_index('label')
In [51]:
label_dict
Out[51]:
In [28]:
df_labels = chords[['start_sample', 'end_sample', 'label']].copy()
df_labels.rename(columns={'start_sample': 'start', 'end_sample': 'end'}, inplace=True)
In [30]:
df_labels.head()
Out[30]:
In [22]:
%time df_labelled_blocks = block_labels(df_blocks, df_labels)
In [23]:
df_labelled_blocks
Out[23]:
In [73]:
# df_block_pcs = pd.merge(df_labelled_blocks[['label']], label_dict, how='inner', left_on='label', right_index=True)
df_block_pcs = df_labelled_blocks[['label']].join(label_dict, on='label')[['label'] + pcs_cols]
In [74]:
df_block_pcs[:15]
Out[74]:
In [66]:
assert len(df_block_pcs) == len(df_blocks)
In [75]:
block_labels_file = data_dir + 'chord-pcs/4096_2048/' + song + '.pcs'
print(block_labels_file)
os.makedirs(os.path.dirname(block_labels_file), exist_ok=True)
df_block_pcs.to_csv(block_labels_file, sep='\t', index=False)
In [26]:
w = create_window(block_size)
X_chromagram = chromagram(x_blocks, w, fs, to_log=True, bin_range=(-48, 67), bin_division=1)
In [27]:
chromagram_file = data_dir + 'chromagram/block=4096_hop=2048_bins=-48,67_div=1/' + song + '.npz'
print(chromagram_file)
os.makedirs(os.path.dirname(chromagram_file), exist_ok=True)
np.savez_compressed(chromagram_file, X=X_chromagram, times=x_times)