In [40]:
import numpy as np
import pandas as pd
import sys
sys.path.append('../../tools/music-processing-experiments')
from time_intervals import block_labels
In [2]:
true_segments_df = pd.read_csv('../../data/beatles/_inbox/segmentation/01_09_true_segments.tsv', sep='\t')
In [6]:
true_segments_df = true_segments_df[['start', 'end', 'label']]
In [95]:
true_segments_df.head()
print(len(true_segments_df))
In [12]:
duration = true_segments_df['end'].iloc[-1]
duration
Out[12]:
In [31]:
def block_segments(duration, fs=44100.0, hop_size=2048):
"""
Computes start and end times of regular overlapping blocks.
"""
hop_duration = hop_size / fs
start_times = np.arange(0, duration, hop_duration)
end_times = start_times + hop_duration
return pd.DataFrame({'start': start_times, 'end': end_times}, columns=['start', 'end'])
block_times = block_segments(duration)
print(block_times.shape)
print(block_times[:5])
In [62]:
chords = true_segments_df
x_times = block_times['start']
block_size = 4096
hop_size = 2048
label_cols = ['label']
def time_to_samples(time, fs=44100.0):
return np.round(time * fs)
# chords['start_sample'] = time_to_samples(chords['start'])
# chords['end_sample'] = time_to_samples(chords['end'])
# df_blocks = pd.DataFrame({'start': time_to_samples(x_times).astype(np.int64)})
# df_blocks['end'] = df_blocks['start'] + block_size
df_blocks = block_times
# df_labels = chords[['start_sample', 'end_sample', 'label']].copy()
# df_labels.rename(columns={'start_sample': 'start', 'end_sample': 'end'}, inplace=True)
# df_labelled_blocks = block_labels(df_blocks, df_labels)
df_labelled_blocks = block_labels(df_blocks, chords)
In [63]:
df_labelled_blocks
Out[63]:
In [66]:
df_labelled_blocks.to_csv('../../data/beatles/_inbox/segmentation/01_09_ref_frames_4096_2048_timed_2.tsv',
sep='\t', index=None, float_format='%.6f')
In [52]:
(pd.read_csv('../../data/beatles/_inbox/segmentation/01_09_ref_frames_4096_2048.tsv', sep='\t')['label'] == df_labelled_blocks['label']).all()
Out[52]:
In [53]:
5548032/44100.
Out[53]:
In [55]:
true_segments_df.iloc[-1]
Out[55]:
In [97]:
df_frames = df_labelled_blocks.copy()
df_frames.head()
Out[97]:
In [104]:
def frames_to_segments(df_frames, total_duration=None):
df = df_frames.copy()
labels = df['label']
segment_start = labels != labels.shift(1)
df_segments = df[segment_start].copy()
end_time = total_duration if total_duration else df['end'].iloc[-1]
df_segments['end'] = df_segments['start'].shift(-1).fillna(end_time)
return df_segments
In [109]:
df_segments = frames_to_segments(df_frames, total_duration=duration)
df_segments.tail()
Out[109]:
In [99]:
df_segments.to_csv('../../data/beatles/_inbox/segmentation/01_09_reconstructed_segments.tsv',
sep='\t', index=None, float_format='%.6f')
In [128]:
with open('../../data/beatles/_inbox/outputs/yesterday_lstm_class.tsv') as file:
yesterday_labels = [line.replace('\n', '').replace('\t', '') for line in file.readlines()]
In [144]:
fs = 44100.
hop_size = 2048
# no time, just 12 PCS labels
def read_label_file(file_name):
with open(file_name) as file:
return [line.replace('\n', '').replace('\t', '') for line in file.readlines()]
def read_labels_as_df(file_name):
labels = read_label_file(file_name)
# compute frame start and end times
hop_duration = hop_size / fs
start_times = hop_duration * np.arange(len(labels))
return pd.DataFrame({
'start': start_times,
'end': hop_duration + start_times,
'label': labels},
columns=['start', 'end', 'label'])
def save_tsv(df, file_name):
df.to_csv(file_name, sep='\t', index=None, float_format='%.6f')
In [159]:
df_frames = read_labels_as_df('../../data/beatles/_inbox/outputs/yesterday_lstm_class.tsv')
df_segments = frames_to_segments(df_frames)
In [160]:
df_segments.head()
Out[160]:
In [161]:
def explode_pitch_classes(df):
df = df.copy()
labels = df['label']
pcs = np.array([[p for p in label] for label in df_segments['label']]).T
pcs_cols = ['C','Db','D','Eb','E','F','Gb','G','Ab','A','Bb','B']
for i, col in enumerate(pcs_cols):
df[col] = pcs[i]
del df['label']
return df
In [164]:
df_segments.head()
Out[164]:
In [163]:
df_segments = explode_pitch_classes(df_segments)
save_tsv(df_segments, '../../data/beatles/_inbox/outputs/yesterday_lstm_class_segments.tsv')
In [166]:
24/(1000/64)
Out[166]:
In [ ]: