Approach
Extensions
In [8]:
import librosa
import dtw
import matplotlib.pyplot as plt
import seaborn
seaborn.set(style='dark')
%matplotlib inline
import numpy as np
import scipy
import mir_eval
import mpld3
import msaf
In [9]:
from IPython.html.widgets import interact
In [10]:
def lab_to_aud(fname):
fname = fname.replace('seglab_tut', 'audio')
fname = fname.replace('.lab', '.flac')
return fname
In [11]:
def make_cost_matrix(audio_file, intervals, labels, dist):
y, sr = librosa.load(audio_file)
cqgram = librosa.logamplitude(librosa.cqt(y, sr=sr)**2, ref_power=np.max)
# convert intervals to frames
intframes = librosa.time_to_frames(intervals)
tempo, beats = librosa.beat.beat_track(y=y, sr=sr, trim=False)
# Sub-divide
subseg = librosa.segment.subsegment(cqgram, beats)
subseg = librosa.util.fix_frames(subseg, x_min=0, x_max=cqgram.shape[1])
# Synchronize
cqgram = librosa.feature.sync(cqgram, subseg, aggregate=np.median)
# Match intervals to subseg points
intframes = librosa.util.match_events(intframes, subseg)
# Score matrix
D = np.nan * np.zeros((len(labels), len(labels)), dtype=np.float32)
np.fill_diagonal(D, 0)
# Path matrix
P = []
for i in range(len(labels)):
P.append([np.nan] * len(labels))
for i in range(len(labels)):
P[i][i] = 0
# Compute DTW scores and paths
for i in range(len(labels)):
x_slice = cqgram[:, intframes[i, 0]:intframes[i, 1]].T
if intframes[i, 1] - intframes[i, 0] < 2:
continue
for j in range(i+1, len(labels)):
if intframes[j, 1] - intframes[j, 0] < 2:
continue
y_slice = cqgram[:, intframes[j, 0]:intframes[j, 1]].T
dtw_cost, distance, path = dtw.dtw(x_slice, y_slice, dist=dist)
D[i, j] = dtw_cost
D[j, i] = D[i, j]
path = list(path)
path[0] = np.asarray(path[0], dtype=np.int32)
path[1] = np.asarray(path[1], dtype=np.int32)
P[i][j] = path
return D, P
In [ ]:
lab_files = librosa.util.find_files('/home/bmcfee/data/beatles_iso/seglab_tut/', ext='lab')
k = 39
lab_files[k]
In [ ]:
audio_file = lab_to_aud(lab_files[k])
intervals, labels = mir_eval.io.load_labeled_intervals(lab_files[k])
In [12]:
ds_path = "/home/uri/datasets/BeatlesTUT/"
file_structs = msaf.io.get_dataset_files(ds_path)
k = 39
audio_file = file_structs[k].audio_file
intervals, labels = msaf.jams2.converters.load_jams_range(
file_structs[k].ref_file, "sections", annotator=0, context="function")
In [13]:
_D, _P = make_cost_matrix(audio_file, intervals, labels, scipy.spatial.distance.correlation)
boundaries = intervals
In [14]:
duration = np.diff(boundaries).ravel()
In [15]:
douter = np.minimum.outer(duration, duration)
In [16]:
np.set_printoptions(precision=3)
In [17]:
D = _D * douter**(-1.0)
In [18]:
D = _D
In [19]:
seaborn.heatmap(D, xticklabels=labels, yticklabels=labels)
Out[19]:
In [20]:
mpld3.disable_notebook()
maxval = float(D[np.isfinite(D)].max())
@interact(threshold=(0, maxval, maxval/20.0))
def _plot(threshold=0):
plt.figure(figsize=(5,4))
seaborn.heatmap((D<=threshold), xticklabels=labels, yticklabels=labels, vmin=0, vmax=1)
plt.tight_layout()
In [21]:
import mpld3
import numpy as np
import matplotlib.pyplot as plt
import seaborn
seaborn.set(style='dark')
from IPython.html.widgets import interact
%matplotlib inline
mpld3.disable_notebook()
dummy_data = np.random.random((10, 10))
@interact(threshold=(0, 1, 0.05))
def _plot(threshold=0):
plt.figure(figsize=(5,4))
seaborn.heatmap((dummy_data<=threshold), vmin=0, vmax=1)
plt.tight_layout()
In [272]:
lab_file = lab_files[k]
y, sr = librosa.load(lab_to_aud(lab_file))
#y = librosa.effects.harmonic(y)
intervals, labels = mir_eval.io.load_labeled_intervals(lab_file)
cqgram = librosa.logamplitude(librosa.cqt(y, sr=sr)**2, ref_power=np.max)
#cqgram = scipy.stats.zscore(cqgram, axis=1)
In [273]:
import IPython.display
In [274]:
IPython.display.Audio(data=y, rate=sr)
Out[274]: