Cross-repetition dtw

Idea: we need strong positive examples for training repetition detection
Approach
- Get labeled segmentations
- For each track, and each pair of sections within a track, run dtw
- See if we can threshold the dtw cost to determine whether or not two sections are repetitions of one-another
- If so, take the path as a set of ground-truth positives
- Sample negatives iid at random
- win everything
Extensions
- In the downstream classifier, use windows of beat-sync features
- That way we can get some smoothing and local context



In [8]:

    
import librosa
import dtw
import matplotlib.pyplot as plt
import seaborn
seaborn.set(style='dark')
%matplotlib inline

import numpy as np
import scipy
import mir_eval
import mpld3

import msaf



In [9]:

    
from IPython.html.widgets import interact



In [10]:

    
def lab_to_aud(fname):
    
    fname = fname.replace('seglab_tut', 'audio')
    fname = fname.replace('.lab', '.flac')
    
    return fname



In [11]:

    
def make_cost_matrix(audio_file, intervals, labels, dist):
    
    y, sr = librosa.load(audio_file)
    
    cqgram = librosa.logamplitude(librosa.cqt(y, sr=sr)**2, ref_power=np.max)
    
    # convert intervals to frames
    intframes = librosa.time_to_frames(intervals)
    
    tempo, beats = librosa.beat.beat_track(y=y, sr=sr, trim=False)
    
    # Sub-divide
    subseg = librosa.segment.subsegment(cqgram, beats)
    subseg = librosa.util.fix_frames(subseg, x_min=0, x_max=cqgram.shape[1])
    
    # Synchronize
    cqgram = librosa.feature.sync(cqgram, subseg, aggregate=np.median)
    
    # Match intervals to subseg points
    intframes = librosa.util.match_events(intframes, subseg)

    # Score matrix
    D = np.nan * np.zeros((len(labels), len(labels)), dtype=np.float32)
    np.fill_diagonal(D, 0)
    
    # Path matrix
    P = []
    for i in range(len(labels)):
        P.append([np.nan] * len(labels))
    for i in range(len(labels)):
        P[i][i] = 0
    
    # Compute DTW scores and paths
    for i in range(len(labels)):
        x_slice = cqgram[:, intframes[i, 0]:intframes[i, 1]].T
        if intframes[i, 1] - intframes[i, 0] < 2:
            continue
        for j in range(i+1, len(labels)):
            if intframes[j, 1] - intframes[j, 0] < 2:
                continue
            y_slice = cqgram[:, intframes[j, 0]:intframes[j, 1]].T
            
            dtw_cost, distance, path = dtw.dtw(x_slice, y_slice, dist=dist)
            D[i, j] = dtw_cost
            D[j, i] = D[i, j]
            path = list(path)
            path[0] = np.asarray(path[0], dtype=np.int32)
            path[1] = np.asarray(path[1], dtype=np.int32)
            P[i][j] = path
            
    return D, P



In [ ]:

    
lab_files = librosa.util.find_files('/home/bmcfee/data/beatles_iso/seglab_tut/', ext='lab')
k = 39
lab_files[k]



In [ ]:

    
audio_file = lab_to_aud(lab_files[k])
intervals, labels = mir_eval.io.load_labeled_intervals(lab_files[k])



In [12]:

    
ds_path = "/home/uri/datasets/BeatlesTUT/"
file_structs = msaf.io.get_dataset_files(ds_path)
k = 39
audio_file = file_structs[k].audio_file
intervals, labels = msaf.jams2.converters.load_jams_range(
            file_structs[k].ref_file, "sections", annotator=0, context="function")



In [13]:

    
_D, _P = make_cost_matrix(audio_file, intervals, labels, scipy.spatial.distance.correlation)
boundaries = intervals



In [14]:

    
duration = np.diff(boundaries).ravel()



In [15]:

    
douter = np.minimum.outer(duration, duration)



In [16]:

    
np.set_printoptions(precision=3)



In [17]:

    
D = _D * douter**(-1.0)



In [18]:

    
D = _D



In [19]:

    
seaborn.heatmap(D, xticklabels=labels, yticklabels=labels)









    Out[19]:





<matplotlib.axes._subplots.AxesSubplot at 0x7ff4bf7e7a10>



In [20]:

    
mpld3.disable_notebook()
maxval = float(D[np.isfinite(D)].max())

@interact(threshold=(0, maxval, maxval/20.0))
def _plot(threshold=0):
    plt.figure(figsize=(5,4))
    seaborn.heatmap((D<=threshold), xticklabels=labels, yticklabels=labels, vmin=0, vmax=1)
    plt.tight_layout()



In [21]:

    
import mpld3
import numpy as np
import matplotlib.pyplot as plt
import seaborn
seaborn.set(style='dark')
from IPython.html.widgets import interact

%matplotlib inline

mpld3.disable_notebook()

dummy_data = np.random.random((10, 10))
@interact(threshold=(0, 1, 0.05))
def _plot(threshold=0):
    plt.figure(figsize=(5,4))
    seaborn.heatmap((dummy_data<=threshold), vmin=0, vmax=1)
    plt.tight_layout()



In [272]:

    
lab_file = lab_files[k]

y, sr = librosa.load(lab_to_aud(lab_file))
#y = librosa.effects.harmonic(y)

intervals, labels = mir_eval.io.load_labeled_intervals(lab_file)
    
cqgram = librosa.logamplitude(librosa.cqt(y, sr=sr)**2, ref_power=np.max)
#cqgram = scipy.stats.zscore(cqgram, axis=1)



In [273]:

    
import IPython.display



In [274]:

    
IPython.display.Audio(data=y, rate=sr)









    Out[274]:



In [275]:

    
import mpld3



In [276]:

    
mpld3.enable_notebook()



In [277]:

    
plt.figure(figsize=(12,4))
librosa.display.specshow(cqgram)
plt.colorbar()
plt.tight_layout()