In [1]:
# Code source: Brian McFee
# License: ISC
import numpy as np
import scipy
import sklearn.cluster
import librosa
import librosa.display
import matplotlib.pyplot as plt
import matplotlib.style as ms
ms.use('seaborn-muted')
%matplotlib inline

In [2]:
file_path = "../data/songData/genres/disco/disco.00000.wav"
y, sr = librosa.load(file_path)

In [3]:
BINS_PER_OCTAVE = 12 * 3
N_OCTAVE = 7

C = librosa.amplitude_to_db(librosa.cqt(y=y, sr=sr,
                                        bins_per_octave=BINS_PER_OCTAVE,
                                        n_bins=N_OCTAVE * BINS_PER_OCTAVE),
                           ref=np.max)

plt.figure(figsize=(12, 4))
librosa.display.specshow(C, y_axis='cqt_hz', sr=sr,
                        bins_per_octave=BINS_PER_OCTAVE,
                        x_axis='time')
plt.title('log power cqt')
plt.tight_layout()



In [4]:
tempo, beats = librosa.beat.beat_track(y=y, sr=sr, trim=False)
Csync = librosa.util.sync(C, beats, aggregate=np.median)

beat_times = librosa.frames_to_time(librosa.util.fix_frames(beats,
                                                           x_min=0,
                                                           x_max=C.shape[1]),
                                   sr=sr)

plt.figure(figsize=(12, 4))
librosa.display.specshow(Csync, bins_per_octave=12*3,
                        y_axis='cqt_hz', x_axis='time',
                        x_coords=beat_times)
plt.tight_layout()



In [5]:
R = librosa.segment.recurrence_matrix(Csync, width=3, mode='affinity', sym=True)
df = librosa.segment.timelag_filter(scipy.ndimage.median_filter)
Rf = df(R, size=(1, 7))

In [6]:
mfcc = librosa.feature.mfcc(y=y, sr=sr)
Msync = librosa.util.sync(mfcc, beats)

path_distance = np.sum(np.diff(Msync, axis=1)**2, axis=0)
sigma = np.median(path_distance)
path_sim = np.exp(-path_distance / sigma)

R_path = np.diag(path_sim, k=1) + np.diag(path_sim, k=-1)

In [7]:
deg_path =np.sum(R_path, axis=1)
deg_rec = np.sum(Rf, axis=1)

mu = deg_path.dot(deg_path + deg_rec) / np.sum((deg_path + deg_rec)**2)

A = mu * Rf + (1 - mu) * R_path

In [8]:
plt.figure(figsize=(8, 4))

plt.subplot(1, 3, 1)
librosa.display.specshow(Rf, cmap='inferno_r', y_axis='time', y_coords=beat_times)
plt.title('Reccurence Similarity')

plt.subplot(1, 3, 2)
librosa.display.specshow(R_path, cmap='inferno_r')
plt.title('Path similarity')

plt.subplot(1, 3, 3)
librosa.display.specshow(A, cmap='inferno_r')
plt.title('Conbined graph')

plt.tight_layout()



In [9]:
L = scipy.sparse.csgraph.laplacian(A, normed=True)

evals, evecs = scipy.linalg.eigh(L)

evecs = scipy.ndimage.median_filter(evecs, size=(9, 1))
Cnorm = np.cumsum(evecs**2, axis=1)**0.5

k =5
X = evecs[:, :k] / Cnorm[:, k-1:k]

plt.figure(figsize=(8,4))

plt.subplot(1, 2, 2)
librosa.display.specshow(Rf, cmap='inferno_r')
plt.title('Reccurence matrix')

plt.subplot(1, 2, 1)
librosa.display.specshow(X,
                        y_axis='time',
                        y_coords=beat_times)
plt.title('Structure components')

plt.tight_layout()



In [10]:
KM = sklearn.cluster.KMeans(n_clusters=k)

seg_ids = KM.fit_predict(X)

plt.figure(figsize=(12, 4))
colors = plt.get_cmap('Paired', k)

plt.subplot(1, 3, 2)
librosa.display.specshow(Rf, cmap='inferno_r')
plt.title('Reccurence matrix')

plt.subplot(1, 3, 1)
librosa.display.specshow(X,
                        y_axis='time',
                        y_coords=beat_times)
plt.title('Structure components')

plt.subplot(1, 3, 3)
librosa.display.specshow(np.atleast_2d(seg_ids).T, cmap=colors)
plt.title('Estimated segments')

plt.tight_layout()



In [11]:
bound_beats = 1 + np.flatnonzero(seg_ids[:-1] != seg_ids[1:])
bound_beats = librosa.util.fix_frames(bound_beats, x_min=0)

bound_segs = list(seg_ids[bound_beats])

bound_frames = beats[bound_beats]

bound_frames = librosa.util.fix_frames(bound_frames,
                                      x_min=None,
                                      x_max=C.shape[1]-1)


---------------------------------------------------------------------------
IndexError                                Traceback (most recent call last)
<ipython-input-11-ccd9e6384b88> in <module>()
      4 bound_segs = list(seg_ids[bound_beats])
      5 
----> 6 bound_frames = beats[bound_beats]
      7 
      8 bound_frames = librosa.util.fix_frames(bound_frames,

IndexError: index 57 is out of bounds for axis 1 with size 57

In [ ]:
import matplotlib.patches as patches

plt.figure(figsize=(12, 4))

bound_times = librosa.frames_to_time(bound_frames)
freqs = librosa.cqt_frequencies(n_bins=C.shape[0],
                               fmin=librosa.note_to_hz('C1'),
                               bins_per_octave=BINS_PER_OCTAVE)

librosa.display.specshow(C, x_axis='time', y_axis='cqt_hz',
                         sr=sr, bins_per_octave=BINS_PER_OCTAVE)

ax = plt.gca()

for interval, label in zip(zip(bound_times, bound_times[1:]), bound_segs):
    ax.add_patch(patches.Rectangle((interval[0], freqs[0]),
                                  interval[1] - interval[0],
                                  freqs[-1],
                                  facecolor=colors(label),
                                  alpha=0.5))
    
plt.tight_layout()
plt.show()