In [1]:
# Code source: Brian McFee
# License: ISC

from __future__ import print_function

import numpy as np
import scipy
import matplotlib.pyplot as plt
import matplotlib.style as ms

ms.use('seaborn-muted')
%matplotlib inline

import sklearn.cluster

import librosa
import librosa.display

In [2]:
file_path = "../data/songData/genres/blues/blues.00000.wav"
y, sr = librosa.load(file_path)

In [3]:
BINS_PER_OCTAVE = 12 * 3
N_OCTAVE = 7

C = librosa.amplitude_to_db(librosa.cqt(y=y, sr=sr,
                                        bins_per_octave=BINS_PER_OCTAVE,
                                        n_bins=N_OCTAVE * BINS_PER_OCTAVE),
                           ref=np.max)

plt.figure(figsize=(12, 4))
librosa.display.specshow(C, y_axis='cqt_hz', sr=sr,
                        bins_per_octave=BINS_PER_OCTAVE,
                        x_axis='time')
plt.title('log power cqt')
plt.tight_layout()



In [4]:
tempo, beats = librosa.beat.beat_track(y=y, sr=sr, trim=False)
Csync = librosa.util.sync(C, beats, aggregate=np.median)

beat_times = librosa.frames_to_time(librosa.util.fix_frames(beats,
                                                           x_min=0,
                                                           x_max=C.shape[1]),
                                   sr=sr)

plt.figure(figsize=(12, 4))
librosa.display.specshow(Csync, bins_per_octave=12*3,
                        y_axis='cqt_hz', x_axis='time',
                        x_coords=beat_times)
plt.tight_layout()



In [5]:
R = librosa.segment.recurrence_matrix(Csync, width=3, mode='affinity', sym=True)
df = librosa.segment.timelag_filter(scipy.ndimage.median_filter)
Rf = df(R, size=(1, 7))

In [6]:
mfcc = librosa.feature.mfcc(y=y, sr=sr)
Msync = librosa.util.sync(mfcc, beats)

path_distance = np.sum(np.diff(Msync, axis=1)**2, axis=0)
sigma = np.median(path_distance)
path_sim = np.exp(-path_distance / sigma)

R_path = np.diag(path_sim, k=1) + np.diag(path_sim, k=-1)

In [7]:
deg_path =np.sum(R_path, axis=1)
deg_rec = np.sum(Rf, axis=1)

mu = deg_path.dot(deg_path + deg_rec) / np.sum((deg_path + deg_rec)**2)

A = mu * Rf + (1 - mu) * R_path

In [8]:
plt.figure(figsize=(8, 4))

plt.subplot(1, 3, 1)
librosa.display.specshow(Rf, cmap='inferno_r', y_axis='time', y_coords=beat_times)
plt.title('Reccurence Similarity')

plt.subplot(1, 3, 2)
librosa.display.specshow(R_path, cmap='inferno_r')
plt.title('Path similarity')

plt.subplot(1, 3, 3)
librosa.display.specshow(A, cmap='inferno_r')
plt.title('Conbined graph')

plt.tight_layout()



In [10]:
L = scipy.sparse.csgraph.laplacian(A, normed=True)

evals, evecs = scipy.linalg.eigh(L)

evecs = scipy.ndimage.median_filter(evecs, size=(9, 1))
Cnorm = np.cumsum(evecs**2, axis=1)**0.5

k =5
X = evecs[:, :k] / Cnorm[:, k-1:k]

plt.figure(figsize=(8,4))

plt.subplot(1, 2, 2)
librosa.display.specshow(Rf, cmap='inferno_r')
plt.title('Reccurence matrix')

plt.subplot(1, 2, 1)
librosa.display.specshow(X,
                        y_axis='time',
                        y_coords=beat_times)
plt.title('Structure components')

plt.tight_layout()