In [1]:
import librosa
import numpy as np
import matplotlib.pyplot as plt
import seaborn
%matplotlib inline
seaborn.set()
In [202]:
import pandas as pd
In the original implementation, this is obvious: take the phase of the first non-DC frequency bin.
Several options are available:
Ideally, we'd like a robust estimator that is not susceptible to noise in any single channel.
Possible solution? median(unwrap(anchors))
In [733]:
def phase_anchor(data, aggregate=np.median, axis=-1):
'''Multi-dimensional phase anchoring
Given a mult-dimensional patch and an axis along which to apply an FFT,
Parameters
----------
data : np.ndarray, dtype=float
The input data
axis : int
The axis along which to compute the FFT
Returns
-------
data_aligned : np.ndarray like data
data with each frame aligned to the first
'''
F = np.fft.rfft(data, axis=axis)
mag = np.abs(F)
phase = np.angle(F)
# Select the reference phase value
#anchor_idx = [0] * F.ndim
anchor_idx = [slice(None)] * F.ndim
anchor_idx[axis] = 1
#angle = phase[tuple(anchor_idx)]
plt.plot(np.unwrap(phase[anchor_idx]))
angle = aggregate(np.unwrap(phase[anchor_idx]))
# Compute phase shifts for each bin
phase_idx = [slice(None)] * F.ndim
phase_idx[axis] = slice(1, None)
shift = angle * np.arange(1, F.shape[axis])
# This indexing will reshape shift to be broadcast-compatible with phase
shift_idx = [np.newaxis] * F.ndim
shift_idx[axis] = slice(None)
phase[phase_idx] = phase[phase_idx] - shift[shift_idx]
# Combine the magnitudes with our new phase
F2 = mag * np.exp(1.j * phase)
data_out = np.fft.irfft(F2, axis=axis)
return data_out
In [163]:
files = librosa.util.find_files('/home/bmcfee/data/beatles_iso/audio/', ext='flac')
In [577]:
files[141]
Out[577]:
In [579]:
k = 141
In [580]:
labfiles = librosa.util.find_files('/home/bmcfee/data/beatles_iso/seglab_tut/', ext='lab')
In [584]:
!cat "{labfiles[k]}"
In [592]:
segments = pd.read_table(labfiles[k], header=None, sep='\s+')
In [593]:
segments
Out[593]:
In [594]:
y, sr = librosa.load(files[k])
In [595]:
cqgram = librosa.cqt(y, sr=sr)
In [596]:
tempo, beats = librosa.beat.beat_track(y=y, sr=sr)
In [597]:
subseg = librosa.segment.subsegment(cqgram, beats)
In [598]:
segment_frames = librosa.time_to_frames(segments[0].values)
In [599]:
zip( librosa.util.match_events(segment_frames, subseg), segments[0], segments[1], segments[2])
Out[599]:
In [600]:
cqb = librosa.feature.sync(cqgram, subseg, aggregate=np.median)
In [694]:
patch = cqb[:, 83:164]
In [706]:
print patch.shape
In [734]:
p2 = phase_anchor(patch, axis=0)
In [739]:
# Generate a synthetically pitch-shifted patch
patch_shift = np.roll(patch, -8, axis=0)
#patch_shift = cqb[:, 838:855]
#patch_shift = cqb[:, 334:334+164-83]
In [740]:
p3 = phase_anchor(patch_shift, axis=0)
In [741]:
patch.shape
Out[741]:
In [742]:
p2.shape
Out[742]:
In [743]:
p3.shape
Out[743]:
In [744]:
plt.figure(figsize=(12,6))
plt.subplot(2,2,1)
librosa.display.specshow(patch)
plt.title('Original')
plt.colorbar()
plt.subplot(2,2,2)
librosa.display.specshow(p2)
plt.title('Original anchored')
plt.colorbar()
plt.subplot(2,2,3)
librosa.display.specshow(patch_shift)
plt.title('Shifted')
plt.colorbar()
plt.subplot(2,2,4)
librosa.display.specshow(p3)
plt.title('Shifted anchored')
plt.colorbar()
plt.tight_layout()
In [745]:
librosa.display.specshow(np.abs(patch - patch_shift))
plt.title('Residual of original and shifted')
plt.colorbar()
plt.tight_layout()
In [746]:
mpld3.disable_notebook()
librosa.display.specshow(np.abs(p2 - p3))
plt.title('Residual of anchored original and anchored shifted')
plt.colorbar()
plt.tight_layout()
In [ ]: