In [1]:
import numpy, scipy, matplotlib.pyplot as plt, sklearn, stanford_mir, IPython.display
%matplotlib inline
plt.rcParams['figure.figsize'] = (14, 5)
Load two files: one harmonic, and one percussive.
In [2]:
yh, fs = librosa.load('prelude_cmaj_10s.wav')
In [3]:
yp, fs = librosa.load('125_bounce.wav')
Add the two signals together, and rescale:
In [4]:
N = min(len(yh), len(yp))
x = yh[:N]/yh.max() + yp[:N]/yp.max()
x = 0.5 * x/x.max()
In [5]:
x.max()
Out[5]:
Listen to the combined audio signal:
In [6]:
IPython.display.Audio(x, rate=fs)
Out[6]:
Compute the STFT:
In [7]:
X = librosa.stft(x)
Take the log-ampllitude for display purposes:
In [8]:
Xmag = librosa.logamplitude(X)
Display the log-magnitude spectrogram:
In [9]:
librosa.display.specshow(Xmag, sr=fs, x_axis='time', y_axis='log')
Out[9]:
Perform harmonic-percussive source separation:
In [10]:
H, P = librosa.decompose.hpss(X)
Compute the log-amplitudes of the outputs:
In [11]:
Hmag = librosa.logamplitude(H)
Pmag = librosa.logamplitude(P)
Display each output:
In [12]:
librosa.display.specshow(Hmag, sr=fs, x_axis='time', y_axis='log')
Out[12]:
In [13]:
librosa.display.specshow(Pmag, sr=fs, x_axis='time', y_axis='log')
Out[13]:
Transform the harmonic output back to the time domain:
In [14]:
h = librosa.istft(H)
Listen to the harmonic output:
In [15]:
IPython.display.Audio(h, rate=fs)
Out[15]:
Transform the percussive output back to the time domain:
In [16]:
p = librosa.istft(P)
Listen to the percussive output:
In [17]:
IPython.display.Audio(p, rate=fs)
Out[17]: