This notebook will demonstrate how to do audio effects processing with librosa and IPython notebook. You will need IPython 2.0 or later.
By the end of this notebook, you'll know how to do the following:
In [ ]:
from __future__ import print_function
In [ ]:
import librosa
import librosa.display
import IPython.display
import numpy as np
In [ ]:
import matplotlib.pyplot as plt
import matplotlib.style as ms
ms.use('seaborn-muted')
%matplotlib inline
In [ ]:
# Load the example track
y, sr = librosa.load(librosa.util.example_audio_file())
In [ ]:
# Play it back!
IPython.display.Audio(data=y, rate=sr)
In [ ]:
# How about separating harmonic and percussive components?
y_h, y_p = librosa.effects.hpss(y)
In [ ]:
# Play the harmonic component
IPython.display.Audio(data=y_h, rate=sr)
In [ ]:
# Play the percussive component
IPython.display.Audio(data=y_p, rate=sr)
In [ ]:
# Pitch shifting? Let's gear-shift by a major third (4 semitones)
y_shift = librosa.effects.pitch_shift(y, sr, 7)
IPython.display.Audio(data=y_shift, rate=sr)
In [ ]:
# Or time-stretching? Let's slow it down
y_slow = librosa.effects.time_stretch(y, 0.5)
IPython.display.Audio(data=y_slow, rate=sr)
In [ ]:
# How about something more advanced? Let's decompose a spectrogram with NMF, and then resynthesize an individual component
D = librosa.stft(y)
# Separate the magnitude and phase
S, phase = librosa.magphase(D)
# Decompose by nmf
components, activations = librosa.decompose.decompose(S, n_components=8, sort=True)
In [ ]:
# Visualize the components and activations, just for fun
plt.figure(figsize=(12,4))
plt.subplot(1,2,1)
librosa.display.specshow(librosa.logamplitude(components**2.0, ref_power=np.max), y_axis='log')
plt.xlabel('Component')
plt.ylabel('Frequency')
plt.title('Components')
plt.subplot(1,2,2)
librosa.display.specshow(activations, x_axis='time')
plt.xlabel('Time')
plt.ylabel('Component')
plt.title('Activations')
plt.tight_layout()
In [ ]:
print(components.shape, activations.shape)
In [ ]:
# Play back the reconstruction
# Reconstruct a spectrogram by the outer product of component k and its activation
D_k = components.dot(activations)
# invert the stft after putting the phase back in
y_k = librosa.istft(D_k * phase)
# And playback
print('Full reconstruction')
IPython.display.Audio(data=y_k, rate=sr)
In [ ]:
# Resynthesize. How about we isolate just first (lowest) component?
k = 0
# Reconstruct a spectrogram by the outer product of component k and its activation
D_k = np.multiply.outer(components[:, k], activations[k])
# invert the stft after putting the phase back in
y_k = librosa.istft(D_k * phase)
# And playback
print('Component #{}'.format(k))
IPython.display.Audio(data=y_k, rate=sr)
In [ ]:
# Resynthesize. How about we isolate a middle-frequency component?
k = len(activations) / 2
# Reconstruct a spectrogram by the outer product of component k and its activation
D_k = np.multiply.outer(components[:, k], activations[k])
# invert the stft after putting the phase back in
y_k = librosa.istft(D_k * phase)
# And playback
print('Component #{}'.format(k))
IPython.display.Audio(data=y_k, rate=sr)
In [ ]:
# Resynthesize. How about we isolate just last (highest) component?
k = -1
# Reconstruct a spectrogram by the outer product of component k and its activation
D_k = np.multiply.outer(components[:, k], activations[k])
# invert the stft after putting the phase back in
y_k = librosa.istft(D_k * phase)
# And playback
print('Component #{}'.format(k))
IPython.display.Audio(data=y_k, rate=sr)