notebook.community

Edit and run



In [1]:

    
import numpy as np
import matplotlib.pyplot as plt
import librosa
import librosa.display
%matplotlib inline

import IPython.display



In [2]:

    
file_path = "../data/songData/genres/blues/blues.00000.wav"
y, sr = librosa.load(file_path)



In [3]:

    
S_full, phase = librosa.magphase(librosa.stft(y))



In [4]:

    
idx = slice(*librosa.time_to_frames([0, 30], sr=sr))
plt.figure(figsize=(12, 4))
librosa.display.specshow(librosa.amplitude_to_db(S_full[:, idx], ref=np.max),
                        x_axis='time', y_axis='log', sr=sr)
plt.colorbar()
plt.tight_layout()



In [5]:

    
S_filter = librosa.decompose.nn_filter(S_full,
                                      aggregate=np.median,
                                      metric='cosine',
                                      width=int(librosa.time_to_frames(2, sr)))



In [6]:

    
S_filter = np.minimum(S_filter, S_full)



In [7]:

    
margin_i, margin_v = 2, 10
power = 2

mask_i = librosa.util.softmask(S_filter,
                              margin_i * (S_full - S_filter),
                              power=power)

mask_v = librosa.util.softmask(S_full - S_filter,
                              margin_v * S_filter,
                              power=power)

S_foreground = mask_v * S_full
S_background = mask_i * S_full



In [8]:

    
plt.figure(figsize=(12, 8))

plt.subplot(3, 1, 1)
librosa.display.specshow(librosa.amplitude_to_db(S_full[:, idx], ref=np.max),
                        y_axis='log', sr=sr)
plt.title('Full spectrogram')
plt.colorbar()

plt.subplot(3, 1, 2)
librosa.display.specshow(librosa.amplitude_to_db(S_background[:, idx], ref=np.max),
                         y_axis='log', sr=sr)
plt.title('Background')
plt.colorbar()

plt.subplot(3, 1, 3)
librosa.display.specshow(librosa.amplitude_to_db(S_foreground[:, idx], ref=np.max),
                        y_axis='log', sr=sr)
plt.title('Foreground')
plt.colorbar()

plt.tight_layout()



In [14]:

    
(S_background.shape)









    Out[14]:





(1025, 1293)



In [ ]:

    
IPython.display.Audio(data=S_background, rate=sr)