notebook.community

Edit and run



In [71]:

    
# load this data by running ./setup
filename = 'African Drum Music-wXV39pybgJU-sm.wav'



In [72]:

    
import numpy as np
from scipy.io.wavfile import read, write
from librosa.core import stft, istft
import IPython

from play import play



In [73]:

    
# load wav data
_, y = read(filename)



In [74]:

    
# convert timeseries wav data into spectrogram data
D = stft(y)



In [75]:

    
slice = np.ones(D.shape[0], dtype=bool)



In [76]:

    
# get rid of huge swath of frequence data
slice[50:] = False
# of whats left, get rid of a lot more though cut every other frequency band and then every third
slice[::2] = False
slice[::3] = False



In [77]:

    
# only 7 complex numbers are left out of 1025
sum(slice), len(slice)









    Out[77]:





(17, 1025)



In [78]:

    
D.shape









    Out[78]:





(1025, 865)



In [79]:

    
# slice D down to lower dimensionality
Dslice = D[slice,:]



In [80]:

    
# Dslice is now significantly lower dimensionality than D
print Dslice.shape
print D.shape









    



(17, 865)
(1025, 865)



In [81]:

    
# now rebuild D back from Dslice (as if Dslice was the output of a NN)
Dnew = np.zeros(D.shape, dtype=D.dtype)



In [82]:

    
# fill in Dnew
Dnew[slice,:] = Dslice



In [83]:

    
# convert back to time domain
back_y = istft(Dnew)



In [84]:

    
# convert back_y into int16 or whatever the file format has.  otherwise back_y is float
back_y = np.array(back_y, dtype=y.dtype)



In [85]:

    
# a measure of how different y and y->fft->back_y are
sum(abs(y - back_y))/len(y)









    Out[85]:





1145



In [86]:

    
# the original version
IPython.display.Audio(y, rate=44100)









    Out[86]:



In [87]:

    
# the new low pass version
IPython.display.Audio(back_y, rate=44100)









    Out[87]:



In [88]:

    
# write test-out for listening if you want to do that
write('test-out.wav', 44100, back_y)