In [71]:
# load this data by running ./setup
filename = 'African Drum Music-wXV39pybgJU-sm.wav'

In [72]:
import numpy as np
from scipy.io.wavfile import read, write
from librosa.core import stft, istft
import IPython

from play import play

In [73]:
# load wav data
_, y = read(filename)

In [74]:
# convert timeseries wav data into spectrogram data
D = stft(y)

In [75]:
slice = np.ones(D.shape[0], dtype=bool)

In [76]:
# get rid of huge swath of frequence data
slice[50:] = False
# of whats left, get rid of a lot more though cut every other frequency band and then every third
slice[::2] = False
slice[::3] = False

In [77]:
# only 7 complex numbers are left out of 1025
sum(slice), len(slice)


Out[77]:
(17, 1025)

In [78]:
D.shape


Out[78]:
(1025, 865)

In [79]:
# slice D down to lower dimensionality
Dslice = D[slice,:]

In [80]:
# Dslice is now significantly lower dimensionality than D
print Dslice.shape
print D.shape


(17, 865)
(1025, 865)

In [81]:
# now rebuild D back from Dslice (as if Dslice was the output of a NN)
Dnew = np.zeros(D.shape, dtype=D.dtype)

In [82]:
# fill in Dnew
Dnew[slice,:] = Dslice

In [83]:
# convert back to time domain
back_y = istft(Dnew)

In [84]:
# convert back_y into int16 or whatever the file format has.  otherwise back_y is float
back_y = np.array(back_y, dtype=y.dtype)

In [85]:
# a measure of how different y and y->fft->back_y are
sum(abs(y - back_y))/len(y)


Out[85]:
1145

In [86]:
# the original version
IPython.display.Audio(y, rate=44100)


Out[86]:

In [87]:
# the new low pass version
IPython.display.Audio(back_y, rate=44100)


Out[87]:

In [88]:
# write test-out for listening if you want to do that
write('test-out.wav', 44100, back_y)