In [1]:
%pylab inline


Populating the interactive namespace from numpy and matplotlib

In [2]:
filename = 'African Drum Music-wXV39pybgJU-sm.wav'

In [3]:
from scipy.io.wavfile import read, write
from librosa.core import stft, istft

# load wav data
_, y = read(filename)


/usr/local/lib/python2.7/dist-packages/librosa/core/audio.py:37: UserWarning: Could not import scikits.samplerate. Falling back to scipy.signal
  warnings.warn('Could not import scikits.samplerate. '
/usr/lib/python2.7/dist-packages/scipy/io/wavfile.py:42: WavFileWarning: Unknown wave file format
  warnings.warn("Unknown wave file format", WavFileWarning)

In [4]:
# convert timeseries wav data into spectrogram data
D = stft(y)

In [5]:
imshowsq(np.abs(D)[:50,:])


---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-5-590806490c7c> in <module>()
----> 1 imshowsq(np.abs(D)[:50,:])

NameError: name 'imshowsq' is not defined

In [6]:
slice = np.ones(D.shape[0], dtype=bool)

In [7]:
# get rid of huge swath of frequence data
# slice[:] = False
# slice[4] = True
# slice[12] = True
# slice[18] = True
# slice[30:] = False
# slice[:10] = False
# of whats left, get rid of a lot more though cut every other frequency band and then every third
# slice[::2] = False
# slice[::3] = False

In [8]:
# only 7 complex numbers are left out of 1025
sum(slice), len(slice)


Out[8]:
(1025, 1025)

In [9]:
# slice D to lower size
D[slice,:].shape


Out[9]:
(1025, 865)

In [10]:
D2 = D[slice,:]

In [11]:
# remove phase of high freq
D2[50:] = np.abs(D2[50:]) + (0 * 1j)

In [12]:
D3 = np.zeros(D.shape, dtype='complex')

In [13]:
D3[slice,:] = D2

In [14]:
# and then back to timeseries wav
back_y = istft(D3)

In [15]:
# convert back_y into int16 or whatever the file format has.  otherwise back_y is float
back_y = np.array(back_y, dtype=y.dtype)

In [16]:
D2.shape, D.shape


Out[16]:
((1025, 865), (1025, 865))

In [17]:
play(back_y)


---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-17-0168f185c190> in <module>()
----> 1 play(back_y)

NameError: name 'play' is not defined

In [18]:
plot(y[100000:101000])


Out[18]:
[<matplotlib.lines.Line2D at 0x7fccd2691cd0>]

In [19]:
plot(back_y[100000:101000])


Out[19]:
[<matplotlib.lines.Line2D at 0x7fcccf0dc710>]

In [20]:
back_y_shift = np.concatenate([np.zeros(250), back_y])

In [21]:
imshowsq(np.abs(D)[:,100:150])


---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-21-4c5ef1aa943b> in <module>()
----> 1 imshowsq(np.abs(D)[:,100:150])

NameError: name 'imshowsq' is not defined

In [22]:
imshowsq(np.abs(stft(back_y))[:,100:150])


---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-22-95b4dbb808f7> in <module>()
----> 1 imshowsq(np.abs(stft(back_y))[:,100:150])

NameError: name 'imshowsq' is not defined

In [23]:
imshowsq(np.abs(stft(back_y_shift))[:,100:150])


---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-23-ec808c9ca07f> in <module>()
----> 1 imshowsq(np.abs(stft(back_y_shift))[:,100:150])

NameError: name 'imshowsq' is not defined

In [24]:
# write test-out for listening
write('test-out.wav', 44100, back_y)

In [25]:
# a measure of how different y and y->fft->back_y are
sum(abs(y - back_y))/len(y)


Out[25]:
288

In [26]:
# the shape of the input and output are the same
print y.shape
print back_y.shape


(442368,)
(442368,)

y is roughly 44100 samples/second * 60 seconds. I'm not sure why its off by 16 ...


In [27]:
y.shape[0], 44100 * 60


Out[27]:
(442368, 2646000)

Default FFT window size is 2048. Default hop_length is 2048/4


In [28]:
# roughly matches 5169 above
print D.shape
print len(y)/2048. * 4.


(1025, 865)
864.0

In [29]:
# windows are roughly 11ms long
1000/(5169/60.)


Out[29]:
11.607661056297156

In [30]:
# D has twice as many values as y
float(D.shape[0] * D.shape[1]) / len(y)


Out[30]:
2.0042702003761574

In [32]:
x = numpy.concatenate([D.real, D.imag])

In [34]:
x.shape


Out[34]:
(2050, 865)

In [107]:
# x is now shape 2, freq, samples

In [108]:
x.shape


Out[108]:
(2, 1025, 5169)

In [109]:
# and to convert back into complex for conversion back to wav

In [110]:
D2 = x[0,:,:] + (x[1,:,:] * 1j)

In [111]:
sum(D - D2)


Out[111]:
0j

In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [81]:
%pylab inline


Populating the interactive namespace from numpy and matplotlib

In [82]:
import numpy as np
from librosa.core import stft, istft

from play import play

In [83]:
# freq = 1981
freq = 2000
gensin = np.sin(np.linspace(0, freq*np.pi, 44100)) * 2**15

In [84]:
gensin = np.array(gensin, dtype='int16')

In [85]:
play(gensin)

In [86]:
dsin = stft(gensin, n_fft=2048, hop_length=2048)

In [104]:
def imshowsq(m):
    """ A helper for showing spectrograms forces to square aspect ratio with no interpolation """
    imshow(m, aspect=float(m.shape[1]) / m.shape[0], interpolation='none')
    colorbar()

In [105]:
binlo = 38
binhi = 54

In [106]:
imshowsq(dsin.real[binlo:binhi,:])
title('bins {} through {} of real part of frequency domain'.format(binlo, binhi))


Out[106]:
<matplotlib.text.Text at 0x7fbc69a9f6d0>

In [107]:
imshowsq(dsin.imag[binlo:binhi,:])
title('bins {} through {} of imaginary part of frequency domain'.format(binlo, binhi))


Out[107]:
<matplotlib.text.Text at 0x7fbc69968b50>

In [108]:
plot(dsin.imag[binlo:binhi,10])
plot(dsin.imag[binlo:binhi,11])
plot(dsin.imag[binlo:binhi,12])
plot(dsin.imag[binlo:binhi,13])
title('a few slices of imaginary space at a few particular times')


Out[108]:
<matplotlib.text.Text at 0x7fbc697ebe50>

See here for real/imaginary vs magnitude/phase


In [113]:
imshowsq(abs(dsin)[binlo:binhi,:])
title('absolute value (maginude complex vector) of frequency domain')


Out[113]:
<matplotlib.text.Text at 0x7fbc6933ae10>

In [114]:
imshowsq(np.angle(dsin)[binlo:binhi,:])
title('angle of frequency domain')


Out[114]:
<matplotlib.text.Text at 0x7fbc6920ab10>

In [118]:
plot(np.abs(dsin)[38:54,11])
title('slice in time of magnitude')


Out[118]:
<matplotlib.text.Text at 0x7fbc68e505d0>

In [119]:
plot(np.angle(dsin)[38:54,11])
title('same time slice of angle')


Out[119]:
<matplotlib.text.Text at 0x7fbc68d72f50>

In [126]:
plot(np.sum(abs(dsin), axis=1))
title('sum of magnitudes over time')
xlim(binlo-20,binhi+20)


Out[126]:
(18, 74)

In [128]:
plot(np.sum(dsin.real, axis=1))
title('sum of real over time')
xlim(binlo-20,binhi+20)


Out[128]:
(18, 74)

In [131]:
# this appears to be some beat frequency combining sin frequency and fft window frequency
plot(dsin.imag[46,:])
title('imaginary value at target frequency bin over time')


Out[131]:
<matplotlib.text.Text at 0x7fbc68630c90>

In [132]:
plot(dsin.real[46,:])
title('real value at target frequency bin over time')


Out[132]:
<matplotlib.text.Text at 0x7fbc683ec910>

In [133]:
plot(np.abs(dsin[46,:]))
title('maginude at target frequency bin over time')


Out[133]:
<matplotlib.text.Text at 0x7fbc6837fe10>

In [134]:
plot(np.angle(dsin)[46,:])
title('angle at target frequency bin over time')


Out[134]:
<matplotlib.text.Text at 0x7fbc6855cc10>

In [78]:
np.fft.fftfreq(dsin.shape[0])*44100


Out[78]:
array([   0.        ,   43.02439024,   86.04878049, ..., -129.07317073,
        -86.04878049,  -43.02439024])

In [79]:
# given number of bins in freq space in D and sample rate, lookup
# frequence of sound in bin 46 which matches ~2khz sin wave
# generated above
(np.fft.fftfreq(dsin.shape[0])*44100)[46]


Out[79]:
1979.1219512195121

In [80]:
# http://stackoverflow.com/questions/19122157/fft-bandpass-filter-in-python
# a low pass filter requires cutting signal on both ends of
# frequency dimension coming out of fft
# plot(abs(np.fft.fftfreq(dsin.shape[0])*44100) < 22020)
plot(abs(np.fft.fftfreq(dsin.shape[0])*44100) < 2000)
xlim((0,dsin.shape[0]))


Out[80]:
(0, 1025)

In [59]:
# therefor, if there is going to be a bandpass filter 
# layer of some kind, it will likely make sense to have
# it symetric in this mirrored space in D

In [ ]:


In [1289]:
y.shape


Out[1289]:
(2646016,)

In [1290]:
def play(audio):
    import pyaudio

    # instantiate PyAudio (1)
    p = pyaudio.PyAudio()

    # open stream (2), 2 is size in bytes of int16
    stream = p.open(format=p.get_format_from_width(2),
                    channels=1,
                    rate=44100,
                    output=True)

    # play stream (3), blocking call
    stream.write(audio)

    # stop stream (4)
    stream.stop_stream()
    stream.close()

    # close PyAudio (5)
    p.terminate()

In [1291]:
back_y = np.array(back_y, dtype=y.dtype)

In [1306]:
play(back_y[:44100*5])

In [1650]:
# gend = np.eye(D.shape[0]) * 100000000
l = 250
i = 15
gend = np.zeros((D.shape[0], l))
#gend[i-1,:] = 1e7
gend[i,:] = 1e7
# gend[i,:] = 1e7 * np.random.random(l)
# gend[i+1,:] = 1e7 * -0.5
# gend[47,:] = 1e7 * -0.5
# gend[46,:] = 2**15

In [1651]:
gendi = np.zeros((D.shape[0], l))
# gendi = np.random.random((D.shape[0], l)) * 2**20
# gendi[45:47,:] = np.random.random((2, l)) * 1e7
# gendi[100,:] = np.sin(np.linspace(-100*np.pi, 100*np.pi, D.shape[0])) * 2**15

In [1652]:
gend = gend + (gendi * 1j)

In [1653]:
# gen = istft(gend, window=np.hanning)
gen = istft(gend)

In [1654]:
imshowsq(abs(gend)[38:54,:])



In [1655]:
max(gen) / 2.0**15


Out[1655]:
0.25706663727760315

In [1656]:
# run signal through 'compressor' so that signal uses full amplitude range
gen *= (2.0**15 - 1) / max(gen)

In [1657]:
max_volume = max(gen) / 2.0**15
print max_volume
assert max_volume < 1


0.999969482422

In [1658]:
gen = np.array(gen, dtype='int16')

In [1659]:
gen


Out[1659]:
array([-30390, -29620, -28785, ..., -27887, -28785, -29620], dtype=int16)

In [1660]:
len(gen) / 44100.


Out[1660]:
2.8908843537414968

In [1661]:
play(gen)

In [1662]:
plot(gen[:550])


Out[1662]:
[<matplotlib.lines.Line2D at 0x7fc0fa49cc90>]

In [1523]:
plot(gendi[100,000:600])


Out[1523]:
[<matplotlib.lines.Line2D at 0x7fc111c212d0>]

In [1524]:
gend1 = stft(gen)

In [1525]:
imshowsq(np.abs(gend1)[38:54,:10])



In [164]:
import time

# instantiate PyAudio (1)
p = pyaudio.PyAudio()

class WavIO(object):
    def __init__(self, y):
        self.y = y
        self.i = 0
    
    def getnchannels(self):
        if len(self.y.shape) == 1:
            return 1
        else:
            raise NotImplemented
    
    def getsampwidth(self):
        return 2
    
    def getframerate(self):
        return 44100

    def readframes(self, size):
        buf = self.y[self.i:self.i + size]
        self.i += size
        return buf.astype('int16')
    
    def close(self):
        pass
    
wio = WavIO(y)

# define callback (2)
def callback(in_data, frame_count, time_info, status):
    data = wio.readframes(frame_count)
    return (data, pyaudio.paContinue)

print p.get_format_from_width(1)
print p.get_format_from_width(2)
print p.get_format_from_width(4)

# open stream using callback (3)
stream = p.open(format=p.get_format_from_width(wio.getsampwidth()),
                channels=wio.getnchannels(),
                rate=wio.getframerate(),
                output=True,
                stream_callback=callback)

# start the stream (4)
stream.start_stream()

# wait for stream to finish (5)
while stream.is_active():
    time.sleep(0.1)

# stop stream (6)
stream.stop_stream()
stream.close()
wio.close()

# close PyAudio (7)
p.terminate()


32
8
1