notebook.community

Edit and run



In [1]:

    
# learning from http://samcarcagno.altervista.org/blog/basic-sound-processing-python/



In [2]:

    
import pylab
from scipy.io import wavfile



In [3]:

    
sampFreq, snd = wavfile.read('440_sine.wav')



In [4]:

    
sampFreq









    Out[4]:





44100



In [5]:

    
snd









    Out[5]:





array([[0, 0],
       [0, 0],
       [0, 0],
       ..., 
       [0, 0],
       [0, 0],
       [0, 0]], dtype=int16)



In [6]:

    
#the scipy.io.wavfile.read function reads wav files as int16 (for 16-bit wavs) or int32 (for 32-bit wavs), 24-bit wav files are not supported



In [7]:

    
#this means that the sound pressure values are mapped to integer values that can range from -2^15 to (2^15)-1



In [8]:

    
snd.shape









    Out[8]:





(5292, 2)



In [9]:

    
# ie two channels 5292 sample points



In [10]:

    
snd = snd / (2.**15) #converts our sound array to floating point values ranging from -1 to 1



In [11]:

    
5292.0 / sampFreq # considering the sampling rate (sampFreq = 44110) this corresponds to a duration of 120 ms









    Out[11]:





0.12



In [12]:

    
len(snd)









    Out[12]:





5292



In [13]:

    
snd[4]









    Out[13]:





array([ 0.,  0.])



In [14]:

    
snd[2500]









    Out[14]:





array([-0.01290894, -0.01290894])



In [15]:

    
# two values for two channels



In [16]:

    
s1 = snd[:,0] # work with one channel



In [17]:

    
#if you’re interested in having sound playback from python check out pyalsaaudio (only Linux) or PyAudio



In [18]:

    
timeArray = pylab.arange(0, 5292, 1)



In [19]:

    
timeArray









    Out[19]:





array([   0,    1,    2, ..., 5289, 5290, 5291])



In [20]:

    
timeArray = timeArray / float(sampFreq)



In [21]:

    
timeArray









    Out[21]:





array([  0.00000000e+00,   2.26757370e-05,   4.53514739e-05, ...,
         1.19931973e-01,   1.19954649e-01,   1.19977324e-01])



In [22]:

    
timeArray = timeArray * 1000



In [23]:

    
%matplotlib inline
import matplotlib.pyplot as plt



In [24]:

    
plt.plot(timeArray, s1, color='k')
plt.ylabel('Amplitude')
plt.xlabel('Time (ms)')









    Out[24]:





<matplotlib.text.Text at 0x10dbbb210>



In [25]:

    
#Another useful graphical representation is that of the frequency content, or spectrum of the tone



In [26]:

    
#obtain the frequency spectrum of the sound using the fft function, that implements a Fast Fourier Transform algorithm



In [27]:

    
# We’ll follow closely the technical document available https://web.archive.org/web/20120615002031/http://www.mathworks.com/support/tech-notes/1700/1702.html to obtain the power spectrum of our sound.



In [28]:

    
p = pylab.fft(s1) # take the fourier transform



In [29]:

    
p #array of complex numbers









    Out[29]:





array([-0.33663940+0.j        ,  0.26251154-0.3079739j ,
        0.52259076+0.24347809j, ...,  0.26140157+0.16932575j,
        0.52259076-0.24347809j,  0.26251154+0.3079739j ])



In [30]:

    
n = len(s1)
n









    Out[30]:





5292



In [31]:

    
nUniquePts = int(pylab.ceil((n+1)/2.0)) #ceil give the ceiling of a value i.e the smallest integer larger than that value



In [32]:

    
nUniquePts #number of unique points









    Out[32]:





2647



In [33]:

    
p = p[0:nUniquePts]
p









    Out[33]:





array([ -3.36639404e-01 +0.00000000e+00j,
         2.62511545e-01 -3.07973896e-01j,
         5.22590764e-01 +2.43478093e-01j, ...,
         5.57675918e-04 -3.53467288e-04j,
        -2.72537919e-04 -1.35899341e-04j,   5.18798828e-04 -4.40619763e-16j])



In [34]:

    
p = abs(p)
p









    Out[34]:





array([  3.36639404e-01,   4.04672994e-01,   5.76526399e-01, ...,
         6.60258702e-04,   3.04541538e-04,   5.18798828e-04])



In [35]:

    
len(p)









    Out[35]:





2647



In [36]:

    
p = p / float(n) # scale by the number of points so that
                 # the magnitude does not depend on the length 
                 # of the signal or on its sampling frequency  
p = p**2  # square it to get the power 

# multiply by two (see technical document for details)
# odd nfft excludes Nyquist point
if n % 2 > 0: # we've got odd number of points fft
    p[1:len(p)] = p[1:len(p)] * 2
else:
    p[1:len(p) -1] = p[1:len(p) - 1] * 2 # we've got even number of points fft

freqArray = pylab.arange(0, nUniquePts, 1.0) * (sampFreq / n);
pylab.plot(freqArray/1000, 10*pylab.log10(p), color='k')
pylab.xlabel('Frequency (kHz)')
pylab.ylabel('Power (dB)')









    Out[36]:





<matplotlib.text.Text at 0x110264a50>



In [37]:

    
sampFreq_pianonote, snd_pianonote = wavfile.read('68448__pinkyfinger__piano-g.wav')



In [38]:

    
sampFreq_pianonote









    Out[38]:





44100



In [39]:

    
snd_pianonote









    Out[39]:





array([[15893, -2626],
       [16801, -2928],
       [16678, -3737],
       ..., 
       [-3060,   810],
       [-3310,   622],
       [-3449,   447]], dtype=int16)



In [40]:

    
snd_pianonote.shape









    Out[40]:





(68608, 2)



In [41]:

    
p1 = snd_pianonote[:,0]
p1









    Out[41]:





array([15893, 16801, 16678, ..., -3060, -3310, -3449], dtype=int16)



In [42]:

    
p1 = p1 #/ (2.**15) #?
p1









    Out[42]:





array([15893, 16801, 16678, ..., -3060, -3310, -3449], dtype=int16)



In [43]:

    
timeArray = pylab.arange(0, 68608, 1) 
timeArray









    Out[43]:





array([    0,     1,     2, ..., 68605, 68606, 68607])



In [44]:

    
timeArray = (timeArray / float(sampFreq)) * 1000



In [45]:

    
plt.plot(timeArray, p1, color='k')
plt.ylabel('Amplitude')
plt.xlabel('Time (ms)')









    Out[45]:





<matplotlib.text.Text at 0x11047f850>



In [46]:

    
fp1 = pylab.fft(p1)



In [47]:

    
fp1









    Out[47]:





array([-8212633.00000000      +0.j        ,
       -6853172.97311141+2286848.53259211j,
       -5532689.89191784+4314590.84317032j, ...,
       -3599135.55162322-5381329.84500757j,
       -5532689.89191788-4314590.84317029j,
       -6853172.97311142-2286848.53259206j])



In [48]:

    
absfp1 = abs(fp1)
absfp1









    Out[48]:





array([ 8212633.        ,  7224656.11710366,  7016149.34163313, ...,
        6473985.45099751,  7016149.34163314,  7224656.11710366])



In [49]:

    
absfp1 = absfp1 / float(n) 
power = absfp1**2  


# if len(power) % 2 > 0: # we've got odd number of points fft
#     power[1:len(power)] = power[1:len(power)] * 2
# else:
#     p[1:len(power) -1] = p[1:len(power) - 1] * 2 # we've got even number of points fft

freqArray = pylab.arange(0, len(power), 1.0) * (sampFreq_pianonote / float(len(power)));
pylab.plot(freqArray/1000, 10*pylab.log10(power), color='k')
pylab.xlabel('Frequency (kHz)')
pylab.ylabel('Power (dB)')









    Out[49]:





<matplotlib.text.Text at 0x1114f5590>



In [50]:

    
power









    Out[50]:





array([ 2408380.82414395,  1863780.18112607,  1757753.52748323, ...,
        1496593.19832612,  1757753.52748323,  1863780.18112607])



In [51]:

    
#http://stackoverflow.com/questions/604453/analyze-audio-using-fast-fourier-transform



In [52]:

    
#https://www.quora.com/How-does-the-sampling-rate-influence-the-frequency-resolution-in-frequency-spectrum-in-FFT



In [53]:

    
#http://web.itu.edu.tr/hulyayalcin/Signal_Processing_Books/RichardLyons_Understanding_DSP__2001.pdf