In [10]:
from pysoundfile import SoundFile
import matplotlib.pyplot as plt
import matplotlib as mpl
import numpy as np
import scipy, pylab
import copy
from mpltools import style
style.use('ggplot')
mpl.rcParams['figure.figsize'] = (18, 7)
#short time windowed fourier
def stft(x, framesamp, hop):
w = scipy.hamming(framesamp)
X = np.array([np.fft.rfft(w*x[i:i+framesamp]) for i in range(0, len(x)-framesamp, hop)],dtype=np.float64)
#Note: might need to convert to phase and amplitude
return X
#read wav
data = SoundFile("tickle_kitchen.wav")
#flatten and subsample
data = np.ravel(data)[::1000]
#short time fourier with 100 freqs (200/ nyquist freq)
out = stft(data,200,1)
#transpose for viewing
out = out.T
#freq band pass power summation - this need tuning as ther
outD = np.sum(out[0:20,:],axis=0)
#threshold 1.5 an arbritrary constant
outRaw = copy.deepcopy(outD) #deepcopy is housekeeping
outD[outD<2]=0
outD[outD>=2]=1
#plot - maybe a little phase lag in spectrogram (the middle plot) due to windowing
f, (ax1, ax2,ax3,ax4) = plt.subplots(4, 1, sharex=True)
ax1.plot(data) #first plot raw signal
ax2.imshow(out,aspect='auto') #spectrogram: frequency spectrum at a specific time
ax3.plot(outRaw) #band pass signal....
ax4.plot(outD) #just a simple threshold - clearly some analysis in the time domain would improve this
#misses the first noise but gets the end two - you get the picture though :)
Out[10]:
In [5]:
#markov laughing
#markov p martix all rows sum to 1
decision = np.array([[0.2,0.3,0.5],
[0.3,0.4,0.3],
[0,8,0.1,0.1]])
#laugh states
event = ["hee","haa","hoo"]
#number of laughs
for i in range(10):
lastMove = 0
#length of laugh
for j in range(3):
#make a weighed random choice
lastMove = np.random.choice(3,1,decision[lastMove,])
print event[lastMove],
print "\n"
In [ ]: