In [32]:
from essentia.standard import *
from pylab import plot, show, figure, imshow, axis, subplot
from numpy import *
#constants
sr = 44100
inputDir = 'input/'
outputDir = 'output/'
#inputFile = 'Safari_20141129_1443_CommercialDetection_irregular_short.aiff'
inputFile = 'Safari_20141212_1103_CommercialDetection_pandora.aiff'
# storage
pool = essentia.Pool()
print 'processing: ' + inputFile
# processing module setup
audio = MonoLoader(filename = inputDir+inputFile)()
w = Windowing(type = 'hann')
spectrum = Spectrum() # FFT() would return the complex FFT, here we just want the magnitude spectrum
mfcc = MFCC()
rms = RMS()
levels = LevelExtractor(frameSize = 20480, hopSize = 5120)
loudness = Loudness()
silenceThreshold = 1.0e-4
In [33]:
# processing stream
# get levels
pool.add('lowlevel.levels',levels(audio))
# find silent frames
pool.add('silenceDetected',essentia.array(pool['lowlevel.levels'][0] < silenceThreshold))
In [39]:
# find beginning and end frames of silence periods
sil = pool['silenceDetected'][0];
silences = { }
In [35]:
sil.shape
Out[35]:
In [19]:
essentia.array(pool['lowlevel.levels'][0] < silenceThreshold).shape
Out[19]:
In [40]:
it = nditer([sil, None],
flags=['c_index', 'refs_ok'],
op_flags=[['readonly'],['writeonly','allocate']])
if (it.index == 0):
it[1] = 0
it.iternext()
while not it.finished:
if (sil[it.index] == 1.0): # preserve edges
if (sil[it.index-1] == 0):
it[1] = 1; # beginning of silence
silences[it.index] = 1;
elif (sil[it.index+1] == 0):
it[1] = 2; # end of silence
silences[it.index] = 2;
else:
it[1] = 0; # no change observed
else:
it[1] = 0;
it.iternext()
In [41]:
pool.add('silenceEdges',it.operands[1]);
In [42]:
silences
Out[42]:
In [ ]: