In [29]:
import librosa
"""
# You might consider soundfile unless loading mp3 is your concern.
import soundfile as sf
# mac, window: pip install soundfile,
# linux: pip install soundfile & sudo apt-get install libsndfile1
"""
import keras
import kapre
from keras.models import Sequential
from kapre.time_frequency import Spectrogram
import numpy as np
from datetime import datetime
now = datetime.now()
def print_version_info():
print('%s/%s/%s' % (now.year, now.month, now.day))
print('Keras version: {}'.format(keras.__version__))
if keras.backend._BACKEND == 'tensorflow':
import tensorflow
print('Keras backend: {}: {}'.format(keras.backend._backend, tensorflow.__version__))
else:
import theano
print('Keras backend: {}: {}'.format(keras.backend._backend, theano.__version__))
print('Keras image dim ordering: {}'.format(keras.backend.image_dim_ordering()))
print('Kapre version: {}'.format(kapre.__version__))
print_version_info()
In [30]:
src, sr = librosa.load('bensound-cute.mp3', sr=None, mono=True)
print(src.shape)
print(sr)
In [31]:
len_second = 1.0 # 1 second
src = src[:int(sr*len_second)]
src = src[np.newaxis, :]
input_shape = src.shape
print(input_shape)
In [32]:
x = np.array([src] * 16)
print(x.shape)
In [33]:
model = Sequential()
model.add(Spectrogram(n_dft=512, n_hop=256, input_shape=input_shape,
return_decibel_spectrogram=True, power_spectrogram=2.0,
trainable_kernel=False, name='static_stft'))
model.add(keras.layers.Convolution2D(32, (3, 3), name='conv1', activation='relu'))
model.add(keras.layers.MaxPooling2D((25, 17)))
model.add(keras.layers.Convolution2D(32, (10, 10), name='conv2', activation='relu'))
model.add(keras.layers.Flatten())
model.add(keras.layers.Dense(10, activation='softmax'))
model.summary(line_length=80, positions=[.33, .65, .8, 1.])
In [34]:
# model.fit()
In [35]:
y = model.predict(x)
print(np.argmax(y,axis=1))
In [ ]: