In [1]:
from keras.models import Sequential
from keras.layers import Convolution1D,Dropout,Dense, MaxPooling1D, Activation,Reshape
from keras import metrics
In [3]:
datasetMusan = "/home/joseildo/codigos/voiceDetection/musan/"
datasetMusanSepareted = "/media/joseildo/DATA/Bmusan"
datasetCodeTest = "/home/joseildo/codigos/voiceDetection/dataCodeTest/"
%store datasetMusan
%store datasetMusanSepareted
%store datasetCodeTest
In [4]:
modelo_2 = Sequential()
modelo_2.add(Dense(1600,activation="sigmoid", input_shape=(None,1600)))
modelo_2.add(Dense(800,activation="sigmoid"))
modelo_2.add(Dropout(0.2))
modelo_2.add(Dense(100,activation="relu"))
modelo_2.add(Dense(2,activation = 'softmax'))
modelo_2.compile(loss='categorical_crossentropy',optimizer = 'sgd',metrics=[metrics.categorical_accuracy])
In [5]:
modelo_3 = Sequential()
modelo_3.add(Convolution1D(1600, 100, padding="same", input_shape=(None,1600)))
modelo_3.add(MaxPooling1D(50,padding="same"))
modelo_3.add(Activation("relu"))
modelo_3.add(Convolution1D(400, 40, padding="same"))
modelo_3.add(MaxPooling1D(50,padding="same"))
modelo_3.add(Activation("relu"))
modelo_3.add(Dense(100))
modelo_3.add(Activation("tanh"))
modelo_3.add(Dropout(0.5))
modelo_3.add(Dense(2))
modelo_3.add(Activation("softmax"))
modelo_3.compile(loss='categorical_crossentropy',optimizer = 'sgd',metrics=["acc"])
In [12]:
from Folder import Folder
from keras import metrics
from pydub import AudioSegment
import numpy as np
from keras.utils import np_utils
## pastas
pasta = Folder(src = datasetMusanSepareted)
pasta.regex = "(?=silence)"
silencio = pasta.sliceF(0.52).loadLowerSlicedToMemory()
pasta.regex = "(?=speech)"
voz = pasta.sliceF(0.035).loadLowerSlicedToMemory()
silencioData = []
silencioLabel = []
tempo = 100
dataSize = 50000
for i in silencio:
for j in range(0,len(i)-tempo,tempo):
silencioData.append(i[j:j+tempo].get_array_of_samples())
silencioLabel += "0"
vozData = []
vozLabel = []
for i in voz:
for j in range(0,len(i)-tempo,tempo):
vozData.append(i[j:j+tempo].get_array_of_samples())
vozLabel += "1"
data, label = silencioData[:dataSize] + vozData[:dataSize], silencioLabel[:dataSize] + vozLabel[:dataSize]
print("quantidade de silencio:",len(silencioData),"quantidade de voz",len(vozData))
#silencioData,silencioLabel,vozData,vozLabel = [],[],[],[]
## modelo de teste
data, label = np.asarray([data]),np.asarray([np_utils.to_categorical(label,2)])
print(data.shape)
modelo_2.fit(data,label,shuffle=True)
print(modelo_2.evaluate(np.asarray([vozData]),np.asarray([np_utils.to_categorical(vozLabel,2)])))
print(modelo_2.evaluate(np.asarray([silencioData]),np.asarray([np_utils.to_categorical(silencioLabel,2)])))
In [ ]:
from Folder import Folder
from keras import metrics
from pydub import AudioSegment
import numpy as np
from keras.utils import np_utils
## pastas
pasta = Folder(src = datasetCodeTest)
pasta.regex = "(?=silence)"
silencio = pasta.sliceF(1).loadLowerSlicedToMemory()
pasta.regex = "(?=speech)"
voz = pasta.sliceF(1).loadLowerSlicedToMemory()
silencioData = []
silencioLabel = []
tempo = 100
dataSize = 50000
for i in silencio:
for j in range(0,len(i)-2*tempo,tempo):
silencioData.append(i[j:j+tempo].get_array_of_samples())
silencioLabel += "0"
vozData = []
vozLabel = []
for i in voz:
for j in range(0,len(i)-2*tempo,tempo):
vozData.append(i[j:j+tempo].get_array_of_samples())
vozLabel += "1"
data, label = silencioData[:dataSize] + vozData[:dataSize], silencioLabel[:dataSize] + vozLabel[:dataSize]
print("quantidade de silencio:",len(silencioData),"quantidade de voz",len(vozData))
#silencioData,silencioLabel,vozData,vozLabel = [],[],[],[]
## modelo de teste
data, label = np.asarray([data]),np.asarray([np_utils.to_categorical(label,2)])
data,label = data.reshape(data.shape[1],1,data.shape[2]), label.reshape(label.shape[1],1,label.shape[2])
print(data.shape)
print(label.shape)
modelo_3.fit(data,label,shuffle=True)
print(modelo_3.evaluate(np.asarray(vozData),np.asarray(np_utils.to_categorical(vozLabel,2))))
print(modelo_3.evaluate(np.asarray(silencioData),np.asarray(np_utils.to_categorical(silencioLabel,2))))
fma contains voice
In [1]:
from keras.models import load_model
import data
audios = "/home/gtad/gapsSom/voiceDetection/musan/"
data.iterateOnFold(audios)
silence_quant = 0
speech_quant = 0
music_quant = 0
noise_quant = 0
while(data.hasNext()):
try:
x,y = data.next()
for i in y[0]:
if i[0] == 1:
silence_quant += 1
elif i[1] == 1:
speech_quant += 1
elif i[2] == 1:
music_quant += 1
elif i[3] == 1:
noise_quant += 1
except:
pass
count = silence_quant + speech_quant + music_quant + noise_quant
print("\nsilence percentage: %.2f"%(silence_quant/count))
print("speech percentage: %.2f"%(speech_quant/count))
print("music percentage: %.2f"%(music_quant/count))
print("noise percentage: %.2f"%(noise_quant/count))
In [9]:
from pydub import AudioSegment as AS
from pydub import silence as dub
from pydub.utils import mediainfo
import os
music = 0
mC = 0
noise = 0
nC = 0
speech = 0
sC = 0
os.chdir("/home/joseildo/codigos/voiceDetection/musan/")
for audio in os.listdir():
sound = mediainfo(audio)
if "speech" in audio:
speech += float(sound["duration"])
sC += 1
elif "music" in audio:
music += float(sound["duration"])
mC += 1
else:
noise += float(sound["duration"])
nC += 1
time = music + noise + speech
total = mC + nC + sC
print("music time:",music,"noise time:",noise,"speech time:",speech,"\n")
print("music quant:",mC,"noise quant:",nC,"speech quant:",sC)
print("total time:",time)
print("music percentage of seconds: %.2f"%(music/time))
print("noise percentage of seconds: %.2f"%(noise/time))
print("speech percentage of seconds: %.2f"%(speech/time))
print("music percentage of files: %.2f"%(mC/total))
print("noise percentage of files: %.2f"%(nC/total))
print("speech percentage of files: %.2f"%(sC/total))
In [3]:
from Folder import Folder
from Data import Data
folder = Folder(src = "/media/joseildo/DATA/Linux/musan",output="/home/joseildo/codigos/voiceDetection/Bmusan/")
i = 0
with Data(folder = folder,thresh = -42) as data:
folder.regex = "(?=music)"
for audio in iter(data):
audio.splitSilence().appendSilences()
audio.splitSound().appendSounds()
audio.storeSounds(name=audio.audiosName)
audio.storeSilences(name="silence-"+str(i))
i += 1
folder.regex = "(?=noise)"
for audio in iter(data):
audio.splitSilence().appendSilences()
audio.splitSound().appendSounds()
audio.storeSounds(name=audio.audiosName)
audio.storeSilences(name="silence-"+str(i))
i += 1
folder.regex = "(?=speech)"
for audio in iter(data):
audio.splitSilence().appendSilences()
audio.splitSound().appendSounds()
audio.storeSounds(name=audio.audiosName)
audio.storeSilences(name="silence-"+str(i))
i += 1
In [ ]:
import time
for i in range(100):
time.sleep(0.5)
print(str("\r"),i,end="")