In [1]:
# All the audio processing tools are wrapped in the Python Module called AudioPipe
import AudioPipe.speaker.recognition as SR # Speaker Recognition Module
import AudioPipe.fingerprint.panako as FP # Accoustic Fingerprinting Module
from AudioPipe.speaker.silence import remove_silence # tool for remove the silence in the audio, not needed
import numpy as np
from AudioPipe.features import mfcc # Feature Extraction Module, part of the shared preprocessing
import scipy.io.wavfile as wav 
from AudioPipe.speaker.rec import dia2spk, getspk # Speaker Recognition using diarization results
from AudioPipe.utils.utils import video2audio # Format converting module, part of the shared preprocessing
import commands, os
from AudioPipe.diarization.diarization import Diarization # Speaker Diarization Moudule
import AudioPipe.data.manage as DM # Data Management Module

In [2]:
# Select the video file to be processed
Video_node = DM.Node("Data/Video/",".mp4")
name = "2015-08-07_0050_US_FOX-News_US_Presidential_Politics"


(0, '')

In [3]:
# Select the file for the meta infomation
Meta_node = DM.Node("Data/RedHen/",".seg")
meta = Meta_node.Pick(name)


(0, '')

In [4]:
# Convert the video to audio
Audio_node = DM.Node("Data/Audio/", ".wav")
audio = Video_node.Flow(video2audio, name, Audio_node, [Audio_node.ext])


(0, '')

In [5]:
dia = "Data/Diarization/2015-08-07_0050_US_FOX-News_US_Presidential_Politics.rttm"

In [6]:
# Below you can find an example on training a fresh model
# However, please make sure that the sample rate of your training data is the same with your testing data
model_gender_dir = "Data/Model/Gender/"
model_gender_nm = 'gender.model'
model_gender = model_gender_dir+model_gender_nm
if not os.path.isfile(model_gender):
    Gender = SR.GMMRec() # Create a new recognizer
    female_fn = model_gender_dir+'female.wav' # choose the training file for female
    male_fn = model_gender_dir+'male.wav' # choose the training file for male
    Gender.enroll_file('Female', female_fn) # enroll the female audio 
    Gender.enroll_file('Male', male_fn) # enroll the male audio 
    Gender.train() # train the GMMs after you enroll all the training data 
    Gender.dump(model_gender) # save the trained model into a file named "gender.model" for future use

In [7]:
# Gender Identification based on Speaker Diarization
Gen_node = DM.Node("Data/Gender/",".gen")
gen = Audio_node.Flow(dia2spk, name, Gen_node, [model_gender, dia, meta, Gen_node.ext])


(0, '')
(0, '')

In [ ]:
# Gender Identification without Speaker Diarization
Genr_node = DM.Node("Data/Gender/",".genr")
gen = Audio_node.Flow(getspk, name, Genr_node, [model_gender, meta, Genr_node.ext])


(0, '')
(0, '')

In [9]:
# you can train a fresh model as follows:
model_speaker_dir = "Data/Model/Speaker/" # The training data should already be here!! 
model_speaker_nm = 'speaker.model'
model_speaker = model_speaker_dir+model_speaker_nm
if not os.path.isfile(model_speaker):
    Speaker = SR.GMMRec() # Create a new recognizer
    other_fn = model_speaker_dir+'Imposter.wav' # choose the training file for female
    trump_fn = model_speaker_dir+'Trump.wav' # choose the training file for male
    Speaker.enroll_file('Other', other_fn) # enroll the female audio 
    Speaker.enroll_file('Trump', trump_fn) # enroll the male audio 
    # You can add more speakers here following the syntax above
    Speaker.train() # train the GMMs after you enroll all the training data 
    Speaker.dump(model_speaker) # save the trained model into a file named "gender.model" for future use

In [10]:
import time

In [11]:
# Speaker Recognition based on Speaker Diarization
start_time = time.time()
Spk_node = DM.Node("Data/Speaker/",".spk")
spk = Audio_node.Flow(dia2spk, name, Spk_node, [model_speaker, dia, meta, Spk_node.ext])
print("---running time: %s seconds ---" % (time.time() - start_time))


(0, '')
(0, '')
---running time: 75.5289838314 seconds ---

In [ ]:
# Speaker Recognition without Speaker Diarization
start_time = time.time()
Spkr_node = DM.Node("Data/Speaker/",".spkr")
spkr = Audio_node.Flow(getspk, name, Spkr_node, [model_speaker, meta, Spkr_node.ext])
print("---running time: %s seconds ---" % (time.time() - start_time))

In [64]:
def uid2time(uid, prefix='', more_entropy=0):
    uid = uid[len(prefix):]
    if more_entropy:
        uid = uid[:-more_entropy]
    i = int(uid[:12],16)
    f = float(int(uid[12:],16))/1000000
    return i+f

In [65]:
import string, time, math, random
def uniqid(m, prefix='', more_entropy=0):
    uniqid = '%8x%05x' %(math.floor(m),(m-math.floor(m))*1000000)
    if more_entropy:
        valid_chars = list(set(string.hexdigits.lower()))
        entropy_string = ''
        for i in range(0,more_entropy,1):
            entropy_string += random.choice(valid_chars)
        uniqid = uniqid + entropy_string
    uniqid = prefix + uniqid
    return uniqid

In [75]:
timestamp = '20150807005009.500'
float(timestamp)


Out[75]:
20150807005009.5

In [76]:
uid = uniqid(float(timestamp), more_entropy=10)
uid


Out[76]:
'1253b9b187517a120d21d2c04d9'

In [77]:
timestamp1 = uid2time(uid,more_entropy=10)
'%.3f' % timestamp1


Out[77]:
'20150807005009.500'