notebook.community

Edit and run



In [1]:

    
# from the UBM, build adapted models of the phones



In [2]:

    
# general imports
import os
import pickle
import numpy as np
import copy



In [3]:

    
# import Scykit learn GMM library
from sklearn import mixture



In [4]:

    
# import custom functions
import sys
# path to libraries
# currently in ../scripts-lib/
tool_path = os.path.abspath('../scripts-lib')

if tool_path not in sys.path:
    sys.path.append(tool_path)
import lib_phones as lph

# print the loaded functions
print dir(lph)[5:]









    



['find_phone_index', 'load_phone_file']



In [5]:

    
# load phone list
phone_path = os.path.abspath('../datasets/TIMIT-MFCCs/TIMIT_phone_list.txt')
phone_list = lph.load_phone_file(phone_path)
print len(phone_list), phone_list









    



61 ['aa', 'ae', 'ah', 'ao', 'aw', 'ax', 'ax-h', 'axr', 'ay', 'b', 'bcl', 'ch', 'd', 'dcl', 'dh', 'dx', 'eh', 'el', 'em', 'en', 'eng', 'epi', 'er', 'ey', 'f', 'g', 'gcl', 'h#', 'hh', 'hv', 'ih', 'ix', 'iy', 'jh', 'k', 'kcl', 'l', 'm', 'n', 'ng', 'nx', 'ow', 'oy', 'p', 'pau', 'pcl', 'q', 'r', 's', 'sh', 't', 'tcl', 'th', 'uh', 'uw', 'ux', 'v', 'w', 'y', 'z', 'zh']



In [6]:

    
#load mfccs into sklearn observations, each frame is an obs

train_TIMIT_dir = os.path.abspath('../datasets/TIMIT-MFCCs/dev')

#create individual obs list for each phone type
train_phone_obs_dict = {}
for phone in phone_list:
    train_phone_obs_dict[phone] = []

# complete obs 
train_obs = []
train_obs_labels = []

# walk the directories
for (path, dirs, files) in os.walk(train_TIMIT_dir):
    print "working in path : " + path

    for file in files:
        # skip the SA files
        #dev, only work on file si1573.mfcc.csv     "si1573" in file and
        if ".mfcc" in file  and "sa" not in file:
            #check if corresponding .phn file exists
            if not os.path.exists(path + "/" + file[:-8] + "phn"):
                print path + "/" + file[:-8] + "phn"
                print "corresponding .phn file does not exist!"
            else:
                
                print "working on: " + file
#                 print "from path : " + path

                # open the files
                mfcc_file = open(path + "/" + file)
                phn_file = open(path + "/" + file[:-8] + "phn")

                # extract phone times
                phone_times = []
                for phn_line in phn_file:
                    phone_times.append(phn_line.split())
                # transpose for easier use
                phone_times = map(list, zip(*phone_times))

                # skip mfcc_file header
                next(mfcc_file)

                # reset frame count
                frame_cnt = 0

                # for each line of mfcc_file
                for mfcc_line in mfcc_file:

                    # increment frame count
                    frame_cnt += 1 

                    # print "frame line #:", frame_cnt 

                    # frame start time in seconds
                    start_t = mfcc_line.split(";")[1]

                    # create frame (skiping first 2 values, frame_index and frame_time)
                    frame = map( float,  mfcc_line.split(";")[2:])
                    # print numpy.shape(frame)
                    # print frame

                    # find correspond phoneme and index in the list
                    phn_index = lph.find_phone_index(start_t, phone_times, phone_list)

                    # add to instances for corresponding 
                    train_phone_obs_dict[phone_list[phn_index]].append(frame)
                    # add to instances
                    train_obs.append(frame)
                    train_obs_labels.append(phone_list[phn_index])









    



working in path : C:\Users\FG\Desktop\PhD\Research\Reservoirs\datasets\TIMIT-MFCCs\dev
working on: si1027.mfcc.csv
working on: si1105.mfcc.csv
working on: si1657.mfcc.csv
working on: si1735.mfcc.csv
working on: si475.mfcc.csv
working on: si648.mfcc.csv
working on: sx115.mfcc.csv
working on: sx127.mfcc.csv
working on: sx205.mfcc.csv
working on: sx217.mfcc.csv
working on: sx25.mfcc.csv
working on: sx295.mfcc.csv
working on: sx307.mfcc.csv
working on: sx37.mfcc.csv
working on: sx385.mfcc.csv
working on: sx397.mfcc.csv



In [7]:

    
print len(train_phone_obs_dict), len(train_phone_obs_dict["h#"])



In [8]:

    
# reload pickled UBM file

pickle_dir = os.path.abspath('../datasets/TIMIT Pickled Data')

# find name
for file in os.listdir(pickle_dir):
    if "TIMIT" in file and 'ubm' in file and ".pckl" in file:
        pickle_name = file

ubm = pickle.load( open(pickle_dir + os.sep + pickle_name, "rb") )
print "loaded UBM gmm from ", pickle_dir + os.sep + pickle_name









    



loaded UBM gmm from  C:\Users\FG\Desktop\PhD\Research\Reservoirs\datasets\TIMIT Pickled Data\TIMIT_ubm_gmm_512.pckl



In [9]:

    
# # print some stats about the UBM
# print np.round(ubm.score(train_obs).mean(), 2)
# print np.round(ubm.score(train_obs).var(), 2)
# print np.round(ubm.score(train_obs)[0:5] )



In [10]:

    
# dictionary containing a gmm for each phone, adapted from the UBM
agmm_dict = dict()

# adapt UBM to the phone obs
for phone in phone_list:
    # make copy of gmm
    agmm = copy.copy(ubm)
    
    if agmm.n_components > len(train_phone_obs_dict[phone]):
        # no enough observations
        print "not enough obs for phone", phone
        pass
    else:
        # adapt
        agmm.fit(train_phone_obs_dict[phone])
        # add to dictionary
        agmm_dict[phone] = agmm









    



not enough obs for phone aa
not enough obs for phone ae
not enough obs for phone ah
not enough obs for phone ao
not enough obs for phone aw
not enough obs for phone ax
not enough obs for phone ax-h
not enough obs for phone axr
not enough obs for phone ay
not enough obs for phone b
not enough obs for phone bcl
not enough obs for phone ch
not enough obs for phone d
not enough obs for phone dcl
not enough obs for phone dh
not enough obs for phone dx
not enough obs for phone eh
not enough obs for phone el
not enough obs for phone em
not enough obs for phone en
not enough obs for phone eng
not enough obs for phone epi
not enough obs for phone er
not enough obs for phone ey
not enough obs for phone f
not enough obs for phone g
not enough obs for phone gcl
not enough obs for phone h#
not enough obs for phone hh
not enough obs for phone hv
not enough obs for phone ih
not enough obs for phone ix
not enough obs for phone iy
not enough obs for phone jh
not enough obs for phone k
not enough obs for phone kcl
not enough obs for phone l
not enough obs for phone m
not enough obs for phone n
not enough obs for phone ng
not enough obs for phone nx
not enough obs for phone ow
not enough obs for phone oy
not enough obs for phone p
not enough obs for phone pau
not enough obs for phone pcl
not enough obs for phone q
not enough obs for phone r
not enough obs for phone s
not enough obs for phone sh
not enough obs for phone t
not enough obs for phone tcl
not enough obs for phone th
not enough obs for phone uh
not enough obs for phone uw
not enough obs for phone ux
not enough obs for phone v
not enough obs for phone w
not enough obs for phone y
not enough obs for phone z
not enough obs for phone zh



In [11]:

    
#save gmm in pickled form

# name and location to save in
pickle_name = "TIMIT_gmm_adapted_dict" + ".pckl"
pickle_dir = os.path.abspath('..'+ os.sep + 'datasets' + os.sep + 'TIMIT Pickled Data')

if not os.path.isdir(pickle_dir):
    os.makedirs(pickle_dir)
    
pickle.dump( agmm_dict, open( pickle_dir + os.sep + pickle_name, "wb") )
print "saved adapted gmm dictionary in ", pickle_dir + os.sep + pickle_name









    



saved adapted gmm dictionary in  C:\Users\FG\Desktop\PhD\Research\Reservoirs\datasets\TIMIT Pickled Data\TIMIT_gmm_adapted_dict.pckl



In [ ]: