Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at

https://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License.



In [ ]:

    
!git clone https://github.com/google-research/google-research.git



In [ ]:

    
import sys
import os
import tarfile
import urllib
import zipfile

sys.path.append('./google-research')

Example of model training

Below steps are taken from model_train_eval - it has more tests in streaming, non streaming, quantized and non qunatized models with TF and TFLite.

Imports



In [ ]:

    
# TF streaming
from kws_streaming.models import models
from kws_streaming.models import utils
from kws_streaming.layers.modes import Modes



In [ ]:

    
import tensorflow as tf
import numpy as np
import tensorflow.compat.v1 as tf1
import logging
from kws_streaming.models import model_params
from kws_streaming.train import model_flags
from kws_streaming.train import test
from kws_streaming.train import train
from kws_streaming.models import utils
from kws_streaming import data
tf1.disable_eager_execution()



In [ ]:

    
config = tf1.ConfigProto()
config.gpu_options.allow_growth = True
sess = tf1.Session(config=config)



In [ ]:

    
# general imports
import matplotlib.pyplot as plt
import os
import json
import numpy as np
import scipy as scipy
import scipy.io.wavfile as wav
import scipy.signal



In [ ]:

    
tf.__version__



In [ ]:

    
tf1.reset_default_graph()
sess = tf1.Session()
tf1.keras.backend.set_session(sess)
tf1.keras.backend.set_learning_phase(0)

Set path to data



In [ ]:

    
# set PATH to data sets (for example to speech commands V2):
# it can be downloaded from
# https://storage.googleapis.com/download.tensorflow.org/data/speech_commands_v0.02.tar.gz
# if you already run "00_check-data.ipynb" then folder "data2" should be located in the current dir
current_dir = os.getcwd()
DATA_PATH = os.path.join(current_dir, "data2/")



In [ ]:

    
def waveread_as_pcm16(filename):
  """Read in audio data from a wav file.  Return d, sr."""
  with tf.io.gfile.GFile(filename, 'rb') as file_handle:
    samplerate, wave_data = wav.read(file_handle)
  # Read in wav file.
  return wave_data, samplerate

def wavread_as_float(filename, target_sample_rate=16000):
  """Read in audio data from a wav file.  Return d, sr."""
  wave_data, samplerate = waveread_as_pcm16(filename)
  desired_length = int(
          round(float(len(wave_data)) / samplerate * target_sample_rate))
  wave_data = scipy.signal.resample(wave_data, desired_length)

  # Normalize short ints to floats in range [-1..1).
  data = np.array(wave_data, np.float32) / 32768.0
  return data, target_sample_rate



In [ ]:

    
# Set path to wav file to visualize it
wav_file = os.path.join(DATA_PATH, "left/012187a4_nohash_0.wav")

# read audio file
wav_data, samplerate = wavread_as_float(wav_file)



In [ ]:

    
assert samplerate == 16000



In [ ]:

    
plt.plot(wav_data)

Set path to a model with config



In [ ]:

    
# select model name should be one of
model_params.HOTWORD_MODEL_PARAMS.keys()



In [ ]:

    
MODEL_NAME = 'svdf'
MODELS_PATH = os.path.join(current_dir, "models")
MODEL_PATH = os.path.join(MODELS_PATH, MODEL_NAME + "/")
MODEL_PATH



In [ ]:

    
os.makedirs(MODEL_PATH)



In [ ]:

    
# get toy model settings
FLAGS = model_params.HOTWORD_MODEL_PARAMS[MODEL_NAME]



In [ ]:

    
# set path to data and model (where model will be stored)
FLAGS.data_dir = DATA_PATH
FLAGS.train_dir = MODEL_PATH

# set speech feature extractor properties
FLAGS.mel_upper_edge_hertz = 7000
FLAGS.window_size_ms = 40.0
FLAGS.window_stride_ms = 20.0
FLAGS.mel_num_bins = 80
FLAGS.dct_num_features = 40
FLAGS.feature_type = 'mfcc_op'
FLAGS.preprocess = 'raw'

# set training settings
FLAGS.train = 1
FLAGS.how_many_training_steps = '400,400,400,400'  # reduced number of training steps for test only
FLAGS.learning_rate = '0.001,0.0005,0.0001,0.00002'
FLAGS.lr_schedule = 'linear'
FLAGS.verbosity = logging.INFO

# data shuffling config
FLAGS.resample = 0.15
FLAGS.time_shift_ms = 100



In [ ]:

    
# model parameters are different for every model
FLAGS.model_name = MODEL_NAME
FLAGS.svdf_memory_size = "4,10,10,10,10,10"
FLAGS.svdf_units1 = "16,32,32,32,64,128"
FLAGS.svdf_act = "'relu','relu','relu','relu','relu','relu'"
FLAGS.svdf_units2 = "40,40,64,64,64,-1"
FLAGS.svdf_dropout = "0.0,0.0,0.0,0.0,0.0,0.0"
FLAGS.svdf_pad = 0
FLAGS.dropout1 = 0.0
FLAGS.units2 = ''
FLAGS.act2 = ''



In [ ]:

    
flags = model_flags.update_flags(FLAGS)



In [ ]:

    
flags.__dict__



In [ ]:

    
with open(os.path.join(flags.train_dir, 'flags.json'), 'wt') as f:
  json.dump(flags.__dict__, f)



In [ ]:

    
# visualize a model
model_non_stream_batch = models.MODELS[flags.model_name](flags)
tf.keras.utils.plot_model(
    model_non_stream_batch,
    show_shapes=True,
    show_layer_names=True,
    expand_nested=True)



In [ ]:

    
model_non_stream_batch.summary()

Model training



In [ ]:

    
# Model training
train.train(flags)

Run model evaluation

TF Run non streaming inference



In [ ]:

    
folder_name = 'tf'
test.tf_non_stream_model_accuracy(flags, folder_name)

more testing functions can be found at test



In [ ]: