In [1]:
import tensorflow as tf
import numpy as np
# preprocessed data
from datasets.cmudict import data
import data_utils
In [2]:
# load data from pickle and npy files
data_ctl, idx_words, idx_phonemes = data.load_data(PATH='datasets/cmudict/')
(trainX, trainY), (testX, testY), (validX, validY) = data_utils.split_dataset(idx_phonemes, idx_words)
In [3]:
# parameters
xseq_len = trainX.shape[-1]
yseq_len = trainY.shape[-1]
batch_size = 128
xvocab_size = len(data_ctl['idx2pho'].keys())
yvocab_size = len(data_ctl['idx2alpha'].keys())
emb_dim = 128
In [4]:
import seq2seq_wrapper
In [6]:
import importlib
importlib.reload(seq2seq_wrapper)
Out[6]:
In [7]:
model = seq2seq_wrapper.Seq2Seq(xseq_len=xseq_len,
yseq_len=yseq_len,
xvocab_size=xvocab_size,
yvocab_size=yvocab_size,
ckpt_path='ckpt/cmudict/',
emb_dim=emb_dim,
num_layers=3
)
In [8]:
val_batch_gen = data_utils.rand_batch_gen(validX, validY, 16)
train_batch_gen = data_utils.rand_batch_gen(trainX, trainY, 128)
In [16]:
sess = model.train(train_batch_gen, val_batch_gen, sess=sess1)
In [9]:
sess = model.restore_last_session()
In [10]:
output = model.predict(sess, val_batch_gen.__next__()[0])
print(output.shape)
In [11]:
output
Out[11]:
In [12]:
for oi in output:
print(data_utils.decode(sequence=oi, lookup=data_ctl['idx2alpha'],
separator=''))