In [1]:
import os
import time
from six.moves import cPickle
import numpy as np
import tensorflow as tf
In [2]:
load_dir = "data/"
load_name = os.path.join(load_dir, 'chars_vocab.pkl')
with open(load_name, 'rb') as fload:
chars, vocab = cPickle.load(fload)
load_name = os.path.join(load_dir, 'corpus_data.pkl')
with open(load_name, 'rb') as fload:
corpus, data = cPickle.load(fload)
In [3]:
batch_size = 50
seq_length = 200
num_batches = int(corpus.size / (batch_size * seq_length))
corpus_reduced = corpus[:(num_batches*batch_size*seq_length)]
xdata = corpus_reduced
ydata = np.copy(xdata)
ydata[:-1] = xdata[1:]
ydata[-1] = xdata[0]
In [4]:
xbatches = np.split(xdata.reshape(batch_size, -1), num_batches, 1)
ybatches = np.split(ydata.reshape(batch_size, -1), num_batches, 1)
In [ ]:
vocab_size = len(vocab)
rnn_size = 128
num_layers = 2
grad_clip = 5.
unitcell = tf.nn.rnn_cell.BasicLSTMCell(rnn_size)
cell = tf.nn.rnn_cell.MultiRNNCell([unitcell] * num_layers)
input_data = tf.placeholder(tf.int32, [batch_size, seq_length])
targets = tf.placeholder(tf.int32, [batch_size, seq_length])
istate = cell.zero_state(batch_size, tf.float32)
with tf.variable_scope('rnnlm') as scope:
try:
softmax_w = tf.get_variable("softmax_w", [rnn_size, vocab_size])
softmax_b = tf.get_variable("softmax_b", [vocab_size])
except ValueError:
scope.reuse_variables()
softmax_w = tf.get_variable("softmax_w", [rnn_size, vocab_size])
softmax_b = tf.get_variable("softmax_b", [vocab_size])
embedding = tf.get_variable("embedding", [vocab_size, rnn_size])
inputs = tf.split(1, seq_length, tf.nn.embedding_lookup(embedding, input_data))
inputs = [tf.squeeze(_input, [1]) for _input in inputs]
outputs, last_state = tf.nn.seq2seq.rnn_decoder(inputs, istate, cell
, loop_function=None, scope='rnnlm')
output = tf.reshape(tf.concat(1, outputs), [-1, rnn_size])
logits = tf.nn.xw_plus_b(output, softmax_w, softmax_b)
probs = tf.nn.softmax(logits)
loss = tf.nn.seq2seq.sequence_loss_by_example([logits], # Input
[tf.reshape(targets, [-1])], # Target
[tf.ones([batch_size * seq_length])], # Weight
vocab_size)
cost = tf.reduce_sum(loss) / batch_size / seq_length
final_state = last_state
lr = tf.Variable(0.0, trainable=False)
tvars = tf.trainable_variables()
grads, _ = tf.clip_by_global_norm(tf.gradients(cost, tvars), grad_clip)
_optm = tf.train.AdamOptimizer(lr)
optm = _optm.apply_gradients(zip(grads, tvars))
In [ ]:
save_dir = "data/"
num_epochs = 200
print_every = 500
save_every = 1000
learning_rate = 0.001
decay_rate = 0.97
sess = tf.Session()
sess.run(tf.initialize_all_variables())
saver = tf.train.Saver()
init_time = time.time()
for epoch in range(num_epochs):
sess.run(tf.assign(lr, learning_rate * (decay_rate ** epoch)))
state = sess.run(istate)
randbatchidx = np.random.permutation(num_batches)
for iteration in range(num_batches):
xbatch = xbatches[randbatchidx[iteration]]
ybatch = ybatches[randbatchidx[iteration]]
start_time = time.time()
train_loss, state, _ = sess.run([cost, final_state, optm]
, feed_dict={input_data: xbatch, targets: ybatch, istate: state})
total_iter = epoch*num_batches + iteration
end_time = time.time();
duration = end_time - start_time
if total_iter % print_every == 0:
print ("[%d/%d] cost: %.4f / Each batch learning took %.4f sec"
% (total_iter, num_epochs*num_batches, train_loss, duration))
if total_iter % save_every == 0:
ckpt_path = os.path.join(save_dir, 'model.ckpt')
saver.save(sess, ckpt_path, global_step = total_iter)
print("model saved to '%s'" % (ckpt_path))