In [1]:
import os
import time
from six.moves import cPickle
import numpy as np
import tensorflow as tf

In [2]:
load_dir    = "data/"
load_name = os.path.join(load_dir, 'chars_vocab.pkl')
with open(load_name, 'rb') as fload:
    chars, vocab = cPickle.load(fload)
load_name = os.path.join(load_dir, 'corpus_data.pkl')
with open(load_name, 'rb') as fload:
    corpus, data = cPickle.load(fload)

In [3]:
batch_size  = 50
seq_length  = 200
num_batches = int(corpus.size / (batch_size * seq_length))

corpus_reduced = corpus[:(num_batches*batch_size*seq_length)]
xdata = corpus_reduced
ydata = np.copy(xdata)
ydata[:-1] = xdata[1:]
ydata[-1]  = xdata[0]

In [4]:
xbatches = np.split(xdata.reshape(batch_size, -1), num_batches, 1)
ybatches = np.split(ydata.reshape(batch_size, -1), num_batches, 1)

In [ ]:
vocab_size = len(vocab)
rnn_size   = 128
num_layers = 2
grad_clip  = 5. 

unitcell   = tf.nn.rnn_cell.BasicLSTMCell(rnn_size)
cell       = tf.nn.rnn_cell.MultiRNNCell([unitcell] * num_layers)
input_data = tf.placeholder(tf.int32, [batch_size, seq_length])
targets    = tf.placeholder(tf.int32, [batch_size, seq_length])
istate     = cell.zero_state(batch_size, tf.float32)

with tf.variable_scope('rnnlm') as scope:
    try:
        softmax_w = tf.get_variable("softmax_w", [rnn_size, vocab_size])
        softmax_b = tf.get_variable("softmax_b", [vocab_size])
    except ValueError:
        scope.reuse_variables()
        softmax_w = tf.get_variable("softmax_w", [rnn_size, vocab_size])
        softmax_b = tf.get_variable("softmax_b", [vocab_size])
    embedding = tf.get_variable("embedding", [vocab_size, rnn_size])
    inputs = tf.split(1, seq_length, tf.nn.embedding_lookup(embedding, input_data))
    inputs = [tf.squeeze(_input, [1]) for _input in inputs]

outputs, last_state = tf.nn.seq2seq.rnn_decoder(inputs, istate, cell
                , loop_function=None, scope='rnnlm')
output = tf.reshape(tf.concat(1, outputs), [-1, rnn_size])
logits = tf.nn.xw_plus_b(output, softmax_w, softmax_b)
probs  = tf.nn.softmax(logits)

loss = tf.nn.seq2seq.sequence_loss_by_example([logits], # Input
    [tf.reshape(targets, [-1])], # Target
    [tf.ones([batch_size * seq_length])], # Weight 
    vocab_size)

cost     = tf.reduce_sum(loss) / batch_size / seq_length
final_state = last_state
lr       = tf.Variable(0.0, trainable=False)
tvars    = tf.trainable_variables()
grads, _ = tf.clip_by_global_norm(tf.gradients(cost, tvars), grad_clip)
_optm    = tf.train.AdamOptimizer(lr)
optm     = _optm.apply_gradients(zip(grads, tvars))

In [ ]:
save_dir      = "data/"
num_epochs    = 200
print_every   = 500
save_every    = 1000
learning_rate = 0.001
decay_rate    = 0.97

sess = tf.Session()
sess.run(tf.initialize_all_variables())
saver = tf.train.Saver()
init_time = time.time()
for epoch in range(num_epochs):
    sess.run(tf.assign(lr, learning_rate * (decay_rate ** epoch)))
    state     = sess.run(istate)
    randbatchidx = np.random.permutation(num_batches)
    for iteration in range(num_batches): 
        xbatch       = xbatches[randbatchidx[iteration]]
        ybatch       = ybatches[randbatchidx[iteration]]
        
        start_time   = time.time()
        train_loss, state, _ = sess.run([cost, final_state, optm]
            , feed_dict={input_data: xbatch, targets: ybatch, istate: state}) 
        total_iter = epoch*num_batches + iteration
        end_time   = time.time();
        duration   = end_time - start_time
        
        if total_iter % print_every == 0:
            print ("[%d/%d] cost: %.4f / Each batch learning took %.4f sec" 
                   % (total_iter, num_epochs*num_batches, train_loss, duration))
        if total_iter % save_every == 0: 
            ckpt_path = os.path.join(save_dir, 'model.ckpt')
            saver.save(sess, ckpt_path, global_step = total_iter)
            print("model saved to '%s'" % (ckpt_path))


[0/2200] cost: 9.8504 / Each batch learning took 5.7818 sec
model saved to 'data/model.ckpt'