In [1]:
import numpy as np
import tensorflow as tf
import random
import time

import docload  # convenient methods for loading and processing Project Gutenberg books

In [2]:
# Load and process data
files = ['../data/adventures_of_sherlock_holmes.txt',
        '../data/hound_of_the_baskervilles.txt',
        '../data/sign_of_the_four.txt']
word_array, dictionary, num_lines, num_words = docload.build_word_array(
    files, vocab_size=50000, gutenberg=True)
reverse_dict = {v: k for k, v in dictionary.items()}
print('Document loaded and processed: {} lines, {} words.'
      .format(num_lines, num_words))


Document loaded and processed: 24080 lines, 247812 words.

In [3]:
# Model hyperparameters and training configuration
class Config(object):
    """Model parameters"""
    def __init__(self, num_words):
        self.vocab_size = num_words
        self.batch_size = 32
        self.num_rnn_steps = 20  # unrolled length of RNN
        self.embed_size = 64     # input embedding
        self.rnn_size = 128      # number of RNN units
        self.hidden_size = 196   # hidden layer connected to last output of RNN
        self.rui_init = 0.01     # maxval, -minval for random_uniform_initializer
        self.vsi_init = 0.01     # stddev multiplier (factor) for variance_scaling_initializer
        self.neg_samples = 64    # for noise contrastive estimation (candidate sampling loss function)
        self.learn_rate = 0.05
        self.momentum = 0.8
        self.epochs = 1

config = Config(len(dictionary))

In [4]:
# Aliases for especially long TensorFlow calls
rui = tf.random_uniform_initializer
vsi = tf.contrib.layers.variance_scaling_initializer
# Commonly used weight and bias initializers
rui_initializer = rui(-config.rui_init, config.rui_init, dtype=tf.float32)
vsi_initializer = vsi(factor=config.vsi_init, dtype=tf.float32)
zero_initializer = tf.zeros_initializer(dtype=tf.float32)

In [5]:
def model(config, x):
    '''Embedding layer, RNN and hidden layer'''
    with tf.name_scope('embedding'):
        with tf.variable_scope('embedding', initializer=rui_initializer):
            embed_w = tf.get_variable('w', [config.vocab_size, config.embed_size])
        embed_out = tf.nn.embedding_lookup(embed_w, x, name='output')
            
    with tf.variable_scope('rnn', initializer=vsi_initializer):
        rnn_cell = tf.contrib.rnn.GRUCell(config.rnn_size, activation=tf.tanh)
        rnn_out, state = tf.nn.dynamic_rnn(rnn_cell, embed_out, dtype=tf.float32)
        
    with tf.name_scope('hidden'):
        rnn_last_output = rnn_out[:, config.num_rnn_steps-1, :]
        with tf.variable_scope('hidden'):
            hid_w = tf.get_variable('w', (config.rnn_size, config.hidden_size),
                                   initializer=vsi_initializer)
            hid_b = tf.get_variable('b', config.hidden_size, initializer=zero_initializer)
        hid_out = tf.nn.tanh(tf.matmul(rnn_last_output, hid_w) + hid_b)
            
    return hid_out

In [6]:
def loss(config, hid_out, y):
    """Loss Function: noise contrastive estimation on final output of RNN"""
    with tf.name_scope('output'):
        with tf.variable_scope('output'):
            w = tf.get_variable('w', (config.vocab_size, config.hidden_size),
                                   initializer=vsi_initializer)
            b = tf.get_variable('b', config.vocab_size, initializer=zero_initializer)
        batch_loss = tf.reduce_mean(
            tf.nn.nce_loss(w, b, inputs=hid_out, labels=y,
                           num_sampled=config.neg_samples,
                           num_classes=config.vocab_size,
                           num_true=1), name='batch_loss')
    
    return batch_loss

In [7]:
def train(config, batch_loss):
    with tf.name_scope('optimize'):
        step = tf.Variable(0, trainable=False, name='global_step')
        optimizer = tf.train.MomentumOptimizer(config.learn_rate, config.momentum)
        train_op = optimizer.minimize(batch_loss, name='minimize_op', global_step=step)
    
    return train_op, step

In [8]:
class MyGraph(object):
    def __init__(self, config, word_array):
        batch_width = len(word_array) // config.batch_size
        self.data_initializer = tf.placeholder(dtype=tf.int32,
                                    shape=(config.batch_size, batch_width))
        self.data = tf.Variable(self.data_initializer, trainable=False, collections=[])
        
        i = tf.train.range_input_producer(batch_width - config.num_rnn_steps - 1, 
                                          num_epochs=config.epochs, shuffle=True).dequeue()
        self.xq = self.data[:, (i):(i+config.num_rnn_steps)]
        self.yq = tf.reshape(self.data[:, i+config.num_rnn_steps], (-1, 1))
        
        self.data_q = tf.FIFOQueue(10000, dtypes = [tf.int32, tf.int32],
                                  shapes=[(config.batch_size, config.num_rnn_steps),
                                         (config.batch_size, 1)])
        self.data_q_close = self.data_q.close()
        self.enq_data = self.data_q.enqueue([self.xq, self.yq])
        self.x, self.y = self.data_q.dequeue()

        self.hid_out = model(config, self.x)
        self.batch_loss = loss(config, self.hid_out, self.y)
        self.train_op, self.step = train(config, self.batch_loss)
        self.init = tf.group(tf.global_variables_initializer(),
                            tf.local_variables_initializer())
        self.saver = tf.train.Saver(max_to_keep=2)

In [9]:
# Train
move_avg_len = 20  # number of batches to average loss over
move_avg_loss = np.zeros(move_avg_len)
with tf.Graph().as_default():
    g = MyGraph(config, word_array)
    with tf.Session() as sess:
        sess.run(g.init)
        batch_width = len(word_array) // config.batch_size
        # reshape data for easy slicing into shape = (batch_size, num_rnn_steps)
        data = word_array[0 : config.batch_size*batch_width].reshape((config.batch_size, batch_width))
        feeddict = {g.data_initializer: data}
        sess.run(g.data.initializer, feed_dict=feeddict)
        
        q_runner = tf.train.QueueRunner(g.data_q, [g.enq_data], g.data_q_close)
        tf.train.add_queue_runner(q_runner) 
        writer = tf.summary.FileWriter('../tf_logs/queue/', tf.get_default_graph())
        coord = tf.train.Coordinator()
        
        threads = tf.train.start_queue_runners(sess=sess, coord=coord)
        start_time = time.time()
        try:
            while not coord.should_stop():
                sess.run(g.train_op)
                [_, l, step] = sess.run([g.train_op, g.batch_loss, g.step])
                move_avg_loss[step % move_avg_len] = l
        except tf.errors.OutOfRangeError as e:
            # Report exceptions to the coordinator.
            coord.request_stop(e)
        finally:
            # Terminate as usual. It is safe to call `coord.request_stop()` twice.
            coord.request_stop()
            coord.join(threads)            
        print("--- %s seconds ---" % (time.time() - start_time))
        last_saved = g.saver.save(sess, '../tf_logs/queue', global_step=1)
        writer.close()
        
print(np.mean(move_avg_loss))


--- 135.61333346366882 seconds ---
2.63432950974

In [10]:
# 7723
print(len(g.train_op))


---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
<ipython-input-10-75787ff65993> in <module>()
      1 # 7723
----> 2 print(len(g.train_op))

TypeError: object of type 'Operation' has no len()

In [ ]: