Check Training Time with QueueRunner

Patrick Coady (pcoady@alum.mit.edu)

Quick look to see if queuerunner is substantially faster than using feeddict

Using QueueRunner was not substantially faster (< 1%). But I have not tried on a GPU.

Also, this is an old version of the model from early in the project. Run time not comparable to present model.


In [ ]:
import numpy as np
import tensorflow as tf
import time

from wordvector import WordVector
import docload

In [ ]:
files = ['../data/adventures_of_sherlock_holmes.txt',
        '../data/hound_of_the_baskervilles.txt',
        '../data/sign_of_the_four.txt']
word_array, dictionary, num_lines, num_words = docload.build_word_array(
    files, vocab_size=50000, gutenberg=True)

print('Document loaded and processed: {} lines, {} words.'
      .format(num_lines, num_words))

In [ ]:
class Config(object):
    def __init__(self, num_words):
        self.embed_init = 0.1
        self.softmax_init = 0.1
        self.rnn_init = 0.03
        self.learn_rate = 0.01
        self.num_steps = 20
        self.hidden_size = 200
        self.batch_size = 20
        self.vocab_size = num_words
        self.neg_samples = 64
        
config = Config(len(dictionary))
        
debug_dict = dict() # global place to put a tensor var or op for debugging

In [ ]:
def producer(config, word_array):
    """Return QueueRunner of training examples from integer-mapped text file.

    Returns QuedueRunner of batches of document word sequences. First tensor is 
    input to RNN. Second tensor is offset by 1 and is the training target for RNN. 
    (Used TensorFlow RNN PTB tutorial as starting point for this producer.)

    Args:
    word_array: np.array (int), as generated by docload.build_word_array()
    batch_size: int, batch size
    num_steps: int, unroll length

    Returns:
    A pair of Tensors, shape = (batch_size, num_steps) 
    """
    word_array_tensor = tf.convert_to_tensor(word_array, name="raw_data", dtype=tf.int32)

    data_len = tf.size(word_array_tensor)
    batch_len = data_len // config.batch_size
    data = tf.reshape(word_array_tensor[0 : config.batch_size*batch_len],
                      [config.batch_size, batch_len])
    epoch_size = (batch_len-1) // config.num_steps

    i = tf.train.range_input_producer(batch_len - config.num_steps - 1, 
                                      num_epochs=1, shuffle=False).dequeue()
    x = data[:, (i):(i+config.num_steps)]
    y = data[:, (i+1):(i+config.num_steps+1)]

    return x, y, epoch_size

In [ ]:
def model(config, x):
    vsi = tf.contrib.layers.variance_scaling_initializer  # short alias
    rui = tf.random_uniform_initializer  # short alias
    with tf.variable_scope('embed', 
                           initializer=rui(-config.embed_init, 
                                           config.embed_init, 
                                           dtype=tf.float32)):
        embed_w = tf.get_variable('w', [config.vocab_size, config.hidden_size])
        embed_out = tf.nn.embedding_lookup(embed_w, x)
    
    with tf.variable_scope('rnn', initializer=vsi(factor=config.rnn_init, dtype=tf.float32)):
        rnn_cell = tf.contrib.rnn.BasicRNNCell(config.hidden_size, activation=tf.tanh)
        initial_state = rnn_cell.zero_state(config.batch_size, tf.float32)
        rnn_out, state = tf.nn.dynamic_rnn(rnn_cell, embed_out, initial_state=initial_state)

    return rnn_out

In [ ]:
def loss(config, rnn_out, y):
    """loss function: noise contrastive estimation on last ouput"""
    rnn_last_output = rnn_out[:, config.num_steps-1, :]
    y_last = tf.reshape(y[:, config.num_steps-1], [config.num_steps, 1])
    nce_w = tf.Variable(tf.random_normal([config.vocab_size,
                                           config.hidden_size],
                                         stddev=config.softmax_init/config.hidden_size**2))
    nce_b = tf.Variable(tf.zeros(config.vocab_size))

    batch_loss = tf.reduce_mean(
        tf.nn.nce_loss(nce_w, nce_b, inputs=rnn_last_output, labels=y_last,
                       num_sampled=config.neg_samples,
                       num_classes=config.vocab_size,
                       num_true=1))
    
    return batch_loss

In [ ]:
def train(config, batch_loss):
    optimizer = tf.train.GradientDescentOptimizer(config.learn_rate)
    train_op = optimizer.minimize(batch_loss)
    
    return train_op

In [ ]:
start = time.time()
with tf.Graph().as_default():
    with tf.variable_scope('top', dtype=tf.float32):
        x, y, epoch_size = producer(config, word_array)
        rnn_out = model(config, x)
        batch_loss = loss(config, rnn_out, y)
        train_op = train(config, batch_loss)
        init_op =  tf.group(tf.global_variables_initializer(),
                            tf.local_variables_initializer())
        with tf.Session() as sess:
            sess.run(init_op)
            coord = tf.train.Coordinator()
            threads = tf.train.start_queue_runners(sess=sess, coord=coord)
            try:
                [_, l] = sess.run([train_op, batch_loss])
                tot_loss, batches, i = (0, 0, 1)
                while True:
                    [_, l] = sess.run([train_op, batch_loss])
                    tot_loss += l
                    batches += 1
            except tf.errors.OutOfRangeError:
                print(tot_loss/batches)
                print('Done training -- epoch limit reached')
            finally:
                coord.request_stop()
            coord.join(threads)
end = time.time()
print('elapsed time = {}'.format(end - start))