Patrick Coady (pcoady@alum.mit.edu)
Quick look to see if queuerunner is substantially faster than using feeddict
Using QueueRunner was not substantially faster (< 1%). But I have not tried on a GPU.
Also, this is an old version of the model from early in the project. Run time not comparable to present model.
In [ ]:
import numpy as np
import tensorflow as tf
import time
from wordvector import WordVector
import docload
In [ ]:
files = ['../data/adventures_of_sherlock_holmes.txt',
'../data/hound_of_the_baskervilles.txt',
'../data/sign_of_the_four.txt']
word_array, dictionary, num_lines, num_words = docload.build_word_array(
files, vocab_size=50000, gutenberg=True)
print('Document loaded and processed: {} lines, {} words.'
.format(num_lines, num_words))
In [ ]:
class Config(object):
def __init__(self, num_words):
self.embed_init = 0.1
self.softmax_init = 0.1
self.rnn_init = 0.03
self.learn_rate = 0.01
self.num_steps = 20
self.hidden_size = 200
self.batch_size = 20
self.vocab_size = num_words
self.neg_samples = 64
config = Config(len(dictionary))
debug_dict = dict() # global place to put a tensor var or op for debugging
In [ ]:
def producer(config, word_array):
"""Return QueueRunner of training examples from integer-mapped text file.
Returns QuedueRunner of batches of document word sequences. First tensor is
input to RNN. Second tensor is offset by 1 and is the training target for RNN.
(Used TensorFlow RNN PTB tutorial as starting point for this producer.)
Args:
word_array: np.array (int), as generated by docload.build_word_array()
batch_size: int, batch size
num_steps: int, unroll length
Returns:
A pair of Tensors, shape = (batch_size, num_steps)
"""
word_array_tensor = tf.convert_to_tensor(word_array, name="raw_data", dtype=tf.int32)
data_len = tf.size(word_array_tensor)
batch_len = data_len // config.batch_size
data = tf.reshape(word_array_tensor[0 : config.batch_size*batch_len],
[config.batch_size, batch_len])
epoch_size = (batch_len-1) // config.num_steps
i = tf.train.range_input_producer(batch_len - config.num_steps - 1,
num_epochs=1, shuffle=False).dequeue()
x = data[:, (i):(i+config.num_steps)]
y = data[:, (i+1):(i+config.num_steps+1)]
return x, y, epoch_size
In [ ]:
def model(config, x):
vsi = tf.contrib.layers.variance_scaling_initializer # short alias
rui = tf.random_uniform_initializer # short alias
with tf.variable_scope('embed',
initializer=rui(-config.embed_init,
config.embed_init,
dtype=tf.float32)):
embed_w = tf.get_variable('w', [config.vocab_size, config.hidden_size])
embed_out = tf.nn.embedding_lookup(embed_w, x)
with tf.variable_scope('rnn', initializer=vsi(factor=config.rnn_init, dtype=tf.float32)):
rnn_cell = tf.contrib.rnn.BasicRNNCell(config.hidden_size, activation=tf.tanh)
initial_state = rnn_cell.zero_state(config.batch_size, tf.float32)
rnn_out, state = tf.nn.dynamic_rnn(rnn_cell, embed_out, initial_state=initial_state)
return rnn_out
In [ ]:
def loss(config, rnn_out, y):
"""loss function: noise contrastive estimation on last ouput"""
rnn_last_output = rnn_out[:, config.num_steps-1, :]
y_last = tf.reshape(y[:, config.num_steps-1], [config.num_steps, 1])
nce_w = tf.Variable(tf.random_normal([config.vocab_size,
config.hidden_size],
stddev=config.softmax_init/config.hidden_size**2))
nce_b = tf.Variable(tf.zeros(config.vocab_size))
batch_loss = tf.reduce_mean(
tf.nn.nce_loss(nce_w, nce_b, inputs=rnn_last_output, labels=y_last,
num_sampled=config.neg_samples,
num_classes=config.vocab_size,
num_true=1))
return batch_loss
In [ ]:
def train(config, batch_loss):
optimizer = tf.train.GradientDescentOptimizer(config.learn_rate)
train_op = optimizer.minimize(batch_loss)
return train_op
In [ ]:
start = time.time()
with tf.Graph().as_default():
with tf.variable_scope('top', dtype=tf.float32):
x, y, epoch_size = producer(config, word_array)
rnn_out = model(config, x)
batch_loss = loss(config, rnn_out, y)
train_op = train(config, batch_loss)
init_op = tf.group(tf.global_variables_initializer(),
tf.local_variables_initializer())
with tf.Session() as sess:
sess.run(init_op)
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(sess=sess, coord=coord)
try:
[_, l] = sess.run([train_op, batch_loss])
tot_loss, batches, i = (0, 0, 1)
while True:
[_, l] = sess.run([train_op, batch_loss])
tot_loss += l
batches += 1
except tf.errors.OutOfRangeError:
print(tot_loss/batches)
print('Done training -- epoch limit reached')
finally:
coord.request_stop()
coord.join(threads)
end = time.time()
print('elapsed time = {}'.format(end - start))