Shakespeare generation using RNNs + Estimators

This notebook illustrates how to:

  1. Creating a Recurrent Neural Network in TensorFlow
  2. Creating a Custom Estimator in tf.contrib.learn

Dependecies


In [2]:
#!/usr/bin/env python

# Copyright 2017 Google Inc. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#      http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# original code from: https://github.com/GoogleCloudPlatform/training-data-analyst/tree/master/blogs/timeseries
# modified by: Marianne Linhares, monteirom@google.com, May 2017

# tensorflow
import tensorflow as tf
import tensorflow.contrib.learn as tflearn
import tensorflow.contrib.layers as tflayers
from tensorflow.contrib.learn.python.learn import learn_runner
import tensorflow.contrib.metrics as metrics
import tensorflow.contrib.rnn as rnn

# helpers
import numpy as np
import csv

# enable tensorflow logs
tf.logging.set_verbosity(tf.logging.INFO)

Getting the data

In this notebook we'll use shakespeare data, but you can basically choose any text file you like!


In [9]:
# load file
file_path = 'shakespeare.txt'
with open(file_path,'r') as f:
    data = f.read()
    print("Data length:", len(data))

# make all letters lower case 
# it makes the problem simpler
# but we'll loose information
data = data.lower()


('Data length:', 1115394)
first citizen:
befor

Preprocess data


In [15]:
# vocabulary
vocab = list(set(data))
vocab_size = len(vocab)
print vocab

# embeding words to one-hot
def text_to_onehot(data_, vocab):
    data = np.zeros((len(data_), len(vocab)))
    cnt = 0
    for s in data_:
        v = [0.0] * len(vocab)
        v[vocab.index(s)] = 1.0
        data[cnt, :] = v
        cnt += 1
    return data

def onehot_to_text(array, vocab):
    return vocab[ array.index(1) ]

data_embed = text_to_onehot(data, vocab)


['\n', '!', ' ', '$', "'", '&', '-', ',', '.', '3', ';', ':', '?', 'a', 'c', 'b', 'e', 'd', 'g', 'f', 'i', 'h', 'k', 'j', 'm', 'l', 'o', 'n', 'q', 'p', 's', 'r', 'u', 't', 'w', 'v', 'y', 'x', 'z']

Read datasets


In [102]:
BATCH_SIZE = 20
TIMESERIES_COL = 'data'

# how many characters should be used by the LSTM as input
N_INPUTS = 10

# how many characters should the LSTM predict
N_OUTPUTS = 1

# -------- read data and convert to needed format -----------
def read_dataset(filename, mode=tf.estimator.ModeKeys.TRAIN):  
  def _input_fn():
    num_epochs = 100 if mode == tf.estimator.ModeKeys.TRAIN else 1

    # could be a path to one file or a file pattern.
    input_file_names = tf.train.match_filenames_once(filename)
    filename_queue = tf.train.string_input_producer(
        input_file_names, num_epochs=num_epochs, shuffle=True)

    reader = tf.TextLineReader()
    _, value = reader.read_up_to(filename_queue, num_records=BATCH_SIZE)

    value_column = tf.expand_dims(value, -1)
    print('readcsv={}'.format(value_column))
    
    # all_data is a list of tensors
    all_data = tf.decode_csv(value_column, record_defaults=DEFAULTS)  
    inputs = all_data[:len(all_data)-N_OUTPUTS]  # first few values
    label = all_data[len(all_data)-N_OUTPUTS : ] # last few values
    
    # from list of tensors to tensor with one more dimension
    inputs = tf.concat(inputs, axis=1)
    label = tf.concat(label, axis=1)
    print(inputs)
    print('inputs={}'.format(inputs))
    
    return {TIMESERIES_COL: inputs}, label   # dict of features, label
  return _input_fn

RNN Model


In [103]:
LSTM_SIZE = 3  # number of hidden layers in each of the LSTM cells

def simple_rnn(features, targets, mode, params):
  # 0. Reformat input shape to become a sequence
  x = tf.split(features[TIMESERIES_COL], N_INPUTS, 1)
    
  # 1. configure the RNN
  lstm_cell = rnn.BasicLSTMCell(LSTM_SIZE, forget_bias=1.0)
  outputs, _ = rnn.static_rnn(lstm_cell, x, dtype=tf.float32)

  # slice to keep only the last cell of the RNN
  outputs = outputs[-1]
  #print 'last outputs={}'.format(outputs)
  
  # output is result of linear activation of last layer of RNN
  weight = tf.Variable(tf.random_normal([LSTM_SIZE, N_OUTPUTS]))
  bias = tf.Variable(tf.random_normal([N_OUTPUTS]))
  predictions = tf.matmul(outputs, weight) + bias
    
  # 2. Define the loss function for training/evaluation
  #print 'targets={}'.format(targets)
  #print 'preds={}'.format(predictions)
  loss = tf.losses.mean_squared_error(targets, predictions)
  eval_metric_ops = {
      "rmse": tf.metrics.root_mean_squared_error(targets, predictions)
  }
  
  # 3. Define the training operation/optimizer
  train_op = tf.contrib.layers.optimize_loss(
      loss=loss,
      global_step=tf.contrib.framework.get_global_step(),
      learning_rate=0.01,
      optimizer="SGD")

  # 4. Create predictions
  predictions_dict = {"predicted": predictions}
  
  # 5. return ModelFnOps
  return tflearn.ModelFnOps(
      mode=mode,
      predictions=predictions_dict,
      loss=loss,
      train_op=train_op,
      eval_metric_ops=eval_metric_ops)

def serving_input_fn():
    feature_placeholders = {
        TIMESERIES_COL: tf.placeholder(tf.float32, [None, N_INPUTS])
    }
  
    features = {
      key: tf.expand_dims(tensor, -1)
      for key, tensor in feature_placeholders.items()
    }

    return tflearn.utils.input_fn_utils.InputFnOps(
      features,
      None,
      feature_placeholders
    )

Running model


In [104]:
nn = tf.contrib.learn.Estimator(model_fn=simple_rnn)

# ---------- Training -------------
print('---------- Training ------------')
nn.fit(input_fn=get_train(), steps=10000)

# ---------- Evaluating -------------
print('---------- Evaluating ------------')
ev = nn.evaluate(input_fn=get_valid())
print(ev)

# ---------- Testing ----------------
print('---------- Testing ------------')
predictions = []
for p in nn.predict(input_fn=get_test()):
    print(p)
    predictions.append(p["predicted"])


INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_keep_checkpoint_every_n_hours': 10000, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x7f93e2739a90>, '_master': '', '_save_checkpoints_secs': 600, '_is_chief': True, '_num_worker_replicas': 0, '_save_summary_steps': 100, '_evaluation_master': '', '_task_id': 0, '_environment': 'local', '_model_dir': None, '_tf_random_seed': None, '_save_checkpoints_steps': None, '_tf_config': gpu_options {
  per_process_gpu_memory_fraction: 1
}
, '_num_ps_replicas': 0, '_task_type': None, '_keep_checkpoint_max': 5}
WARNING:tensorflow:Using temporary folder as model directory: /tmp/tmp8_mypdvr
WARNING:tensorflow:Estimator's model_fn (<function simple_rnn at 0x7f94200ab488>) includes params argument, but params are not passed to Estimator.
---------- Training ------------
readcsv=Tensor("ExpandDims:0", shape=(?, 1), dtype=string)
Tensor("concat:0", shape=(?, 8), dtype=float32)
inputs=Tensor("concat:0", shape=(?, 8), dtype=float32)
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Saving checkpoints for 1 into /tmp/tmp8_mypdvr/model.ckpt.
INFO:tensorflow:loss = 2.5161, step = 1
INFO:tensorflow:global_step/sec: 318.828
INFO:tensorflow:loss = 0.549084, step = 101 (0.315 sec)
INFO:tensorflow:global_step/sec: 435.988
INFO:tensorflow:loss = 0.383728, step = 201 (0.229 sec)
INFO:tensorflow:global_step/sec: 553.403
INFO:tensorflow:loss = 0.366001, step = 301 (0.181 sec)
INFO:tensorflow:global_step/sec: 548.812
INFO:tensorflow:loss = 0.332147, step = 401 (0.182 sec)
INFO:tensorflow:global_step/sec: 573.055
INFO:tensorflow:loss = 0.211765, step = 501 (0.174 sec)
INFO:tensorflow:global_step/sec: 627.33
INFO:tensorflow:loss = 0.227118, step = 601 (0.159 sec)
INFO:tensorflow:global_step/sec: 673.334
INFO:tensorflow:loss = 0.220097, step = 701 (0.148 sec)
INFO:tensorflow:global_step/sec: 659.417
INFO:tensorflow:loss = 0.176416, step = 801 (0.152 sec)
INFO:tensorflow:global_step/sec: 632.751
INFO:tensorflow:loss = 0.192545, step = 901 (0.158 sec)
INFO:tensorflow:global_step/sec: 525.397
INFO:tensorflow:loss = 0.108057, step = 1001 (0.190 sec)
INFO:tensorflow:global_step/sec: 548.235
INFO:tensorflow:loss = 0.106585, step = 1101 (0.182 sec)
INFO:tensorflow:global_step/sec: 577.548
INFO:tensorflow:loss = 0.132173, step = 1201 (0.173 sec)
INFO:tensorflow:global_step/sec: 557.913
INFO:tensorflow:loss = 0.0886731, step = 1301 (0.179 sec)
INFO:tensorflow:global_step/sec: 609.355
INFO:tensorflow:loss = 0.117523, step = 1401 (0.164 sec)
INFO:tensorflow:global_step/sec: 645.575
INFO:tensorflow:loss = 0.0595678, step = 1501 (0.155 sec)
INFO:tensorflow:global_step/sec: 635.898
INFO:tensorflow:loss = 0.055208, step = 1601 (0.157 sec)
INFO:tensorflow:global_step/sec: 652.523
INFO:tensorflow:loss = 0.0826051, step = 1701 (0.153 sec)
INFO:tensorflow:global_step/sec: 661.371
INFO:tensorflow:loss = 0.0487283, step = 1801 (0.151 sec)
INFO:tensorflow:global_step/sec: 664.151
INFO:tensorflow:loss = 0.074663, step = 1901 (0.151 sec)
INFO:tensorflow:global_step/sec: 679.624
INFO:tensorflow:loss = 0.0351935, step = 2001 (0.147 sec)
INFO:tensorflow:global_step/sec: 683.685
INFO:tensorflow:loss = 0.0325475, step = 2101 (0.146 sec)
INFO:tensorflow:global_step/sec: 700.076
INFO:tensorflow:loss = 0.0498031, step = 2201 (0.143 sec)
INFO:tensorflow:global_step/sec: 651.294
INFO:tensorflow:loss = 0.0287361, step = 2301 (0.154 sec)
INFO:tensorflow:global_step/sec: 612.311
INFO:tensorflow:loss = 0.0440863, step = 2401 (0.164 sec)
INFO:tensorflow:global_step/sec: 582.997
INFO:tensorflow:loss = 0.0205669, step = 2501 (0.171 sec)
INFO:tensorflow:global_step/sec: 598.134
INFO:tensorflow:loss = 0.019825, step = 2601 (0.167 sec)
INFO:tensorflow:global_step/sec: 562.552
INFO:tensorflow:loss = 0.0275705, step = 2701 (0.178 sec)
INFO:tensorflow:global_step/sec: 560.664
INFO:tensorflow:loss = 0.0178936, step = 2801 (0.178 sec)
INFO:tensorflow:global_step/sec: 482.031
INFO:tensorflow:loss = 0.0232757, step = 2901 (0.208 sec)
INFO:tensorflow:global_step/sec: 437.611
INFO:tensorflow:loss = 0.0128476, step = 3001 (0.228 sec)
INFO:tensorflow:global_step/sec: 464.913
INFO:tensorflow:loss = 0.0128188, step = 3101 (0.215 sec)
INFO:tensorflow:global_step/sec: 552.009
INFO:tensorflow:loss = 0.0176094, step = 3201 (0.181 sec)
INFO:tensorflow:global_step/sec: 525.144
INFO:tensorflow:loss = 0.0133842, step = 3301 (0.190 sec)
INFO:tensorflow:global_step/sec: 561.757
INFO:tensorflow:loss = 0.0140969, step = 3401 (0.178 sec)
INFO:tensorflow:global_step/sec: 592.061
INFO:tensorflow:loss = 0.00993286, step = 3501 (0.169 sec)
INFO:tensorflow:global_step/sec: 652.37
INFO:tensorflow:loss = 0.00997376, step = 3601 (0.153 sec)
INFO:tensorflow:global_step/sec: 631.268
INFO:tensorflow:loss = 0.0145411, step = 3701 (0.159 sec)
INFO:tensorflow:global_step/sec: 628.952
INFO:tensorflow:loss = 0.0117357, step = 3801 (0.159 sec)
INFO:tensorflow:global_step/sec: 604.421
INFO:tensorflow:loss = 0.0111357, step = 3901 (0.165 sec)
INFO:tensorflow:global_step/sec: 645.957
INFO:tensorflow:loss = 0.00861579, step = 4001 (0.155 sec)
INFO:tensorflow:global_step/sec: 669.878
INFO:tensorflow:loss = 0.00883033, step = 4101 (0.149 sec)
INFO:tensorflow:global_step/sec: 701.239
INFO:tensorflow:loss = 0.0131198, step = 4201 (0.143 sec)
INFO:tensorflow:global_step/sec: 708.475
INFO:tensorflow:loss = 0.0108804, step = 4301 (0.141 sec)
INFO:tensorflow:global_step/sec: 659.426
INFO:tensorflow:loss = 0.0100947, step = 4401 (0.152 sec)
INFO:tensorflow:global_step/sec: 658.227
INFO:tensorflow:loss = 0.00778541, step = 4501 (0.152 sec)
INFO:tensorflow:global_step/sec: 672.555
INFO:tensorflow:loss = 0.00824757, step = 4601 (0.149 sec)
INFO:tensorflow:global_step/sec: 560.119
INFO:tensorflow:loss = 0.0121191, step = 4701 (0.179 sec)
INFO:tensorflow:global_step/sec: 611.432
INFO:tensorflow:loss = 0.010313, step = 4801 (0.163 sec)
INFO:tensorflow:global_step/sec: 591.989
INFO:tensorflow:loss = 0.00961275, step = 4901 (0.169 sec)
INFO:tensorflow:global_step/sec: 605.691
INFO:tensorflow:loss = 0.00720987, step = 5001 (0.165 sec)
INFO:tensorflow:global_step/sec: 653.008
INFO:tensorflow:loss = 0.00788047, step = 5101 (0.153 sec)
INFO:tensorflow:global_step/sec: 620.77
INFO:tensorflow:loss = 0.0113603, step = 5201 (0.161 sec)
INFO:tensorflow:global_step/sec: 663.737
INFO:tensorflow:loss = 0.00989323, step = 5301 (0.151 sec)
INFO:tensorflow:global_step/sec: 630.431
INFO:tensorflow:loss = 0.00931461, step = 5401 (0.159 sec)
INFO:tensorflow:global_step/sec: 616.818
INFO:tensorflow:loss = 0.00679222, step = 5501 (0.162 sec)
INFO:tensorflow:global_step/sec: 657.148
INFO:tensorflow:loss = 0.00761112, step = 5601 (0.152 sec)
INFO:tensorflow:global_step/sec: 563.222
INFO:tensorflow:loss = 0.010773, step = 5701 (0.178 sec)
INFO:tensorflow:global_step/sec: 551.505
INFO:tensorflow:loss = 0.00955649, step = 5801 (0.181 sec)
INFO:tensorflow:global_step/sec: 581.475
INFO:tensorflow:loss = 0.00908505, step = 5901 (0.172 sec)
INFO:tensorflow:global_step/sec: 555.166
INFO:tensorflow:loss = 0.00647394, step = 6001 (0.180 sec)
INFO:tensorflow:global_step/sec: 641.102
INFO:tensorflow:loss = 0.00739197, step = 6101 (0.156 sec)
INFO:tensorflow:global_step/sec: 670.836
INFO:tensorflow:loss = 0.0103052, step = 6201 (0.149 sec)
INFO:tensorflow:global_step/sec: 647.533
INFO:tensorflow:loss = 0.00926899, step = 6301 (0.154 sec)
INFO:tensorflow:global_step/sec: 570.073
INFO:tensorflow:loss = 0.00888494, step = 6401 (0.176 sec)
INFO:tensorflow:global_step/sec: 629.446
INFO:tensorflow:loss = 0.00621971, step = 6501 (0.159 sec)
INFO:tensorflow:global_step/sec: 625.384
INFO:tensorflow:loss = 0.00720181, step = 6601 (0.160 sec)
INFO:tensorflow:global_step/sec: 620.871
INFO:tensorflow:loss = 0.00992052, step = 6701 (0.161 sec)
INFO:tensorflow:global_step/sec: 604.324
INFO:tensorflow:loss = 0.00901292, step = 6801 (0.165 sec)
INFO:tensorflow:global_step/sec: 603.486
INFO:tensorflow:loss = 0.00870032, step = 6901 (0.166 sec)
INFO:tensorflow:global_step/sec: 581.056
INFO:tensorflow:loss = 0.00600843, step = 7001 (0.172 sec)
INFO:tensorflow:global_step/sec: 556.999
INFO:tensorflow:loss = 0.00703041, step = 7101 (0.180 sec)
INFO:tensorflow:global_step/sec: 476.596
INFO:tensorflow:loss = 0.00959476, step = 7201 (0.209 sec)
INFO:tensorflow:global_step/sec: 471.288
INFO:tensorflow:loss = 0.00877882, step = 7301 (0.212 sec)
INFO:tensorflow:global_step/sec: 549.079
INFO:tensorflow:loss = 0.00852603, step = 7401 (0.182 sec)
INFO:tensorflow:global_step/sec: 528.336
INFO:tensorflow:loss = 0.0058271, step = 7501 (0.189 sec)
INFO:tensorflow:global_step/sec: 559.606
INFO:tensorflow:loss = 0.00687246, step = 7601 (0.179 sec)
INFO:tensorflow:global_step/sec: 564.831
INFO:tensorflow:loss = 0.00931203, step = 7701 (0.177 sec)
INFO:tensorflow:global_step/sec: 569.215
INFO:tensorflow:loss = 0.00856148, step = 7801 (0.176 sec)
INFO:tensorflow:global_step/sec: 570.462
INFO:tensorflow:loss = 0.00836025, step = 7901 (0.176 sec)
INFO:tensorflow:global_step/sec: 561.37
INFO:tensorflow:loss = 0.00566758, step = 8001 (0.178 sec)
INFO:tensorflow:global_step/sec: 534.426
INFO:tensorflow:loss = 0.00672497, step = 8101 (0.187 sec)
INFO:tensorflow:global_step/sec: 541.442
INFO:tensorflow:loss = 0.00906158, step = 8201 (0.185 sec)
INFO:tensorflow:global_step/sec: 553.321
INFO:tensorflow:loss = 0.0083578, step = 8301 (0.181 sec)
INFO:tensorflow:global_step/sec: 512.331
INFO:tensorflow:loss = 0.00820215, step = 8401 (0.195 sec)
INFO:tensorflow:global_step/sec: 542.974
INFO:tensorflow:loss = 0.00552449, step = 8501 (0.184 sec)
INFO:tensorflow:global_step/sec: 607.323
INFO:tensorflow:loss = 0.00658613, step = 8601 (0.165 sec)
INFO:tensorflow:global_step/sec: 607.635
INFO:tensorflow:loss = 0.00883612, step = 8701 (0.165 sec)
INFO:tensorflow:global_step/sec: 550.331
INFO:tensorflow:loss = 0.0081658, step = 8801 (0.182 sec)
INFO:tensorflow:global_step/sec: 547.476
INFO:tensorflow:loss = 0.00805137, step = 8901 (0.183 sec)
INFO:tensorflow:global_step/sec: 557.38
INFO:tensorflow:loss = 0.00539422, step = 9001 (0.179 sec)
INFO:tensorflow:global_step/sec: 579.323
INFO:tensorflow:loss = 0.0064547, step = 9101 (0.173 sec)
INFO:tensorflow:global_step/sec: 558.963
INFO:tensorflow:loss = 0.00863045, step = 9201 (0.178 sec)
INFO:tensorflow:global_step/sec: 648.996
INFO:tensorflow:loss = 0.00798415, step = 9301 (0.154 sec)
INFO:tensorflow:global_step/sec: 648.336
INFO:tensorflow:loss = 0.00790755, step = 9401 (0.154 sec)
INFO:tensorflow:global_step/sec: 584.311
INFO:tensorflow:loss = 0.00527421, step = 9501 (0.171 sec)
INFO:tensorflow:global_step/sec: 599.056
INFO:tensorflow:loss = 0.00632979, step = 9601 (0.167 sec)
INFO:tensorflow:global_step/sec: 587.011
INFO:tensorflow:loss = 0.00844085, step = 9701 (0.170 sec)
INFO:tensorflow:global_step/sec: 564.261
INFO:tensorflow:loss = 0.00781175, step = 9801 (0.177 sec)
INFO:tensorflow:global_step/sec: 521.111
INFO:tensorflow:loss = 0.00777032, step = 9901 (0.192 sec)
INFO:tensorflow:Saving checkpoints for 10000 into /tmp/tmp8_mypdvr/model.ckpt.
INFO:tensorflow:Loss for final step: 0.00584779.
---------- Evaluating ------------
readcsv=Tensor("ExpandDims:0", shape=(?, 1), dtype=string)
Tensor("concat:0", shape=(?, 8), dtype=float32)
inputs=Tensor("concat:0", shape=(?, 8), dtype=float32)
INFO:tensorflow:Starting evaluation at 2017-05-25-23:04:07
INFO:tensorflow:Restoring parameters from /tmp/tmp8_mypdvr/model.ckpt-10000
INFO:tensorflow:Finished evaluation at 2017-05-25-23:04:07
INFO:tensorflow:Saving dict for global step 10000: global_step = 10000, loss = 0.00976282, rmse = 0.0996007
WARNING:tensorflow:Skipping summary for global_step, must be a float or np.float32.
{'rmse': 0.099600725, 'global_step': 10000, 'loss': 0.0097628236}
---------- Testing ------------
readcsv=Tensor("ExpandDims:0", shape=(?, 1), dtype=string)
Tensor("concat:0", shape=(?, 8), dtype=float32)
inputs=Tensor("concat:0", shape=(?, 8), dtype=float32)
INFO:tensorflow:Restoring parameters from /tmp/tmp8_mypdvr/model.ckpt-10000
{'predicted': array([-0.8021934 , -0.77287835], dtype=float32)}
{'predicted': array([ 0.95930284,  0.71554035], dtype=float32)}
{'predicted': array([ 0.48363847,  0.67111224], dtype=float32)}
{'predicted': array([ 0.75698453,  0.85404211], dtype=float32)}
{'predicted': array([ 1.21760607,  0.95413655], dtype=float32)}

Visualizing predictions


In [99]:
# read test csv
def read_csv(filename):
    with open(filename, 'rt') as csvfile:
        reader = csv.reader(csvfile)
        data = []
        for row in reader:
            data.append([float(x) for x in row])
        return data

test_data = read_csv('test.csv')

# update predictions with features
# preds = test_data[:INPUTS] concat with predictions
preds = [] 
for i in range(len(predictions)):
    preds.append(list(test_data[i][:N_INPUTS]) + list(predictions[i]))

# visualizing predictions
for d in test_data: sns.tsplot(d[N_INPUTS:])
for p in preds: sns.tsplot(p[N_INPUTS:], color="red")
plt.show()    
    
# visualizing all the series
for d in test_data: sns.tsplot(d)
for p in preds: sns.tsplot(p, color="red")
plt.show()



In [ ]: