Time series prediction using RNNs + Estimators

This notebook illustrates how to:

  1. Creating a Recurrent Neural Network in TensorFlow
  2. Creating a Custom Estimator in tf.contrib.learn


In [1]:
#!/usr/bin/env python

# original code from: https://github.com/GoogleCloudPlatform/training-data-analyst/tree/master/blogs/timeseries
# modified by: Marianne Linhares, monteirom@google.com, May 2017

# tensorflow
import tensorflow as tf
import tensorflow.contrib.learn as tflearn
import tensorflow.contrib.layers as tflayers
from tensorflow.contrib.learn.python.learn import learn_runner
import tensorflow.contrib.metrics as metrics
import tensorflow.contrib.rnn as rnn

# visualization
import seaborn as sns
import matplotlib.pyplot as plt

# helpers
import numpy as np
import csv

# enable tensorflow logs

Generating time-series data

Essentially a set of sinusoids with random amplitudes and frequencies.

Each series will consist of 10 (SEQ_LEN) numbers.

In [2]:
TRAIN = 10000
VALID = 50
TEST = 5

SEQ_LEN = 10

def create_time_series():
    freq = (np.random.random()*0.5) + 0.1  # 0.1 to 0.6
    ampl = np.random.random() + 0.5  # 0.5 to 1.5
    x = np.sin(np.arange(0,SEQ_LEN) * freq) * ampl
    return x

def to_csv(filename, N):
    with open(filename, 'w') as ofp:
        for lineno in range(0, N):
            seq = create_time_series()
            line = ",".join(map(str, seq))
            ofp.write(line + '\n')
# Creating datasets
to_csv('train.csv', TRAIN)
to_csv('valid.csv', VALID)
to_csv('test.csv', TEST)

# Example
for i in range(5): sns.tsplot(create_time_series())

Read datasets

In [3]:
DEFAULTS = [[0.0] for x in range(0, SEQ_LEN)]
TIMESERIES_COL = 'rawdata'
N_OUTPUTS = 2  # in each sequence, 1-8 are features, and 9-10 is label

# -------- read data and convert to needed format -----------
def read_dataset(filename, mode=tf.estimator.ModeKeys.TRAIN):  
  def _input_fn():
    num_epochs = 100 if mode == tf.estimator.ModeKeys.TRAIN else 1

    # could be a path to one file or a file pattern.
    input_file_names = tf.train.match_filenames_once(filename)
    filename_queue = tf.train.string_input_producer(
        input_file_names, num_epochs=num_epochs, shuffle=True)

    reader = tf.TextLineReader()
    _, value = reader.read_up_to(filename_queue, num_records=BATCH_SIZE)

    value_column = tf.expand_dims(value, -1)
    # all_data is a list of tensors
    all_data = tf.decode_csv(value_column, record_defaults=DEFAULTS)  
    inputs = all_data[:len(all_data)-N_OUTPUTS]  # first few values
    label = all_data[len(all_data)-N_OUTPUTS : ] # last few values
    # from list of tensors to tensor with one more dimension
    inputs = tf.concat(inputs, axis=1)
    label = tf.concat(label, axis=1)
    return {TIMESERIES_COL: inputs}, label   # dict of features, label
  return _input_fn

def get_train():
  return read_dataset('train.csv', mode=tf.estimator.ModeKeys.TRAIN)

def get_valid():
  return read_dataset('valid.csv', mode=tf.estimator.ModeKeys.EVAL)

def get_test():
  return read_dataset('test.csv', mode=tf.estimator.ModeKeys.EVAL)

RNN Model

In [4]:
LSTM_SIZE = 3  # number of hidden layers in each of the LSTM cells

def simple_rnn(features, targets, mode, params):
  # 0. Reformat input shape to become a sequence
  x = tf.split(features[TIMESERIES_COL], N_INPUTS, 1)
  # 1. configure the RNN
  lstm_cell = rnn.BasicLSTMCell(LSTM_SIZE, forget_bias=1.0)
  outputs, _ = rnn.static_rnn(lstm_cell, x, dtype=tf.float32)

  # slice to keep only the last cell of the RNN
  outputs = outputs[-1]
  #print 'last outputs={}'.format(outputs)
  # output is result of linear activation of last layer of RNN
  weight = tf.Variable(tf.random_normal([LSTM_SIZE, N_OUTPUTS]))
  bias = tf.Variable(tf.random_normal([N_OUTPUTS]))
  predictions = tf.matmul(outputs, weight) + bias
  # 2. Define the loss function for training/evaluation
  #print 'targets={}'.format(targets)
  #print 'preds={}'.format(predictions)
  loss = tf.losses.mean_squared_error(targets, predictions)
  eval_metric_ops = {
      "rmse": tf.metrics.root_mean_squared_error(targets, predictions)
  # 3. Define the training operation/optimizer
  train_op = tf.contrib.layers.optimize_loss(

  # 4. Create predictions
  predictions_dict = {"predicted": predictions}
  # 5. return ModelFnOps
  return tflearn.ModelFnOps(

def serving_input_fn():
    feature_placeholders = {
        TIMESERIES_COL: tf.placeholder(tf.float32, [None, N_INPUTS])
    features = {
      key: tf.expand_dims(tensor, -1)
      for key, tensor in feature_placeholders.items()

    return tflearn.utils.input_fn_utils.InputFnOps(

Running model

In [5]:
nn = tf.contrib.learn.Estimator(model_fn=simple_rnn)

# ---------- Training -------------
print('---------- Training ------------')
nn.fit(input_fn=get_train(), steps=10000)

# ---------- Evaluating -------------
print('---------- Evaluating ------------')
ev = nn.evaluate(input_fn=get_valid())

# ---------- Testing ----------------
print('---------- Testing ------------')
predictions = []
for p in nn.predict(input_fn=get_test()):

Visualizing predictions

In [6]:
# read test csv
def read_csv(filename):
    with open(filename, 'rt') as csvfile:
        reader = csv.reader(csvfile)
        data = []
        for row in reader:
            data.append([float(x) for x in row])
        return data

test_data = read_csv('test.csv')

# update predictions with features
# preds = test_data[:INPUTS] concat with predictions
preds = [] 
for i in range(len(predictions)):
    preds.append(list(test_data[i][:N_INPUTS]) + list(predictions[i]))

# visualizing predictions
for d in test_data: sns.tsplot(d[N_INPUTS:], color="black")
for p in preds: sns.tsplot(p[N_INPUTS:], color="red")
# visualizing all the series
for d in test_data: sns.tsplot(d, color="black")
for p in preds: sns.tsplot(p, color="red")