This model will try to predict the next value in a short sequence based on historical data. This can be used for example to forecast demand based on a couple of weeks of sales data.
In [ ]:
import numpy as np
import utils_datagen
import utils_display
from matplotlib import pyplot as plt
import tensorflow as tf
print("Tensorflow version: " + tf.__version__)
In [ ]:
DATA_SEQ_LEN = 1024*128
data = np.concatenate([utils_datagen.create_time_series(waveform, DATA_SEQ_LEN) for waveform in utils_datagen.Waveforms])
utils_display.picture_this_1(data, DATA_SEQ_LEN)
In [ ]:
NB_EPOCHS = 5 # number of times the data is repeated during training
RNN_CELLSIZE = 32 # size of the RNN cells
SEQLEN = 16 # unrolled sequence length
BATCHSIZE = 32 # mini-batch size
In [ ]:
utils_display.picture_this_2(data, BATCHSIZE, SEQLEN) # execute multiple times to see different sample sequences
In [ ]:
# three simplistic predictive models: can you beat them ?
def simplistic_models(X):
# "random" model
Yrnd = tf.random_uniform([tf.shape(X)[0]], -2.0, 2.0) # tf.shape(X)[0] is the batch size
# "same as last" model
Ysal = X[:,-1]
# "trend from last two" model
Ytfl = X[:,-1] + (X[:,-1] - X[:,-2])
return Yrnd, Ysal, Ytfl
In [ ]:
# linear model (RMSE: 0.36, with shuffling: 0.17)
def linear_model(X):
Yout = tf.layers.dense(X, 1) # output shape [BATCHSIZE, 1]
return Yout
In [ ]:
# 2-layer dense model (RMSE: 0.15-0.18, if training data is not shuffled: 0.38)
def DNN_model(X):
# X shape [BATCHSIZE, SEQLEN]
# --- dummy model: please implement a real one ---
# to test it, do not forget to use this function (DNN_model) when instantiating the model
Y = X * tf.Variable(tf.ones([]), name="dummy1") # Y shape [BATCHSIZE, SEQLEN]
# --- end of dummy model ---
Yout = tf.layers.dense(Y, 1, activation=None) # output shape [BATCHSIZE, 1]. Predicting vectors of 1 element.
return Yout
In [ ]:
# convolutional (RMSE: 0.31, with shuffling: 0.16)
def CNN_model(X):
X = tf.expand_dims(X, axis=2) # [BATCHSIZE, SEQLEN, 1] is necessary for conv model
Y = tf.layers.conv1d(X, filters=8, kernel_size=4, activation=tf.nn.relu, padding="same") # [BATCHSIZE, SEQLEN, 8]
Y = tf.layers.conv1d(Y, filters=16, kernel_size=3, activation=tf.nn.relu, padding="same") # [BATCHSIZE, SEQLEN, 8]
Y = tf.layers.conv1d(Y, filters=8, kernel_size=1, activation=tf.nn.relu, padding="same") # [BATCHSIZE, SEQLEN, 8]
Y = tf.layers.max_pooling1d(Y, pool_size=2, strides=2) # [BATCHSIZE, SEQLEN//2, 8]
Y = tf.layers.conv1d(Y, filters=8, kernel_size=3, activation=tf.nn.relu, padding="same") # [BATCHSIZE, SEQLEN//2, 8]
Y = tf.layers.max_pooling1d(Y, pool_size=2, strides=2) # [BATCHSIZE, SEQLEN//4, 8]
# mis-using a conv layer as linear regression :-)
Yout = tf.layers.conv1d(Y, filters=1, kernel_size=SEQLEN//4, activation=None, padding="valid") # output shape [BATCHSIZE, 1, 1]
Yout = tf.squeeze(Yout, axis=-1) # output shape [BATCHSIZE, 1]
return Yout
In [ ]:
# RNN model (RMSE: 0.38, with shuffling 0.14, the same with loss on last 8)
def RNN_model(X, n=1):
X = tf.expand_dims(X, axis=2) # shape [BATCHSIZE, SEQLEN, 1] is necessary for RNN model
batchsize = tf.shape(X)[0] # allow for variable batch size
# --- dummy model: please implement a real RNN model ---
# to test it, do not forget to use this function (RNN_model) when instantiating the model
Yn = X * tf.ones([RNN_CELLSIZE], name="dummy2") # Yn shape [BATCHSIZE, SEQLEN, RNN_CELLSIZE]
# TODO: create a tf.nn.rnn_cell.GRUCell
# TODO: unroll the cell using tf.nn.dynamic_rnn(..., dtype=tf.float32)
# --- end of dummy model ---
# This is the regression layer. It is already implemented.
# Yn [BATCHSIZE, SEQLEN, RNN_CELLSIZE]
Yn = tf.reshape(Yn, [batchsize*SEQLEN, RNN_CELLSIZE])
Yr = tf.layers.dense(Yn, 1) # Yr [BATCHSIZE*SEQLEN, 1] predicting vectors of 1 element
Yr = tf.reshape(Yr, [batchsize, SEQLEN, 1]) # Yr [BATCHSIZE, SEQLEN, 1]
# In this RNN model, you can compute the loss on the last predicted item or the last n predicted items
# Last n with n=SEQLEN//2 is slightly better. This is a hyperparameter you can adjust in the RNN_model_N
# function below.
Yout = Yr[:,-n:SEQLEN,:] # last item(s) in sequence: output shape [BATCHSIZE, n, 1]
Yout = tf.squeeze(Yout, axis=-1) # remove the last dimension (1): output shape [BATCHSIZE, n]
return Yout
In [ ]:
def RNN_model_N(X): return RNN_model(X, n=SEQLEN//2)
In [ ]:
def model_fn(features, labels, model):
X = features # shape [BATCHSIZE, SEQLEN]
Y = model(X)
last_label = labels[:, -1] # last item in sequence: the target value to predict
last_labels = labels[:, -tf.shape(Y)[1]:SEQLEN] # last p items in sequence (as many as in Y), useful for RNN_model(X, n>1)
loss = tf.losses.mean_squared_error(Y, last_labels) # loss computed on last label(s)
optimizer = tf.train.AdamOptimizer(learning_rate=0.01)
train_op = optimizer.minimize(loss)
Yrnd, Ysal, Ytfl = simplistic_models(X)
eval_metrics = {"RMSE": tf.sqrt(loss),
# compare agains three simplistic predictive models: can you beat them ?
"RMSErnd": tf.sqrt(tf.losses.mean_squared_error(Yrnd, last_label)),
"RMSEsal": tf.sqrt(tf.losses.mean_squared_error(Ysal, last_label)),
"RMSEtfl": tf.sqrt(tf.losses.mean_squared_error(Ytfl, last_label))}
Yout = Y[:,-1]
return Yout, loss, eval_metrics, train_op
In [ ]:
# training to predict the same sequence shifted by one (next value)
labeldata = np.roll(data, -1)
# slice data into sequences
traindata = np.reshape(data, [-1, SEQLEN])
labeldata = np.reshape(labeldata, [-1, SEQLEN])
# also make an evaluation dataset by randomly subsampling our fake data
EVAL_SEQUENCES = DATA_SEQ_LEN*4//SEQLEN//4
joined_data = np.stack([traindata, labeldata], axis=1) # new shape is [N_sequences, 2(train/eval), SEQLEN]
joined_evaldata = joined_data[np.random.choice(joined_data.shape[0], EVAL_SEQUENCES, replace=False)]
evaldata = joined_evaldata[:,0,:]
evallabels = joined_evaldata[:,1,:]
def datasets(nb_epochs):
# Dataset API for batching, shuffling, repeating
dataset = tf.data.Dataset.from_tensor_slices((traindata, labeldata))
dataset = dataset.repeat(NB_EPOCHS)
dataset = dataset.shuffle(DATA_SEQ_LEN*4//SEQLEN) # important ! Number of sequences in shuffle buffer: all of them
dataset = dataset.batch(BATCHSIZE)
# Dataset API for batching
evaldataset = tf.data.Dataset.from_tensor_slices((evaldata, evallabels))
evaldataset = evaldataset.repeat()
evaldataset = evaldataset.batch(EVAL_SEQUENCES) # just one batch with everything
# Some boilerplate code...
# this creates a Tensorflow iterator of the correct type and shape
# compatible with both our training and eval datasets
tf_iter = tf.data.Iterator.from_structure(dataset.output_types, dataset.output_shapes)
# it can be initialized to iterate through the training dataset
dataset_init_op = tf_iter.make_initializer(dataset)
# or it can be initialized to iterate through the eval dataset
evaldataset_init_op = tf_iter.make_initializer(evaldataset)
# Returns the tensorflow nodes needed by our model_fn.
features, labels = tf_iter.get_next()
# When these nodes will be executed (sess.run) in the training or eval loop,
# they will output the next batch of data.
# Note: when you do not need to swap the dataset (like here between train/eval) just use
# features, labels = dataset.make_one_shot_iterator().get_next()
# TODO: easier with tf.estimator.inputs.numpy_input_fn ???
return features, labels, dataset_init_op, evaldataset_init_op
In [ ]:
tf.reset_default_graph() # restart model graph from scratch
# instantiate the dataset
features, labels, dataset_init_op, evaldataset_init_op = datasets(NB_EPOCHS)
# instantiate the model
Yout, loss, eval_metrics, train_op = model_fn(features, labels, linear_model)
In [ ]:
# variable initialization
sess = tf.Session()
init = tf.global_variables_initializer()
sess.run(init)
In [ ]:
count = 0
losses = []
indices = []
sess.run(dataset_init_op)
while True:
try: loss_, _ = sess.run([loss, train_op])
except tf.errors.OutOfRangeError: break
# print progress
if count%300 == 0:
epoch = count // (DATA_SEQ_LEN*4//BATCHSIZE//SEQLEN)
print("epoch " + str(epoch) + ", batch " + str(count) + ", loss=" + str(loss_))
if count%10 == 0:
losses.append(np.mean(loss_))
indices.append(count)
count += 1
# final evaluation
sess.run(evaldataset_init_op)
eval_metrics_, Yout_ = sess.run([eval_metrics, Yout])
print("Final accuracy on eval dataset:")
print(str(eval_metrics_))
In [ ]:
plt.ylim(ymax=np.amax(losses[1:])) # ignore first value(s) for scaling
plt.plot(indices, losses)
plt.show()
In [ ]:
# execute multiple times to see different sample sequences
utils_display.picture_this_3(Yout_, evaldata, evallabels, SEQLEN)
Copyright 2018 Google LLC
Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License.