An RNN for short-term predictions

This model will try to predict the next value in a short sequence based on historical data. This can be used for example to forecast demand based on a couple of weeks of sales data.

This is the solution notebook. The corresponding work notebook is here:


In [1]:
# using Tensorflow 2
%tensorflow_version 2.x
import numpy as np
from matplotlib import pyplot as plt
import tensorflow as tf
print("Tensorflow version: " + tf.__version__)


TensorFlow 2.x selected.
Tensorflow version: 2.0.0

In [0]:
#@title Display utilities [RUN ME]

from enum import IntEnum
import numpy as np

class Waveforms(IntEnum):
    SINE1 = 0
    SINE2 = 1
    SINE3 = 2
    SINE4 = 3

def create_time_series(waveform, datalen):
    # Generates a sequence of length datalen
    # There are three available waveforms in the Waveforms enum
    # good waveforms
    frequencies = [(0.2, 0.15), (0.35, 0.3), (0.6, 0.55), (0.4, 0.25)]
    freq1, freq2 = frequencies[waveform]
    noise = [np.random.random()*0.2 for i in range(datalen)]
    x1 = np.sin(np.arange(0,datalen) * freq1)  + noise
    x2 = np.sin(np.arange(0,datalen) * freq2)  + noise
    x = x1 + x2
    return x.astype(np.float32)

from matplotlib import transforms as plttrans

plt.rcParams['figure.figsize']=(16.8,6.0)
plt.rcParams['axes.grid']=True
plt.rcParams['axes.linewidth']=0
plt.rcParams['grid.color']='#DDDDDD'
plt.rcParams['axes.facecolor']='white'
plt.rcParams['xtick.major.size']=0
plt.rcParams['ytick.major.size']=0

def picture_this_1(data, datalen):
    plt.subplot(211)
    plt.plot(data[datalen-512:datalen+512])
    plt.axvspan(0, 512, color='black', alpha=0.06)
    plt.axvspan(512, 1024, color='grey', alpha=0.04)
    plt.subplot(212)
    plt.plot(data[3*datalen-512:3*datalen+512])
    plt.axvspan(0, 512, color='grey', alpha=0.04)
    plt.axvspan(512, 1024, color='black', alpha=0.06)
    plt.show()
    
def picture_this_2(data, batchsize, seqlen):
    samples = np.reshape(data, [-1, batchsize, seqlen])
    rndsample = samples[np.random.choice(samples.shape[0], 8, replace=False)]
    print("Tensor shape of a batch of training sequences: " + str(rndsample[0].shape))
    print("Random excerpt:")
    subplot = 241
    for i in range(8):
        plt.subplot(subplot)
        plt.plot(rndsample[i, 0]) # first sequence in random batch
        subplot += 1
    plt.show()
    
def picture_this_3(predictions, evaldata, evallabels, seqlen):
    subplot = 241
    for i in range(8):
        plt.subplot(subplot)
        #k = int(np.random.rand() * evaldata.shape[0])
        l0, = plt.plot(evaldata[i, 1:], label="data")
        plt.plot([seqlen-2, seqlen-1], evallabels[i, -2:], ":")
        l1, = plt.plot([seqlen-1], [predictions[i]], "o", label='Predicted')
        l2, = plt.plot([seqlen-1], [evallabels[i][-1]], "o", label='Ground Truth')
        if i==0:
            plt.legend(handles=[l0, l1, l2])
        subplot += 1
    plt.show()
    
def histogram_helper(data, title, last_label=None):
  labels = ['RND', 'LAST', 'LAST2', 'LINEAR', 'DNN', 'CNN', 'RNN', 'RNN_N']
  colors = ['#4285f4', '#34a853', '#fbbc05', '#ea4334',
            '#4285f4', '#34a853', '#fbbc05', '#ea4334',
            '#4285f4', '#34a853', '#fbbc05', '#ea4334']
  fig = plt.figure(figsize=(7,4))
  plt.xticks(rotation='40')
  ymax = data[1]*1.3
  plt.ylim(0, ymax)
  plt.title(title, pad="20")
  # remove data points where data is None
  filtered = filter(lambda tup: tup[1] is not None, zip(labels, data, colors))
  # split back into lists
  labels, data, colors = map(list, zip(*filtered))
  # replace last label is appropriate
  if last_label is not None:
    labels[-1] = last_label
  # histogram plot
  plt.bar(labels, data, color=colors)
  # add values on histogram bars
  for i, (_, v, color) in enumerate(zip(labels, data, colors)):
      plt.gca().text(i-0.3, min(v, ymax)+0.02, "{0:.4f}".format(v), color=color, fontweight="bold")
  plt.show()

def picture_this_hist_yours(data):
  histogram_helper(data, 'RMSE: your model vs. other approaches',
                   last_label='Yours')

def picture_this_hist_all(data):
  histogram_helper(data, 'RMSE: final comparison')

Generate fake dataset


In [3]:
DATA_SEQ_LEN = 1024*128
data = np.concatenate([create_time_series(waveform, DATA_SEQ_LEN) for waveform in Waveforms]) # 4 different wave forms
picture_this_1(data, DATA_SEQ_LEN)
DATA_LEN = DATA_SEQ_LEN * 4 # since we concatenated 4 sequences


Hyperparameters


In [0]:
RNN_CELLSIZE = 32   # size of the RNN cells
SEQLEN = 16         # unrolled sequence length
BATCHSIZE = 32      # mini-batch size
LAST_N = SEQLEN//2  # loss computed on last N element of sequence in advanced RNN model

Visualize training sequences

This is what the neural network will see during training.


In [5]:
picture_this_2(data, BATCHSIZE, SEQLEN) # execute multiple times to see different sample sequences


Tensor shape of a batch of training sequences: (32, 16)
Random excerpt:

Prepare datasets


In [0]:
# training to predict the same sequence shifted by one (next value)
labeldata = np.roll(data, -1)

# cut data into sequences
traindata = np.reshape(data, [-1, SEQLEN])
labeldata = np.reshape(labeldata, [-1, SEQLEN])

# make an evaluation dataset by cutting the sequences differently
evaldata = np.roll(data, -SEQLEN//2)
evallabels = np.roll(evaldata, -1)
evaldata = np.reshape(evaldata, [-1, SEQLEN])
evallabels = np.reshape(evallabels, [-1, SEQLEN])

def get_training_dataset(last_n=1):
  dataset = tf.data.Dataset.from_tensor_slices(
      (
          traindata, # features
          labeldata[:,-last_n:SEQLEN] # targets: the last element or last n elements in the shifted sequence
      )
  )
  # Dataset API used here to put the dataset into shape
  dataset = dataset.repeat()
  dataset = dataset.shuffle(DATA_LEN//SEQLEN) # shuffling is important ! (Number of sequences in shuffle buffer: all of them)
  dataset = dataset.batch(BATCHSIZE, drop_remainder = True)
  return dataset

def get_evaluation_dataset(last_n=1):
  dataset = tf.data.Dataset.from_tensor_slices(
      (
          evaldata, # features       
          evallabels[:,-last_n:SEQLEN] # targets: the last element or last n elements in the shifted sequence
      )
  )
  # Dataset API used here to put the dataset into shape
  dataset = dataset.batch(evaldata.shape[0], drop_remainder = True) # just one batch with everything
  return dataset

Peek at the data


In [9]:
train_ds = get_training_dataset()
for features, labels in train_ds.take(10):
  print("input_shape:", features.numpy().shape, ", shape of labels:", labels.numpy().shape)


input_shape: (32, 16) , shape of labels: (32, 1)
input_shape: (32, 16) , shape of labels: (32, 1)
input_shape: (32, 16) , shape of labels: (32, 1)
input_shape: (32, 16) , shape of labels: (32, 1)
input_shape: (32, 16) , shape of labels: (32, 1)
input_shape: (32, 16) , shape of labels: (32, 1)
input_shape: (32, 16) , shape of labels: (32, 1)
input_shape: (32, 16) , shape of labels: (32, 1)
input_shape: (32, 16) , shape of labels: (32, 1)
input_shape: (32, 16) , shape of labels: (32, 1)

Benchmark model definitions

We will compare the RNNs against these models. For the time being you can regard them as black boxes.


In [10]:
# this is how to create a Keras model from neural network layers
def compile_keras_sequential_model(list_of_layers, model_name):
  
    # a tf.keras.Sequential model is a sequence of layers
    model = tf.keras.Sequential(list_of_layers, name=model_name)    
    
    # to finalize the model, specify the loss, the optimizer and metrics
    model.compile(
       loss = 'mean_squared_error',
       optimizer = 'rmsprop',
       metrics = ['RootMeanSquaredError'])
    
    # this prints a description of the model
    model.summary()
    
    return model
  
#
# three very simplistic "models" that require no training. Can you beat them ?
#

# SIMPLISTIC BENCHMARK MODEL 1
predict_same_as_last_value = lambda x: x[:,-1] # shape of x is [BATCHSIZE,SEQLEN]
# SIMPLISTIC BENCHMARK MODEL 2
predict_trend_from_last_two_values = lambda x: x[:,-1] + (x[:,-1] - x[:,-2])
# SIMPLISTIC BENCHMARK MODEL 3
predict_random_value = lambda x: tf.random.uniform(tf.shape(x)[0:1], -2.0, 2.0)

def model_layers_from_lambda(lambda_fn, input_shape, output_shape):
  return [tf.keras.layers.Lambda(lambda_fn, input_shape=input_shape),
          tf.keras.layers.Reshape(output_shape)]

model_layers_RAND  = model_layers_from_lambda(predict_random_value,               input_shape=[SEQLEN,], output_shape=[1,])
model_layers_LAST  = model_layers_from_lambda(predict_same_as_last_value,         input_shape=[SEQLEN,], output_shape=[1,])
model_layers_LAST2 = model_layers_from_lambda(predict_trend_from_last_two_values, input_shape=[SEQLEN,], output_shape=[1,])

#
# three neural network models for comparison, in increasing order of complexity
#

# BENCHMARK MODEL 4: linear model (RMSE: 0.215 after 10 epochs)
model_layers_LINEAR = [tf.keras.layers.Dense(1, input_shape=[SEQLEN,])] # output shape [BATCHSIZE, 1]

# BENCHMARK MODEL 5: 2-layer dense model (RMSE: 0.197 after 10 epochs)
model_layers_DNN = [tf.keras.layers.Dense(SEQLEN//2, activation='relu', input_shape=[SEQLEN,]), # input  shape [BATCHSIZE, SEQLEN]
                    tf.keras.layers.Dense(1)] # output shape [BATCHSIZE, 1]

# BENCHMARK MODEL 6: convolutional (RMSE: 0.186 after 10 epochs)
model_layers_CNN = [
    tf.keras.layers.Reshape([SEQLEN, 1], input_shape=[SEQLEN,]), # [BATCHSIZE, SEQLEN, 1] is necessary for conv model
    tf.keras.layers.Conv1D(filters=8, kernel_size=4, activation='relu', padding="same"), # [BATCHSIZE, SEQLEN, 8]
    tf.keras.layers.Conv1D(filters=16, kernel_size=3, activation='relu', padding="same"), # [BATCHSIZE, SEQLEN, 8]
    tf.keras.layers.Conv1D(filters=8, kernel_size=1, activation='relu', padding="same"), # [BATCHSIZE, SEQLEN, 8]
    tf.keras.layers.MaxPooling1D(pool_size=2, strides=2),  # [BATCHSIZE, SEQLEN//2, 8]
    tf.keras.layers.Conv1D(filters=8, kernel_size=3, activation='relu', padding="same"),  # [BATCHSIZE, SEQLEN//2, 8]
    tf.keras.layers.MaxPooling1D(pool_size=2, strides=2),  # [BATCHSIZE, SEQLEN//4, 8]
    # mis-using a conv layer as linear regression :-)
    tf.keras.layers.Conv1D(filters=1, kernel_size=SEQLEN//4, activation=None, padding="valid"), # output shape [BATCHSIZE, 1, 1]
    tf.keras.layers.Reshape([1,]) ] # output shape [BATCHSIZE, 1]

# instantiate the benchmark models and train those that need training
steps_per_epoch = steps_per_epoch = DATA_LEN // SEQLEN // BATCHSIZE
NB_BENCHMARK_EPOCHS = 10
model_RAND   = compile_keras_sequential_model(model_layers_RAND, "RAND") # Simplistic model without parameters. It needs no training.
model_LAST   = compile_keras_sequential_model(model_layers_LAST, "LAST") # Simplistic model without parameters. It needs no training.
model_LAST2  = compile_keras_sequential_model(model_layers_LAST2, "LAST2") # Simplistic model without parameters. It needs no training.
model_LINEAR = compile_keras_sequential_model(model_layers_LINEAR, "LINEAR")
model_LINEAR.fit(get_training_dataset(), steps_per_epoch=steps_per_epoch, epochs=NB_BENCHMARK_EPOCHS)
model_DNN = compile_keras_sequential_model(model_layers_DNN, "DNN")
model_DNN.fit(get_training_dataset(), steps_per_epoch=steps_per_epoch, epochs=NB_BENCHMARK_EPOCHS)
model_CNN = compile_keras_sequential_model(model_layers_CNN, "CNN")
model_CNN.fit(get_training_dataset(), steps_per_epoch=steps_per_epoch, epochs=NB_BENCHMARK_EPOCHS)

# evaluate the benchmark models
benchmark_models = [model_RAND, model_LAST, model_LAST2, model_LINEAR, model_DNN, model_CNN]
benchmark_rmses = []
for model in benchmark_models:
  _, rmse = model.evaluate(get_evaluation_dataset(), steps=1)
  benchmark_rmses.append(rmse)


Model: "RAND"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
=================================================================
lambda (Lambda)              (None,)                   0         
_________________________________________________________________
reshape (Reshape)            (None, 1)                 0         
=================================================================
Total params: 0
Trainable params: 0
Non-trainable params: 0
_________________________________________________________________
Model: "LAST"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
=================================================================
lambda_1 (Lambda)            (None,)                   0         
_________________________________________________________________
reshape_1 (Reshape)          (None, 1)                 0         
=================================================================
Total params: 0
Trainable params: 0
Non-trainable params: 0
_________________________________________________________________
Model: "LAST2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
=================================================================
lambda_2 (Lambda)            (None,)                   0         
_________________________________________________________________
reshape_2 (Reshape)          (None, 1)                 0         
=================================================================
Total params: 0
Trainable params: 0
Non-trainable params: 0
_________________________________________________________________
Model: "LINEAR"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
=================================================================
dense (Dense)                (None, 1)                 17        
=================================================================
Total params: 17
Trainable params: 17
Non-trainable params: 0
_________________________________________________________________
Train for 1024 steps
Epoch 1/10
1024/1024 [==============================] - 2s 2ms/step - loss: 0.6609 - RootMeanSquaredError: 0.8129
Epoch 2/10
1024/1024 [==============================] - 2s 2ms/step - loss: 0.0770 - RootMeanSquaredError: 0.2775
Epoch 3/10
1024/1024 [==============================] - 2s 2ms/step - loss: 0.0586 - RootMeanSquaredError: 0.2421
Epoch 4/10
1024/1024 [==============================] - 2s 2ms/step - loss: 0.0519 - RootMeanSquaredError: 0.2278
Epoch 5/10
1024/1024 [==============================] - 2s 2ms/step - loss: 0.0492 - RootMeanSquaredError: 0.2219
Epoch 6/10
1024/1024 [==============================] - 2s 2ms/step - loss: 0.0481 - RootMeanSquaredError: 0.2194
Epoch 7/10
1024/1024 [==============================] - 2s 2ms/step - loss: 0.0474 - RootMeanSquaredError: 0.2177
Epoch 8/10
1024/1024 [==============================] - 2s 2ms/step - loss: 0.0475 - RootMeanSquaredError: 0.2180
Epoch 9/10
1024/1024 [==============================] - 2s 2ms/step - loss: 0.0466 - RootMeanSquaredError: 0.2159
Epoch 10/10
1024/1024 [==============================] - 2s 2ms/step - loss: 0.0473 - RootMeanSquaredError: 0.2174
Model: "DNN"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
=================================================================
dense_1 (Dense)              (None, 8)                 136       
_________________________________________________________________
dense_2 (Dense)              (None, 1)                 9         
=================================================================
Total params: 145
Trainable params: 145
Non-trainable params: 0
_________________________________________________________________
Train for 1024 steps
Epoch 1/10
1024/1024 [==============================] - 2s 2ms/step - loss: 0.1960 - RootMeanSquaredError: 0.4427
Epoch 2/10
1024/1024 [==============================] - 2s 2ms/step - loss: 0.0547 - RootMeanSquaredError: 0.2340
Epoch 3/10
1024/1024 [==============================] - 2s 2ms/step - loss: 0.0478 - RootMeanSquaredError: 0.2186
Epoch 4/10
1024/1024 [==============================] - 2s 2ms/step - loss: 0.0450 - RootMeanSquaredError: 0.2122
Epoch 5/10
1024/1024 [==============================] - 2s 2ms/step - loss: 0.0435 - RootMeanSquaredError: 0.2086
Epoch 6/10
1024/1024 [==============================] - 2s 2ms/step - loss: 0.0432 - RootMeanSquaredError: 0.2078
Epoch 7/10
1024/1024 [==============================] - 2s 2ms/step - loss: 0.0416 - RootMeanSquaredError: 0.2039
Epoch 8/10
1024/1024 [==============================] - 2s 2ms/step - loss: 0.0409 - RootMeanSquaredError: 0.2022
Epoch 9/10
1024/1024 [==============================] - 2s 2ms/step - loss: 0.0397 - RootMeanSquaredError: 0.1994
Epoch 10/10
1024/1024 [==============================] - 2s 2ms/step - loss: 0.0394 - RootMeanSquaredError: 0.1984
Model: "CNN"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
=================================================================
reshape_3 (Reshape)          (None, 16, 1)             0         
_________________________________________________________________
conv1d (Conv1D)              (None, 16, 8)             40        
_________________________________________________________________
conv1d_1 (Conv1D)            (None, 16, 16)            400       
_________________________________________________________________
conv1d_2 (Conv1D)            (None, 16, 8)             136       
_________________________________________________________________
max_pooling1d (MaxPooling1D) (None, 8, 8)              0         
_________________________________________________________________
conv1d_3 (Conv1D)            (None, 8, 8)              200       
_________________________________________________________________
max_pooling1d_1 (MaxPooling1 (None, 4, 8)              0         
_________________________________________________________________
conv1d_4 (Conv1D)            (None, 1, 1)              33        
_________________________________________________________________
reshape_4 (Reshape)          (None, 1)                 0         
=================================================================
Total params: 809
Trainable params: 809
Non-trainable params: 0
_________________________________________________________________
Train for 1024 steps
Epoch 1/10
1024/1024 [==============================] - 4s 4ms/step - loss: 0.1119 - RootMeanSquaredError: 0.3346
Epoch 2/10
1024/1024 [==============================] - 3s 3ms/step - loss: 0.0449 - RootMeanSquaredError: 0.2118
Epoch 3/10
1024/1024 [==============================] - 3s 3ms/step - loss: 0.0414 - RootMeanSquaredError: 0.2034
Epoch 4/10
1024/1024 [==============================] - 3s 3ms/step - loss: 0.0394 - RootMeanSquaredError: 0.1984
Epoch 5/10
1024/1024 [==============================] - 3s 3ms/step - loss: 0.0389 - RootMeanSquaredError: 0.1973
Epoch 6/10
1024/1024 [==============================] - 3s 3ms/step - loss: 0.0375 - RootMeanSquaredError: 0.1937
Epoch 7/10
1024/1024 [==============================] - 3s 3ms/step - loss: 0.0368 - RootMeanSquaredError: 0.1917
Epoch 8/10
1024/1024 [==============================] - 3s 3ms/step - loss: 0.0365 - RootMeanSquaredError: 0.1911
Epoch 9/10
1024/1024 [==============================] - 3s 3ms/step - loss: 0.0359 - RootMeanSquaredError: 0.1896
Epoch 10/10
1024/1024 [==============================] - 3s 3ms/step - loss: 0.0355 - RootMeanSquaredError: 0.1884
1/1 [==============================] - 0s 119ms/step - loss: 2.3955 - RootMeanSquaredError: 1.5477
1/1 [==============================] - 0s 271ms/step - loss: 0.1690 - RootMeanSquaredError: 0.4111
1/1 [==============================] - 0s 123ms/step - loss: 0.1124 - RootMeanSquaredError: 0.3352
1/1 [==============================] - 0s 111ms/step - loss: 0.0462 - RootMeanSquaredError: 0.2150
1/1 [==============================] - 0s 113ms/step - loss: 0.0378 - RootMeanSquaredError: 0.1944
1/1 [==============================] - 0s 341ms/step - loss: 0.0339 - RootMeanSquaredError: 0.1841

RNN models

X shape [BATCHSIZE, SEQLEN, 1]
Y shape [BATCHSIZE, SEQLEN, 1]
H shape [BATCHSIZE, RNN_CELLSIZE*N_LAYERS]

In [11]:
# RNN model (RMSE: 0.164 after 10 epochs)
model_RNN = tf.keras.Sequential([
    tf.keras.layers.Reshape([SEQLEN, 1], input_shape=[SEQLEN,]), # [BATCHSIZE, SEQLEN, 1] is necessary for RNN model
    tf.keras.layers.GRU(RNN_CELLSIZE, return_sequences=True),  # output shape [BATCHSIZE, SEQLEN, RNN_CELLSIZE]
    tf.keras.layers.GRU(RNN_CELLSIZE), # keep only last output in sequence: output shape [BATCHSIZE, RNN_CELLSIZE]
    tf.keras.layers.Dense(1) # output shape [BATCHSIZE, 1]
])

model_RNN.compile(
    loss = 'mean_squared_error',
    optimizer = 'rmsprop',
    metrics = ['RootMeanSquaredError'])

model_RNN.summary()


Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
=================================================================
reshape_5 (Reshape)          (None, 16, 1)             0         
_________________________________________________________________
gru (GRU)                    (None, 16, 32)            3360      
_________________________________________________________________
gru_1 (GRU)                  (None, 32)                6336      
_________________________________________________________________
dense_3 (Dense)              (None, 1)                 33        
=================================================================
Total params: 9,729
Trainable params: 9,729
Non-trainable params: 0
_________________________________________________________________

In [13]:
# RNN model with loss computed on last N elements (RMSE: 0.163 after 10 epochs)
model_RNN_N = tf.keras.Sequential([
    tf.keras.layers.Reshape([SEQLEN, 1], input_shape=[SEQLEN,]), # [BATCHSIZE, SEQLEN, 1] is necessary for RNN model
    tf.keras.layers.GRU(RNN_CELLSIZE, return_sequences=True),
    tf.keras.layers.GRU(RNN_CELLSIZE, return_sequences=True), # output shape [BATCHSIZE, SEQLEN, RNN_CELLSIZE]
    tf.keras.layers.TimeDistributed(tf.keras.layers.Dense(1)),              # output shape [BATCHSIZE, SEQLEN, 1]
    tf.keras.layers.Lambda(lambda x: x[:,-LAST_N:SEQLEN,0]) # last N item(s) in sequence: output shape [BATCHSIZE, LAST_N]
])

model_RNN_N.compile(
    loss = 'mean_squared_error',
    optimizer = 'rmsprop',
    metrics = ['RootMeanSquaredError'])

model_RNN_N.summary()


Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
=================================================================
reshape_7 (Reshape)          (None, 16, 1)             0         
_________________________________________________________________
gru_4 (GRU)                  (None, 16, 32)            3360      
_________________________________________________________________
gru_5 (GRU)                  (None, 16, 32)            6336      
_________________________________________________________________
time_distributed (TimeDistri (None, 16, 1)             33        
_________________________________________________________________
lambda_3 (Lambda)            (None, 8)                 0         
=================================================================
Total params: 9,729
Trainable params: 9,729
Non-trainable params: 0
_________________________________________________________________

Training loop


In [14]:
# You can re-execute this cell to continue training

steps_per_epoch = DATA_LEN // SEQLEN // BATCHSIZE
NB_EPOCHS = 10      # use NB_EPOCHS=1 when coding your models
                   # use NB_EPOCHS=10 when training for real (benchmark models were trained for 10 epochs)

model = model_RNN_N # train your model: model_RNN or model_RNN_N
train_ds = get_training_dataset(last_n=LAST_N) # use last_n=LAST_N for model_RNN_N

history = model.fit(train_ds, steps_per_epoch=steps_per_epoch, epochs=NB_EPOCHS)


Train for 1024 steps
Epoch 1/10
1024/1024 [==============================] - 17s 16ms/step - loss: 0.0704 - RootMeanSquaredError: 0.2653
Epoch 2/10
1024/1024 [==============================] - 14s 13ms/step - loss: 0.0386 - RootMeanSquaredError: 0.1966
Epoch 3/10
1024/1024 [==============================] - 13s 13ms/step - loss: 0.0367 - RootMeanSquaredError: 0.1915
Epoch 4/10
1024/1024 [==============================] - 13s 13ms/step - loss: 0.0345 - RootMeanSquaredError: 0.1858
Epoch 5/10
1024/1024 [==============================] - 13s 13ms/step - loss: 0.0329 - RootMeanSquaredError: 0.1815
Epoch 6/10
1024/1024 [==============================] - 13s 13ms/step - loss: 0.0315 - RootMeanSquaredError: 0.1773
Epoch 7/10
1024/1024 [==============================] - 13s 13ms/step - loss: 0.0303 - RootMeanSquaredError: 0.1739
Epoch 8/10
1024/1024 [==============================] - 14s 13ms/step - loss: 0.0287 - RootMeanSquaredError: 0.1695
Epoch 9/10
1024/1024 [==============================] - 13s 13ms/step - loss: 0.0276 - RootMeanSquaredError: 0.1662
Epoch 10/10
1024/1024 [==============================] - 14s 14ms/step - loss: 0.0265 - RootMeanSquaredError: 0.1627

In [15]:
plt.plot(history.history['loss'])
plt.show()


Evaluation


In [16]:
# Here "evaluating" using the training dataset
eval_ds = get_evaluation_dataset(last_n=LAST_N)  # use last_n=LAST_N for model_RNN_N
loss, your_rmse = model.evaluate(eval_ds, steps=1)

# NOTE: benchmark models were trained for 10 epochs

picture_this_hist_yours(benchmark_rmses + [your_rmse])


1/1 [==============================] - 2s 2s/step - loss: 0.0287 - RootMeanSquaredError: 0.1696

Predictions


In [17]:
# execute multiple times to see different sample sequences
subset = np.random.choice(DATA_LEN//SEQLEN, 8) # pick 8 eval sequences at random

predictions = model.predict(evaldata[subset], steps=1) # prediction directly from numpy array
picture_this_3(predictions[:,-1], evaldata[subset], evallabels[subset], SEQLEN)


Benchmark

Benchmark all the algorithms.


In [20]:
your_RNN_layers = [
    tf.keras.layers.Reshape([SEQLEN, 1], input_shape=[SEQLEN,]), # [BATCHSIZE, SEQLEN, 1] is necessary for RNN model
    tf.keras.layers.GRU(RNN_CELLSIZE, return_sequences=True),  # output shape [BATCHSIZE, SEQLEN, RNN_CELLSIZE]
    tf.keras.layers.GRU(RNN_CELLSIZE), # keep only last output in sequence: output shape [BATCHSIZE, RNN_CELLSIZE]
    tf.keras.layers.Dense(1) # output shape [BATCHSIZE, 1]
]
assert len(your_RNN_layers)>0, "the model has no layers"
your_RNN_model = compile_keras_sequential_model(your_RNN_layers, 'RNN')

your_RNN_N_layers = [
    tf.keras.layers.Reshape([SEQLEN, 1], input_shape=[SEQLEN,]), # [BATCHSIZE, SEQLEN, 1] is necessary for RNN model
    tf.keras.layers.GRU(RNN_CELLSIZE, return_sequences=True),
    tf.keras.layers.GRU(RNN_CELLSIZE, return_sequences=True), # output shape [BATCHSIZE, SEQLEN, RNN_CELLSIZE]
    tf.keras.layers.TimeDistributed(tf.keras.layers.Dense(1)), # output shape [BATCHSIZE, SEQLEN, 1]
    tf.keras.layers.Lambda(lambda x: x[:,-LAST_N:SEQLEN,0]) # last N item(s) in sequence: output shape [BATCHSIZE, LAST_N]
]
assert len(your_RNN_layers)>0, "the model has no layers"
your_RNN_N_model = compile_keras_sequential_model(your_RNN_N_layers, 'RNN_N')

# train your models from scratch
your_RNN_model.fit(get_training_dataset(), steps_per_epoch=steps_per_epoch, epochs=NB_BENCHMARK_EPOCHS)
your_RNN_N_model.fit(get_training_dataset(last_n=LAST_N), steps_per_epoch=steps_per_epoch, epochs=NB_BENCHMARK_EPOCHS)

# evaluate all models
rmses = []
benchmark_models = [model_RAND, model_LAST, model_LAST2, model_LINEAR, model_DNN, model_CNN]
for model in benchmark_models:
  _, rmse = model.evaluate(get_evaluation_dataset(), steps=1)
  rmses.append(rmse)
_, rmse = your_RNN_model.evaluate(get_evaluation_dataset(), steps=1)
rmses.append(rmse)
_, rmse = your_RNN_N_model.evaluate(get_evaluation_dataset(last_n=LAST_N), steps=1)
rmses.append(rmse)


Model: "RNN"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
=================================================================
reshape_10 (Reshape)         (None, 16, 1)             0         
_________________________________________________________________
gru_10 (GRU)                 (None, 16, 32)            3360      
_________________________________________________________________
gru_11 (GRU)                 (None, 32)                6336      
_________________________________________________________________
dense_7 (Dense)              (None, 1)                 33        
=================================================================
Total params: 9,729
Trainable params: 9,729
Non-trainable params: 0
_________________________________________________________________
Model: "RNN_N"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
=================================================================
reshape_11 (Reshape)         (None, 16, 1)             0         
_________________________________________________________________
gru_12 (GRU)                 (None, 16, 32)            3360      
_________________________________________________________________
gru_13 (GRU)                 (None, 16, 32)            6336      
_________________________________________________________________
time_distributed_2 (TimeDist (None, 16, 1)             33        
_________________________________________________________________
lambda_5 (Lambda)            (None, 8)                 0         
=================================================================
Total params: 9,729
Trainable params: 9,729
Non-trainable params: 0
_________________________________________________________________
Train for 1024 steps
Epoch 1/10
1024/1024 [==============================] - 17s 16ms/step - loss: 0.0746 - RootMeanSquaredError: 0.2731
Epoch 2/10
1024/1024 [==============================] - 13s 13ms/step - loss: 0.0384 - RootMeanSquaredError: 0.1959
Epoch 3/10
1024/1024 [==============================] - 13s 13ms/step - loss: 0.0352 - RootMeanSquaredError: 0.1876
Epoch 4/10
1024/1024 [==============================] - 13s 13ms/step - loss: 0.0336 - RootMeanSquaredError: 0.1833
Epoch 5/10
1024/1024 [==============================] - 13s 13ms/step - loss: 0.0329 - RootMeanSquaredError: 0.1813
Epoch 6/10
1024/1024 [==============================] - 13s 13ms/step - loss: 0.0316 - RootMeanSquaredError: 0.1778
Epoch 7/10
1024/1024 [==============================] - 13s 13ms/step - loss: 0.0304 - RootMeanSquaredError: 0.1743
Epoch 8/10
1024/1024 [==============================] - 13s 13ms/step - loss: 0.0294 - RootMeanSquaredError: 0.1713
Epoch 9/10
1024/1024 [==============================] - 13s 13ms/step - loss: 0.0285 - RootMeanSquaredError: 0.1689
Epoch 10/10
1024/1024 [==============================] - 13s 13ms/step - loss: 0.0282 - RootMeanSquaredError: 0.1679
Train for 1024 steps
Epoch 1/10
1024/1024 [==============================] - 17s 17ms/step - loss: 0.0664 - RootMeanSquaredError: 0.2578
Epoch 2/10
1024/1024 [==============================] - 14s 13ms/step - loss: 0.0387 - RootMeanSquaredError: 0.1968
Epoch 3/10
1024/1024 [==============================] - 13s 13ms/step - loss: 0.0366 - RootMeanSquaredError: 0.1912
Epoch 4/10
1024/1024 [==============================] - 14s 14ms/step - loss: 0.0342 - RootMeanSquaredError: 0.1850
Epoch 5/10
1024/1024 [==============================] - 14s 14ms/step - loss: 0.0322 - RootMeanSquaredError: 0.1796
Epoch 6/10
1024/1024 [==============================] - 14s 13ms/step - loss: 0.0305 - RootMeanSquaredError: 0.1747
Epoch 7/10
1024/1024 [==============================] - 14s 13ms/step - loss: 0.0292 - RootMeanSquaredError: 0.1710
Epoch 8/10
1024/1024 [==============================] - 14s 14ms/step - loss: 0.0282 - RootMeanSquaredError: 0.1679
Epoch 9/10
1024/1024 [==============================] - 14s 14ms/step - loss: 0.0273 - RootMeanSquaredError: 0.1651
Epoch 10/10
1024/1024 [==============================] - 14s 14ms/step - loss: 0.0267 - RootMeanSquaredError: 0.1633
1/1 [==============================] - 0s 68ms/step - loss: 2.3761 - RootMeanSquaredError: 1.5415
1/1 [==============================] - 0s 71ms/step - loss: 0.1690 - RootMeanSquaredError: 0.4111
1/1 [==============================] - 0s 64ms/step - loss: 0.1124 - RootMeanSquaredError: 0.3352
1/1 [==============================] - 0s 63ms/step - loss: 0.0462 - RootMeanSquaredError: 0.2150
1/1 [==============================] - 0s 65ms/step - loss: 0.0378 - RootMeanSquaredError: 0.1944
1/1 [==============================] - 0s 240ms/step - loss: 0.0339 - RootMeanSquaredError: 0.1841
1/1 [==============================] - 2s 2s/step - loss: 0.0289 - RootMeanSquaredError: 0.1701
1/1 [==============================] - 2s 2s/step - loss: 0.0263 - RootMeanSquaredError: 0.1621

In [21]:
picture_this_hist_all(rmses)


Copyright 2019 Google LLC

Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License.