An RNN for short-term predictions

This model will try to predict the next value in a short sequence based on historical data. This can be used for example to forecast demand based on a couple of weeks of sales data.

This is the solution notebook. The corresponding work notebook is here:

In [1]:
# using Tensorflow 2
%tensorflow_version 2.x
import numpy as np
from matplotlib import pyplot as plt
import tensorflow as tf
print("Tensorflow version: " + tf.__version__)

TensorFlow 2.x selected.
Tensorflow version: 2.0.0

In [0]:
#@title Display utilities [RUN ME]

from enum import IntEnum
import numpy as np

class Waveforms(IntEnum):
    SINE1 = 0
    SINE2 = 1
    SINE3 = 2
    SINE4 = 3

def create_time_series(waveform, datalen):
    # Generates a sequence of length datalen
    # There are three available waveforms in the Waveforms enum
    # good waveforms
    frequencies = [(0.2, 0.15), (0.35, 0.3), (0.6, 0.55), (0.4, 0.25)]
    freq1, freq2 = frequencies[waveform]
    noise = [np.random.random()*0.2 for i in range(datalen)]
    x1 = np.sin(np.arange(0,datalen) * freq1)  + noise
    x2 = np.sin(np.arange(0,datalen) * freq2)  + noise
    x = x1 + x2
    return x.astype(np.float32)

from matplotlib import transforms as plttrans


def picture_this_1(data, datalen):
    plt.axvspan(0, 512, color='black', alpha=0.06)
    plt.axvspan(512, 1024, color='grey', alpha=0.04)
    plt.axvspan(0, 512, color='grey', alpha=0.04)
    plt.axvspan(512, 1024, color='black', alpha=0.06)
def picture_this_2(data, batchsize, seqlen):
    samples = np.reshape(data, [-1, batchsize, seqlen])
    rndsample = samples[np.random.choice(samples.shape[0], 8, replace=False)]
    print("Tensor shape of a batch of training sequences: " + str(rndsample[0].shape))
    print("Random excerpt:")
    subplot = 241
    for i in range(8):
        plt.plot(rndsample[i, 0]) # first sequence in random batch
        subplot += 1
def picture_this_3(predictions, evaldata, evallabels, seqlen):
    subplot = 241
    for i in range(8):
        #k = int(np.random.rand() * evaldata.shape[0])
        l0, = plt.plot(evaldata[i, 1:], label="data")
        plt.plot([seqlen-2, seqlen-1], evallabels[i, -2:], ":")
        l1, = plt.plot([seqlen-1], [predictions[i]], "o", label='Predicted')
        l2, = plt.plot([seqlen-1], [evallabels[i][-1]], "o", label='Ground Truth')
        if i==0:
            plt.legend(handles=[l0, l1, l2])
        subplot += 1
def histogram_helper(data, title, last_label=None):
  labels = ['RND', 'LAST', 'LAST2', 'LINEAR', 'DNN', 'CNN', 'RNN', 'RNN_N']
  colors = ['#4285f4', '#34a853', '#fbbc05', '#ea4334',
            '#4285f4', '#34a853', '#fbbc05', '#ea4334',
            '#4285f4', '#34a853', '#fbbc05', '#ea4334']
  fig = plt.figure(figsize=(7,4))
  ymax = data[1]*1.3
  plt.ylim(0, ymax)
  plt.title(title, pad="20")
  # remove data points where data is None
  filtered = filter(lambda tup: tup[1] is not None, zip(labels, data, colors))
  # split back into lists
  labels, data, colors = map(list, zip(*filtered))
  # replace last label is appropriate
  if last_label is not None:
    labels[-1] = last_label
  # histogram plot, data, color=colors)
  # add values on histogram bars
  for i, (_, v, color) in enumerate(zip(labels, data, colors)):
      plt.gca().text(i-0.3, min(v, ymax)+0.02, "{0:.4f}".format(v), color=color, fontweight="bold")

def picture_this_hist_yours(data):
  histogram_helper(data, 'RMSE: your model vs. other approaches',

def picture_this_hist_all(data):
  histogram_helper(data, 'RMSE: final comparison')

Generate fake dataset

In [3]:
DATA_SEQ_LEN = 1024*128
data = np.concatenate([create_time_series(waveform, DATA_SEQ_LEN) for waveform in Waveforms]) # 4 different wave forms
picture_this_1(data, DATA_SEQ_LEN)
DATA_LEN = DATA_SEQ_LEN * 4 # since we concatenated 4 sequences


In [0]:
RNN_CELLSIZE = 32   # size of the RNN cells
SEQLEN = 16         # unrolled sequence length
BATCHSIZE = 32      # mini-batch size
LAST_N = SEQLEN//2  # loss computed on last N element of sequence in advanced RNN model

Visualize training sequences

This is what the neural network will see during training.

In [5]:
picture_this_2(data, BATCHSIZE, SEQLEN) # execute multiple times to see different sample sequences

Tensor shape of a batch of training sequences: (32, 16)
Random excerpt:

Prepare datasets

In [0]:
# training to predict the same sequence shifted by one (next value)
labeldata = np.roll(data, -1)

# cut data into sequences
traindata = np.reshape(data, [-1, SEQLEN])
labeldata = np.reshape(labeldata, [-1, SEQLEN])

# make an evaluation dataset by cutting the sequences differently
evaldata = np.roll(data, -SEQLEN//2)
evallabels = np.roll(evaldata, -1)
evaldata = np.reshape(evaldata, [-1, SEQLEN])
evallabels = np.reshape(evallabels, [-1, SEQLEN])

def get_training_dataset(last_n=1):
  dataset =
          traindata, # features
          labeldata[:,-last_n:SEQLEN] # targets: the last element or last n elements in the shifted sequence
  # Dataset API used here to put the dataset into shape
  dataset = dataset.repeat()
  dataset = dataset.shuffle(DATA_LEN//SEQLEN) # shuffling is important ! (Number of sequences in shuffle buffer: all of them)
  dataset = dataset.batch(BATCHSIZE, drop_remainder = True)
  return dataset

def get_evaluation_dataset(last_n=1):
  dataset =
          evaldata, # features       
          evallabels[:,-last_n:SEQLEN] # targets: the last element or last n elements in the shifted sequence
  # Dataset API used here to put the dataset into shape
  dataset = dataset.batch(evaldata.shape[0], drop_remainder = True) # just one batch with everything
  return dataset

Peek at the data

In [9]:
train_ds = get_training_dataset()
for features, labels in train_ds.take(10):
  print("input_shape:", features.numpy().shape, ", shape of labels:", labels.numpy().shape)

input_shape: (32, 16) , shape of labels: (32, 1)
input_shape: (32, 16) , shape of labels: (32, 1)
input_shape: (32, 16) , shape of labels: (32, 1)
input_shape: (32, 16) , shape of labels: (32, 1)
input_shape: (32, 16) , shape of labels: (32, 1)
input_shape: (32, 16) , shape of labels: (32, 1)
input_shape: (32, 16) , shape of labels: (32, 1)
input_shape: (32, 16) , shape of labels: (32, 1)
input_shape: (32, 16) , shape of labels: (32, 1)
input_shape: (32, 16) , shape of labels: (32, 1)

Benchmark model definitions

We will compare the RNNs against these models. For the time being you can regard them as black boxes.

In [10]:
# this is how to create a Keras model from neural network layers
def compile_keras_sequential_model(list_of_layers, model_name):
    # a tf.keras.Sequential model is a sequence of layers
    model = tf.keras.Sequential(list_of_layers, name=model_name)    
    # to finalize the model, specify the loss, the optimizer and metrics
       loss = 'mean_squared_error',
       optimizer = 'rmsprop',
       metrics = ['RootMeanSquaredError'])
    # this prints a description of the model
    return model
# three very simplistic "models" that require no training. Can you beat them ?

predict_same_as_last_value = lambda x: x[:,-1] # shape of x is [BATCHSIZE,SEQLEN]
predict_trend_from_last_two_values = lambda x: x[:,-1] + (x[:,-1] - x[:,-2])
predict_random_value = lambda x: tf.random.uniform(tf.shape(x)[0:1], -2.0, 2.0)

def model_layers_from_lambda(lambda_fn, input_shape, output_shape):
  return [tf.keras.layers.Lambda(lambda_fn, input_shape=input_shape),

model_layers_RAND  = model_layers_from_lambda(predict_random_value,               input_shape=[SEQLEN,], output_shape=[1,])
model_layers_LAST  = model_layers_from_lambda(predict_same_as_last_value,         input_shape=[SEQLEN,], output_shape=[1,])
model_layers_LAST2 = model_layers_from_lambda(predict_trend_from_last_two_values, input_shape=[SEQLEN,], output_shape=[1,])

# three neural network models for comparison, in increasing order of complexity

# BENCHMARK MODEL 4: linear model (RMSE: 0.215 after 10 epochs)
model_layers_LINEAR = [tf.keras.layers.Dense(1, input_shape=[SEQLEN,])] # output shape [BATCHSIZE, 1]

# BENCHMARK MODEL 5: 2-layer dense model (RMSE: 0.197 after 10 epochs)
model_layers_DNN = [tf.keras.layers.Dense(SEQLEN//2, activation='relu', input_shape=[SEQLEN,]), # input  shape [BATCHSIZE, SEQLEN]
                    tf.keras.layers.Dense(1)] # output shape [BATCHSIZE, 1]

# BENCHMARK MODEL 6: convolutional (RMSE: 0.186 after 10 epochs)
model_layers_CNN = [
    tf.keras.layers.Reshape([SEQLEN, 1], input_shape=[SEQLEN,]), # [BATCHSIZE, SEQLEN, 1] is necessary for conv model
    tf.keras.layers.Conv1D(filters=8, kernel_size=4, activation='relu', padding="same"), # [BATCHSIZE, SEQLEN, 8]
    tf.keras.layers.Conv1D(filters=16, kernel_size=3, activation='relu', padding="same"), # [BATCHSIZE, SEQLEN, 8]
    tf.keras.layers.Conv1D(filters=8, kernel_size=1, activation='relu', padding="same"), # [BATCHSIZE, SEQLEN, 8]
    tf.keras.layers.MaxPooling1D(pool_size=2, strides=2),  # [BATCHSIZE, SEQLEN//2, 8]
    tf.keras.layers.Conv1D(filters=8, kernel_size=3, activation='relu', padding="same"),  # [BATCHSIZE, SEQLEN//2, 8]
    tf.keras.layers.MaxPooling1D(pool_size=2, strides=2),  # [BATCHSIZE, SEQLEN//4, 8]
    # mis-using a conv layer as linear regression :-)
    tf.keras.layers.Conv1D(filters=1, kernel_size=SEQLEN//4, activation=None, padding="valid"), # output shape [BATCHSIZE, 1, 1]
    tf.keras.layers.Reshape([1,]) ] # output shape [BATCHSIZE, 1]

# instantiate the benchmark models and train those that need training
steps_per_epoch = steps_per_epoch = DATA_LEN // SEQLEN // BATCHSIZE
model_RAND   = compile_keras_sequential_model(model_layers_RAND, "RAND") # Simplistic model without parameters. It needs no training.
model_LAST   = compile_keras_sequential_model(model_layers_LAST, "LAST") # Simplistic model without parameters. It needs no training.
model_LAST2  = compile_keras_sequential_model(model_layers_LAST2, "LAST2") # Simplistic model without parameters. It needs no training.
model_LINEAR = compile_keras_sequential_model(model_layers_LINEAR, "LINEAR"), steps_per_epoch=steps_per_epoch, epochs=NB_BENCHMARK_EPOCHS)
model_DNN = compile_keras_sequential_model(model_layers_DNN, "DNN"), steps_per_epoch=steps_per_epoch, epochs=NB_BENCHMARK_EPOCHS)
model_CNN = compile_keras_sequential_model(model_layers_CNN, "CNN"), steps_per_epoch=steps_per_epoch, epochs=NB_BENCHMARK_EPOCHS)

# evaluate the benchmark models
benchmark_models = [model_RAND, model_LAST, model_LAST2, model_LINEAR, model_DNN, model_CNN]
benchmark_rmses = []
for model in benchmark_models:
  _, rmse = model.evaluate(get_evaluation_dataset(), steps=1)

Model: "RAND"
Layer (type)                 Output Shape              Param #   
lambda (Lambda)              (None,)                   0         
reshape (Reshape)            (None, 1)                 0         
Total params: 0
Trainable params: 0
Non-trainable params: 0
Model: "LAST"
Layer (type)                 Output Shape              Param #   
lambda_1 (Lambda)            (None,)                   0         
reshape_1 (Reshape)          (None, 1)                 0         
Total params: 0
Trainable params: 0
Non-trainable params: 0
Model: "LAST2"
Layer (type)                 Output Shape              Param #   
lambda_2 (Lambda)            (None,)                   0         
reshape_2 (Reshape)          (None, 1)                 0         
Total params: 0
Trainable params: 0
Non-trainable params: 0
Model: "LINEAR"
Layer (type)                 Output Shape              Param #   
dense (Dense)                (None, 1)                 17        
Total params: 17
Trainable params: 17
Non-trainable params: 0
Train for 1024 steps
Epoch 1/10
1024/1024 [==============================] - 2s 2ms/step - loss: 0.6609 - RootMeanSquaredError: 0.8129
Epoch 2/10
1024/1024 [==============================] - 2s 2ms/step - loss: 0.0770 - RootMeanSquaredError: 0.2775
Epoch 3/10
1024/1024 [==============================] - 2s 2ms/step - loss: 0.0586 - RootMeanSquaredError: 0.2421
Epoch 4/10
1024/1024 [==============================] - 2s 2ms/step - loss: 0.0519 - RootMeanSquaredError: 0.2278
Epoch 5/10
1024/1024 [==============================] - 2s 2ms/step - loss: 0.0492 - RootMeanSquaredError: 0.2219
Epoch 6/10
1024/1024 [==============================] - 2s 2ms/step - loss: 0.0481 - RootMeanSquaredError: 0.2194
Epoch 7/10
1024/1024 [==============================] - 2s 2ms/step - loss: 0.0474 - RootMeanSquaredError: 0.2177
Epoch 8/10
1024/1024 [==============================] - 2s 2ms/step - loss: 0.0475 - RootMeanSquaredError: 0.2180
Epoch 9/10
1024/1024 [==============================] - 2s 2ms/step - loss: 0.0466 - RootMeanSquaredError: 0.2159
Epoch 10/10
1024/1024 [==============================] - 2s 2ms/step - loss: 0.0473 - RootMeanSquaredError: 0.2174
Model: "DNN"
Layer (type)                 Output Shape              Param #   
dense_1 (Dense)              (None, 8)                 136       
dense_2 (Dense)              (None, 1)                 9         
Total params: 145
Trainable params: 145
Non-trainable params: 0
Train for 1024 steps
Epoch 1/10
1024/1024 [==============================] - 2s 2ms/step - loss: 0.1960 - RootMeanSquaredError: 0.4427
Epoch 2/10
1024/1024 [==============================] - 2s 2ms/step - loss: 0.0547 - RootMeanSquaredError: 0.2340
Epoch 3/10
1024/1024 [==============================] - 2s 2ms/step - loss: 0.0478 - RootMeanSquaredError: 0.2186
Epoch 4/10
1024/1024 [==============================] - 2s 2ms/step - loss: 0.0450 - RootMeanSquaredError: 0.2122
Epoch 5/10
1024/1024 [==============================] - 2s 2ms/step - loss: 0.0435 - RootMeanSquaredError: 0.2086
Epoch 6/10
1024/1024 [==============================] - 2s 2ms/step - loss: 0.0432 - RootMeanSquaredError: 0.2078
Epoch 7/10
1024/1024 [==============================] - 2s 2ms/step - loss: 0.0416 - RootMeanSquaredError: 0.2039
Epoch 8/10
1024/1024 [==============================] - 2s 2ms/step - loss: 0.0409 - RootMeanSquaredError: 0.2022
Epoch 9/10
1024/1024 [==============================] - 2s 2ms/step - loss: 0.0397 - RootMeanSquaredError: 0.1994
Epoch 10/10
1024/1024 [==============================] - 2s 2ms/step - loss: 0.0394 - RootMeanSquaredError: 0.1984
Model: "CNN"
Layer (type)                 Output Shape              Param #   
reshape_3 (Reshape)          (None, 16, 1)             0         
conv1d (Conv1D)              (None, 16, 8)             40        
conv1d_1 (Conv1D)            (None, 16, 16)            400       
conv1d_2 (Conv1D)            (None, 16, 8)             136       
max_pooling1d (MaxPooling1D) (None, 8, 8)              0         
conv1d_3 (Conv1D)            (None, 8, 8)              200       
max_pooling1d_1 (MaxPooling1 (None, 4, 8)              0         
conv1d_4 (Conv1D)            (None, 1, 1)              33        
reshape_4 (Reshape)          (None, 1)                 0         
Total params: 809
Trainable params: 809
Non-trainable params: 0
Train for 1024 steps
Epoch 1/10
1024/1024 [==============================] - 4s 4ms/step - loss: 0.1119 - RootMeanSquaredError: 0.3346
Epoch 2/10
1024/1024 [==============================] - 3s 3ms/step - loss: 0.0449 - RootMeanSquaredError: 0.2118
Epoch 3/10
1024/1024 [==============================] - 3s 3ms/step - loss: 0.0414 - RootMeanSquaredError: 0.2034
Epoch 4/10
1024/1024 [==============================] - 3s 3ms/step - loss: 0.0394 - RootMeanSquaredError: 0.1984
Epoch 5/10
1024/1024 [==============================] - 3s 3ms/step - loss: 0.0389 - RootMeanSquaredError: 0.1973
Epoch 6/10
1024/1024 [==============================] - 3s 3ms/step - loss: 0.0375 - RootMeanSquaredError: 0.1937
Epoch 7/10
1024/1024 [==============================] - 3s 3ms/step - loss: 0.0368 - RootMeanSquaredError: 0.1917
Epoch 8/10
1024/1024 [==============================] - 3s 3ms/step - loss: 0.0365 - RootMeanSquaredError: 0.1911
Epoch 9/10
1024/1024 [==============================] - 3s 3ms/step - loss: 0.0359 - RootMeanSquaredError: 0.1896
Epoch 10/10
1024/1024 [==============================] - 3s 3ms/step - loss: 0.0355 - RootMeanSquaredError: 0.1884
1/1 [==============================] - 0s 119ms/step - loss: 2.3955 - RootMeanSquaredError: 1.5477
1/1 [==============================] - 0s 271ms/step - loss: 0.1690 - RootMeanSquaredError: 0.4111
1/1 [==============================] - 0s 123ms/step - loss: 0.1124 - RootMeanSquaredError: 0.3352
1/1 [==============================] - 0s 111ms/step - loss: 0.0462 - RootMeanSquaredError: 0.2150
1/1 [==============================] - 0s 113ms/step - loss: 0.0378 - RootMeanSquaredError: 0.1944
1/1 [==============================] - 0s 341ms/step - loss: 0.0339 - RootMeanSquaredError: 0.1841

RNN models


In [11]:
# RNN model (RMSE: 0.164 after 10 epochs)
model_RNN = tf.keras.Sequential([
    tf.keras.layers.Reshape([SEQLEN, 1], input_shape=[SEQLEN,]), # [BATCHSIZE, SEQLEN, 1] is necessary for RNN model
    tf.keras.layers.GRU(RNN_CELLSIZE, return_sequences=True),  # output shape [BATCHSIZE, SEQLEN, RNN_CELLSIZE]
    tf.keras.layers.GRU(RNN_CELLSIZE), # keep only last output in sequence: output shape [BATCHSIZE, RNN_CELLSIZE]
    tf.keras.layers.Dense(1) # output shape [BATCHSIZE, 1]

    loss = 'mean_squared_error',
    optimizer = 'rmsprop',
    metrics = ['RootMeanSquaredError'])


Model: "sequential"
Layer (type)                 Output Shape              Param #   
reshape_5 (Reshape)          (None, 16, 1)             0         
gru (GRU)                    (None, 16, 32)            3360      
gru_1 (GRU)                  (None, 32)                6336      
dense_3 (Dense)              (None, 1)                 33        
Total params: 9,729
Trainable params: 9,729
Non-trainable params: 0

In [13]:
# RNN model with loss computed on last N elements (RMSE: 0.163 after 10 epochs)
model_RNN_N = tf.keras.Sequential([
    tf.keras.layers.Reshape([SEQLEN, 1], input_shape=[SEQLEN,]), # [BATCHSIZE, SEQLEN, 1] is necessary for RNN model
    tf.keras.layers.GRU(RNN_CELLSIZE, return_sequences=True),
    tf.keras.layers.GRU(RNN_CELLSIZE, return_sequences=True), # output shape [BATCHSIZE, SEQLEN, RNN_CELLSIZE]
    tf.keras.layers.TimeDistributed(tf.keras.layers.Dense(1)),              # output shape [BATCHSIZE, SEQLEN, 1]
    tf.keras.layers.Lambda(lambda x: x[:,-LAST_N:SEQLEN,0]) # last N item(s) in sequence: output shape [BATCHSIZE, LAST_N]

    loss = 'mean_squared_error',
    optimizer = 'rmsprop',
    metrics = ['RootMeanSquaredError'])


Model: "sequential_1"
Layer (type)                 Output Shape              Param #   
reshape_7 (Reshape)          (None, 16, 1)             0         
gru_4 (GRU)                  (None, 16, 32)            3360      
gru_5 (GRU)                  (None, 16, 32)            6336      
time_distributed (TimeDistri (None, 16, 1)             33        
lambda_3 (Lambda)            (None, 8)                 0         
Total params: 9,729
Trainable params: 9,729
Non-trainable params: 0

Training loop

In [14]:
# You can re-execute this cell to continue training

steps_per_epoch = DATA_LEN // SEQLEN // BATCHSIZE
NB_EPOCHS = 10      # use NB_EPOCHS=1 when coding your models
                   # use NB_EPOCHS=10 when training for real (benchmark models were trained for 10 epochs)

model = model_RNN_N # train your model: model_RNN or model_RNN_N
train_ds = get_training_dataset(last_n=LAST_N) # use last_n=LAST_N for model_RNN_N

history =, steps_per_epoch=steps_per_epoch, epochs=NB_EPOCHS)

Train for 1024 steps
Epoch 1/10
1024/1024 [==============================] - 17s 16ms/step - loss: 0.0704 - RootMeanSquaredError: 0.2653
Epoch 2/10
1024/1024 [==============================] - 14s 13ms/step - loss: 0.0386 - RootMeanSquaredError: 0.1966
Epoch 3/10
1024/1024 [==============================] - 13s 13ms/step - loss: 0.0367 - RootMeanSquaredError: 0.1915
Epoch 4/10
1024/1024 [==============================] - 13s 13ms/step - loss: 0.0345 - RootMeanSquaredError: 0.1858
Epoch 5/10
1024/1024 [==============================] - 13s 13ms/step - loss: 0.0329 - RootMeanSquaredError: 0.1815
Epoch 6/10
1024/1024 [==============================] - 13s 13ms/step - loss: 0.0315 - RootMeanSquaredError: 0.1773
Epoch 7/10
1024/1024 [==============================] - 13s 13ms/step - loss: 0.0303 - RootMeanSquaredError: 0.1739
Epoch 8/10
1024/1024 [==============================] - 14s 13ms/step - loss: 0.0287 - RootMeanSquaredError: 0.1695
Epoch 9/10
1024/1024 [==============================] - 13s 13ms/step - loss: 0.0276 - RootMeanSquaredError: 0.1662
Epoch 10/10
1024/1024 [==============================] - 14s 14ms/step - loss: 0.0265 - RootMeanSquaredError: 0.1627

In [15]:


In [16]:
# Here "evaluating" using the training dataset
eval_ds = get_evaluation_dataset(last_n=LAST_N)  # use last_n=LAST_N for model_RNN_N
loss, your_rmse = model.evaluate(eval_ds, steps=1)

# NOTE: benchmark models were trained for 10 epochs

picture_this_hist_yours(benchmark_rmses + [your_rmse])

1/1 [==============================] - 2s 2s/step - loss: 0.0287 - RootMeanSquaredError: 0.1696


In [17]:
# execute multiple times to see different sample sequences
subset = np.random.choice(DATA_LEN//SEQLEN, 8) # pick 8 eval sequences at random

predictions = model.predict(evaldata[subset], steps=1) # prediction directly from numpy array
picture_this_3(predictions[:,-1], evaldata[subset], evallabels[subset], SEQLEN)


Benchmark all the algorithms.

In [20]:
your_RNN_layers = [
    tf.keras.layers.Reshape([SEQLEN, 1], input_shape=[SEQLEN,]), # [BATCHSIZE, SEQLEN, 1] is necessary for RNN model
    tf.keras.layers.GRU(RNN_CELLSIZE, return_sequences=True),  # output shape [BATCHSIZE, SEQLEN, RNN_CELLSIZE]
    tf.keras.layers.GRU(RNN_CELLSIZE), # keep only last output in sequence: output shape [BATCHSIZE, RNN_CELLSIZE]
    tf.keras.layers.Dense(1) # output shape [BATCHSIZE, 1]
assert len(your_RNN_layers)>0, "the model has no layers"
your_RNN_model = compile_keras_sequential_model(your_RNN_layers, 'RNN')

your_RNN_N_layers = [
    tf.keras.layers.Reshape([SEQLEN, 1], input_shape=[SEQLEN,]), # [BATCHSIZE, SEQLEN, 1] is necessary for RNN model
    tf.keras.layers.GRU(RNN_CELLSIZE, return_sequences=True),
    tf.keras.layers.GRU(RNN_CELLSIZE, return_sequences=True), # output shape [BATCHSIZE, SEQLEN, RNN_CELLSIZE]
    tf.keras.layers.TimeDistributed(tf.keras.layers.Dense(1)), # output shape [BATCHSIZE, SEQLEN, 1]
    tf.keras.layers.Lambda(lambda x: x[:,-LAST_N:SEQLEN,0]) # last N item(s) in sequence: output shape [BATCHSIZE, LAST_N]
assert len(your_RNN_layers)>0, "the model has no layers"
your_RNN_N_model = compile_keras_sequential_model(your_RNN_N_layers, 'RNN_N')

# train your models from scratch, steps_per_epoch=steps_per_epoch, epochs=NB_BENCHMARK_EPOCHS), steps_per_epoch=steps_per_epoch, epochs=NB_BENCHMARK_EPOCHS)

# evaluate all models
rmses = []
benchmark_models = [model_RAND, model_LAST, model_LAST2, model_LINEAR, model_DNN, model_CNN]
for model in benchmark_models:
  _, rmse = model.evaluate(get_evaluation_dataset(), steps=1)
_, rmse = your_RNN_model.evaluate(get_evaluation_dataset(), steps=1)
_, rmse = your_RNN_N_model.evaluate(get_evaluation_dataset(last_n=LAST_N), steps=1)

Model: "RNN"
Layer (type)                 Output Shape              Param #   
reshape_10 (Reshape)         (None, 16, 1)             0         
gru_10 (GRU)                 (None, 16, 32)            3360      
gru_11 (GRU)                 (None, 32)                6336      
dense_7 (Dense)              (None, 1)                 33        
Total params: 9,729
Trainable params: 9,729
Non-trainable params: 0
Model: "RNN_N"
Layer (type)                 Output Shape              Param #   
reshape_11 (Reshape)         (None, 16, 1)             0         
gru_12 (GRU)                 (None, 16, 32)            3360      
gru_13 (GRU)                 (None, 16, 32)            6336      
time_distributed_2 (TimeDist (None, 16, 1)             33        
lambda_5 (Lambda)            (None, 8)                 0         
Total params: 9,729
Trainable params: 9,729
Non-trainable params: 0
Train for 1024 steps
Epoch 1/10
1024/1024 [==============================] - 17s 16ms/step - loss: 0.0746 - RootMeanSquaredError: 0.2731
Epoch 2/10
1024/1024 [==============================] - 13s 13ms/step - loss: 0.0384 - RootMeanSquaredError: 0.1959
Epoch 3/10
1024/1024 [==============================] - 13s 13ms/step - loss: 0.0352 - RootMeanSquaredError: 0.1876
Epoch 4/10
1024/1024 [==============================] - 13s 13ms/step - loss: 0.0336 - RootMeanSquaredError: 0.1833
Epoch 5/10
1024/1024 [==============================] - 13s 13ms/step - loss: 0.0329 - RootMeanSquaredError: 0.1813
Epoch 6/10
1024/1024 [==============================] - 13s 13ms/step - loss: 0.0316 - RootMeanSquaredError: 0.1778
Epoch 7/10
1024/1024 [==============================] - 13s 13ms/step - loss: 0.0304 - RootMeanSquaredError: 0.1743
Epoch 8/10
1024/1024 [==============================] - 13s 13ms/step - loss: 0.0294 - RootMeanSquaredError: 0.1713
Epoch 9/10
1024/1024 [==============================] - 13s 13ms/step - loss: 0.0285 - RootMeanSquaredError: 0.1689
Epoch 10/10
1024/1024 [==============================] - 13s 13ms/step - loss: 0.0282 - RootMeanSquaredError: 0.1679
Train for 1024 steps
Epoch 1/10
1024/1024 [==============================] - 17s 17ms/step - loss: 0.0664 - RootMeanSquaredError: 0.2578
Epoch 2/10
1024/1024 [==============================] - 14s 13ms/step - loss: 0.0387 - RootMeanSquaredError: 0.1968
Epoch 3/10
1024/1024 [==============================] - 13s 13ms/step - loss: 0.0366 - RootMeanSquaredError: 0.1912
Epoch 4/10
1024/1024 [==============================] - 14s 14ms/step - loss: 0.0342 - RootMeanSquaredError: 0.1850
Epoch 5/10
1024/1024 [==============================] - 14s 14ms/step - loss: 0.0322 - RootMeanSquaredError: 0.1796
Epoch 6/10
1024/1024 [==============================] - 14s 13ms/step - loss: 0.0305 - RootMeanSquaredError: 0.1747
Epoch 7/10
1024/1024 [==============================] - 14s 13ms/step - loss: 0.0292 - RootMeanSquaredError: 0.1710
Epoch 8/10
1024/1024 [==============================] - 14s 14ms/step - loss: 0.0282 - RootMeanSquaredError: 0.1679
Epoch 9/10
1024/1024 [==============================] - 14s 14ms/step - loss: 0.0273 - RootMeanSquaredError: 0.1651
Epoch 10/10
1024/1024 [==============================] - 14s 14ms/step - loss: 0.0267 - RootMeanSquaredError: 0.1633
1/1 [==============================] - 0s 68ms/step - loss: 2.3761 - RootMeanSquaredError: 1.5415
1/1 [==============================] - 0s 71ms/step - loss: 0.1690 - RootMeanSquaredError: 0.4111
1/1 [==============================] - 0s 64ms/step - loss: 0.1124 - RootMeanSquaredError: 0.3352
1/1 [==============================] - 0s 63ms/step - loss: 0.0462 - RootMeanSquaredError: 0.2150
1/1 [==============================] - 0s 65ms/step - loss: 0.0378 - RootMeanSquaredError: 0.1944
1/1 [==============================] - 0s 240ms/step - loss: 0.0339 - RootMeanSquaredError: 0.1841
1/1 [==============================] - 2s 2s/step - loss: 0.0289 - RootMeanSquaredError: 0.1701
1/1 [==============================] - 2s 2s/step - loss: 0.0263 - RootMeanSquaredError: 0.1621

In [21]:

