Race LSTM prediction

The aim of this notebook is to train a LSTM model being able to predict speed of the runner at the next data point.


In [1]:
import glob
import os
import pandas as pd
import random
import sys
import dateutil.parser
from datetime import datetime

import numpy as np
from matplotlib import pyplot as plt

from keras.models import Sequential
from keras.layers import Dense, Activation, Dropout
from keras.layers import LSTM
from keras.optimizers import RMSprop
from keras.utils import np_utils, normalize

from sklearn import metrics as me
from sklearn.utils import shuffle
import sklearn.linear_model as lm

%matplotlib inline


Using TensorFlow backend.

In [2]:
# Load the dataset
PATH_DATA = "../data/running/APE_running_data"

def convert(date):
    dt = dateutil.parser.parse(date).replace(tzinfo=None)
    epoch = datetime.utcfromtimestamp(0)
    delta = dt - epoch
    return delta.total_seconds()

colnames = np.array(['time', 'elevation', 'distance', 'speed'])
datasets_all = []

os.chdir(PATH_DATA)
for file in glob.glob("*.tab"):
    print("Processing {}".format(file))
    dataset = np.genfromtxt(file, skip_header=1,delimiter='\t', converters={0: convert})
    dataset[:,0] -= dataset[0,0]
    dataset = pd.DataFrame(dataset,columns=colnames)
    slope = np.array([])
    window_size_half = 8
    for j in dataset.index:
        index = np.arange(j-window_size_half+1, j+window_size_half+1)
        index = index[(index >= 0) & (index < len(dataset))]
        dataset_part = dataset.iloc[index].dropna()
        regr = lm.LinearRegression()
        regr.fit(dataset_part.distance[:,np.newaxis], np.array(dataset_part.elevation))
        slope = np.append(slope,regr.coef_)
    dataset['slope'] = slope
    if (len(dataset) > 300) == (len(dataset) < 900):
        datasets_all.append(dataset)

print('\nDataset sample')
print(datasets_all[0][:10])
print(len(datasets_all))


Processing activity_437398751.tab
Processing activity_437398728.tab
Processing activity_404227667.tab
Processing activity_437398762.tab
Processing activity_377304158.tab
Processing activity_447149230.tab
Processing activity_437398739.tab
Processing activity_437398771.tab
Processing activity_396872154.tab
Processing activity_447656990.tab
Processing activity_373735080.tab
Processing activity_373734948.tab
Processing activity_437398768.tab
Processing activity_437398746.tab
Processing activity_404227656.tab
Processing activity_437398721.tab
Processing activity_437398717.tab
Processing activity_437398782.tab
Processing activity_373735106.tab
Processing activity_437398753.tab
Processing activity_437398761.tab
Processing activity_377304230.tab
Processing activity_437398778.tab
Processing activity_437398785.tab
Processing activity_379843442.tab
Processing activity_404227695.tab
Processing activity_367230665.tab
Processing activity_447149252.tab
Processing activity_447149261.tab
Processing activity_447149239.tab
Processing activity_404227677.tab
Processing activity_384747476.tab
Processing activity_373735010.tab

Dataset sample
   time   elevation   distance  speed     slope
0   0.0  858.400024   0.500000  2.372  0.057380
1   1.0  858.400024   1.740000  2.187  0.056713
2   4.0  859.200012  11.300000  2.425  0.056653
3   5.0  859.400024  15.210000  2.690  0.057973
4  11.0  861.000000  36.490002  3.122  0.054931
5  15.0  861.400024  47.270000  3.382  0.053070
6  20.0  861.799988  61.230000  2.904  0.051486
7  25.0  862.400024  72.169998  2.486  0.050327
8  29.0  863.200012  81.059998  2.401  0.048273
9  34.0  863.799988  94.839996  2.426  0.043934
30

In [12]:
# Plot features of one race 
RACE_NUMBER = 0 
# Retrieve the data of race 
data = datasets_all[RACE_NUMBER]

print(data[:10])

# Plot the speed and the slope
# There are two different scales (left and right y axis)
fig_speed_slope, ax1_ss = plt.subplots(figsize=(20, 4))
plt.title('Speed and slope of race ' + str(RACE_NUMBER))
plt.xlabel('time [s]')

speed, = ax1_ss.plot(data['time'], data['speed'], color='darkgreen', linestyle='-')
ax1_ss.set_ylabel('speed [m/s]', color='darkgreen')
ax1_ss.tick_params('y', colors='darkgreen')
ax2_ss = ax1_ss.twinx()
slope, = ax2_ss.plot(data['time'], data['slope'], color='darkblue', linestyle='-')
ax2_ss.set_ylabel('slope', color='darkblue')
ax2_ss.tick_params('y', colors='darkblue')
p = [speed, slope]
ax1_ss.legend(p, [p_.get_label() for p_ in p], loc='upper right')

# Plot the elevation and the distance
fig_elevation_distance, ax1_ed = plt.subplots(figsize=(20, 4))
plt.title('Elevation and distance of race ' + str(RACE_NUMBER))
plt.xlabel('time [s]')

elevation, = ax1_ed.plot(data['time'], data['elevation'], color='darkmagenta', linestyle='-')
ax1_ed.set_ylabel('elevation [m]', color='darkmagenta')
ax1_ed.tick_params('y', colors='darkmagenta')
ax2_ed = ax1_ed.twinx()
distance, = ax2_ed.plot(data['time'], data['distance'], color='teal', linestyle='-')
ax2_ed.set_ylabel('distance [m]', color='teal')
ax2_ed.tick_params('y', colors='teal')
p = [elevation, distance]
ax1_ed.legend(p, [p_.get_label() for p_ in p], loc='upper center')


   time   elevation   distance  speed     slope
0   0.0  858.400024   0.500000  2.372  0.057380
1   1.0  858.400024   1.740000  2.187  0.056713
2   4.0  859.200012  11.300000  2.425  0.056653
3   5.0  859.400024  15.210000  2.690  0.057973
4  11.0  861.000000  36.490002  3.122  0.054931
5  15.0  861.400024  47.270000  3.382  0.053070
6  20.0  861.799988  61.230000  2.904  0.051486
7  25.0  862.400024  72.169998  2.486  0.050327
8  29.0  863.200012  81.059998  2.401  0.048273
9  34.0  863.799988  94.839996  2.426  0.043934
Out[12]:
<matplotlib.legend.Legend at 0x7f9fb4a9db38>

In [16]:
# Statistics on data
# Concatenation of all races and statistics
# Count is not the same on every column due to missing values (NaN)
df = pd.concat(datasets_all)
print(df.describe())


               time     elevation      distance         speed         slope
count  19691.000000  19685.000000  19685.000000  19685.000000  19691.000000
mean    1420.064496    770.292253   4218.044999      3.098012      0.002012
std      859.987948     97.435802   2640.126082      1.142250      0.043974
min        0.000000    412.000000      0.000000      0.000000     -0.339632
25%      687.000000    758.000000   1974.310059      2.819000     -0.024790
50%     1389.000000    796.599976   4050.669922      3.104000      0.001888
75%     2094.000000    823.400024   6278.560059      3.348000      0.028552
max     3826.000000    884.000000  12011.019531     54.685001      0.208548

In [7]:
# Number of points taken for each prediction
NB_POINTS = 10
# Number of races taken for the training set
NB_TRAINING =  20
# Number of races taken for the testing set
NB_TEST = 10
# Number of next points for the average goal
NB_POINTS_AVG = 1 # Must not be greater than NB_POINTS
# Shuffle data or not ?
SHUFFLE = True
# List of features (columns of dataframe)
FEATURES = ['time', 'speed', 'slope']

# Remove NaN values in dataset
dataset = [i.dropna() for i in datasets_all]

# Filter features
dataset = [x[FEATURES] for x in dataset]

# Shuffle the dataset
random.seed(42)
random.shuffle(dataset)

# Deep copy of the races
dataset_not_normalized = []
for race in dataset:
    dataset_not_normalized.append(race.copy())
    
# Normalization of each feature
# Min and max values for normalization are saved in order
# to normalize the data of the test set with those values

# Contains, for each feature the min and the max value of the feature of the training set
# These min and max values will be used to normalize the dataset
max_feature = dict.fromkeys(FEATURES, -sys.maxsize - 1)
min_feature = dict.fromkeys(FEATURES, sys.maxsize)
# Get the maximal and minimal values for each column of the training set
for race in range(NB_TRAINING):
    for column in FEATURES:
        max_local = np.amax(dataset[race][column])
        min_local = np.amin(dataset[race][column])
        if max_feature[column] < max_local:
            max_feature[column] = max_local
        if min_feature[column] > min_local:
            min_feature[column] = min_local
    
# Min-max normalisation
def norm_min_max(x, min_local, max_local):
    return (x - min_local) / (max_local - min_local)

# Apply the normalization
for race in dataset:
    for column in FEATURES:
        race[column] = race[column].apply(norm_min_max, min_local=min_feature[column], max_local=max_feature[column])
    
seq = []
next_speed = []

# Creation of train set
for race in range(NB_TRAINING):
    print('Race={} ({} samples) (train)'.format(race, len(dataset[race])))
    seq_local = []
    next_speed_local = []
    for i in range(len(dataset[race])-(NB_POINTS+1)):
        # Creation of vector containing the avg slope of the NB_POINTS+1 to NB_POINTS+NB_POINTS_AVG points (the ones we want to predict)
        # This vector is added in the input of the model, because we know the slope on all the run
        slopes_avg = np.mean(dataset[race][i+NB_POINTS:i+NB_POINTS+NB_POINTS_AVG]['slope'].as_matrix())
        slopes = np.reshape(NB_POINTS * [slopes_avg], (NB_POINTS, 1))
        matrix = np.hstack((dataset[race][i:i+NB_POINTS].as_matrix(), slopes))
        seq_local.append(matrix)
        # The goal (speed) is not the normalized value
        # The goal is the average of the next 5 speeds
        speeds_avg = np.mean(dataset_not_normalized[race][i+NB_POINTS:i+NB_POINTS+NB_POINTS_AVG]['speed'].as_matrix())
        next_speed_local.append(speeds_avg)
    seq += seq_local
    next_speed += next_speed_local
        
# Transform the data into numpy arrays 
trainX = np.array(seq)
trainY = np.array(next_speed)

# Shuffle data
if SHUFFLE:
    trainX, trainY = shuffle(trainX, trainY, random_state=42)
        
seq_test = []
next_speed_test = []
# Creation of test set
for race in range(NB_TRAINING, NB_TRAINING + NB_TEST):
    print('Race={} ({} samples) (test)'.format(race, len(dataset[race])))
    seq_test_local = []
    next_speed_test_local = []
    for i in range(len(dataset[race])-(NB_POINTS+1)):
        slopes_avg = np.mean(dataset[race][i+NB_POINTS:i+NB_POINTS+NB_POINTS_AVG]['slope'].as_matrix())
        slopes = np.reshape(NB_POINTS * [slopes_avg], (NB_POINTS, 1))
        matrix = np.hstack((dataset[race][i:i+NB_POINTS].as_matrix(), slopes))
        seq_test_local.append(matrix)
        # The goal (speed) is not the normalized value
        # The goal is the average of the next 5 speeds
        speeds_avg = np.mean(dataset_not_normalized[race][i+NB_POINTS:i+NB_POINTS+NB_POINTS_AVG]['speed'].as_matrix())
        next_speed_test_local.append(speeds_avg)
    seq_test += seq_test_local
    next_speed_test += next_speed_test_local
        
# Transform the data into numpy arrays 
testX = np.array(seq_test)
testY = np.array(next_speed_test)
# Shuffle data
if SHUFFLE:
    testX, testY = shuffle(testX, testY, random_state=42)


Race=0 (685 samples) (train)
Race=1 (583 samples) (train)
Race=2 (527 samples) (train)
Race=3 (726 samples) (train)
Race=4 (709 samples) (train)
Race=5 (823 samples) (train)
Race=6 (732 samples) (train)
Race=7 (595 samples) (train)
Race=8 (583 samples) (train)
Race=9 (536 samples) (train)
Race=10 (860 samples) (train)
Race=11 (603 samples) (train)
Race=12 (773 samples) (train)
Race=13 (577 samples) (train)
Race=14 (715 samples) (train)
Race=15 (527 samples) (train)
Race=16 (725 samples) (train)
Race=17 (566 samples) (train)
Race=18 (682 samples) (train)
Race=19 (418 samples) (train)
Race=20 (605 samples) (test)
Race=21 (853 samples) (test)
Race=22 (655 samples) (test)
Race=23 (435 samples) (test)
Race=24 (581 samples) (test)
Race=25 (734 samples) (test)
Race=26 (693 samples) (test)
Race=27 (616 samples) (test)
Race=28 (735 samples) (test)
Race=29 (833 samples) (test)

In [8]:
BATCH_SIZE = 10 
NB_EPOCHS = 100
NB_UNITS = 4 

# Create and fit the LSTM network
model = Sequential()
model.add(LSTM(NB_UNITS, input_shape=(NB_POINTS, len(FEATURES) + 1)))
model.add(Dense(1))

model.compile(loss='mean_squared_error', optimizer='adam')
model.summary()


_________________________________________________________________
Layer (type)                 Output Shape              Param #   
=================================================================
lstm_1 (LSTM)                (None, 4)                 144       
_________________________________________________________________
dense_1 (Dense)              (None, 1)                 5         
=================================================================
Total params: 149
Trainable params: 149
Non-trainable params: 0
_________________________________________________________________

In [9]:
history = model.fit(trainX, trainY, epochs=NB_EPOCHS, batch_size=BATCH_SIZE, verbose=1, 
                   validation_data=(testX, testY))


Train on 12725 samples, validate on 6630 samples
Epoch 1/100
12725/12725 [==============================] - 9s - loss: 0.8797 - val_loss: 0.1559
Epoch 2/100
12725/12725 [==============================] - 10s - loss: 0.3051 - val_loss: 0.1343
Epoch 3/100
12725/12725 [==============================] - 10s - loss: 0.2880 - val_loss: 0.1195
Epoch 4/100
12725/12725 [==============================] - 10s - loss: 0.2749 - val_loss: 0.1334
Epoch 5/100
12725/12725 [==============================] - 10s - loss: 0.2618 - val_loss: 0.1152
Epoch 6/100
12725/12725 [==============================] - 13s - loss: 0.2422 - val_loss: 0.1028
Epoch 7/100
12725/12725 [==============================] - 20s - loss: 0.2234 - val_loss: 0.0912
Epoch 8/100
12725/12725 [==============================] - 19s - loss: 0.2095 - val_loss: 0.0864
Epoch 9/100
12725/12725 [==============================] - 15s - loss: 0.2028 - val_loss: 0.0837
Epoch 10/100
12725/12725 [==============================] - 14s - loss: 0.1958 - val_loss: 0.0809
Epoch 11/100
12725/12725 [==============================] - 10s - loss: 0.1896 - val_loss: 0.0800
Epoch 12/100
12725/12725 [==============================] - 11s - loss: 0.1822 - val_loss: 0.0751
Epoch 13/100
12725/12725 [==============================] - 9s - loss: 0.1764 - val_loss: 0.0724
Epoch 14/100
12725/12725 [==============================] - 8s - loss: 0.1674 - val_loss: 0.0706
Epoch 15/100
12725/12725 [==============================] - 7s - loss: 0.1596 - val_loss: 0.0689
Epoch 16/100
12725/12725 [==============================] - 7s - loss: 0.1523 - val_loss: 0.0743
Epoch 17/100
12725/12725 [==============================] - 8s - loss: 0.1447 - val_loss: 0.0638
Epoch 18/100
12725/12725 [==============================] - 8s - loss: 0.1376 - val_loss: 0.0569
Epoch 19/100
12725/12725 [==============================] - 13s - loss: 0.1311 - val_loss: 0.0571
Epoch 20/100
12725/12725 [==============================] - 9s - loss: 0.1252 - val_loss: 0.0519
Epoch 21/100
12725/12725 [==============================] - 9s - loss: 0.1193 - val_loss: 0.0494
Epoch 22/100
12725/12725 [==============================] - 7s - loss: 0.1144 - val_loss: 0.0493
Epoch 23/100
12725/12725 [==============================] - 8s - loss: 0.1095 - val_loss: 0.0462
Epoch 24/100
12725/12725 [==============================] - 10s - loss: 0.1047 - val_loss: 0.0444
Epoch 25/100
12725/12725 [==============================] - 10s - loss: 0.1002 - val_loss: 0.0418
Epoch 26/100
12725/12725 [==============================] - 11s - loss: 0.0959 - val_loss: 0.0479
Epoch 27/100
12725/12725 [==============================] - 11s - loss: 0.0916 - val_loss: 0.0416
Epoch 28/100
12725/12725 [==============================] - 11s - loss: 0.0879 - val_loss: 0.0370
Epoch 29/100
12725/12725 [==============================] - 10s - loss: 0.0849 - val_loss: 0.0366
Epoch 30/100
12725/12725 [==============================] - 10s - loss: 0.0817 - val_loss: 0.0354
Epoch 31/100
12725/12725 [==============================] - 9s - loss: 0.0791 - val_loss: 0.0343
Epoch 32/100
12725/12725 [==============================] - 7s - loss: 0.0774 - val_loss: 0.0353
Epoch 33/100
12725/12725 [==============================] - 9s - loss: 0.0753 - val_loss: 0.0318
Epoch 34/100
12725/12725 [==============================] - 8s - loss: 0.0728 - val_loss: 0.0321
Epoch 35/100
12725/12725 [==============================] - 8s - loss: 0.0715 - val_loss: 0.0324
Epoch 36/100
12725/12725 [==============================] - 8s - loss: 0.0703 - val_loss: 0.0310
Epoch 37/100
12725/12725 [==============================] - 8s - loss: 0.0688 - val_loss: 0.0317
Epoch 38/100
12725/12725 [==============================] - 7s - loss: 0.0679 - val_loss: 0.0295
Epoch 39/100
12725/12725 [==============================] - 7s - loss: 0.0671 - val_loss: 0.0298
Epoch 40/100
12725/12725 [==============================] - 9s - loss: 0.0668 - val_loss: 0.0318
Epoch 41/100
12725/12725 [==============================] - 10s - loss: 0.0658 - val_loss: 0.0291
Epoch 42/100
12725/12725 [==============================] - 13s - loss: 0.0651 - val_loss: 0.0285
Epoch 43/100
12725/12725 [==============================] - 13s - loss: 0.0650 - val_loss: 0.0289
Epoch 44/100
12725/12725 [==============================] - 10s - loss: 0.0643 - val_loss: 0.0285
Epoch 45/100
12725/12725 [==============================] - 10s - loss: 0.0641 - val_loss: 0.0280
Epoch 46/100
12725/12725 [==============================] - 9s - loss: 0.0634 - val_loss: 0.0300
Epoch 47/100
12725/12725 [==============================] - 8s - loss: 0.0628 - val_loss: 0.0300
Epoch 48/100
12725/12725 [==============================] - 8s - loss: 0.0630 - val_loss: 0.0277
Epoch 49/100
12725/12725 [==============================] - 9s - loss: 0.0628 - val_loss: 0.0280
Epoch 50/100
12725/12725 [==============================] - 11s - loss: 0.0621 - val_loss: 0.0303
Epoch 51/100
12725/12725 [==============================] - 10s - loss: 0.0624 - val_loss: 0.0292
Epoch 52/100
12725/12725 [==============================] - 10s - loss: 0.0621 - val_loss: 0.0277
Epoch 53/100
12725/12725 [==============================] - 9s - loss: 0.0623 - val_loss: 0.0277
Epoch 54/100
12725/12725 [==============================] - 7s - loss: 0.0619 - val_loss: 0.0296
Epoch 55/100
12725/12725 [==============================] - 8s - loss: 0.0620 - val_loss: 0.0298
Epoch 56/100
12725/12725 [==============================] - 9s - loss: 0.0619 - val_loss: 0.0298
Epoch 57/100
12725/12725 [==============================] - 10s - loss: 0.0615 - val_loss: 0.0285
Epoch 58/100
12725/12725 [==============================] - 12s - loss: 0.0612 - val_loss: 0.0294
Epoch 59/100
12725/12725 [==============================] - 13s - loss: 0.0615 - val_loss: 0.0277
Epoch 60/100
12725/12725 [==============================] - 14s - loss: 0.0615 - val_loss: 0.0281
Epoch 61/100
12725/12725 [==============================] - 9s - loss: 0.0613 - val_loss: 0.0284
Epoch 62/100
12725/12725 [==============================] - 8s - loss: 0.0615 - val_loss: 0.0277
Epoch 63/100
12725/12725 [==============================] - 8s - loss: 0.0615 - val_loss: 0.0289
Epoch 64/100
12725/12725 [==============================] - 9s - loss: 0.0611 - val_loss: 0.0279
Epoch 65/100
12725/12725 [==============================] - 8s - loss: 0.0612 - val_loss: 0.0281
Epoch 66/100
12725/12725 [==============================] - 8s - loss: 0.0612 - val_loss: 0.0298
Epoch 67/100
12725/12725 [==============================] - 8s - loss: 0.0615 - val_loss: 0.0280
Epoch 68/100
12725/12725 [==============================] - 8s - loss: 0.0609 - val_loss: 0.0280
Epoch 69/100
12725/12725 [==============================] - 9s - loss: 0.0608 - val_loss: 0.0281
Epoch 70/100
12725/12725 [==============================] - 10s - loss: 0.0610 - val_loss: 0.0292
Epoch 71/100
12725/12725 [==============================] - 10s - loss: 0.0607 - val_loss: 0.0281
Epoch 72/100
12725/12725 [==============================] - 14s - loss: 0.0611 - val_loss: 0.0340
Epoch 73/100
12725/12725 [==============================] - 10s - loss: 0.0608 - val_loss: 0.0294
Epoch 74/100
12725/12725 [==============================] - 10s - loss: 0.0605 - val_loss: 0.0341
Epoch 75/100
12725/12725 [==============================] - 10s - loss: 0.0610 - val_loss: 0.0277
Epoch 76/100
12725/12725 [==============================] - 9s - loss: 0.0608 - val_loss: 0.0401
Epoch 77/100
12725/12725 [==============================] - 8s - loss: 0.0611 - val_loss: 0.0277
Epoch 78/100
12725/12725 [==============================] - 8s - loss: 0.0605 - val_loss: 0.0306
Epoch 79/100
12725/12725 [==============================] - 10s - loss: 0.0606 - val_loss: 0.0278
Epoch 80/100
12725/12725 [==============================] - 12s - loss: 0.0608 - val_loss: 0.0440
Epoch 81/100
12725/12725 [==============================] - 16s - loss: 0.0608 - val_loss: 0.0303
Epoch 82/100
12725/12725 [==============================] - 9s - loss: 0.0607 - val_loss: 0.0331
Epoch 83/100
12725/12725 [==============================] - 9s - loss: 0.0603 - val_loss: 0.0279
Epoch 84/100
12725/12725 [==============================] - 9s - loss: 0.0603 - val_loss: 0.0282
Epoch 85/100
12725/12725 [==============================] - 9s - loss: 0.0604 - val_loss: 0.0282
Epoch 86/100
12725/12725 [==============================] - 8s - loss: 0.0601 - val_loss: 0.0278
Epoch 87/100
12725/12725 [==============================] - 8s - loss: 0.0602 - val_loss: 0.0302
Epoch 88/100
12725/12725 [==============================] - 9s - loss: 0.0603 - val_loss: 0.0278
Epoch 89/100
12725/12725 [==============================] - 11s - loss: 0.0602 - val_loss: 0.0339
Epoch 90/100
12725/12725 [==============================] - 12s - loss: 0.0602 - val_loss: 0.0276
Epoch 91/100
12725/12725 [==============================] - 15s - loss: 0.0600 - val_loss: 0.0325
Epoch 92/100
12725/12725 [==============================] - 11s - loss: 0.0605 - val_loss: 0.0280
Epoch 93/100
12725/12725 [==============================] - 11s - loss: 0.0599 - val_loss: 0.0278
Epoch 94/100
12725/12725 [==============================] - 10s - loss: 0.0599 - val_loss: 0.0278
Epoch 95/100
12725/12725 [==============================] - 8s - loss: 0.0599 - val_loss: 0.0278
Epoch 96/100
12725/12725 [==============================] - 9s - loss: 0.0599 - val_loss: 0.0310
Epoch 97/100
12725/12725 [==============================] - 10s - loss: 0.0602 - val_loss: 0.0290
Epoch 98/100
12725/12725 [==============================] - 11s - loss: 0.0600 - val_loss: 0.0276
Epoch 99/100
12725/12725 [==============================] - 10s - loss: 0.0599 - val_loss: 0.0277
Epoch 100/100
12725/12725 [==============================] - 10s - loss: 0.0595 - val_loss: 0.0282

In [10]:
# Plot the training and testing
plt.plot(history.history['loss'], label='Training')
plt.plot(history.history['val_loss'], label='Testing')
plt.xlabel('epochs')
plt.ylabel('mse')
plt.legend()
plt.grid()



In [11]:
# Plot one race with its prediction
RACE_NUMBER = NB_TRAINING # First race of testing set 
# Actual values
actual = dataset_not_normalized[RACE_NUMBER]
print("Actual values")
print(actual[:10])
# Predict the future values
# testX contains all the testing races, we need to extract data from the wanted race
# Calculation of starting line of test set depending on race number
start = sum(len(dataset_not_normalized[race]) for race in range(NB_TRAINING, RACE_NUMBER))
predictY = model.predict(testX[start:start + len(actual)])
print("Predictions")
print(predictY[:10])
print("Standard deviation=" + str(np.std(predictY)))

# Plot the results
plt.figure(figsize=(20,4))
plt.suptitle('Prediction race ' + str(RACE_NUMBER))
plt.title('History=' + str(NB_POINTS) + ', Future=' + str(1))
plt.xlabel('time [s]')
plt.ylabel('speed [m/s]')

# Plot the predictions
plt.plot(np.arange(len(predictY)), predictY, 'g-', label='Predicted')
# Plot the actual values
plt.plot(np.arange(len(predictY)), actual['speed'], 'r-', label='Actual')
plt.legend()


Actual values
   time  speed     slope
0   0.0  2.398 -0.005980
1   1.0  2.261  0.008163
2   3.0  1.913  0.011956
3   9.0  2.111  0.014740
4  11.0  1.891  0.014677
5  13.0  1.885  0.014280
6  14.0  2.114  0.014118
7  16.0  2.321  0.014391
8  22.0  2.257  0.015162
9  27.0  2.627  0.015852
Predictions
[[ 3.03785133]
 [ 3.23672628]
 [ 2.77741647]
 [ 2.47870064]
 [ 3.33603144]
 [ 3.03858519]
 [ 3.1397438 ]
 [ 3.30418205]
 [ 3.22521305]
 [ 3.51283741]]
Standard deviation=0.355538
Out[11]:
<matplotlib.legend.Legend at 0x7f9fb4eb2080>

In [8]:
# Information about software version
%load_ext version_information
%reload_ext version_information

%version_information numpy, matplotlib, keras, pandas, sklearn, tensorflow


Out[8]:
SoftwareVersion
Python3.6.1 64bit [GCC 6.3.1 20170306]
IPython6.0.0
OSLinux 4.11.2 1 ARCH x86_64 with arch
numpy1.12.0
matplotlib2.0.0
keras2.0.3
pandas0.19.2
sklearn0.18.1
tensorflow1.1.0
Thu Jun 01 14:12:55 2017 CEST