In [1]:
from __future__ import print_function
import pandas as pd
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
%matplotlib inline
plt.rcParams['figure.figsize'] = (10, 10)
path='/home/ubuntu/data/training/time_series/'
In [2]:
import os
os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"]="0"
from tensorflow.python.client import device_lib
print(device_lib.list_local_devices())
In [15]:
# Read series
with open(path + 'cif2016_trn.txt') as file:
values = {}
forecast={}
period={}
for line in file:
l = line.rstrip().split(";")
forecast[l[0]] = int(l[1])
period[l[0]] = l[2]
values[l[0]] = [float(i) for i in l[3:]]
print(forecast)
print(period)
In [4]:
# Plot the series
plt.rcParams['figure.figsize'] = (10, 40)
fig = plt.figure()
for i in range(1,len(values)+1):
fig.add_subplot(len(values)//4,4,i)
plt.plot(values['ts'+str(i)])
In [5]:
# Serie to model
serie='ts71'
In [6]:
# Use 14 historical data to predict
x_length = 14
# Forecast interval
y_length = forecast[serie]
print(x_length, y_length)
# Separate for validation the last 3 cases
num_validation_cases = 3
max_index_train = len(values[serie]) - y_length - x_length - num_validation_cases
max_index_valid = max_index_train + num_validation_cases
# Create data
X_trn = []
X_val = []
y_trn = []
y_val = []
for i in range(max_index_train):
X_trn += [values[serie][i:i+x_length]]
y_trn += [values[serie][i+x_length:i+x_length+y_length]]
for i in range(max_index_train, max_index_valid):
X_val += [values[serie][i:i+x_length]]
y_val += [values[serie][i+x_length:i+x_length+y_length]]
X_trn = np.array(X_trn)
X_val = np.array(X_val)
y_trn = np.array(y_trn)
y_val = np.array(y_val)
print(X_trn.shape, X_val.shape,y_trn.shape, y_val.shape)
In [7]:
print(np.max(X_trn),np.max(y_trn))
In [8]:
# Normalize
max_trn = max((np.max(X_trn),np.max(y_trn)))
print('max_trn: ', max_trn)
X_trn = X_trn/max_trn
X_val = X_val/max_trn
y_trn = y_trn/max_trn
y_val = y_val/max_trn
In [9]:
# Model
lstm_feat = 256
gpu_options = tf.GPUOptions(allow_growth = True)
sess = tf.InteractiveSession(config=tf.ConfigProto(gpu_options=gpu_options, log_device_placement=True))
def dense(x, input_size=10, otput_size=1):
W = tf.Variable(tf.truncated_normal([input_size, otput_size], stddev=0.1))
b = tf.Variable(tf.constant(0.1, shape=[otput_size]))
return tf.matmul(x,W) + b
#Inputs
x_input = tf.placeholder(tf.float32, shape=[None, x_length], name='x')
x_input_lstn = tf.reshape(x_input, [-1, x_length, 1])
y_input = tf.placeholder(tf.float32, shape=[None, y_length], name='y')
keep_prob = tf.placeholder(tf.float32, name='keep_prob')
lstm1 = tf.contrib.rnn.LSTMCell(lstm_feat, initializer=tf.random_uniform_initializer(-0.1, 0.1, seed=123))
lstm1 = tf.contrib.rnn.DropoutWrapper(lstm1, output_keep_prob=keep_prob)
lstm_out, _ = tf.nn.dynamic_rnn(lstm1, x_input_lstn, dtype=tf.float32, scope='lstm16')
#Final dense layer
y_pred = dense(lstm_out[:,-1,:], input_size=lstm_feat, otput_size=y_length)
print(y_pred)
# Loss function
cost = tf.reduce_sum(tf.square(y_pred - y_input))
numerator = tf.abs(y_pred-y_input)
denominator = tf.divide(tf.abs(y_pred) + tf.abs(y_input), 2)
SMAPE_cost = tf.divide(tf.reduce_sum(tf.divide(numerator, denominator)), y_length)
# Trainer
learning_rate = tf.placeholder(tf.float32, name='learning_rate')
train_step = tf.train.RMSPropOptimizer(learning_rate=learning_rate).minimize(SMAPE_cost)
In [10]:
sess.run(tf.global_variables_initializer())
In [11]:
# Train graph
num_epoch=1000
lr=0.001
i=0
for epoch in range(num_epoch):
feed_dict={x_input: X_trn, y_input: y_trn, learning_rate: lr, keep_prob: 0.9}
_, c = sess.run([train_step, cost], feed_dict=feed_dict)
i += 1
if i%100==0:
c_tst = cost.eval(feed_dict={x_input: X_val, y_input: y_val, keep_prob: 1})
print('Epoch: ', epoch, ' - LR: ',lr, ' - Cost: ',c, ' - Cost test: ',c_tst )
lr *= 0.999
In [12]:
# Evaluate val prediction
y_pred_val = y_pred.eval(feed_dict={x_input: X_val, y_input: y_val, keep_prob: 1})
pred_val = np.concatenate((values[serie][:-y_length-1], y_pred_val[-1]*max_trn), axis=0)
plt.rcParams['figure.figsize'] = (10, 3)
real_vs_pred = np.array([ values[serie][:-1], pred_val]).T
plt.plot(real_vs_pred)
Out[12]:
In [13]:
# Evaluate val prediction all validation data
y_pred_val = y_pred.eval(feed_dict={x_input: X_val, y_input: y_val, keep_prob: 1})
pred_val = np.concatenate((X_val,y_pred_val), axis=1)
val = np.concatenate((X_val,y_val), axis=1)
plt.rcParams['figure.figsize'] = (10, 10)
fig = plt.figure()
for i in range(3):
fig.add_subplot(3,1,i+1)
real_vs_pred = np.array([ val[i], pred_val[i]]).T
plt.plot(real_vs_pred)