notebook.community

Edit and run



In [1]:

    
import tensorflow as tf 
import pandas as pd 
import numpy as np 
import matplotlib.pyplot as plt 

from tensorflow.contrib import rnn

% matplotlib inline
plt.style.use('ggplot')



In [2]:

    
data_df = pd.read_csv('tsco.csv')
# We only care about close price for now 
data = data_df.Open.values

plt.subplot(1,2,1)
plt.plot(list(range(len(data))), data)
plt.xlabel('Timestep')
plt.ylabel('Close Price')
plt.title('Close Price')
plt.subplot(1,2,2)
plt.hist(data, 100)
plt.xlabel('Close Price')
plt.ylabel('Frequency')
plt.title('Close Price Distribution')
fig = plt.gcf()
fig.set_size_inches(18.5, 5.5)



In [26]:

    
# Normalise the data 
data_normal = data - np.mean(data)
data_normal = data_normal / np.std(data_normal)

print('Mean: {:2.2f} STD:{:2.2f}'.format(np.mean(data_normal), np.std(data_normal)))









    



Mean: -0.00 STD:1.00



In [27]:

    
# You should defintly review this section again

timesteps = list(range(len(data_normal)))
n_step = 3*7 # last three weeks
n_input = 1
n_output = 1 # next day stock only


useable_t = timesteps[n_step:-n_output]

past = np.empty((len(timesteps), n_step))
future = np.empty((len(timesteps), n_output))
for i, v in enumerate(useable_t):
    #print('Index: {}, Values: {} Procurement: {}' .format(i, v, proc_s.iloc[v]))
    past[v,:] = data_normal[v-n_step:v]
    future[v,:] = data_normal[v:v+n_output]
    
training_ratio = 0.8
training_size = int(len(useable_t)*training_ratio)
useable_past = past[useable_t]
useable_future = future[useable_t]

train_t = useable_t[:training_size]
test_t = useable_t[training_size:]


train_past = useable_past[:training_size]
train_future = useable_future[:training_size]

test_past = useable_past[training_size:]
test_future = useable_future[training_size:]

train_past = train_past.reshape((len(train_past), n_step, n_input))
test_past = test_past.reshape((len(test_past), n_step, n_input))



In [28]:

    
def my_rnn(x, W, b):
    x = tf.transpose(x, [1, 0, 2])
    x = tf.reshape(x, [-1, n_input])
    x = tf.split(x, n_step, axis=0)
    
    lstm_cell = rnn.BasicLSTMCell(n_hidden, forget_bias=1.0)
    outputs, states = rnn.static_rnn(lstm_cell, x, dtype=tf.float32)
    return tf.nn.bias_add(tf.matmul(outputs[-1], W), b)



In [29]:

    
learning_rate = 1e-3
n_epoch = 5
epoch_size = 100
n_hidden = 256
n_iter = n_epoch * epoch_size

tf.reset_default_graph()

x = tf.placeholder(tf.float32, [None, n_step, n_input])
y = tf.placeholder(tf.float32, [None, n_output])

W = tf.Variable(tf.truncated_normal([n_hidden, n_output]))
b = tf.Variable(tf.truncated_normal([n_output]))

h = my_rnn(x, W, b)

individual_losses = tf.reduce_mean(tf.squared_difference(h,y), 1)
loss = tf.reduce_mean(individual_losses)
optimiser = tf.train.AdamOptimizer(learning_rate).minimize(loss)



In [30]:

    
%%time 
sess = tf.InteractiveSession()
tf.global_variables_initializer().run()

for iter in range(n_iter+1):
    # No mini Batch yet
    optimiser.run(feed_dict={x:train_past, y:train_future})
    
    if iter% epoch_size ==0:
        print('Epoch: {} Loss: {}' .format(int(iter/epoch_size), loss.eval(feed_dict={x:train_past, y:train_future})))









    



Epoch: 0 Loss: 0.18119347095489502
Epoch: 1 Loss: 0.047518037259578705
Epoch: 2 Loss: 0.045090217143297195
Epoch: 3 Loss: 0.043372515588998795
Epoch: 4 Loss: 0.04218125343322754
Epoch: 5 Loss: 0.041353434324264526
CPU times: user 25.1 s, sys: 7.71 s, total: 32.8 s
Wall time: 42.8 s



In [31]:

    
error = loss.eval(feed_dict={x:test_past, y:test_future})
print('Error on Test Set: {}' .format(error))









    



Error on Test Set: 0.008012385107576847

Measure Performance



In [32]:

    
plt.plot(test_t, test_future, label='Actual')
plt.xlabel('Timestep')
plt.ylabel('Closing Price Normalised')
fcast = sess.run(h, feed_dict={x:test_past})
plt.plot(test_t, fcast, label='Forecast')
plt.plot(np.array(test_t)+1, test_future, label='Same as Previous Day')
plt.legend()
fig = plt.gcf()
fig.set_size_inches(36, 10.5)



In [11]:

    
def msre(x,y):
    return np.mean((x-y)**2)

print('Fcast Erro: {}' .format(msre(test_future, fcast)))
print('Fcast Erro: {}' .format(msre(test_future[:-1], test_future[1:])))









    



Fcast Erro: 0.014119948148640916
Fcast Erro: 0.0022516281552023548



In [ ]:



In [ ]: