Predicting Alphabet Inc. stock price using a Recurrent Neural Network
Dataset from Google Finance
Based on RNN project: Time Series Prediction project of the Udacity's Artificial Intelligence Nanodegree
In [1]:
%matplotlib inline
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import keras
import helper
#helper.reproducible(seed=42)
sns.set()
In [2]:
df = pd.read_csv('data/alphabet_stock.csv')
print("Days: {}".format(len(df)))
df.head(3)
Out[2]:
In [3]:
# Parse dates
df['Date'] = pd.to_datetime(df['Date'], format='%d-%b-%y', errors='coerce')
df.head(3)
Out[3]:
In [4]:
# Sort values by date
df = df.sort_values(by='Date')
df.plot(x='Date', y='Close')
plt.ylabel("Share Price ($)");
In [5]:
# Only Close prices will be used
data = df['Close'].values
# Normalize data
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler(feature_range=(-1, 1), copy=True)
data = scaler.fit_transform(data.reshape(-1, 1))
data.reshape(-1);
Run a sliding window along the input series and creates associated input/output pairs
In [6]:
def window_transform_series(series, window_size):
# containers for input/output pairs
X = []
y = []
# fill input/output lists from the given sequence
for i in range(len(series) - window_size):
X.append(series[i:i + window_size]) # size: sliding window
y.append(series[i + window_size]) # size: scalar
# reshape each
X = np.asarray(X)
X.shape = (np.shape(X)[0:2])
y = np.asarray(y)
y.shape = (len(y), 1)
return X, y
# window the data
window_size = 28 # Four weeks
X, y = window_transform_series(series=data, window_size=window_size)
In [7]:
# split the dataset into training / test sets
train_test_split = int(len(X) * 0.9)
X_train = X[:train_test_split, :]
y_train = y[:train_test_split]
# keep the last chunk for testing
X_test = X[train_test_split:, :]
y_test = y[train_test_split:]
# Keras's LSTM module requires input reshaped to [samples, window size, stepsize]
X_train = np.asarray(np.reshape(X_train, (X_train.shape[0], window_size, 1)))
X_test = np.asarray(np.reshape(X_test, (X_test.shape[0], window_size, 1)))
In [8]:
from keras.models import Sequential
from keras.layers import Dense, LSTM
model = Sequential()
model.add(LSTM(256, input_shape=(window_size, 1)))
model.add(Dense(1, activation=None))
model.summary()
model.compile(loss='mean_squared_error', optimizer='rmsprop')
# train the model
print("\nTraining ...")
callbacks = [keras.callbacks.EarlyStopping(monitor='val_loss', patience=10, verbose=0)]
%time history = model.fit(X_train, y_train, epochs=100, batch_size=1024, verbose=0, \
validation_split=0.2, callbacks=callbacks)
helper.show_training(history)
model_path = os.path.join("models", "simple_stock_prediction.h5")
model.save(model_path)
print("\nModel saved at", model_path)
In [9]:
model = keras.models.load_model(model_path)
print("Model loaded:", model_path)
# print training and testing errors
training_error = model.evaluate(X_train, y_train, verbose=0)
print('\nTraining error = {:.4f}'.format(training_error))
testing_error = model.evaluate(X_test, y_test, verbose=0)
print('Test error = {:.4f}'.format(testing_error))
In [10]:
train_predict = model.predict(X_train)
test_predict = model.predict(X_test)
In [11]:
# revert scaling
data_or = scaler.inverse_transform(data)
train_predict_or = scaler.inverse_transform(train_predict)
test_predict_or = scaler.inverse_transform(test_predict)
plt.figure(figsize=(14, 6))
# plot original series
plt.plot(data_or, color='k')
# plot training set prediction
split_pt = train_test_split + window_size
plt.plot(np.arange(window_size, split_pt, 1), train_predict_or, color='b')
# plot testing set prediction
plt.plot(np.arange(split_pt, split_pt + len(test_predict_or), 1), test_predict_or, color='r')
plt.xlabel('day')
plt.ylabel('stock price')
plt.legend(
['original series', 'training fit', 'testing fit'],
loc='center left',
bbox_to_anchor=(1, 0.5));
The stock prices are predicted daily using the actual data from the last 4 weeks. That explains why the predictions seem so accurate.