In [35]:
import pandas as pd
import matplotlib.pyplot as plt
from sklearn import preprocessing
from sklearn import model_selection
from keras.layers.core import Dense, Dropout
from keras.layers.recurrent import GRU
from keras.models import Sequential, load_model
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
%matplotlib inline
In [12]:
df = pd.read_csv('D:/project/ml/course/deepai/dl_course/Stock-Prediction-RNN/prices_stock.csv', index_col=['date'])
In [13]:
df.head()
Out[13]:
In [7]:
df.symbol.value_counts().head()
Out[7]:
In [14]:
df[df['symbol']=='AMZN']['open'].plot()
Out[14]:
In [123]:
target = df.loc[df['symbol']=='AMZN', ['open', 'low', 'high', 'close']]
target.index = pd.to_datetime(target.index)
target = target.iloc[(target.index>='2010-01-01')&(target.index<='2012-04-04')]
target.shape
Out[123]:
In [122]:
print(target.index.min(), target.index.max())
In [16]:
target.head()
Out[16]:
In [124]:
target_shift = target.shift(-3)
target_shift.head()
Out[124]:
In [125]:
target['label'] = target_shift['close']
In [126]:
target.tail()
Out[126]:
In [127]:
target = target[target['label'].notnull()]
In [128]:
target.head()
Out[128]:
In [129]:
target.shape
Out[129]:
In [130]:
x,y = target[['open','low','high','close']], target['label']
x_scale = preprocessing.StandardScaler()
y_scale = preprocessing.StandardScaler()
n = 300
X_train, X_test, y_train, y_test = x[:n], x[n:], y[:n], y[n:]
index = X_test.index
X_train = x_scale.fit_transform(X_train.values)
X_test = x_scale.transform(X_test.values)
y_train = y_scale.fit_transform(y_train.values.reshape(-1,1))
y_test = y_scale.transform(y_test.values.reshape(-1,1))
X_train = X_train.reshape((-1,1,4))
X_test = X_test.reshape((-1,1,4))
In [61]:
index[:10]
Out[61]:
In [121]:
X_train.shape
Out[121]:
In [131]:
model = Sequential()
model.add(GRU(units=512, return_sequences=True, input_shape=(1,4)))
model.add(Dropout(0.2))
model.add(GRU(units=256))
model.add(Dropout(0.2))
model.add(Dense(1,activation='sigmoid'))
model.compile(loss='mse', optimizer='adam')
In [120]:
model.summary()
In [132]:
model.fit(X_train,y_train,batch_size=250, epochs=5, validation_split=0.1, verbose=1)
Out[132]:
In [133]:
score = model.evaluate(X_test, y_test)
print('Score: {}'.format(score))
pred = model.predict(X_test)
pred = y_scale.inverse_transform(pred)
y_test = y_scale.inverse_transform(y_test)
In [134]:
y_test[-10:]
Out[134]:
In [135]:
plt.hist(y_test)
Out[135]:
In [136]:
plt.hist(pred)
Out[136]:
In [113]:
yhat[-10:]
Out[113]:
In [92]:
# plt.plot(index[-100:], yhat[-100:], label='pred')
def plot(x, y):
f, ax = plt.subplots()
ax.plot(x, y, label='truth')
ax.legend()
# years = mdates.YearLocator() # every year
months = mdates.MonthLocator() # every month
monthsFmt = mdates.DateFormatter('%Y%m')
ax.xaxis.set_major_locator(months)
ax.xaxis.set_major_formatter(monthsFmt)
# ax.xaxis.set_minor_locator(months)
ax.format_xdata = mdates.DateFormatter('%Y-%m-%d')
# ax.format_ydata = price
f.autofmt_xdate()
In [137]:
target.tail()
Out[137]:
In [145]:
y_scale.inverse_transform(y_train[-10:])
Out[145]:
In [138]:
y_test[-10:]
Out[138]:
In [142]:
pred[-50:]
Out[142]:
In [140]:
plot(index[-100:], y_test[-100:])
In [141]:
plot(index[-100:], pred[-100:])