In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
In [2]:
data = pd.read_csv("../data/EURUSD_daily.csv", index_col='Date')
In [3]:
data.index = pd.to_datetime(data.index)
data.columns = ['close']
In [4]:
split_date = pd.Timestamp('01-01-2015')
In [5]:
data['log_ret'] = np.log(data.close) - np.log(data.close.shift(1))
In [6]:
data['pct_change'] = data.close.pct_change()
In [7]:
data.head(5)
Out[7]:
In [8]:
mean = data.log_ret.mean()
std = data.log_ret.std()
In [9]:
data['normalized'] = 1/(1+np.exp(-(data.log_ret-mean)/std))
In [10]:
data['5MA'] = data.normalized.rolling(5).mean()
In [11]:
data.dropna(inplace=True)
In [12]:
data_n = data.drop('close', axis=1).drop('log_ret', axis=1).drop('pct_change', axis=1)
In [13]:
train = data_n[:split_date]
test = data_n[split_date:]
In [14]:
train.count()
Out[14]:
In [15]:
x_train = train[:-1]
y_train = train['5MA'][1:]
x_test = test[:-1]
y_test = test['5MA'][1:]
In [16]:
x_train.count()
Out[16]:
In [17]:
from keras.models import Sequential
from keras.layers import Dense, LSTM, Dropout
import keras.backend as K
from keras.callbacks import EarlyStopping
In [18]:
x_train_np = x_train.values
y_train_np = y_train.values
x_test_np = x_test.values
y_test_np = y_test.values
In [19]:
x_train_t = x_train_np.reshape(x_train.shape[0], 1, 2)
x_test_t = x_test_np.reshape(x_test.shape[0], 1, 2)
In [20]:
early_stop = EarlyStopping(monitor='loss', patience=2, verbose=1)
In [21]:
K.clear_session()
model = Sequential()
model.add(LSTM(100, input_shape= (x_train_t.shape[1], x_train_t.shape[2]), activation='relu', return_sequences=True))
model.add(Dropout(0.2))
model.add(LSTM(150, activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(1, activation='sigmoid'))
model.compile(loss='mse', optimizer='adam')
In [22]:
model.summary()
In [23]:
history = model.fit(x_train_t, y_train, epochs = 1000, batch_size=32, verbose = 1, callbacks=[early_stop])
In [24]:
y_pred = model.predict(x_test_t, batch_size=32)
In [25]:
fig = plt.figure(figsize = (16,9))
plt.plot(y_pred)
plt.plot(y_test_np)
plt.legend(['predicted', 'real'])
Out[25]:
In [26]:
evaluation_df = pd.DataFrame (y_pred, columns = ['predicted'])
In [27]:
evaluation_df['real'] = y_test_np
In [28]:
evaluation_df['test_x'] = x_test_np[:,1]
In [29]:
evaluation_df['predicted_move'] = (evaluation_df.predicted - evaluation_df.test_x) > 0
evaluation_df['real_move'] = (evaluation_df.real - evaluation_df.test_x) > 0
In [30]:
evaluation_df['guess_correct'] = evaluation_df.predicted_move == evaluation_df.real_move
In [31]:
evaluation_df.head(20)
Out[31]:
In [32]:
evaluation_df[evaluation_df['guess_correct']==False].count() / evaluation_df.count()
Out[32]:
In [33]:
evaluation_df[evaluation_df['guess_correct']==True].count() / evaluation_df.count()
Out[33]:
In [34]:
def R_squared (y_pred, y_real):
ss_res = np.sum((y_real-y_pred)**2)
ss_tot = np.sum((y_real-np.mean(y_real))**2)
return 1 - (ss_res/ss_tot)
In [35]:
r_sq = R_squared(evaluation_df['predicted'], evaluation_df['real'])
In [36]:
r_sq
Out[36]:
In [68]:
# function reverse to sigmoid
def logit(x, mean, std):
return np.log(x/(1-x))*std + mean
In [38]:
reverse_df = evaluation_df.drop('real_move', axis=1).drop('predicted_move', axis=1).drop('guess_correct', axis=1)
In [39]:
reverse_df.columns = ['predicted_MA', 'real_MA', 'test_step_MA']
In [40]:
reverse_df['test_step_val'] = x_test_np[:,0]
In [41]:
for s in range(1,4):
reverse_df['test_step-{}'.format(s)] = reverse_df.test_step_val.shift(s)
In [42]:
reverse_df['predicted_value'] = reverse_df.predicted_MA*5 - (reverse_df.test_step_val + reverse_df['test_step-1'] + reverse_df['test_step-2'] + reverse_df['test_step-3'])
In [43]:
reverse_df['real_value'] = reverse_df.real_MA*5 - (reverse_df.test_step_val + reverse_df['test_step-1'] + reverse_df['test_step-2'] + reverse_df['test_step-3'])
In [74]:
reverse_df['predicted_close_ret'] = reverse_df['predicted_value'].apply(logit,mean=mean,std=std)
reverse_df['real_close_ret'] = reverse_df['real_value'].apply(logit,mean=mean,std=std)
In [116]:
reverse_df.head(700)
Out[116]:
In [107]:
pred_close_change = reverse_df['predicted_close_ret']
In [108]:
pred_close_change.dropna(inplace=True)
In [109]:
close = [100]
In [110]:
for i in range(1,pred_close_change.count()):
close.append(close[i-1]*np.e**pred_close_change.values[i])
In [111]:
real_close_change = reverse_df['real_close_ret'].dropna()
In [126]:
pred_close_change.plot(figsize=(16,9))
#real_close_change.plot()
Out[126]:
In [112]:
close_real = [100]
In [113]:
for i in range(1,real_close_change.count()):
close_real.append(close_real[i-1]*np.e**real_close_change.values[i])
In [128]:
fig = plt.figure(figsize = (16,9))
plt.plot(close)
plt.plot(close_real)
plt.legend(['predicted', 'real'])
Out[128]:
In [ ]: