In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

In [2]:
data = pd.read_csv("../data/EURUSD_daily.csv", index_col='Date')

In [3]:
data.index = pd.to_datetime(data.index)
data.columns = ['close']

In [4]:
split_date = pd.Timestamp('01-01-2015')

In [5]:
data['log_ret'] = np.log(data.close) - np.log(data.close.shift(1))

In [6]:
data['pct_change'] = data.close.pct_change()

In [7]:
data.head(5)


Out[7]:
close log_ret pct_change
Date
2000-01-03 1.0276 NaN NaN
2000-01-04 1.0299 0.002236 0.002238
2000-01-05 1.0317 0.001746 0.001748
2000-01-06 1.0299 -0.001746 -0.001745
2000-01-07 1.0283 -0.001555 -0.001554

In [8]:
mean = data.log_ret.mean()
std = data.log_ret.std()

In [9]:
data['normalized'] = 1/(1+np.exp(-(data.log_ret-mean)/std))

In [10]:
data['5MA'] = data.normalized.rolling(5).mean()

In [11]:
data.dropna(inplace=True)

In [12]:
data_n = data.drop('close', axis=1).drop('log_ret', axis=1).drop('pct_change', axis=1)

In [13]:
train = data_n[:split_date]
test = data_n[split_date:]

In [14]:
train.count()


Out[14]:
normalized    3909
5MA           3909
dtype: int64

In [15]:
x_train = train[:-1]
y_train = train['5MA'][1:]

x_test = test[:-1]
y_test = test['5MA'][1:]

In [16]:
x_train.count()


Out[16]:
normalized    3908
5MA           3908
dtype: int64

In [17]:
from keras.models import Sequential
from keras.layers import Dense, LSTM, Dropout
import keras.backend as K
from keras.callbacks import EarlyStopping


Using TensorFlow backend.

In [18]:
x_train_np = x_train.values
y_train_np = y_train.values

x_test_np = x_test.values
y_test_np = y_test.values

In [19]:
x_train_t = x_train_np.reshape(x_train.shape[0], 1, 2)
x_test_t = x_test_np.reshape(x_test.shape[0], 1, 2)

In [20]:
early_stop = EarlyStopping(monitor='loss', patience=2, verbose=1)

In [21]:
K.clear_session()

model = Sequential()

model.add(LSTM(100, input_shape= (x_train_t.shape[1], x_train_t.shape[2]), activation='relu', return_sequences=True))
model.add(Dropout(0.2))
model.add(LSTM(150, activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(1, activation='sigmoid'))

model.compile(loss='mse', optimizer='adam')

In [22]:
model.summary()


_________________________________________________________________
Layer (type)                 Output Shape              Param #   
=================================================================
lstm_1 (LSTM)                (None, 1, 100)            41200     
_________________________________________________________________
dropout_1 (Dropout)          (None, 1, 100)            0         
_________________________________________________________________
lstm_2 (LSTM)                (None, 150)               150600    
_________________________________________________________________
dropout_2 (Dropout)          (None, 150)               0         
_________________________________________________________________
dense_1 (Dense)              (None, 1)                 151       
=================================================================
Total params: 191,951
Trainable params: 191,951
Non-trainable params: 0
_________________________________________________________________

In [23]:
history = model.fit(x_train_t, y_train, epochs = 1000, batch_size=32, verbose = 1, callbacks=[early_stop])


Epoch 1/1000
3908/3908 [==============================] - 5s - loss: 0.0071     
Epoch 2/1000
3908/3908 [==============================] - 0s - loss: 0.0047     
Epoch 3/1000
3908/3908 [==============================] - 0s - loss: 0.0034     
Epoch 4/1000
3908/3908 [==============================] - 0s - loss: 0.0032     
Epoch 5/1000
3908/3908 [==============================] - 0s - loss: 0.0031     
Epoch 6/1000
3908/3908 [==============================] - 0s - loss: 0.0031     
Epoch 7/1000
3908/3908 [==============================] - 0s - loss: 0.0031     
Epoch 8/1000
3908/3908 [==============================] - 0s - loss: 0.0031     
Epoch 9/1000
3908/3908 [==============================] - 0s - loss: 0.0030     
Epoch 10/1000
3908/3908 [==============================] - 0s - loss: 0.0030     
Epoch 11/1000
3908/3908 [==============================] - 0s - loss: 0.0030     
Epoch 12/1000
3908/3908 [==============================] - 0s - loss: 0.0030     
Epoch 13/1000
3908/3908 [==============================] - 0s - loss: 0.0030     
Epoch 14/1000
3908/3908 [==============================] - 0s - loss: 0.0030     
Epoch 15/1000
3908/3908 [==============================] - 0s - loss: 0.0030     
Epoch 16/1000
3908/3908 [==============================] - 0s - loss: 0.0030     
Epoch 17/1000
3908/3908 [==============================] - 0s - loss: 0.0030     
Epoch 18/1000
3908/3908 [==============================] - 0s - loss: 0.0030     
Epoch 19/1000
3908/3908 [==============================] - 0s - loss: 0.0030     
Epoch 20/1000
3908/3908 [==============================] - 0s - loss: 0.0030     
Epoch 21/1000
3908/3908 [==============================] - 0s - loss: 0.0030     
Epoch 00020: early stopping

In [24]:
y_pred = model.predict(x_test_t, batch_size=32)

In [25]:
fig = plt.figure(figsize = (16,9))
plt.plot(y_pred)
plt.plot(y_test_np)
plt.legend(['predicted', 'real'])


Out[25]:
<matplotlib.legend.Legend at 0x7f22f4162c18>

In [26]:
evaluation_df = pd.DataFrame (y_pred, columns = ['predicted'])

In [27]:
evaluation_df['real'] = y_test_np

In [28]:
evaluation_df['test_x'] = x_test_np[:,1]

In [29]:
evaluation_df['predicted_move'] = (evaluation_df.predicted - evaluation_df.test_x) > 0
evaluation_df['real_move'] = (evaluation_df.real - evaluation_df.test_x) > 0

In [30]:
evaluation_df['guess_correct'] = evaluation_df.predicted_move == evaluation_df.real_move

In [31]:
evaluation_df.head(20)


Out[31]:
predicted real test_x predicted_move real_move guess_correct
0 0.422422 0.392086 0.409029 True False False
1 0.393396 0.366916 0.392086 True False False
2 0.386127 0.341283 0.366916 True False False
3 0.381560 0.333727 0.341283 True False False
4 0.371154 0.306021 0.333727 True False False
5 0.368070 0.395491 0.306021 True True True
6 0.419422 0.435629 0.395491 True True True
7 0.439373 0.417601 0.435629 True False False
8 0.416933 0.472164 0.417601 False True False
9 0.467582 0.417783 0.472164 False False True
10 0.396697 0.350642 0.417783 False False True
11 0.382444 0.377069 0.350642 True True True
12 0.406854 0.384413 0.377069 True True True
13 0.398362 0.403700 0.384413 True True True
14 0.424238 0.392009 0.403700 True False False
15 0.377724 0.356232 0.392009 False False True
16 0.368132 0.361565 0.356232 True True True
17 0.399543 0.460263 0.361565 True True True
18 0.467338 0.379172 0.460263 True False False
19 0.388359 0.491167 0.379172 True True True

In [32]:
evaluation_df[evaluation_df['guess_correct']==False].count() / evaluation_df.count()


Out[32]:
predicted         0.414127
real              0.414127
test_x            0.414127
predicted_move    0.414127
real_move         0.414127
guess_correct     0.414127
dtype: float64

In [33]:
evaluation_df[evaluation_df['guess_correct']==True].count() / evaluation_df.count()


Out[33]:
predicted         0.585873
real              0.585873
test_x            0.585873
predicted_move    0.585873
real_move         0.585873
guess_correct     0.585873
dtype: float64

In [34]:
def R_squared (y_pred, y_real):
    ss_res = np.sum((y_real-y_pred)**2)
    ss_tot = np.sum((y_real-np.mean(y_real))**2)
    return 1 - (ss_res/ss_tot)

In [35]:
r_sq = R_squared(evaluation_df['predicted'], evaluation_df['real'])

In [36]:
r_sq


Out[36]:
0.62323181856346033

In [68]:
# function reverse to sigmoid
def logit(x, mean, std):
    return np.log(x/(1-x))*std + mean

In [38]:
reverse_df = evaluation_df.drop('real_move', axis=1).drop('predicted_move', axis=1).drop('guess_correct', axis=1)

In [39]:
reverse_df.columns = ['predicted_MA', 'real_MA', 'test_step_MA']

In [40]:
reverse_df['test_step_val'] =  x_test_np[:,0]

In [41]:
for s in range(1,4):
    reverse_df['test_step-{}'.format(s)] = reverse_df.test_step_val.shift(s)

In [42]:
reverse_df['predicted_value'] = reverse_df.predicted_MA*5 - (reverse_df.test_step_val + reverse_df['test_step-1'] + reverse_df['test_step-2'] + reverse_df['test_step-3'])

In [43]:
reverse_df['real_value'] = reverse_df.real_MA*5 - (reverse_df.test_step_val + reverse_df['test_step-1'] + reverse_df['test_step-2'] + reverse_df['test_step-3'])

In [74]:
reverse_df['predicted_close_ret'] = reverse_df['predicted_value'].apply(logit,mean=mean,std=std)
reverse_df['real_close_ret'] = reverse_df['real_value'].apply(logit,mean=mean,std=std)

In [116]:
reverse_df.head(700)


Out[116]:
predicted_MA real_MA test_step_MA test_step_val test_step-1 test_step-2 test_step-3 predicted_value real_value predicted_close predicted_close_ret real_close_ret
0 0.422422 0.392086 0.409029 0.495526 NaN NaN NaN NaN NaN NaN NaN NaN
1 0.393396 0.366916 0.392086 0.224067 0.495526 NaN NaN NaN NaN NaN NaN NaN
2 0.386127 0.341283 0.366916 0.291382 0.224067 0.495526 NaN NaN NaN NaN NaN NaN
3 0.381560 0.333727 0.341283 0.370673 0.291382 0.224067 0.495526 0.526151 0.286988 0.000682 0.000682 -0.005647
4 0.371154 0.306021 0.333727 0.286988 0.370673 0.291382 0.224067 0.682658 0.356994 0.004806 0.004806 -0.003641
5 0.368070 0.395491 0.306021 0.356994 0.286988 0.370673 0.291382 0.534311 0.671419 0.000886 0.000886 0.004486
6 0.419422 0.435629 0.395491 0.671419 0.356994 0.286988 0.370673 0.411036 0.492069 -0.002214 -0.002214 -0.000169
7 0.439373 0.417601 0.435629 0.492069 0.671419 0.356994 0.286988 0.389394 0.280532 -0.002777 -0.002777 -0.005845
8 0.416933 0.472164 0.417601 0.280532 0.492069 0.671419 0.356994 0.283648 0.559804 -0.005749 -0.005749 0.001528
9 0.467582 0.417783 0.472164 0.559804 0.280532 0.492069 0.671419 0.334087 0.085091 -0.004273 -0.004273 -0.014783
10 0.396697 0.350642 0.417783 0.085091 0.559804 0.280532 0.492069 0.565987 0.335712 0.001685 0.001685 -0.004227
11 0.382444 0.377069 0.350642 0.335712 0.085091 0.559804 0.280532 0.651079 0.624207 0.003919 0.003919 0.003194
12 0.406854 0.384413 0.377069 0.624207 0.335712 0.085091 0.559804 0.429457 0.317248 -0.001743 -0.001743 -0.004751
13 0.398362 0.403700 0.384413 0.317248 0.624207 0.335712 0.085091 0.629552 0.656239 0.003336 0.003336 0.004061
14 0.424238 0.392009 0.403700 0.656239 0.317248 0.624207 0.335712 0.187786 0.026640 -0.009104 -0.009104 -0.022412
15 0.377724 0.356232 0.392009 0.026640 0.656239 0.317248 0.624207 0.264284 0.156827 -0.006356 -0.006356 -0.010461
16 0.368132 0.361565 0.356232 0.156827 0.026640 0.656239 0.317248 0.683706 0.650873 0.004836 0.004836 0.003914
17 0.399543 0.460263 0.361565 0.650873 0.156827 0.026640 0.656239 0.507136 0.810735 0.000207 0.000207 0.009102
18 0.467338 0.379172 0.460263 0.810735 0.650873 0.156827 0.026640 0.691614 0.250787 0.005066 0.005066 -0.006796
19 0.388359 0.491167 0.379172 0.250787 0.810735 0.650873 0.156827 0.072574 0.586616 -0.015861 -0.015861 0.002212
20 0.481275 0.550382 0.491167 0.586616 0.250787 0.810735 0.650873 0.107364 0.452901 -0.013180 -0.013180 -0.001149
21 0.526129 0.545608 0.550382 0.452901 0.586616 0.250787 0.810735 0.529606 0.627004 0.000768 0.000768 0.003268
22 0.527031 0.560010 0.545608 0.627004 0.452901 0.586616 0.250787 0.717847 0.882743 0.005853 0.005853 0.012618
23 0.543036 0.555299 0.560010 0.882743 0.627004 0.452901 0.586616 0.165913 0.227232 -0.010042 -0.010042 -0.007605
24 0.522114 0.588140 0.555299 0.227232 0.882743 0.627004 0.452901 0.420689 0.750818 -0.001966 -0.001966 0.006908
25 0.561612 0.518038 0.588140 0.750818 0.227232 0.882743 0.627004 0.320264 0.102391 -0.004664 -0.004664 -0.013510
26 0.479556 0.490989 0.518038 0.102391 0.750818 0.227232 0.882743 0.434595 0.491758 -0.001612 -0.001612 -0.000177
27 0.479208 0.412791 0.490989 0.491758 0.102391 0.750818 0.227232 0.823839 0.491757 0.009649 0.009649 -0.000177
28 0.424551 0.455822 0.412791 0.491757 0.491758 0.102391 0.750818 0.286029 0.442387 -0.005676 -0.005676 -0.001415
29 0.452187 0.469776 0.455822 0.442387 0.491757 0.491758 0.102391 0.732641 0.820585 0.006316 0.006316 0.009510
... ... ... ... ... ... ... ... ... ... ... ... ...
670 0.504186 0.545638 0.526914 0.321977 0.741094 0.520511 0.393565 0.543782 0.751040 0.001124 0.001124 0.006915
671 0.529099 0.613733 0.545638 0.751040 0.321977 0.741094 0.520511 0.310874 0.734044 -0.004936 -0.004936 0.006361
672 0.579810 0.589979 0.613733 0.734044 0.751040 0.321977 0.741094 0.350894 0.401738 -0.003807 -0.003807 -0.002455
673 0.553431 0.579544 0.589979 0.401738 0.734044 0.751040 0.321977 0.558354 0.688922 0.001491 0.001491 0.004988
674 0.553777 0.618970 0.579544 0.688922 0.401738 0.734044 0.751040 0.193138 0.519106 -0.008888 -0.008888 0.000506
675 0.577574 0.512271 0.618970 0.519106 0.688922 0.401738 0.734044 0.544061 0.217545 0.001131 0.001131 -0.007954
676 0.486202 0.478093 0.512271 0.217545 0.519106 0.688922 0.401738 0.603701 0.563153 0.002654 0.002654 0.001613
677 0.471620 0.473489 0.478093 0.563153 0.217545 0.519106 0.688922 0.369373 0.378717 -0.003307 -0.003307 -0.003058
678 0.462860 0.437518 0.473489 0.378717 0.563153 0.217545 0.519106 0.635781 0.509068 0.003503 0.003503 0.000255
679 0.441195 0.443654 0.437518 0.509068 0.378717 0.563153 0.217545 0.537492 0.549785 0.000966 0.000966 0.001275
680 0.446566 0.534474 0.443654 0.549785 0.509068 0.378717 0.563153 0.232105 0.671647 -0.007433 -0.007433 0.004492
681 0.519004 0.493852 0.534474 0.671647 0.549785 0.509068 0.378717 0.485804 0.360044 -0.000325 -0.000325 -0.003558
682 0.479006 0.486909 0.493852 0.360044 0.671647 0.549785 0.509068 0.304485 0.344003 -0.005123 -0.005123 -0.003997
683 0.472973 0.503748 0.486909 0.344003 0.360044 0.671647 0.549785 0.439385 0.593260 -0.001491 -0.001491 0.002383
684 0.491673 0.471422 0.503748 0.593260 0.344003 0.360044 0.671647 0.489409 0.388156 -0.000235 -0.000235 -0.002809
685 0.461337 0.459020 0.471422 0.388156 0.593260 0.344003 0.360044 0.621223 0.609637 0.003114 0.003114 0.002809
686 0.459420 0.518946 0.459020 0.609637 0.388156 0.593260 0.344003 0.362046 0.659676 -0.003504 -0.003504 0.004157
687 0.506099 0.510673 0.518946 0.659676 0.609637 0.388156 0.593260 0.279770 0.302635 -0.005868 -0.005868 -0.005177
688 0.490336 0.531101 0.510673 0.302635 0.659676 0.609637 0.388156 0.491576 0.695401 -0.000181 -0.000181 0.005177
689 0.516633 0.545109 0.531101 0.695401 0.302635 0.659676 0.609637 0.315817 0.458195 -0.004792 -0.004792 -0.001016
690 0.522304 0.590445 0.545109 0.458195 0.695401 0.302635 0.659676 0.495614 0.836316 -0.000080 -0.000080 0.010201
691 0.565472 0.596518 0.590445 0.836316 0.458195 0.695401 0.302635 0.534811 0.690040 0.000899 0.000899 0.005020
692 0.566188 0.631077 0.596518 0.690040 0.836316 0.458195 0.695401 0.150989 0.475433 -0.010741 -0.010741 -0.000584
693 0.584702 0.540210 0.631077 0.475433 0.690040 0.836316 0.458195 0.463524 0.241064 -0.000883 -0.000883 -0.007123
694 0.511241 0.555740 0.540210 0.241064 0.475433 0.690040 0.836316 0.313350 0.535846 -0.004864 -0.004864 0.000925
695 0.532336 0.464436 0.555740 0.535846 0.241064 0.475433 0.690040 0.719298 0.379796 0.005897 0.005897 -0.003030
696 0.455764 0.453809 0.464436 0.379796 0.535846 0.241064 0.475433 0.646681 0.636903 0.003799 0.003799 0.003534
697 0.456547 0.464541 0.453809 0.636903 0.379796 0.535846 0.241064 0.489126 0.529094 -0.000242 -0.000242 0.000756
698 0.461208 0.512733 0.464541 0.529094 0.636903 0.379796 0.535846 0.224399 0.482023 -0.007706 -0.007706 -0.000420
699 0.497348 0.560295 0.512733 0.482023 0.529094 0.636903 0.379796 0.458924 0.773661 -0.000998 -0.000998 0.007694

700 rows × 12 columns


In [107]:
pred_close_change = reverse_df['predicted_close_ret']

In [108]:
pred_close_change.dropna(inplace=True)

In [109]:
close = [100]

In [110]:
for i in range(1,pred_close_change.count()):
    close.append(close[i-1]*np.e**pred_close_change.values[i])

In [111]:
real_close_change = reverse_df['real_close_ret'].dropna()

In [126]:
pred_close_change.plot(figsize=(16,9))
#real_close_change.plot()


Out[126]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f22c4f695c0>

In [112]:
close_real = [100]

In [113]:
for i in range(1,real_close_change.count()):
    close_real.append(close_real[i-1]*np.e**real_close_change.values[i])

In [128]:
fig = plt.figure(figsize = (16,9))
plt.plot(close)
plt.plot(close_real)
plt.legend(['predicted', 'real'])


Out[128]:
<matplotlib.legend.Legend at 0x7f22c4cebdd8>

In [ ]: