In this notebook the predictor will be used to estimate the new states and rewards for the dyna (hallucinated) iterations, of the Q-learning agent.



In [1]:

    
# Basic imports
import os
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import datetime as dt
import scipy.optimize as spo
import sys
from time import time
from sklearn.metrics import r2_score, median_absolute_error

%matplotlib inline

%pylab inline
pylab.rcParams['figure.figsize'] = (20.0, 10.0)

%load_ext autoreload
%autoreload 2

sys.path.append('../../')

from sklearn.externals import joblib









    



Populating the interactive namespace from numpy and matplotlib

First, let's try to instantiate the best predictor that was found



In [2]:

    
best_params_df = pd.read_pickle('../../data/best_params_final_df.pkl')
best_params_df









    Out[2]:







  
    
      
      GOOD_DATA_RATIO
      SAMPLES_GOOD_DATA_RATIO
      ahead_days
      base_days
      model
      mre
      r2
      step_days
      train_days
      train_val_time
      x_filename
      y_filename
    
    
      ahead_days
      
      
      
      
      
      
      
      
      
      
      
      
    
  
  
    
      1.0
      0.99
      0.9
      1.0
      112.0
      linear
      0.015856
      0.986599
      7.0
      504.0
      -1.0
      x_base112_ahead1.pkl
      y_base112_ahead1.pkl
    
    
      7.0
      0.99
      0.9
      7.0
      112.0
      linear
      0.042367
      0.923348
      7.0
      756.0
      -1.0
      x_base112_ahead7.pkl
      y_base112_ahead7.pkl
    
    
      14.0
      0.99
      0.9
      14.0
      112.0
      linear
      0.060167
      0.865259
      7.0
      756.0
      -1.0
      x_base112_ahead14.pkl
      y_base112_ahead14.pkl
    
    
      28.0
      0.99
      0.9
      28.0
      112.0
      linear
      0.091966
      0.758046
      7.0
      756.0
      -1.0
      x_base112_ahead28.pkl
      y_base112_ahead28.pkl
    
    
      56.0
      0.99
      0.9
      56.0
      112.0
      linear
      0.127913
      0.590426
      7.0
      756.0
      -1.0
      x_base112_ahead56.pkl
      y_base112_ahead56.pkl



In [3]:

    
import predictor.feature_extraction as fe
from predictor.linear_predictor import LinearPredictor
import utils.misc as misc
import predictor.evaluation as ev

ahead_days = 1

# Get some parameters
train_days = int(best_params_df.loc[ahead_days, 'train_days'])
GOOD_DATA_RATIO, \
train_val_time, \
base_days, \
step_days, \
ahead_days, \
SAMPLES_GOOD_DATA_RATIO, \
x_filename, \
y_filename = misc.unpack_params(best_params_df.loc[ahead_days,:])

pid = 'base{}_ahead{}'.format(base_days, ahead_days)

# Get the datasets
x_train = pd.read_pickle('../../data/x_{}.pkl'.format(pid))
y_train = pd.read_pickle('../../data/y_{}.pkl'.format(pid))
x_test = pd.read_pickle('../../data/x_{}_test.pkl'.format(pid)).sort_index()
y_test = pd.DataFrame(pd.read_pickle('../../data/y_{}_test.pkl'.format(pid))).sort_index()

# Let's cut the training set to use only the required number of samples
end_date = x_train.index.levels[0][-1]
start_date = fe.add_market_days(end_date, -train_days)
x_sub_df = x_train.loc[(slice(start_date,None),slice(None)),:]
y_sub_df = pd.DataFrame(y_train.loc[(slice(start_date,None),slice(None))])

# Create the estimator and train
estimator = LinearPredictor()
estimator.fit(x_sub_df, y_sub_df)

# Get the training and test predictions
y_train_pred = estimator.predict(x_sub_df)
y_test_pred = estimator.predict(x_test)

# Get the training and test metrics for each symbol
metrics_train = ev.get_metrics_df(y_sub_df, y_train_pred)
metrics_test = ev.get_metrics_df(y_test, y_test_pred)

# Show the mean metrics
metrics_df = pd.DataFrame(columns=['train', 'test'])
metrics_df['train'] = metrics_train.mean()
metrics_df['test'] = metrics_test.mean()
print('Mean metrics: \n{}\n{}'.format(metrics_df,'-'*70))

# Plot the metrics in time
metrics_train_time = ev.get_metrics_in_time(y_sub_df, y_train_pred, base_days + ahead_days)
metrics_test_time = ev.get_metrics_in_time(y_test, y_test_pred, base_days + ahead_days)
plt.plot(metrics_train_time[2], metrics_train_time[0], label='train', marker='.')
plt.plot(metrics_test_time[2], metrics_test_time[0], label='test', marker='.')
plt.title('$r^2$ metrics')
plt.legend()
plt.figure()
plt.plot(metrics_train_time[2], metrics_train_time[1], label='train', marker='.')
plt.plot(metrics_test_time[2], metrics_test_time[1], label='test', marker='.')
plt.title('MRE metrics')
plt.legend()









    



Mean metrics: 
        train      test
r2   0.983486  0.976241
mre  0.008762  0.013906
----------------------------------------------------------------------






    Out[3]:





<matplotlib.legend.Legend at 0x7f8529da1438>

Let's see the range of the test set (to check that no data from the recommender test set is in the training set for the predictor)



In [4]:

    
print('The first training day for the predictor is: {}.'.format(start_date))









    



The first training day for the predictor is: 2012-07-16 00:00:00.



In [5]:

    
print('The last training day for the predictor is: {}.'.format(fe.add_market_days(end_date, base_days)))









    



The last training day for the predictor is: 2014-12-26 00:00:00.



In [6]:

    
print('The testing data for the recommender')
total_data_test_df = pd.read_pickle('../../data/data_test_df.pkl').stack(level='feature')
total_data_test_df.head()









    



The testing data for the recommender






    Out[6]:







  
    
      
      
      A
      AAL
      AAP
      AAPL
      ABBV
      ABC
      ABT
      ACN
      ADBE
      ADI
      ...
      XLNX
      XOM
      XRAY
      XRX
      XYL
      YHOO
      YUM
      ZBH
      ZION
      ZTS
    
    
      date
      feature
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
    
  
  
    
      2015-01-02
      Close
      40.56
      53.91
      158.56
      109.33
      65.89
      90.46
      44.90
      88.84
      72.34
      55.54
      ...
      43.60
      92.83
      51.93
      13.75
      38.08
      50.17
      72.35
      112.59
      28.29
      43.31
    
    
      High
      41.31
      54.60
      162.50
      111.44
      66.40
      91.32
      45.45
      90.09
      73.20
      56.25
      ...
      43.93
      93.05
      53.12
      14.02
      38.59
      50.78
      73.51
      114.61
      28.72
      43.70
    
    
      Low
      40.37
      53.07
      157.47
      107.35
      65.49
      89.82
      44.64
      88.43
      71.89
      54.97
      ...
      43.11
      91.81
      51.88
      13.66
      37.50
      49.47
      72.00
      111.78
      28.06
      43.06
    
    
      Open
      41.18
      54.28
      160.85
      111.39
      65.62
      90.61
      45.25
      89.67
      72.70
      55.68
      ...
      43.51
      92.25
      52.13
      13.88
      38.40
      50.66
      73.44
      114.21
      28.67
      43.46
    
    
      Volume
      1530798.00
      10756705.00
      509983.00
      53204626.00
      5087291.00
      1124780.00
      3217165.00
      2021565.00
      2356405.00
      1329619.00
      ...
      2402443.00
      10220410.00
      1829006.00
      3912022.00
      606118.00
      11924473.00
      1641557.00
      909491.00
      2299118.00
      1784851.00
    
  

5 rows × 499 columns



In [7]:

    
print('The first TEST day for the recommender is: {}'.format(total_data_test_df.index[-0]))









    



The first TEST day for the recommender is: (Timestamp('2015-01-02 00:00:00'), 'Close')

Good!

The predictor will be used as it is, without retraining, for simplicity and computational performance



In [8]:

    
joblib.dump(estimator, '../../data/best_predictor.pkl')









    Out[8]:





['../../data/best_predictor.pkl']

Let's test the saved predictor... just in case.



In [9]:

    
estimator_reloaded = joblib.load('../../data/best_predictor.pkl')

# Get the training and test predictions
y_train_pred = estimator_reloaded.predict(x_sub_df)
y_test_pred = estimator_reloaded.predict(x_test)

# Get the training and test metrics for each symbol
metrics_train = ev.get_metrics_df(y_sub_df, y_train_pred)
metrics_test = ev.get_metrics_df(y_test, y_test_pred)

# Show the mean metrics
metrics_df = pd.DataFrame(columns=['train', 'test'])
metrics_df['train'] = metrics_train.mean()
metrics_df['test'] = metrics_test.mean()
print('Mean metrics: \n{}\n{}'.format(metrics_df,'-'*70))

# Plot the metrics in time
metrics_train_time = ev.get_metrics_in_time(y_sub_df, y_train_pred, base_days + ahead_days)
metrics_test_time = ev.get_metrics_in_time(y_test, y_test_pred, base_days + ahead_days)
plt.plot(metrics_train_time[2], metrics_train_time[0], label='train', marker='.')
plt.plot(metrics_test_time[2], metrics_test_time[0], label='test', marker='.')
plt.title('$r^2$ metrics')
plt.legend()
plt.figure()
plt.plot(metrics_train_time[2], metrics_train_time[1], label='train', marker='.')
plt.plot(metrics_test_time[2], metrics_test_time[1], label='test', marker='.')
plt.title('MRE metrics')
plt.legend()









    



Mean metrics: 
        train      test
r2   0.983486  0.976241
mre  0.008762  0.013906
----------------------------------------------------------------------






    Out[9]:





<matplotlib.legend.Legend at 0x7f8529b37f60>

Looks good to me.

Let's assume that the data comes as real values for one ticker



In [17]:

    
# Get the data
SYMBOL = 'SPY'
total_data_train_df = pd.read_pickle('../../data/data_train_val_df.pkl').stack(level='feature')
data_train_df = total_data_train_df[SYMBOL].unstack()[['Close', 'Volume']]



In [18]:

    
data_train_df.head()



In [41]:

    
def generate_samples(data_df):
    start_date = data_df.index[0]
    close_sample = pd.DataFrame(data_df['Close'].values, columns=[start_date]).T
    close_sample = close_sample / close_sample.iloc[0,0]
    volume_sample = pd.DataFrame(data_df['Volume'].values, columns=[start_date]).T
    volume_sample = volume_sample / volume_sample.iloc[0,0]
    return close_sample, volume_sample



In [42]:

    
data_df = data_train_df[:112]



In [43]:

    
start_date = data_df.index[0]
close_sample = pd.DataFrame(data_df['Close'].values, columns=[start_date]).T
close_sample = close_sample / close_sample.iloc[0,0]
volume_sample = pd.DataFrame(data_df['Volume'].values, columns=[start_date]).T
volume_sample = volume_sample / volume_sample.iloc[0,0]



In [44]:

    
close_sample









    Out[44]:







  
    
      
      0
      1
      2
      3
      4
      5
      6
      7
      8
      9
      ...
      102
      103
      104
      105
      106
      107
      108
      109
      110
      111
    
  
  
    
      1993-01-29
      1.0
      1.007055
      1.009103
      1.0198
      1.024124
      1.023441
      1.023441
      1.016386
      1.017751
      1.022758
      ...
      1.019117
      1.030496
      1.025489
      1.025489
      1.022758
      1.017069
      1.006372
      1.009103
      1.020482
      1.023441
    
  

1 rows × 112 columns



In [45]:

    
close_sample, volume_sample = generate_samples(data_df)



In [46]:

    
close_sample









    Out[46]:







  
    
      
      0
      1
      2
      3
      4
      5
      6
      7
      8
      9
      ...
      102
      103
      104
      105
      106
      107
      108
      109
      110
      111
    
  
  
    
      1993-01-29
      1.0
      1.007055
      1.009103
      1.0198
      1.024124
      1.023441
      1.023441
      1.016386
      1.017751
      1.022758
      ...
      1.019117
      1.030496
      1.025489
      1.025489
      1.022758
      1.017069
      1.006372
      1.009103
      1.020482
      1.023441
    
  

1 rows × 112 columns



In [47]:

    
volume_sample









    Out[47]:







  
    
      
      0
      1
      2
      3
      4
      5
      6
      7
      8
      9
      ...
      102
      103
      104
      105
      106
      107
      108
      109
      110
      111
    
  
  
    
      1993-01-29
      1.0
      0.478967
      0.200658
      0.527711
      0.529805
      0.49053
      0.594199
      0.121711
      0.378389
      0.019438
      ...
      0.044657
      0.438497
      0.206838
      0.436204
      0.603768
      0.28449
      0.245614
      0.342604
      0.247408
      0.376994
    
  

1 rows × 112 columns

Now, let's predict one step



In [155]:

    
history_df = data_train_df[:112]



In [156]:

    
estimator_close = joblib.load('../../data/best_predictor.pkl')
estimator_volume = joblib.load('../../data/best_volume_predictor.pkl')



In [157]:

    
h_history_df = history_df.copy()



In [167]:

    
def predict_one_step(h_history_df, keep=False):
    close_sample, volume_sample = generate_samples(h_history_df)
    estimated_close = estimator_close.predict(close_sample).iloc[0,0] * h_history_df['Close'].iloc[0]
    estimated_volume = estimator_volume.predict(volume_sample).iloc[0,0] * h_history_df['Volume'].iloc[0]
    predicted_date = fe.add_market_days(h_history_df.index[-1], 1)
    h_history_df = h_history_df.drop(h_history_df.index[0])
    h_history_df.loc[predicted_date,:] = {'Close': estimated_close,'Volume': estimated_volume}
    return h_history_df



In [159]:

    
close_sample, volume_sample = generate_samples(h_history_df)
estimated_close = estimator_close.predict(close_sample).iloc[0,0] * h_history_df['Close'].iloc[0]
estimated_volume = estimator_volume.predict(volume_sample).iloc[0,0] * h_history_df['Volume'].iloc[0]



In [160]:

    
estimator_close.predict(close_sample).iloc[0,0]









    Out[160]:





1.0263297800006368



In [161]:

    
predicted_date = fe.add_market_days(h_history_df.index[-1], 1)
predicted_date









    Out[161]:





Timestamp('1993-07-12 00:00:00')



In [162]:

    
history_df









    Out[162]:







  
    
      feature
      Close
      Volume
    
    
      date
      
      
    
  
  
    
      1993-01-29
      43.94
      1003200.0
    
    
      1993-02-01
      44.25
      480500.0
    
    
      1993-02-02
      44.34
      201300.0
    
    
      1993-02-03
      44.81
      529400.0
    
    
      1993-02-04
      45.00
      531500.0
    
    
      1993-02-05
      44.97
      492100.0
    
    
      1993-02-08
      44.97
      596100.0
    
    
      1993-02-09
      44.66
      122100.0
    
    
      1993-02-10
      44.72
      379600.0
    
    
      1993-02-11
      44.94
      19500.0
    
    
      1993-02-12
      44.59
      42500.0
    
    
      1993-02-16
      43.47
      374800.0
    
    
      1993-02-17
      43.44
      210900.0
    
    
      1993-02-18
      43.41
      378100.0
    
    
      1993-02-19
      43.56
      34900.0
    
    
      1993-02-22
      43.72
      513600.0
    
    
      1993-02-23
      43.69
      373700.0
    
    
      1993-02-24
      44.25
      26300.0
    
    
      1993-02-25
      44.34
      44500.0
    
    
      1993-02-26
      44.41
      66200.0
    
    
      1993-03-01
      44.28
      66500.0
    
    
      1993-03-02
      44.94
      182400.0
    
    
      1993-03-03
      45.12
      280100.0
    
    
      1993-03-04
      44.88
      89500.0
    
    
      1993-03-05
      44.75
      40000.0
    
    
      1993-03-08
      45.75
      50800.0
    
    
      1993-03-09
      45.59
      169300.0
    
    
      1993-03-10
      45.69
      194400.0
    
    
      1993-03-11
      45.56
      70900.0
    
    
      1993-03-12
      45.09
      643600.0
    
    
      ...
      ...
      ...
    
    
      1993-05-27
      45.44
      53800.0
    
    
      1993-05-28
      45.22
      79100.0
    
    
      1993-06-01
      45.66
      28300.0
    
    
      1993-06-02
      45.59
      20300.0
    
    
      1993-06-03
      45.44
      21600.0
    
    
      1993-06-04
      45.28
      32000.0
    
    
      1993-06-07
      45.12
      121400.0
    
    
      1993-06-08
      44.72
      104500.0
    
    
      1993-06-09
      44.88
      43300.0
    
    
      1993-06-10
      44.91
      17900.0
    
    
      1993-06-11
      45.09
      647400.0
    
    
      1993-06-14
      45.03
      64200.0
    
    
      1993-06-15
      44.94
      142400.0
    
    
      1993-06-16
      45.03
      330900.0
    
    
      1993-06-17
      45.19
      37400.0
    
    
      1993-06-18
      44.50
      58500.0
    
    
      1993-06-21
      44.59
      29300.0
    
    
      1993-06-22
      44.62
      137500.0
    
    
      1993-06-23
      44.22
      227600.0
    
    
      1993-06-24
      44.81
      243700.0
    
    
      1993-06-25
      44.78
      44800.0
    
    
      1993-06-28
      45.28
      439900.0
    
    
      1993-06-29
      45.06
      207500.0
    
    
      1993-06-30
      45.06
      437600.0
    
    
      1993-07-01
      44.94
      605700.0
    
    
      1993-07-02
      44.69
      285400.0
    
    
      1993-07-06
      44.22
      246400.0
    
    
      1993-07-07
      44.34
      343700.0
    
    
      1993-07-08
      44.84
      248200.0
    
    
      1993-07-09
      44.97
      378200.0
    
  

112 rows × 2 columns



In [163]:

    
h_history_df = h_history_df.drop(h_history_df.index[0])
h_history_df.loc[predicted_date,:] = {'Close': estimated_close,'Volume': estimated_volume}
h_history_df









    Out[163]:







  
    
      feature
      Close
      Volume
    
    
      date
      
      
    
  
  
    
      1993-02-01
      44.250000
      480500.000000
    
    
      1993-02-02
      44.340000
      201300.000000
    
    
      1993-02-03
      44.810000
      529400.000000
    
    
      1993-02-04
      45.000000
      531500.000000
    
    
      1993-02-05
      44.970000
      492100.000000
    
    
      1993-02-08
      44.970000
      596100.000000
    
    
      1993-02-09
      44.660000
      122100.000000
    
    
      1993-02-10
      44.720000
      379600.000000
    
    
      1993-02-11
      44.940000
      19500.000000
    
    
      1993-02-12
      44.590000
      42500.000000
    
    
      1993-02-16
      43.470000
      374800.000000
    
    
      1993-02-17
      43.440000
      210900.000000
    
    
      1993-02-18
      43.410000
      378100.000000
    
    
      1993-02-19
      43.560000
      34900.000000
    
    
      1993-02-22
      43.720000
      513600.000000
    
    
      1993-02-23
      43.690000
      373700.000000
    
    
      1993-02-24
      44.250000
      26300.000000
    
    
      1993-02-25
      44.340000
      44500.000000
    
    
      1993-02-26
      44.410000
      66200.000000
    
    
      1993-03-01
      44.280000
      66500.000000
    
    
      1993-03-02
      44.940000
      182400.000000
    
    
      1993-03-03
      45.120000
      280100.000000
    
    
      1993-03-04
      44.880000
      89500.000000
    
    
      1993-03-05
      44.750000
      40000.000000
    
    
      1993-03-08
      45.750000
      50800.000000
    
    
      1993-03-09
      45.590000
      169300.000000
    
    
      1993-03-10
      45.690000
      194400.000000
    
    
      1993-03-11
      45.560000
      70900.000000
    
    
      1993-03-12
      45.090000
      643600.000000
    
    
      1993-03-15
      45.310000
      310800.000000
    
    
      ...
      ...
      ...
    
    
      1993-05-28
      45.220000
      79100.000000
    
    
      1993-06-01
      45.660000
      28300.000000
    
    
      1993-06-02
      45.590000
      20300.000000
    
    
      1993-06-03
      45.440000
      21600.000000
    
    
      1993-06-04
      45.280000
      32000.000000
    
    
      1993-06-07
      45.120000
      121400.000000
    
    
      1993-06-08
      44.720000
      104500.000000
    
    
      1993-06-09
      44.880000
      43300.000000
    
    
      1993-06-10
      44.910000
      17900.000000
    
    
      1993-06-11
      45.090000
      647400.000000
    
    
      1993-06-14
      45.030000
      64200.000000
    
    
      1993-06-15
      44.940000
      142400.000000
    
    
      1993-06-16
      45.030000
      330900.000000
    
    
      1993-06-17
      45.190000
      37400.000000
    
    
      1993-06-18
      44.500000
      58500.000000
    
    
      1993-06-21
      44.590000
      29300.000000
    
    
      1993-06-22
      44.620000
      137500.000000
    
    
      1993-06-23
      44.220000
      227600.000000
    
    
      1993-06-24
      44.810000
      243700.000000
    
    
      1993-06-25
      44.780000
      44800.000000
    
    
      1993-06-28
      45.280000
      439900.000000
    
    
      1993-06-29
      45.060000
      207500.000000
    
    
      1993-06-30
      45.060000
      437600.000000
    
    
      1993-07-01
      44.940000
      605700.000000
    
    
      1993-07-02
      44.690000
      285400.000000
    
    
      1993-07-06
      44.220000
      246400.000000
    
    
      1993-07-07
      44.340000
      343700.000000
    
    
      1993-07-08
      44.840000
      248200.000000
    
    
      1993-07-09
      44.970000
      378200.000000
    
    
      1993-07-12
      45.096931
      317005.444336
    
  

112 rows × 2 columns



In [164]:

    
h_history_df = history_df.copy()

for i in range(20):
    h_history_df = predict_one_step(h_history_df.copy())

Just for fun, let's see some predictions...



In [177]:

    
h_history_df = history_df.copy()
predicted_df = pd.DataFrame()

for i in range(112):
    h_history_df = predict_one_step(h_history_df.copy())
    predicted_df = predicted_df.append(h_history_df.iloc[-1])



In [178]:

    
predicted_df









    Out[178]:







  
    
      
      Close
      Volume
    
  
  
    
      1993-07-12
      45.096931
      317005.444336
    
    
      1993-07-13
      45.272855
      234663.131714
    
    
      1993-07-14
      45.352824
      214566.215515
    
    
      1993-07-15
      45.394431
      221401.904297
    
    
      1993-07-16
      45.528650
      214040.344238
    
    
      1993-07-19
      45.531005
      225595.892334
    
    
      1993-07-20
      45.584072
      202098.783875
    
    
      1993-07-21
      45.672890
      176303.114319
    
    
      1993-07-22
      45.680850
      190819.433594
    
    
      1993-07-23
      45.593667
      159563.713074
    
    
      1993-07-26
      45.715988
      205371.704102
    
    
      1993-07-27
      45.832896
      194119.818115
    
    
      1993-07-28
      45.977559
      166442.243958
    
    
      1993-07-29
      46.106970
      188824.995422
    
    
      1993-07-30
      46.025468
      177809.684753
    
    
      1993-08-02
      46.132740
      157451.440430
    
    
      1993-08-03
      46.226434
      163128.808594
    
    
      1993-08-04
      46.340224
      168103.535461
    
    
      1993-08-05
      46.380099
      159743.972778
    
    
      1993-08-06
      46.412681
      174360.876465
    
    
      1993-08-09
      46.569444
      134458.137512
    
    
      1993-08-10
      46.657768
      157866.064453
    
    
      1993-08-11
      46.764914
      161390.016174
    
    
      1993-08-12
      46.899871
      161131.683350
    
    
      1993-08-13
      46.843561
      168693.847656
    
    
      1993-08-16
      46.917006
      153741.778564
    
    
      1993-08-17
      46.908179
      145202.856445
    
    
      1993-08-18
      46.957220
      163674.975586
    
    
      1993-08-19
      47.179815
      156884.208679
    
    
      1993-08-20
      47.193003
      174177.392578
    
    
      ...
      ...
      ...
    
    
      1993-11-04
      50.929699
      156432.595825
    
    
      1993-11-05
      50.940004
      150538.153076
    
    
      1993-11-08
      51.015914
      156254.121399
    
    
      1993-11-09
      51.037842
      151454.243469
    
    
      1993-11-10
      51.226688
      156661.633301
    
    
      1993-11-11
      51.266941
      162682.617188
    
    
      1993-11-12
      51.362671
      156605.184937
    
    
      1993-11-15
      51.450026
      152773.208618
    
    
      1993-11-16
      51.535433
      160660.469055
    
    
      1993-11-17
      51.582946
      152807.156372
    
    
      1993-11-18
      51.620447
      173595.584106
    
    
      1993-11-19
      51.739458
      176346.240234
    
    
      1993-11-22
      51.773475
      176841.870117
    
    
      1993-11-23
      51.815487
      177174.087524
    
    
      1993-11-24
      51.931993
      168392.449951
    
    
      1993-11-26
      52.041623
      165126.640320
    
    
      1993-11-29
      52.150271
      168954.719543
    
    
      1993-11-30
      52.288375
      168614.578247
    
    
      1993-12-01
      52.279436
      169158.032227
    
    
      1993-12-02
      52.379620
      165528.286743
    
    
      1993-12-03
      52.437111
      153546.093750
    
    
      1993-12-06
      52.454606
      165083.322144
    
    
      1993-12-07
      52.541034
      164385.871887
    
    
      1993-12-08
      52.627165
      169782.336426
    
    
      1993-12-09
      52.645518
      173587.911987
    
    
      1993-12-10
      52.763918
      166172.686768
    
    
      1993-12-13
      52.897331
      160526.953125
    
    
      1993-12-14
      53.000237
      163752.575684
    
    
      1993-12-15
      53.074286
      164596.865845
    
    
      1993-12-16
      53.126134
      167482.305908
    
  

112 rows × 2 columns



In [180]:

    
real_df = history_df.append(data_train_df[112:224])
plt.plot(real_df.index, real_df['Close'], 'b', label='real')
plt.plot(predicted_df.index, predicted_df['Close'], 'r', label='predicted')
plt.legend()
plt.show()



In [ ]:



In [ ]:



In [ ]:

	GOOD_DATA_RATIO	SAMPLES_GOOD_DATA_RATIO	ahead_days	base_days	model	mre	r2	step_days	train_days	train_val_time	x_filename	y_filename
ahead_days
1.0	0.99	0.9	1.0	112.0	linear	0.015856	0.986599	7.0	504.0	-1.0	x_base112_ahead1.pkl	y_base112_ahead1.pkl
7.0	0.99	0.9	7.0	112.0	linear	0.042367	0.923348	7.0	756.0	-1.0	x_base112_ahead7.pkl	y_base112_ahead7.pkl
14.0	0.99	0.9	14.0	112.0	linear	0.060167	0.865259	7.0	756.0	-1.0	x_base112_ahead14.pkl	y_base112_ahead14.pkl
28.0	0.99	0.9	28.0	112.0	linear	0.091966	0.758046	7.0	756.0	-1.0	x_base112_ahead28.pkl	y_base112_ahead28.pkl
56.0	0.99	0.9	56.0	112.0	linear	0.127913	0.590426	7.0	756.0	-1.0	x_base112_ahead56.pkl	y_base112_ahead56.pkl

		A	AAL	AAP	AAPL	ABBV	ABC	ABT	ACN	ADBE	ADI	...	XLNX	XOM	XRAY	XRX	XYL	YHOO	YUM	ZBH	ZION	ZTS
date	feature
2015-01-02	Close	40.56	53.91	158.56	109.33	65.89	90.46	44.90	88.84	72.34	55.54	...	43.60	92.83	51.93	13.75	38.08	50.17	72.35	112.59	28.29	43.31
	High	41.31	54.60	162.50	111.44	66.40	91.32	45.45	90.09	73.20	56.25	...	43.93	93.05	53.12	14.02	38.59	50.78	73.51	114.61	28.72	43.70
	Low	40.37	53.07	157.47	107.35	65.49	89.82	44.64	88.43	71.89	54.97	...	43.11	91.81	51.88	13.66	37.50	49.47	72.00	111.78	28.06	43.06
	Open	41.18	54.28	160.85	111.39	65.62	90.61	45.25	89.67	72.70	55.68	...	43.51	92.25	52.13	13.88	38.40	50.66	73.44	114.21	28.67	43.46
	Volume	1530798.00	10756705.00	509983.00	53204626.00	5087291.00	1124780.00	3217165.00	2021565.00	2356405.00	1329619.00	...	2402443.00	10220410.00	1829006.00	3912022.00	606118.00	11924473.00	1641557.00	909491.00	2299118.00	1784851.00

feature	Close	Volume
date
1993-01-29	43.94	1003200.0
1993-02-01	44.25	480500.0
1993-02-02	44.34	201300.0
1993-02-03	44.81	529400.0
1993-02-04	45.00	531500.0

feature	Close	Volume
date
1993-02-01	44.250000	480500.000000
1993-02-02	44.340000	201300.000000
1993-02-03	44.810000	529400.000000
1993-02-04	45.000000	531500.000000
1993-02-05	44.970000	492100.000000
1993-02-08	44.970000	596100.000000
1993-02-09	44.660000	122100.000000
1993-02-10	44.720000	379600.000000
1993-02-11	44.940000	19500.000000
1993-02-12	44.590000	42500.000000
1993-02-16	43.470000	374800.000000
1993-02-17	43.440000	210900.000000
1993-02-18	43.410000	378100.000000
1993-02-19	43.560000	34900.000000
1993-02-22	43.720000	513600.000000
1993-02-23	43.690000	373700.000000
1993-02-24	44.250000	26300.000000
1993-02-25	44.340000	44500.000000
1993-02-26	44.410000	66200.000000
1993-03-01	44.280000	66500.000000
1993-03-02	44.940000	182400.000000
1993-03-03	45.120000	280100.000000
1993-03-04	44.880000	89500.000000
1993-03-05	44.750000	40000.000000
1993-03-08	45.750000	50800.000000
1993-03-09	45.590000	169300.000000
1993-03-10	45.690000	194400.000000
1993-03-11	45.560000	70900.000000
1993-03-12	45.090000	643600.000000
1993-03-15	45.310000	310800.000000
...	...	...
1993-05-28	45.220000	79100.000000
1993-06-01	45.660000	28300.000000
1993-06-02	45.590000	20300.000000
1993-06-03	45.440000	21600.000000
1993-06-04	45.280000	32000.000000
1993-06-07	45.120000	121400.000000
1993-06-08	44.720000	104500.000000
1993-06-09	44.880000	43300.000000
1993-06-10	44.910000	17900.000000
1993-06-11	45.090000	647400.000000
1993-06-14	45.030000	64200.000000
1993-06-15	44.940000	142400.000000
1993-06-16	45.030000	330900.000000
1993-06-17	45.190000	37400.000000
1993-06-18	44.500000	58500.000000
1993-06-21	44.590000	29300.000000
1993-06-22	44.620000	137500.000000
1993-06-23	44.220000	227600.000000
1993-06-24	44.810000	243700.000000
1993-06-25	44.780000	44800.000000
1993-06-28	45.280000	439900.000000
1993-06-29	45.060000	207500.000000
1993-06-30	45.060000	437600.000000
1993-07-01	44.940000	605700.000000
1993-07-02	44.690000	285400.000000
1993-07-06	44.220000	246400.000000
1993-07-07	44.340000	343700.000000
1993-07-08	44.840000	248200.000000
1993-07-09	44.970000	378200.000000
1993-07-12	45.096931	317005.444336