In [1]:

    
# Basic imports
import os
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import datetime as dt
import scipy.optimize as spo
import sys
from time import time
from sklearn.metrics import r2_score, median_absolute_error

%matplotlib inline

%pylab inline
pylab.rcParams['figure.figsize'] = (20.0, 10.0)

%load_ext autoreload
%autoreload 2

sys.path.append('../../')
import predictor.feature_extraction as fe
import utils.preprocessing as pp
import utils.misc as misc









    



Populating the interactive namespace from numpy and matplotlib

In the previous notebook some hyperparameter exploration was made for the Random Forest Regressor. Let's see which are now the best predictors for each number of "ahead days".



In [24]:

    
best_raw_params = pd.read_pickle('../../data/best_dataset_params_raw_df.pkl')

def keep_max_r2(record):
    return record.loc[np.argmax(record['r2']),:]

best_ini_pred_df = best_raw_params.groupby('ahead_days').apply(keep_max_r2)
best_ini_pred_df









    Out[24]:







  
    
      
      GOOD_DATA_RATIO
      SAMPLES_GOOD_DATA_RATIO
      ahead_days
      base_days
      model
      mre
      r2
      step_days
      train_days
      train_val_time
      x_filename
      y_filename
    
    
      ahead_days
      
      
      
      
      
      
      
      
      
      
      
      
    
  
  
    
      1.0
      0.99
      0.9
      1.0
      112.0
      linear
      0.015856
      0.986599
      7.0
      504.0
      -1.0
      x_base112_ahead1.pkl
      y_base112_ahead1.pkl
    
    
      7.0
      0.99
      0.9
      7.0
      112.0
      linear
      0.042367
      0.923348
      7.0
      756.0
      -1.0
      x_base112_ahead7.pkl
      y_base112_ahead7.pkl
    
    
      14.0
      0.99
      0.9
      14.0
      112.0
      linear
      0.060167
      0.865259
      7.0
      756.0
      -1.0
      x_base112_ahead14.pkl
      y_base112_ahead14.pkl
    
    
      28.0
      0.99
      0.9
      28.0
      112.0
      linear
      0.091966
      0.758046
      7.0
      756.0
      -1.0
      x_base112_ahead28.pkl
      y_base112_ahead28.pkl
    
    
      56.0
      0.99
      0.9
      56.0
      112.0
      linear
      0.127913
      0.590426
      7.0
      756.0
      -1.0
      x_base112_ahead56.pkl
      y_base112_ahead56.pkl

Those were the best predictors before the hyperparameters tuning



In [25]:

    
hyper1_df = pd.read_pickle('../../data/hyper_ahead1_random_forest_df.pkl')
hyper1_df









    Out[25]:







  
    
      
      n_estimators
      max_depth
      n_jobs
      scores
      r2
      mre
    
  
  
    
      0
      50
      5
      -1
      (0.983814083021, 0.0191439772135)
      0.983814
      0.019144
    
    
      1
      50
      10
      -1
      (0.986125708821, 0.0168504555546)
      0.986126
      0.016850
    
    
      2
      100
      5
      -1
      (0.984182203771, 0.0190912449575)
      0.984182
      0.019091
    
    
      3
      100
      10
      -1
      (0.98606697491, 0.0168202507896)
      0.986067
      0.016820



In [26]:

    
from sklearn.metrics import r2_score

hyper_1_best_series = hyper1_df.iloc[np.argmax(hyper1_df['r2'])].copy()
hyper_1_best_series.name = 1
hyper_1_best_series









    Out[26]:





n_estimators                                   50
max_depth                                      10
n_jobs                                         -1
scores          (0.986125708821, 0.0168504555546)
r2                                       0.986126
mre                                     0.0168505
Name: 1, dtype: object



In [27]:

    
ahead_days = [1, 7, 14, 28, 56]
best_hyper_df = pd.DataFrame()
best_hyper_df.index.name = 'ahead_days'

for ahead in ahead_days:
    hyper_df = pd.read_pickle('../../data/hyper_ahead{}_random_forest_df.pkl'.format(ahead))
    hyper_best_series = hyper_df.iloc[np.argmax(hyper_df['r2'])].copy()
    hyper_best_series.name = ahead
    best_hyper_df = best_hyper_df.append(hyper_best_series)
best_hyper_df









    Out[27]:







  
    
      
      max_depth
      mre
      n_estimators
      n_jobs
      r2
      scores
    
    
      ahead_days
      
      
      
      
      
      
    
  
  
    
      1
      10.0
      0.016850
      50.0
      -1.0
      0.986126
      (0.986125708821, 0.0168504555546)
    
    
      7
      5.0
      0.042002
      100.0
      -1.0
      0.929017
      (0.929016577116, 0.0420024748489)
    
    
      14
      5.0
      0.059515
      100.0
      -1.0
      0.868927
      (0.868927025792, 0.0595149788733)
    
    
      28
      5.0
      0.090310
      50.0
      -1.0
      0.765112
      (0.76511197092, 0.0903104492391)
    
    
      56
      5.0
      0.126410
      50.0
      -1.0
      0.615412
      (0.615412026805, 0.126409538141)

Let's compare the new best Random Forest predictors with the old Linear Regressors.



In [28]:

    
def join_and_compare(df1, df2, column, labels):
    tj1 = pd.DataFrame(df1[column].copy())
    tj1.rename(columns = {column: labels[0]}, inplace=True)
    tj2 = pd.DataFrame(df2[column].copy())
    tj2.rename(columns = {column: labels[1]}, inplace=True)
    comp_df = tj1.join(tj2)
    comp_df['diff'] = comp_df[labels[1]] - comp_df[labels[0]]
    
    return comp_df

First the $r^2$ metrics



In [29]:

    
comp_r2_df = join_and_compare(best_ini_pred_df, best_hyper_df, 'r2', ['linear', 'random_forest'])
comp_r2_df['best'] = comp_r2_df.apply(lambda x: np.argmax(x), axis=1)
comp_r2_df









    Out[29]:







  
    
      
      linear
      random_forest
      diff
      best
    
    
      ahead_days
      
      
      
      
    
  
  
    
      1.0
      0.986599
      0.986126
      -0.000474
      linear
    
    
      7.0
      0.923348
      0.929017
      0.005669
      random_forest
    
    
      14.0
      0.865259
      0.868927
      0.003668
      random_forest
    
    
      28.0
      0.758046
      0.765112
      0.007066
      random_forest
    
    
      56.0
      0.590426
      0.615412
      0.024986
      random_forest

The values are very similar in both cases. A minor difference can be seen only in the case of 56 days ahead, in which the random forest seems to be a bit better than the linear predictor. In any case, as the linear predictor is much simpler, and faster, it's probably better to keep it as the best predictor. It can be seen that this scenario is different from the one before hyperparameter tuning, in which the linear predictor was always better.

And then the MRE metrics



In [30]:

    
comp_mre_df = join_and_compare(best_ini_pred_df, best_hyper_df, 'mre', ['linear', 'random_forest'])
comp_mre_df['best'] = comp_mre_df.apply(lambda x: np.argmax(x), axis=1)
comp_mre_df









    Out[30]:







  
    
      
      linear
      random_forest
      diff
      best
    
    
      ahead_days
      
      
      
      
    
  
  
    
      1.0
      0.015856
      0.016850
      0.000994
      random_forest
    
    
      7.0
      0.042367
      0.042002
      -0.000365
      linear
    
    
      14.0
      0.060167
      0.059515
      -0.000652
      linear
    
    
      28.0
      0.091966
      0.090310
      -0.001656
      linear
    
    
      56.0
      0.127913
      0.126410
      -0.001504
      linear

The values for the MRE metrics are almost the same for both predictors.

Conlcusion: The linear predictor will be chosen for all the predictions. In the case of the 56 ahead days prediction, a better $r^2$ metric could be achieved by the random forest predictor, but the linear predictor is still far simpler and faster.

Testing the chosen predictor

Let's get the test data



In [31]:

    
data_test_df = pd.read_pickle('../../data/data_test_df.pkl')
data_test_df.head()









    Out[31]:







  
    
      feature
      Close
      ...
      Volume
    
    
      
      SPY
      MMM
      ABT
      ABBV
      ACN
      ATVI
      AYI
      ADBE
      AMD
      AAP
      ...
      XEL
      XRX
      XLNX
      XL
      XYL
      YHOO
      YUM
      ZBH
      ZION
      ZTS
    
    
      date
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
    
  
  
    
      2015-01-02
      205.43
      164.06
      44.90
      65.89
      88.84
      20.13
      139.88
      72.34
      NaN
      158.56
      ...
      2535289.0
      3912022.0
      2402443.0
      NaN
      606118.0
      11924473.0
      1641557.0
      909491.0
      2299118.0
      1784851.0
    
    
      2015-01-05
      201.72
      160.36
      44.91
      64.65
      87.34
      19.85
      136.52
      71.98
      2.66
      156.47
      ...
      3107187.0
      7032861.0
      2611059.0
      NaN
      1369903.0
      14389308.0
      3176619.0
      2163761.0
      5326879.0
      3116681.0
    
    
      2015-01-06
      199.82
      158.65
      44.40
      64.33
      86.71
      19.48
      134.81
      70.53
      2.63
      156.36
      ...
      4749648.0
      7170289.0
      3430462.0
      NaN
      1336249.0
      16204304.0
      3597727.0
      1782098.0
      9096223.0
      3987015.0
    
    
      2015-01-07
      202.31
      159.80
      44.76
      66.93
      88.53
      19.06
      137.20
      71.11
      2.58
      159.72
      ...
      2833770.0
      4836408.0
      2110610.0
      NaN
      1039030.0
      11788031.0
      3273992.0
      1462026.0
      2759850.0
      2481935.0
    
    
      2015-01-08
      205.90
      163.63
      45.68
      67.63
      89.88
      19.25
      142.00
      72.92
      2.61
      161.12
      ...
      2516764.0
      6229982.0
      2824232.0
      NaN
      821836.0
      14704771.0
      3061324.0
      1408433.0
      1831484.0
      3121258.0
    
  

5 rows × 2495 columns

When generating the datasets, some symbols were removed from the training set (because they contained too many missing points). The same symbols should be removed from the test set.

Let's generate datasets for the test set, with the best parameters found.



In [33]:

    
best_ini_pred_df









    Out[33]:







  
    
      
      GOOD_DATA_RATIO
      SAMPLES_GOOD_DATA_RATIO
      ahead_days
      base_days
      model
      mre
      r2
      step_days
      train_days
      train_val_time
      x_filename
      y_filename
    
    
      ahead_days
      
      
      
      
      
      
      
      
      
      
      
      
    
  
  
    
      1.0
      0.99
      0.9
      1.0
      112.0
      linear
      0.015856
      0.986599
      7.0
      504.0
      -1.0
      x_base112_ahead1.pkl
      y_base112_ahead1.pkl
    
    
      7.0
      0.99
      0.9
      7.0
      112.0
      linear
      0.042367
      0.923348
      7.0
      756.0
      -1.0
      x_base112_ahead7.pkl
      y_base112_ahead7.pkl
    
    
      14.0
      0.99
      0.9
      14.0
      112.0
      linear
      0.060167
      0.865259
      7.0
      756.0
      -1.0
      x_base112_ahead14.pkl
      y_base112_ahead14.pkl
    
    
      28.0
      0.99
      0.9
      28.0
      112.0
      linear
      0.091966
      0.758046
      7.0
      756.0
      -1.0
      x_base112_ahead28.pkl
      y_base112_ahead28.pkl
    
    
      56.0
      0.99
      0.9
      56.0
      112.0
      linear
      0.127913
      0.590426
      7.0
      756.0
      -1.0
      x_base112_ahead56.pkl
      y_base112_ahead56.pkl



In [34]:

    
best_ini_pred_df.to_pickle('../../data/best_params_final_df.pkl')

Some playing with the data to remove the same symbols as in the training set



In [54]:

    
params = best_ini_pred_df.loc[1]



In [55]:

    
train_val_time = int(params['train_val_time'])
base_days = int(params['base_days'])
step_days = int(params['step_days'])
ahead_days = int(params['ahead_days'])
    
print('Generating: base{}_ahead{}'.format(base_days, ahead_days))
pid = 'base{}_ahead{}'.format(base_days, ahead_days)









    



Generating: base112_ahead1



In [56]:

    
y_train_df = pd.read_pickle('../../data/y_{}.pkl'.format(pid))
y_train_df.head()









    Out[56]:





1993-01-29  AAPL    0.945761
            ABT     1.416988
            ADBE    0.914591
            ADM     0.947878
            ADP     0.944196
Name: 112, dtype: float64



In [57]:

    
kept_symbols = y_train_df.index.get_level_values(1).unique().tolist()
len(kept_symbols)









    Out[57]:





285



In [58]:

    
len(data_test_df.columns.get_level_values(1).unique().tolist())









    Out[58]:





499



In [59]:

    
filtered_data_test_df = data_test_df.loc[:, (slice(None), kept_symbols)]



In [60]:

    
len(filtered_data_test_df.columns.get_level_values(1).unique().tolist())









    Out[60]:





285

OK, let's create a function to generate one test dataset



In [62]:

    
def generate_one_test_set(params, data_df):
    # print(('-'*70 + '\n {}, {} \n' + '-'*70).format(params['base_days'].values, params['ahead_days'].values))
    tic = time()
    
    train_val_time = int(params['train_val_time'])
    base_days = int(params['base_days'])
    step_days = int(params['step_days'])
    ahead_days = int(params['ahead_days'])
    
    print('Generating: base{}_ahead{}'.format(base_days, ahead_days))
    pid = 'base{}_ahead{}'.format(base_days, ahead_days)
    
    # Getting the data
    today = data_df.index[-1]  # Real date
    print(pid + ') data_df loaded')

    # Drop symbols with many missing points
    y_train_df = pd.read_pickle('../../data/y_{}.pkl'.format(pid))
    kept_symbols = y_train_df.index.get_level_values(1).unique().tolist()
    data_df = data_df.loc[:, (slice(None), kept_symbols)]
    print(pid + ') Irrelevant symbols dropped.')
    
    # Generate the intervals for the predictor
    x, y = fe.generate_train_intervals(data_df, 
                                       train_val_time, 
                                       base_days, 
                                       step_days,
                                       ahead_days, 
                                       today, 
                                       fe.feature_close_one_to_one)    
    print(pid + ') Intervals generated')
    
    # Drop "bad" samples and fill missing data
    x_y_df = pd.concat([x, y], axis=1)
    x_y_df = pp.drop_irrelevant_samples(x_y_df, params['SAMPLES_GOOD_DATA_RATIO'])
    x = x_y_df.iloc[:, :-1]
    y = x_y_df.iloc[:, -1]
    x = pp.fill_missing(x)
    print(pid + ') Irrelevant samples dropped and missing data filled.')
    
    # Pickle that
    x.to_pickle('../../data/x_{}_test.pkl'.format(pid))
    y.to_pickle('../../data/y_{}_test.pkl'.format(pid))
    
    toc = time()
    print('%s) %i intervals generated in: %i seconds.' % (pid, x.shape[0], (toc-tic)))
    
    return pid, x, y



In [64]:

    
for ind in range(best_ini_pred_df.shape[0]):
    pid, x, y = generate_one_set(best_ini_pred_df.iloc[ind,:], data_test_df)









    



Generating: base112_ahead1
base112_ahead1) data_df loaded
base112_ahead1) Irrelevant symbols dropped.
base112_ahead1) Intervals generated
base112_ahead1) Irrelevant samples dropped and missing data filled.
base112_ahead1) 15957 intervals generated in: 2 seconds.
Generating: base112_ahead7
base112_ahead7) data_df loaded
base112_ahead7) Irrelevant symbols dropped.
base112_ahead7) Intervals generated
base112_ahead7) Irrelevant samples dropped and missing data filled.
base112_ahead7) 15673 intervals generated in: 2 seconds.
Generating: base112_ahead14
base112_ahead14) data_df loaded
base112_ahead14) Irrelevant symbols dropped.
base112_ahead14) Intervals generated
base112_ahead14) Irrelevant samples dropped and missing data filled.
base112_ahead14) 15388 intervals generated in: 2 seconds.
Generating: base112_ahead28
base112_ahead28) data_df loaded
base112_ahead28) Irrelevant symbols dropped.
base112_ahead28) Intervals generated
base112_ahead28) Irrelevant samples dropped and missing data filled.
base112_ahead28) 14818 intervals generated in: 2 seconds.
Generating: base112_ahead56
base112_ahead56) data_df loaded
base112_ahead56) Irrelevant symbols dropped.
base112_ahead56) Intervals generated
base112_ahead56) Irrelevant samples dropped and missing data filled.
base112_ahead56) 13678 intervals generated in: 2 seconds.



In [67]:

    
x = pd.read_pickle('../../data/x_base112_ahead7_test.pkl')
x









    Out[67]:







  
    
      
      
      0
      1
      2
      3
      4
      5
      6
      7
      8
      9
      ...
      102
      103
      104
      105
      106
      107
      108
      109
      110
      111
    
  
  
    
      2015-05-14
      AES
      1.0
      0.996948
      0.990166
      0.975246
      0.976263
      0.975076
      0.956087
      0.933876
      0.940827
      0.924890
      ...
      0.802136
      0.874364
      0.874025
      0.883011
      0.870973
      0.884368
      0.882333
      0.889115
      0.881655
      0.866904
    
    
      2015-05-26
      LH
      1.0
      0.996912
      0.992405
      0.984393
      0.990486
      0.984811
      0.979553
      0.999249
      0.996077
      1.005926
      ...
      0.979720
      0.981305
      0.968453
      0.913954
      0.934318
      0.982641
      1.007094
      1.024954
      1.029461
      1.024370
    
    
      2015-01-13
      AET
      1.0
      1.014385
      1.020850
      1.027477
      1.026830
      1.036690
      1.035720
      1.042832
      1.043640
      1.038468
      ...
      0.871020
      0.877808
      0.871666
      0.867141
      0.868434
      0.873768
      0.886213
      0.880394
      0.876677
      0.862130
    
    
      2015-03-05
      DHR
      1.0
      0.972875
      0.954134
      0.936873
      0.950847
      0.939832
      0.948545
      0.965806
      0.954299
      0.990301
      ...
      0.831991
      0.812428
      0.795002
      0.801249
      0.771823
      0.794016
      0.762946
      0.787112
      0.777577
      0.787112
    
    
      2015-11-02
      TIF
      1.0
      0.997919
      1.005277
      1.003047
      0.998216
      0.998216
      1.011075
      1.002378
      0.984391
      0.983574
      ...
      1.052401
      1.060428
      1.070685
      1.050840
      1.078564
      1.055820
      1.057009
      1.052921
      1.058644
      1.078118
    
    
      2015-09-14
      MCHP
      1.0
      1.015770
      1.018873
      1.003878
      0.956050
      0.942089
      0.875905
      0.826784
      0.838935
      0.830920
      ...
      0.616856
      0.605481
      0.639866
      0.592296
      0.608583
      0.654085
      0.664943
      0.630817
      0.650465
      0.630300
    
    
      2015-05-14
      MAS
      1.0
      1.004813
      1.000633
      1.008360
      1.003293
      1.005446
      0.997847
      0.992527
      0.998480
      0.998987
      ...
      1.048132
      1.051552
      1.055731
      1.045725
      1.035592
      1.036225
      1.047118
      1.046485
      1.043065
      1.050412
    
    
      2015-03-05
      FMC
      1.0
      0.976036
      0.965069
      0.936231
      0.922015
      0.939480
      0.934200
      0.963038
      0.949228
      0.965069
      ...
      0.756702
      0.758733
      0.738830
      0.742486
      0.746141
      0.741673
      0.736393
      0.749797
      0.726645
      0.728676
    
    
      2015-03-25
      UNM
      1.0
      0.996394
      1.003778
      1.020350
      1.006354
      0.995363
      1.005753
      1.012021
      1.008844
      1.011850
      ...
      0.843294
      0.822772
      0.798643
      0.780268
      0.762494
      0.786193
      0.800876
      0.800618
      0.786622
      0.770393
    
    
      2015-03-05
      TROW
      1.0
      0.984706
      0.980882
      0.964118
      0.959412
      0.974412
      0.963529
      0.972353
      0.967647
      0.987941
      ...
      1.023529
      1.021765
      1.019412
      1.017059
      1.016765
      1.007059
      1.006176
      1.022941
      1.019118
      1.000588
    
    
      2015-06-15
      R
      1.0
      1.013201
      1.018557
      1.035202
      1.031758
      1.031184
      1.035393
      1.027549
      1.034437
      1.044959
      ...
      1.185575
      1.173522
      1.189593
      1.183662
      1.168357
      1.142912
      1.160895
      1.158408
      1.182323
      1.175818
    
    
      2016-06-14
      BK
      1.0
      1.004254
      1.004254
      1.013370
      1.025828
      1.027347
      1.025220
      1.050744
      0.973868
      0.931936
      ...
      1.046187
      1.040717
      1.169553
      1.150106
      1.130963
      1.176542
      1.182315
      1.169857
      1.180188
      1.181708
    
    
      2015-05-26
      WEC
      1.0
      1.007366
      1.009881
      1.005390
      1.006827
      1.009342
      1.022458
      1.008624
      1.017068
      1.008264
      ...
      0.944305
      0.953647
      0.954366
      0.971254
      0.983651
      0.975386
      0.972332
      0.996227
      0.989759
      0.972691
    
    
      2015-07-24
      KMB
      1.0
      0.995829
      1.009385
      1.011470
      1.016684
      1.022941
      1.027112
      1.005214
      1.018770
      1.009385
      ...
      1.093326
      1.059437
      1.074296
      1.097497
      1.102972
      1.106882
      1.106621
      1.111575
      1.102972
      1.090459
    
    
      2015-04-24
      CTL
      1.0
      0.997253
      0.994217
      0.990458
      0.972676
      0.978459
      0.981061
      0.974122
      0.980049
      0.981350
      ...
      0.906318
      0.912245
      0.898655
      0.897788
      0.899957
      0.912823
      0.898655
      0.911811
      0.917450
      0.921209
    
    
      2015-04-06
      ORCL
      1.0
      0.998578
      1.002642
      1.003658
      1.001422
      0.996952
      0.993700
      0.995326
      0.996545
      0.981101
      ...
      0.913432
      0.907539
      0.883154
      0.904085
      0.908352
      0.900427
      0.926235
      0.921357
      0.920545
      0.928876
    
    
      2016-03-24
      MCHP
      1.0
      1.025679
      1.026045
      1.031548
      0.990462
      0.984593
      0.946809
      0.930668
      0.931768
      0.907924
      ...
      1.025679
      1.029714
      1.041453
      1.073734
      1.043287
      1.045488
      1.012472
      1.025312
      1.117021
      1.103081
    
    
      2015-11-20
      NSC
      1.0
      0.979500
      0.969147
      0.977860
      0.977655
      0.974375
      0.971812
      0.944547
      0.954387
      0.943624
      ...
      0.852399
      0.846966
      0.936142
      0.936552
      0.953772
      0.955617
      0.942804
      0.923637
      0.929889
      0.913694
    
    
      2016-02-24
      SWKS
      1.0
      1.007463
      1.022162
      1.011986
      1.037540
      1.037766
      1.037313
      1.045228
      1.043193
      1.031886
      ...
      1.241067
      1.231343
      1.239484
      1.235188
      1.225011
      1.221845
      1.228177
      1.224785
      1.233605
      1.224107
    
    
      2016-04-05
      TMO
      1.0
      1.006903
      0.985634
      0.996082
      0.999813
      1.021642
      1.033582
      1.034515
      1.036567
      1.041045
      ...
      1.194216
      1.204291
      1.206716
      1.205970
      1.214179
      1.213619
      1.209701
      1.209888
      1.200933
      1.207276
    
    
      2015-12-11
      WMT
      1.0
      0.956508
      0.980670
      0.981959
      0.956830
      0.948131
      0.951353
      0.961501
      0.971649
      0.986147
      ...
      0.618073
      0.591817
      0.585696
      0.570876
      0.572970
      0.591978
      0.619523
      0.618073
      0.637242
      0.651740
    
    
      2015-03-05
      LH
      1.0
      0.987184
      0.982884
      0.984101
      0.983452
      1.008112
      1.003650
      1.019387
      1.016142
      1.028634
      ...
      1.018981
      1.032528
      1.033014
      1.030256
      1.032933
      1.018008
      1.011843
      1.015493
      1.009815
      1.005435
    
    
      2015-04-06
      LNT
      1.0
      0.992061
      1.000541
      1.015698
      1.021112
      1.036810
      1.037532
      1.051425
      1.049441
      1.036268
      ...
      1.100686
      1.088777
      1.059726
      1.066402
      1.072717
      1.047997
      1.068928
      1.056117
      1.050523
      1.052508
    
    
      2015-02-24
      BLL
      1.0
      0.983608
      0.979711
      0.969395
      0.964580
      0.958161
      0.960912
      0.975928
      0.955238
      0.959652
      ...
      1.021492
      1.028198
      1.028485
      1.076055
      1.089752
      1.105857
      1.115028
      1.106717
      1.127063
      1.124427
    
    
      2015-03-16
      CSCO
      1.0
      0.997821
      0.989902
      0.995801
      0.996226
      0.978953
      0.977252
      0.980122
      0.983736
      0.989476
      ...
      0.933298
      0.921552
      0.923625
      0.924103
      0.921020
      0.924794
      0.930215
      0.908318
      0.884773
      0.863247
    
    
      2015-01-23
      KSS
      1.0
      1.006128
      1.008014
      1.000943
      1.018032
      0.997407
      1.000000
      1.026046
      1.019564
      1.074720
      ...
      1.009193
      1.023807
      1.032174
      1.022157
      1.026164
      1.037831
      1.002711
      1.010371
      1.015203
      1.017796
    
    
      2016-04-25
      STI
      1.0
      1.013857
      1.022624
      1.014989
      0.944853
      0.949095
      0.947681
      0.922370
      0.909926
      0.913886
      ...
      1.075933
      1.070701
      1.085124
      1.087387
      1.057268
      1.029695
      1.045249
      1.050198
      1.035916
      1.076640
    
    
      2016-05-04
      D
      1.0
      1.006203
      1.005197
      1.013998
      1.044342
      1.024141
      1.028080
      1.027578
      1.036798
      1.030511
      ...
      1.126739
      1.117603
      1.128080
      1.122967
      1.117100
      1.106873
      1.108215
      1.090863
      1.098994
      1.078374
    
    
      2015-12-22
      PAYX
      1.0
      1.004924
      1.008729
      1.012086
      1.019024
      1.017234
      1.011862
      0.975604
      0.988585
      0.988362
      ...
      1.007833
      1.024843
      1.008057
      1.029096
      1.018800
      1.078559
      1.094673
      1.092435
      1.094002
      1.103402
    
    
      2016-02-03
      CSCO
      1.0
      0.988012
      0.952349
      0.939687
      0.959017
      0.991159
      1.005994
      1.000824
      1.033116
      1.043905
      ...
      1.272571
      1.285457
      1.328688
      1.329287
      1.331161
      1.337379
      1.362329
      1.364352
      1.337679
      1.339852
    
    
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
    
    
      2015-03-16
      EQT
      1.0
      0.990920
      0.977329
      0.980431
      0.970223
      0.962328
      0.953982
      0.932946
      0.933284
      0.928209
      ...
      0.955335
      0.949865
      0.932269
      0.922626
      0.929450
      0.930352
      0.934243
      0.930239
      0.904861
      0.879935
    
    
      2015-12-22
      CINF
      1.0
      1.026151
      1.029562
      1.033542
      1.049460
      1.040364
      1.043775
      1.038658
      1.048323
      1.035247
      ...
      1.244457
      1.254690
      1.249574
      1.252416
      1.257533
      1.270608
      1.279704
      1.280841
      1.284252
      1.284821
    
    
      2016-05-13
      ESRX
      1.0
      0.996238
      0.985978
      0.974008
      0.966142
      0.973324
      0.972640
      0.986320
      0.998632
      0.990766
      ...
      0.844049
      0.855335
      0.849179
      0.846785
      0.846785
      0.850205
      0.847811
      0.857387
      0.865937
      0.867305
    
    
      2016-04-25
      PCG
      1.0
      0.996652
      0.997870
      0.990262
      0.992088
      1.006391
      1.009738
      0.999087
      1.000609
      1.003043
      ...
      0.946439
      0.944005
      0.949178
      0.951613
      0.947961
      0.950091
      0.956482
      0.959525
      0.951004
      0.958612
    
    
      2015-12-02
      PX
      1.0
      0.984692
      1.005156
      1.017241
      1.024009
      1.017080
      0.986304
      0.991460
      0.990171
      1.000000
      ...
      1.170641
      1.181921
      1.175959
      1.188044
      1.179826
      1.175959
      1.185466
      1.185305
      1.188205
      1.199484
    
    
      2015-08-24
      KIM
      1.0
      0.975213
      1.008527
      1.018045
      1.025580
      1.011898
      1.001785
      1.009122
      1.022011
      1.000793
      ...
      0.905215
      0.931787
      0.954987
      0.939124
      0.963514
      0.965893
      0.951616
      0.986516
      0.973230
      0.999009
    
    
      2015-11-02
      TXN
      1.0
      1.008464
      0.999637
      1.015599
      0.990447
      0.970012
      0.968440
      0.935792
      0.928900
      0.900484
      ...
      0.887304
      0.892019
      0.874123
      0.867594
      0.873640
      0.856832
      0.854051
      0.856469
      0.849335
      0.868198
    
    
      2016-02-03
      CINF
      1.0
      0.994751
      0.989501
      0.967454
      0.973753
      0.971654
      0.926509
      0.943307
      0.950131
      0.962730
      ...
      1.259843
      1.259843
      1.262467
      1.262467
      1.262467
      1.234646
      1.254593
      1.249869
      1.244094
      1.256693
    
    
      2015-09-23
      CMI
      1.0
      0.997886
      0.993894
      0.939878
      0.952560
      0.995773
      1.009394
      1.043917
      1.087600
      1.105683
      ...
      1.105683
      1.124706
      1.103335
      1.108267
      1.129638
      1.144669
      1.141616
      1.175904
      1.165336
      1.164866
    
    
      2016-03-24
      KO
      1.0
      0.997854
      1.005007
      1.017883
      1.018240
      1.026109
      1.006438
      0.986409
      1.001431
      0.987124
      ...
      1.090129
      1.091559
      1.095494
      1.108011
      1.110873
      1.119099
      1.121245
      1.129471
      1.128040
      1.124464
    
    
      2015-10-22
      AES
      1.0
      0.996197
      1.019395
      1.028713
      1.055334
      1.046967
      1.049439
      1.081765
      1.101540
      1.099639
      ...
      1.106104
      1.102491
      1.089371
      1.090892
      1.085948
      1.107245
      1.100970
      1.108766
      1.098498
      1.106484
    
    
      2015-07-24
      UNM
      1.0
      0.995569
      0.996576
      1.000503
      1.014299
      1.010069
      1.001108
      0.997181
      0.995569
      0.996677
      ...
      0.945222
      0.929816
      0.942604
      0.955292
      0.969691
      0.969691
      0.967576
      0.979458
      0.975733
      0.967375
    
    
      2016-03-15
      BA
      1.0
      0.995917
      0.990474
      0.981175
      0.978453
      0.973917
      0.982309
      0.999546
      1.004309
      1.007938
      ...
      1.058290
      1.051486
      1.046723
      1.056475
      1.049671
      1.038784
      1.050352
      1.062826
      1.064641
      1.067135
    
    
      2016-03-04
      HES
      1.0
      0.977572
      0.963278
      0.982871
      0.981392
      0.991004
      0.980776
      0.956993
      0.965619
      0.967591
      ...
      1.067406
      1.055453
      1.084042
      1.085274
      1.087492
      1.093530
      1.096365
      1.106470
      1.108811
      1.107579
    
    
      2016-04-25
      GPC
      1.0
      1.001443
      1.007215
      1.005195
      1.001732
      1.014141
      1.036797
      1.073304
      1.063925
      1.061616
      ...
      1.056999
      1.055988
      1.072727
      1.097980
      1.102453
      1.108225
      1.089755
      1.103175
      1.093939
      1.078932
    
    
      2016-06-14
      UDR
      1.0
      1.011154
      1.024362
      1.023188
      1.023775
      1.032873
      1.036983
      1.033167
      1.019959
      1.022894
      ...
      1.017317
      1.030525
      1.011741
      0.987085
      0.998826
      1.026710
      0.991195
      0.992956
      0.978574
      0.972997
    
    
      2015-03-25
      LUV
      1.0
      0.968561
      0.974607
      1.009674
      0.984281
      0.992140
      0.981258
      0.972189
      0.973398
      0.976421
      ...
      0.853688
      0.848851
      0.833736
      0.814389
      0.800484
      0.805925
      0.829504
      0.831318
      0.859129
      0.827690
    
    
      2015-10-13
      VZ
      1.0
      0.987694
      1.005274
      1.004505
      0.982529
      0.979892
      0.968025
      1.025272
      1.038347
      1.010878
      ...
      1.134161
      1.144160
      1.139985
      1.137457
      1.138117
      1.150313
      1.158334
      1.152730
      1.153170
      1.149324
    
    
      2016-05-04
      CAG
      1.0
      1.012031
      1.009143
      1.017709
      1.021174
      1.002502
      1.009143
      0.997786
      1.006256
      0.982772
      ...
      0.867950
      0.861886
      0.856497
      0.838210
      0.829836
      0.829644
      0.832435
      0.831569
      0.835707
      0.831569
    
    
      2015-08-13
      SPGI
      1.0
      1.003642
      1.009249
      1.006326
      0.998371
      0.977715
      0.947139
      0.908416
      0.897297
      0.933001
      ...
      0.919774
      0.920684
      0.928113
      0.904965
      0.919822
      0.900077
      0.901275
      0.889725
      0.894709
      0.913064
    
    
      2016-01-04
      JBHT
      1.0
      1.031456
      1.022136
      0.996505
      0.979417
      0.994951
      0.990680
      0.979029
      0.981748
      0.986408
      ...
      1.094369
      1.104466
      1.116893
      1.121553
      1.114951
      1.120777
      1.134757
      1.137087
      1.136699
      1.133981
    
    
      2016-05-13
      BK
      1.0
      1.005993
      1.000856
      1.016267
      1.004852
      1.014840
      1.017979
      1.043664
      1.069920
      1.056792
      ...
      1.025114
      1.038242
      0.992580
      0.991724
      0.981735
      0.972888
      0.965183
      0.976884
      0.994292
      0.986872
    
    
      2015-01-02
      ADM
      1.0
      0.942065
      0.948992
      0.973552
      1.001259
      1.014484
      1.018262
      0.994962
      0.941436
      0.942695
      ...
      0.782116
      0.794710
      0.794081
      0.787154
      0.782116
      0.771411
      0.767632
      0.766373
      0.761335
      0.759446
    
    
      2015-09-14
      QCOM
      1.0
      1.031346
      1.031592
      1.031223
      0.993977
      0.998156
      0.959926
      0.943823
      0.922065
      0.923540
      ...
      0.684081
      0.680025
      0.644991
      0.665519
      0.691948
      0.714567
      0.712477
      0.690227
      0.719361
      0.696128
    
    
      2015-06-15
      PCG
      1.0
      1.004349
      1.010149
      1.028271
      1.017760
      1.028634
      1.031171
      1.023559
      1.021022
      1.026459
      ...
      1.160203
      1.140631
      1.139906
      1.144618
      1.125045
      1.113447
      1.121058
      1.114534
      1.138094
      1.138094
    
    
      2015-08-13
      HRS
      1.0
      1.005307
      1.007202
      1.006539
      0.995451
      0.972707
      0.937074
      0.902957
      0.890163
      0.927123
      ...
      0.932809
      0.932714
      0.939822
      0.920205
      0.935747
      0.921721
      0.922384
      0.912055
      0.915277
      0.927028
    
    
      2015-05-14
      AVY
      1.0
      0.988512
      1.001532
      1.015573
      1.010722
      1.009957
      1.006127
      1.010722
      1.018892
      1.018381
      ...
      0.942303
      0.932857
      0.937197
      0.924687
      0.912688
      0.941026
      0.931580
      0.925453
      0.938218
      0.926219
    
    
      2015-03-05
      VNO
      1.0
      0.995117
      1.000158
      0.977788
      0.965974
      0.971251
      0.976686
      1.006065
      1.017250
      1.022290
      ...
      1.066950
      1.063327
      1.079001
      1.105072
      1.112319
      1.098299
      1.084909
      1.099795
      1.070180
      1.084121
    
    
      2015-06-15
      FITB
      1.0
      1.007701
      1.006093
      1.025812
      1.013879
      1.028182
      1.034276
      1.032583
      1.017519
      1.011341
      ...
      0.916892
      0.914946
      0.921209
      0.907160
      0.882109
      0.868060
      0.878555
      0.884648
      0.915115
      0.901405
    
    
      2016-01-13
      BF.B
      1.0
      1.022664
      0.983248
      0.969453
      0.965840
      0.980292
      1.001478
      0.951059
      0.971588
      0.957793
      ...
      1.267203
      1.248645
      1.235507
      1.229430
      1.232879
      1.231565
      1.247003
      1.255214
      1.256200
      1.255214
    
  

15673 rows × 112 columns



In [70]:

    
x.iloc[10].plot()









    Out[70]:





<matplotlib.axes._subplots.AxesSubplot at 0x7faf6199d7b8>

The datasets were successfully generated

There will be two types of test. One is retraining at every step, and the other is without retraining. The first one is good enough to confirm that there was no overfitting on the hyperparameters. The second can show how "valid" is the model for periods outside the training period. If there is no time dependence in the results, the test without retraining may be the only one perfomed.



In [2]:

    
best_params_df = pd.read_pickle('../../data/best_params_final_df.pkl')
best_params_df









    Out[2]:







  
    
      
      GOOD_DATA_RATIO
      SAMPLES_GOOD_DATA_RATIO
      ahead_days
      base_days
      model
      mre
      r2
      step_days
      train_days
      train_val_time
      x_filename
      y_filename
    
    
      ahead_days
      
      
      
      
      
      
      
      
      
      
      
      
    
  
  
    
      1.0
      0.99
      0.9
      1.0
      112.0
      linear
      0.015856
      0.986599
      7.0
      504.0
      -1.0
      x_base112_ahead1.pkl
      y_base112_ahead1.pkl
    
    
      7.0
      0.99
      0.9
      7.0
      112.0
      linear
      0.042367
      0.923348
      7.0
      756.0
      -1.0
      x_base112_ahead7.pkl
      y_base112_ahead7.pkl
    
    
      14.0
      0.99
      0.9
      14.0
      112.0
      linear
      0.060167
      0.865259
      7.0
      756.0
      -1.0
      x_base112_ahead14.pkl
      y_base112_ahead14.pkl
    
    
      28.0
      0.99
      0.9
      28.0
      112.0
      linear
      0.091966
      0.758046
      7.0
      756.0
      -1.0
      x_base112_ahead28.pkl
      y_base112_ahead28.pkl
    
    
      56.0
      0.99
      0.9
      56.0
      112.0
      linear
      0.127913
      0.590426
      7.0
      756.0
      -1.0
      x_base112_ahead56.pkl
      y_base112_ahead56.pkl

Ahead 1

Without retraining

Warning: The dates that appear on the samples are the initial dates (there are 112 days ahead after the marked date).



In [59]:

    
from predictor.linear_predictor import LinearPredictor
import utils.misc as misc
import predictor.evaluation as ev

ahead_days = 1

# Get some parameters
train_days = int(best_params_df.loc[ahead_days, 'train_days'])
GOOD_DATA_RATIO, \
train_val_time, \
base_days, \
step_days, \
ahead_days, \
SAMPLES_GOOD_DATA_RATIO, \
x_filename, \
y_filename = misc.unpack_params(best_params_df.loc[ahead_days,:])

pid = 'base{}_ahead{}'.format(base_days, ahead_days)

# Get the datasets
x_train = pd.read_pickle('../../data/x_{}.pkl'.format(pid))
y_train = pd.read_pickle('../../data/y_{}.pkl'.format(pid))
x_test = pd.read_pickle('../../data/x_{}_test.pkl'.format(pid)).sort_index()
y_test = pd.DataFrame(pd.read_pickle('../../data/y_{}_test.pkl'.format(pid))).sort_index()

# Let's cut the training set to use only the required number of samples
end_date = x_train.index.levels[0][-1]
start_date = fe.add_market_days(end_date, -train_days)
x_sub_df = x_train.loc[(slice(start_date,None),slice(None)),:]
y_sub_df = pd.DataFrame(y_train.loc[(slice(start_date,None),slice(None))])

# Create the estimator and train
estimator = LinearPredictor()
estimator.fit(x_sub_df, y_sub_df)

# Get the training and test predictions
y_train_pred = estimator.predict(x_sub_df)
y_test_pred = estimator.predict(x_test)

# Get the training and test metrics for each symbol
metrics_train = ev.get_metrics_df(y_sub_df, y_train_pred)
metrics_test = ev.get_metrics_df(y_test, y_test_pred)

# Show the mean metrics
metrics_df = pd.DataFrame(columns=['train', 'test'])
metrics_df['train'] = metrics_train.mean()
metrics_df['test'] = metrics_test.mean()
print('Mean metrics: \n{}\n{}'.format(metrics_df,'-'*70))

# Plot the metrics in time
metrics_train_time = ev.get_metrics_in_time(y_sub_df, y_train_pred, base_days + ahead_days)
metrics_test_time = ev.get_metrics_in_time(y_test, y_test_pred, base_days + ahead_days)
plt.plot(metrics_train_time[2], metrics_train_time[0], label='train', marker='.')
plt.plot(metrics_test_time[2], metrics_test_time[0], label='test', marker='.')
plt.title('$r^2$ metrics')
plt.legend()
plt.figure()
plt.plot(metrics_train_time[2], metrics_train_time[1], label='train', marker='.')
plt.plot(metrics_test_time[2], metrics_test_time[1], label='test', marker='.')
plt.title('MRE metrics')
plt.legend()









    



Mean metrics: 
        train      test
r2   0.983486  0.976241
mre  0.008762  0.013906
----------------------------------------------------------------------






    Out[59]:





<matplotlib.legend.Legend at 0x7f10dfa9a9e8>

Ahead 7



In [60]:

    
from predictor.linear_predictor import LinearPredictor
import utils.misc as misc
import predictor.evaluation as ev

ahead_days = 7

# Get some parameters
train_days = int(best_params_df.loc[ahead_days, 'train_days'])
GOOD_DATA_RATIO, \
train_val_time, \
base_days, \
step_days, \
ahead_days, \
SAMPLES_GOOD_DATA_RATIO, \
x_filename, \
y_filename = misc.unpack_params(best_params_df.loc[ahead_days,:])

pid = 'base{}_ahead{}'.format(base_days, ahead_days)

# Get the datasets
x_train = pd.read_pickle('../../data/x_{}.pkl'.format(pid))
y_train = pd.read_pickle('../../data/y_{}.pkl'.format(pid))
x_test = pd.read_pickle('../../data/x_{}_test.pkl'.format(pid)).sort_index()
y_test = pd.DataFrame(pd.read_pickle('../../data/y_{}_test.pkl'.format(pid))).sort_index()

# Let's cut the training set to use only the required number of samples
end_date = x_train.index.levels[0][-1]
start_date = fe.add_market_days(end_date, -train_days)
x_sub_df = x_train.loc[(slice(start_date,None),slice(None)),:]
y_sub_df = pd.DataFrame(y_train.loc[(slice(start_date,None),slice(None))])

# Create the estimator and train
estimator = LinearPredictor()
estimator.fit(x_sub_df, y_sub_df)

# Get the training and test predictions
y_train_pred = estimator.predict(x_sub_df)
y_test_pred = estimator.predict(x_test)

# Get the training and test metrics for each symbol
metrics_train = ev.get_metrics_df(y_sub_df, y_train_pred)
metrics_test = ev.get_metrics_df(y_test, y_test_pred)

# Show the mean metrics
metrics_df = pd.DataFrame(columns=['train', 'test'])
metrics_df['train'] = metrics_train.mean()
metrics_df['test'] = metrics_test.mean()
print('Mean metrics: \n{}\n{}'.format(metrics_df,'-'*70))

# Plot the metrics in time
metrics_train_time = ev.get_metrics_in_time(y_sub_df, y_train_pred, base_days + ahead_days)
metrics_test_time = ev.get_metrics_in_time(y_test, y_test_pred, base_days + ahead_days)
plt.plot(metrics_train_time[2], metrics_train_time[0], label='train', marker='.')
plt.plot(metrics_test_time[2], metrics_test_time[0], label='test', marker='.')
plt.title('$r^2$ metrics')
plt.legend()
plt.figure()
plt.plot(metrics_train_time[2], metrics_train_time[1], label='train', marker='.')
plt.plot(metrics_test_time[2], metrics_test_time[1], label='test', marker='.')
plt.title('MRE metrics')
plt.legend()









    



Mean metrics: 
        train      test
r2   0.906177  0.874892
mre  0.026232  0.034764
----------------------------------------------------------------------






    Out[60]:





<matplotlib.legend.Legend at 0x7f10df101160>

Ahead 14



In [61]:

    
from predictor.linear_predictor import LinearPredictor
import utils.misc as misc
import predictor.evaluation as ev

ahead_days = 14

# Get some parameters
train_days = int(best_params_df.loc[ahead_days, 'train_days'])
GOOD_DATA_RATIO, \
train_val_time, \
base_days, \
step_days, \
ahead_days, \
SAMPLES_GOOD_DATA_RATIO, \
x_filename, \
y_filename = misc.unpack_params(best_params_df.loc[ahead_days,:])

pid = 'base{}_ahead{}'.format(base_days, ahead_days)

# Get the datasets
x_train = pd.read_pickle('../../data/x_{}.pkl'.format(pid))
y_train = pd.read_pickle('../../data/y_{}.pkl'.format(pid))
x_test = pd.read_pickle('../../data/x_{}_test.pkl'.format(pid)).sort_index()
y_test = pd.DataFrame(pd.read_pickle('../../data/y_{}_test.pkl'.format(pid))).sort_index()

# Let's cut the training set to use only the required number of samples
end_date = x_train.index.levels[0][-1]
start_date = fe.add_market_days(end_date, -train_days)
x_sub_df = x_train.loc[(slice(start_date,None),slice(None)),:]
y_sub_df = pd.DataFrame(y_train.loc[(slice(start_date,None),slice(None))])

# Create the estimator and train
estimator = LinearPredictor()
estimator.fit(x_sub_df, y_sub_df)

# Get the training and test predictions
y_train_pred = estimator.predict(x_sub_df)
y_test_pred = estimator.predict(x_test)

# Get the training and test metrics for each symbol
metrics_train = ev.get_metrics_df(y_sub_df, y_train_pred)
metrics_test = ev.get_metrics_df(y_test, y_test_pred)

# Show the mean metrics
metrics_df = pd.DataFrame(columns=['train', 'test'])
metrics_df['train'] = metrics_train.mean()
metrics_df['test'] = metrics_test.mean()
print('Mean metrics: \n{}\n{}'.format(metrics_df,'-'*70))

# Plot the metrics in time
metrics_train_time = ev.get_metrics_in_time(y_sub_df, y_train_pred, base_days + ahead_days)
metrics_test_time = ev.get_metrics_in_time(y_test, y_test_pred, base_days + ahead_days)
plt.plot(metrics_train_time[2], metrics_train_time[0], label='train', marker='.')
plt.plot(metrics_test_time[2], metrics_test_time[0], label='test', marker='.')
plt.title('$r^2$ metrics')
plt.legend()
plt.figure()
plt.plot(metrics_train_time[2], metrics_train_time[1], label='train', marker='.')
plt.plot(metrics_test_time[2], metrics_test_time[1], label='test', marker='.')
plt.title('MRE metrics')
plt.legend()









    



Mean metrics: 
        train      test
r2   0.826779  0.758697
mre  0.037349  0.051755
----------------------------------------------------------------------






    Out[61]:





<matplotlib.legend.Legend at 0x7f10df0b1a90>

Ahead 28



In [62]:

    
from predictor.linear_predictor import LinearPredictor
import utils.misc as misc
import predictor.evaluation as ev

ahead_days = 28

# Get some parameters
train_days = int(best_params_df.loc[ahead_days, 'train_days'])
GOOD_DATA_RATIO, \
train_val_time, \
base_days, \
step_days, \
ahead_days, \
SAMPLES_GOOD_DATA_RATIO, \
x_filename, \
y_filename = misc.unpack_params(best_params_df.loc[ahead_days,:])

pid = 'base{}_ahead{}'.format(base_days, ahead_days)

# Get the datasets
x_train = pd.read_pickle('../../data/x_{}.pkl'.format(pid))
y_train = pd.read_pickle('../../data/y_{}.pkl'.format(pid))
x_test = pd.read_pickle('../../data/x_{}_test.pkl'.format(pid)).sort_index()
y_test = pd.DataFrame(pd.read_pickle('../../data/y_{}_test.pkl'.format(pid))).sort_index()

# Let's cut the training set to use only the required number of samples
end_date = x_train.index.levels[0][-1]
start_date = fe.add_market_days(end_date, -train_days)
x_sub_df = x_train.loc[(slice(start_date,None),slice(None)),:]
y_sub_df = pd.DataFrame(y_train.loc[(slice(start_date,None),slice(None))])

# Create the estimator and train
estimator = LinearPredictor()
estimator.fit(x_sub_df, y_sub_df)

# Get the training and test predictions
y_train_pred = estimator.predict(x_sub_df)
y_test_pred = estimator.predict(x_test)

# Get the training and test metrics for each symbol
metrics_train = ev.get_metrics_df(y_sub_df, y_train_pred)
metrics_test = ev.get_metrics_df(y_test, y_test_pred)

# Show the mean metrics
metrics_df = pd.DataFrame(columns=['train', 'test'])
metrics_df['train'] = metrics_train.mean()
metrics_df['test'] = metrics_test.mean()
print('Mean metrics: \n{}\n{}'.format(metrics_df,'-'*70))

# Plot the metrics in time
metrics_train_time = ev.get_metrics_in_time(y_sub_df, y_train_pred, base_days + ahead_days)
metrics_test_time = ev.get_metrics_in_time(y_test, y_test_pred, base_days + ahead_days)
plt.plot(metrics_train_time[2], metrics_train_time[0], label='train', marker='.')
plt.plot(metrics_test_time[2], metrics_test_time[0], label='test', marker='.')
plt.title('$r^2$ metrics')
plt.legend()
plt.figure()
plt.plot(metrics_train_time[2], metrics_train_time[1], label='train', marker='.')
plt.plot(metrics_test_time[2], metrics_test_time[1], label='test', marker='.')
plt.title('MRE metrics')
plt.legend()









    



Mean metrics: 
        train      test
r2   0.696077  0.515802
mre  0.052396  0.078545
----------------------------------------------------------------------






    Out[62]:





<matplotlib.legend.Legend at 0x7f10df4ce390>

Ahead 56



In [63]:

    
from predictor.linear_predictor import LinearPredictor
import utils.misc as misc
import predictor.evaluation as ev

ahead_days = 56

# Get some parameters
train_days = int(best_params_df.loc[ahead_days, 'train_days'])
GOOD_DATA_RATIO, \
train_val_time, \
base_days, \
step_days, \
ahead_days, \
SAMPLES_GOOD_DATA_RATIO, \
x_filename, \
y_filename = misc.unpack_params(best_params_df.loc[ahead_days,:])

pid = 'base{}_ahead{}'.format(base_days, ahead_days)

# Get the datasets
x_train = pd.read_pickle('../../data/x_{}.pkl'.format(pid))
y_train = pd.read_pickle('../../data/y_{}.pkl'.format(pid))
x_test = pd.read_pickle('../../data/x_{}_test.pkl'.format(pid)).sort_index()
y_test = pd.DataFrame(pd.read_pickle('../../data/y_{}_test.pkl'.format(pid))).sort_index()

# Let's cut the training set to use only the required number of samples
end_date = x_train.index.levels[0][-1]
start_date = fe.add_market_days(end_date, -train_days)
x_sub_df = x_train.loc[(slice(start_date,None),slice(None)),:]
y_sub_df = pd.DataFrame(y_train.loc[(slice(start_date,None),slice(None))])

# Create the estimator and train
estimator = LinearPredictor()
estimator.fit(x_sub_df, y_sub_df)

# Get the training and test predictions
y_train_pred = estimator.predict(x_sub_df)
y_test_pred = estimator.predict(x_test)

# Get the training and test metrics for each symbol
metrics_train = ev.get_metrics_df(y_sub_df, y_train_pred)
metrics_test = ev.get_metrics_df(y_test, y_test_pred)

# Show the mean metrics
metrics_df = pd.DataFrame(columns=['train', 'test'])
metrics_df['train'] = metrics_train.mean()
metrics_df['test'] = metrics_test.mean()
print('Mean metrics: \n{}\n{}'.format(metrics_df,'-'*70))

# Plot the metrics in time
metrics_train_time = ev.get_metrics_in_time(y_sub_df, y_train_pred, base_days + ahead_days)
metrics_test_time = ev.get_metrics_in_time(y_test, y_test_pred, base_days + ahead_days)
plt.plot(metrics_train_time[2], metrics_train_time[0], label='train', marker='.')
plt.plot(metrics_test_time[2], metrics_test_time[0], label='test', marker='.')
plt.title('$r^2$ metrics')
plt.legend()
plt.figure()
plt.plot(metrics_train_time[2], metrics_train_time[1], label='train', marker='.')
plt.plot(metrics_test_time[2], metrics_test_time[1], label='test', marker='.')
plt.title('MRE metrics')
plt.legend()









    



Mean metrics: 
        train      test
r2   0.494079  0.152134
mre  0.073589  0.108190
----------------------------------------------------------------------






    Out[63]:





<matplotlib.legend.Legend at 0x7f10def4ea20>

Conclusion: The metrics are a bit worse for the test set, as expected, but it seems to be reasonably close. There is no clear tendency in time, so a "time validity" for the model couldn't be found within the studied periods.

NOTE: A gap can be seen in the dates between the training and test sets. That is due to using only test samples that have test features (x) and labels (y). It would be acceptable to use features in the training set to predict labels in the test set. In this case, it was considered that, since the studied case is even more restrictive than the other one, no training features with labels in the test set were used.



In [ ]:

	GOOD_DATA_RATIO	SAMPLES_GOOD_DATA_RATIO	ahead_days	base_days	model	mre	r2	step_days	train_days	train_val_time	x_filename	y_filename
ahead_days
1.0	0.99	0.9	1.0	112.0	linear	0.015856	0.986599	7.0	504.0	-1.0	x_base112_ahead1.pkl	y_base112_ahead1.pkl
7.0	0.99	0.9	7.0	112.0	linear	0.042367	0.923348	7.0	756.0	-1.0	x_base112_ahead7.pkl	y_base112_ahead7.pkl
14.0	0.99	0.9	14.0	112.0	linear	0.060167	0.865259	7.0	756.0	-1.0	x_base112_ahead14.pkl	y_base112_ahead14.pkl
28.0	0.99	0.9	28.0	112.0	linear	0.091966	0.758046	7.0	756.0	-1.0	x_base112_ahead28.pkl	y_base112_ahead28.pkl
56.0	0.99	0.9	56.0	112.0	linear	0.127913	0.590426	7.0	756.0	-1.0	x_base112_ahead56.pkl	y_base112_ahead56.pkl

	n_estimators	max_depth	n_jobs	scores	r2	mre
0	50	5	-1	(0.983814083021, 0.0191439772135)	0.983814	0.019144
1	50	10	-1	(0.986125708821, 0.0168504555546)	0.986126	0.016850
2	100	5	-1	(0.984182203771, 0.0190912449575)	0.984182	0.019091
3	100	10	-1	(0.98606697491, 0.0168202507896)	0.986067	0.016820

	max_depth	mre	n_estimators	n_jobs	r2	scores
ahead_days
1	10.0	0.016850	50.0	-1.0	0.986126	(0.986125708821, 0.0168504555546)
7	5.0	0.042002	100.0	-1.0	0.929017	(0.929016577116, 0.0420024748489)
14	5.0	0.059515	100.0	-1.0	0.868927	(0.868927025792, 0.0595149788733)
28	5.0	0.090310	50.0	-1.0	0.765112	(0.76511197092, 0.0903104492391)
56	5.0	0.126410	50.0	-1.0	0.615412	(0.615412026805, 0.126409538141)

feature	Close										...	Volume
	SPY	MMM	ABT	ABBV	ACN	ATVI	AYI	ADBE	AMD	AAP	...	XEL	XRX	XLNX	XL	XYL	YHOO	YUM	ZBH	ZION	ZTS
date
2015-01-02	205.43	164.06	44.90	65.89	88.84	20.13	139.88	72.34	NaN	158.56	...	2535289.0	3912022.0	2402443.0	NaN	606118.0	11924473.0	1641557.0	909491.0	2299118.0	1784851.0
2015-01-05	201.72	160.36	44.91	64.65	87.34	19.85	136.52	71.98	2.66	156.47	...	3107187.0	7032861.0	2611059.0	NaN	1369903.0	14389308.0	3176619.0	2163761.0	5326879.0	3116681.0
2015-01-06	199.82	158.65	44.40	64.33	86.71	19.48	134.81	70.53	2.63	156.36	...	4749648.0	7170289.0	3430462.0	NaN	1336249.0	16204304.0	3597727.0	1782098.0	9096223.0	3987015.0
2015-01-07	202.31	159.80	44.76	66.93	88.53	19.06	137.20	71.11	2.58	159.72	...	2833770.0	4836408.0	2110610.0	NaN	1039030.0	11788031.0	3273992.0	1462026.0	2759850.0	2481935.0
2015-01-08	205.90	163.63	45.68	67.63	89.88	19.25	142.00	72.92	2.61	161.12	...	2516764.0	6229982.0	2824232.0	NaN	821836.0	14704771.0	3061324.0	1408433.0	1831484.0	3121258.0

		0	1	2	3	4	5	6	7	8	9	...	102	103	104	105	106	107	108	109	110	111
2015-05-14	AES	1.0	0.996948	0.990166	0.975246	0.976263	0.975076	0.956087	0.933876	0.940827	0.924890	...	0.802136	0.874364	0.874025	0.883011	0.870973	0.884368	0.882333	0.889115	0.881655	0.866904
2015-05-26	LH	1.0	0.996912	0.992405	0.984393	0.990486	0.984811	0.979553	0.999249	0.996077	1.005926	...	0.979720	0.981305	0.968453	0.913954	0.934318	0.982641	1.007094	1.024954	1.029461	1.024370
2015-01-13	AET	1.0	1.014385	1.020850	1.027477	1.026830	1.036690	1.035720	1.042832	1.043640	1.038468	...	0.871020	0.877808	0.871666	0.867141	0.868434	0.873768	0.886213	0.880394	0.876677	0.862130
2015-03-05	DHR	1.0	0.972875	0.954134	0.936873	0.950847	0.939832	0.948545	0.965806	0.954299	0.990301	...	0.831991	0.812428	0.795002	0.801249	0.771823	0.794016	0.762946	0.787112	0.777577	0.787112
2015-11-02	TIF	1.0	0.997919	1.005277	1.003047	0.998216	0.998216	1.011075	1.002378	0.984391	0.983574	...	1.052401	1.060428	1.070685	1.050840	1.078564	1.055820	1.057009	1.052921	1.058644	1.078118
2015-09-14	MCHP	1.0	1.015770	1.018873	1.003878	0.956050	0.942089	0.875905	0.826784	0.838935	0.830920	...	0.616856	0.605481	0.639866	0.592296	0.608583	0.654085	0.664943	0.630817	0.650465	0.630300
2015-05-14	MAS	1.0	1.004813	1.000633	1.008360	1.003293	1.005446	0.997847	0.992527	0.998480	0.998987	...	1.048132	1.051552	1.055731	1.045725	1.035592	1.036225	1.047118	1.046485	1.043065	1.050412
2015-03-05	FMC	1.0	0.976036	0.965069	0.936231	0.922015	0.939480	0.934200	0.963038	0.949228	0.965069	...	0.756702	0.758733	0.738830	0.742486	0.746141	0.741673	0.736393	0.749797	0.726645	0.728676
2015-03-25	UNM	1.0	0.996394	1.003778	1.020350	1.006354	0.995363	1.005753	1.012021	1.008844	1.011850	...	0.843294	0.822772	0.798643	0.780268	0.762494	0.786193	0.800876	0.800618	0.786622	0.770393
2015-03-05	TROW	1.0	0.984706	0.980882	0.964118	0.959412	0.974412	0.963529	0.972353	0.967647	0.987941	...	1.023529	1.021765	1.019412	1.017059	1.016765	1.007059	1.006176	1.022941	1.019118	1.000588
2015-06-15	R	1.0	1.013201	1.018557	1.035202	1.031758	1.031184	1.035393	1.027549	1.034437	1.044959	...	1.185575	1.173522	1.189593	1.183662	1.168357	1.142912	1.160895	1.158408	1.182323	1.175818
2016-06-14	BK	1.0	1.004254	1.004254	1.013370	1.025828	1.027347	1.025220	1.050744	0.973868	0.931936	...	1.046187	1.040717	1.169553	1.150106	1.130963	1.176542	1.182315	1.169857	1.180188	1.181708
2015-05-26	WEC	1.0	1.007366	1.009881	1.005390	1.006827	1.009342	1.022458	1.008624	1.017068	1.008264	...	0.944305	0.953647	0.954366	0.971254	0.983651	0.975386	0.972332	0.996227	0.989759	0.972691
2015-07-24	KMB	1.0	0.995829	1.009385	1.011470	1.016684	1.022941	1.027112	1.005214	1.018770	1.009385	...	1.093326	1.059437	1.074296	1.097497	1.102972	1.106882	1.106621	1.111575	1.102972	1.090459
2015-04-24	CTL	1.0	0.997253	0.994217	0.990458	0.972676	0.978459	0.981061	0.974122	0.980049	0.981350	...	0.906318	0.912245	0.898655	0.897788	0.899957	0.912823	0.898655	0.911811	0.917450	0.921209
2015-04-06	ORCL	1.0	0.998578	1.002642	1.003658	1.001422	0.996952	0.993700	0.995326	0.996545	0.981101	...	0.913432	0.907539	0.883154	0.904085	0.908352	0.900427	0.926235	0.921357	0.920545	0.928876
2016-03-24	MCHP	1.0	1.025679	1.026045	1.031548	0.990462	0.984593	0.946809	0.930668	0.931768	0.907924	...	1.025679	1.029714	1.041453	1.073734	1.043287	1.045488	1.012472	1.025312	1.117021	1.103081
2015-11-20	NSC	1.0	0.979500	0.969147	0.977860	0.977655	0.974375	0.971812	0.944547	0.954387	0.943624	...	0.852399	0.846966	0.936142	0.936552	0.953772	0.955617	0.942804	0.923637	0.929889	0.913694
2016-02-24	SWKS	1.0	1.007463	1.022162	1.011986	1.037540	1.037766	1.037313	1.045228	1.043193	1.031886	...	1.241067	1.231343	1.239484	1.235188	1.225011	1.221845	1.228177	1.224785	1.233605	1.224107
2016-04-05	TMO	1.0	1.006903	0.985634	0.996082	0.999813	1.021642	1.033582	1.034515	1.036567	1.041045	...	1.194216	1.204291	1.206716	1.205970	1.214179	1.213619	1.209701	1.209888	1.200933	1.207276
2015-12-11	WMT	1.0	0.956508	0.980670	0.981959	0.956830	0.948131	0.951353	0.961501	0.971649	0.986147	...	0.618073	0.591817	0.585696	0.570876	0.572970	0.591978	0.619523	0.618073	0.637242	0.651740
2015-03-05	LH	1.0	0.987184	0.982884	0.984101	0.983452	1.008112	1.003650	1.019387	1.016142	1.028634	...	1.018981	1.032528	1.033014	1.030256	1.032933	1.018008	1.011843	1.015493	1.009815	1.005435
2015-04-06	LNT	1.0	0.992061	1.000541	1.015698	1.021112	1.036810	1.037532	1.051425	1.049441	1.036268	...	1.100686	1.088777	1.059726	1.066402	1.072717	1.047997	1.068928	1.056117	1.050523	1.052508
2015-02-24	BLL	1.0	0.983608	0.979711	0.969395	0.964580	0.958161	0.960912	0.975928	0.955238	0.959652	...	1.021492	1.028198	1.028485	1.076055	1.089752	1.105857	1.115028	1.106717	1.127063	1.124427
2015-03-16	CSCO	1.0	0.997821	0.989902	0.995801	0.996226	0.978953	0.977252	0.980122	0.983736	0.989476	...	0.933298	0.921552	0.923625	0.924103	0.921020	0.924794	0.930215	0.908318	0.884773	0.863247
2015-01-23	KSS	1.0	1.006128	1.008014	1.000943	1.018032	0.997407	1.000000	1.026046	1.019564	1.074720	...	1.009193	1.023807	1.032174	1.022157	1.026164	1.037831	1.002711	1.010371	1.015203	1.017796
2016-04-25	STI	1.0	1.013857	1.022624	1.014989	0.944853	0.949095	0.947681	0.922370	0.909926	0.913886	...	1.075933	1.070701	1.085124	1.087387	1.057268	1.029695	1.045249	1.050198	1.035916	1.076640
2016-05-04	D	1.0	1.006203	1.005197	1.013998	1.044342	1.024141	1.028080	1.027578	1.036798	1.030511	...	1.126739	1.117603	1.128080	1.122967	1.117100	1.106873	1.108215	1.090863	1.098994	1.078374
2015-12-22	PAYX	1.0	1.004924	1.008729	1.012086	1.019024	1.017234	1.011862	0.975604	0.988585	0.988362	...	1.007833	1.024843	1.008057	1.029096	1.018800	1.078559	1.094673	1.092435	1.094002	1.103402
2016-02-03	CSCO	1.0	0.988012	0.952349	0.939687	0.959017	0.991159	1.005994	1.000824	1.033116	1.043905	...	1.272571	1.285457	1.328688	1.329287	1.331161	1.337379	1.362329	1.364352	1.337679	1.339852
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
2015-03-16	EQT	1.0	0.990920	0.977329	0.980431	0.970223	0.962328	0.953982	0.932946	0.933284	0.928209	...	0.955335	0.949865	0.932269	0.922626	0.929450	0.930352	0.934243	0.930239	0.904861	0.879935
2015-12-22	CINF	1.0	1.026151	1.029562	1.033542	1.049460	1.040364	1.043775	1.038658	1.048323	1.035247	...	1.244457	1.254690	1.249574	1.252416	1.257533	1.270608	1.279704	1.280841	1.284252	1.284821
2016-05-13	ESRX	1.0	0.996238	0.985978	0.974008	0.966142	0.973324	0.972640	0.986320	0.998632	0.990766	...	0.844049	0.855335	0.849179	0.846785	0.846785	0.850205	0.847811	0.857387	0.865937	0.867305
2016-04-25	PCG	1.0	0.996652	0.997870	0.990262	0.992088	1.006391	1.009738	0.999087	1.000609	1.003043	...	0.946439	0.944005	0.949178	0.951613	0.947961	0.950091	0.956482	0.959525	0.951004	0.958612
2015-12-02	PX	1.0	0.984692	1.005156	1.017241	1.024009	1.017080	0.986304	0.991460	0.990171	1.000000	...	1.170641	1.181921	1.175959	1.188044	1.179826	1.175959	1.185466	1.185305	1.188205	1.199484
2015-08-24	KIM	1.0	0.975213	1.008527	1.018045	1.025580	1.011898	1.001785	1.009122	1.022011	1.000793	...	0.905215	0.931787	0.954987	0.939124	0.963514	0.965893	0.951616	0.986516	0.973230	0.999009
2015-11-02	TXN	1.0	1.008464	0.999637	1.015599	0.990447	0.970012	0.968440	0.935792	0.928900	0.900484	...	0.887304	0.892019	0.874123	0.867594	0.873640	0.856832	0.854051	0.856469	0.849335	0.868198
2016-02-03	CINF	1.0	0.994751	0.989501	0.967454	0.973753	0.971654	0.926509	0.943307	0.950131	0.962730	...	1.259843	1.259843	1.262467	1.262467	1.262467	1.234646	1.254593	1.249869	1.244094	1.256693
2015-09-23	CMI	1.0	0.997886	0.993894	0.939878	0.952560	0.995773	1.009394	1.043917	1.087600	1.105683	...	1.105683	1.124706	1.103335	1.108267	1.129638	1.144669	1.141616	1.175904	1.165336	1.164866
2016-03-24	KO	1.0	0.997854	1.005007	1.017883	1.018240	1.026109	1.006438	0.986409	1.001431	0.987124	...	1.090129	1.091559	1.095494	1.108011	1.110873	1.119099	1.121245	1.129471	1.128040	1.124464
2015-10-22	AES	1.0	0.996197	1.019395	1.028713	1.055334	1.046967	1.049439	1.081765	1.101540	1.099639	...	1.106104	1.102491	1.089371	1.090892	1.085948	1.107245	1.100970	1.108766	1.098498	1.106484
2015-07-24	UNM	1.0	0.995569	0.996576	1.000503	1.014299	1.010069	1.001108	0.997181	0.995569	0.996677	...	0.945222	0.929816	0.942604	0.955292	0.969691	0.969691	0.967576	0.979458	0.975733	0.967375
2016-03-15	BA	1.0	0.995917	0.990474	0.981175	0.978453	0.973917	0.982309	0.999546	1.004309	1.007938	...	1.058290	1.051486	1.046723	1.056475	1.049671	1.038784	1.050352	1.062826	1.064641	1.067135
2016-03-04	HES	1.0	0.977572	0.963278	0.982871	0.981392	0.991004	0.980776	0.956993	0.965619	0.967591	...	1.067406	1.055453	1.084042	1.085274	1.087492	1.093530	1.096365	1.106470	1.108811	1.107579
2016-04-25	GPC	1.0	1.001443	1.007215	1.005195	1.001732	1.014141	1.036797	1.073304	1.063925	1.061616	...	1.056999	1.055988	1.072727	1.097980	1.102453	1.108225	1.089755	1.103175	1.093939	1.078932
2016-06-14	UDR	1.0	1.011154	1.024362	1.023188	1.023775	1.032873	1.036983	1.033167	1.019959	1.022894	...	1.017317	1.030525	1.011741	0.987085	0.998826	1.026710	0.991195	0.992956	0.978574	0.972997
2015-03-25	LUV	1.0	0.968561	0.974607	1.009674	0.984281	0.992140	0.981258	0.972189	0.973398	0.976421	...	0.853688	0.848851	0.833736	0.814389	0.800484	0.805925	0.829504	0.831318	0.859129	0.827690
2015-10-13	VZ	1.0	0.987694	1.005274	1.004505	0.982529	0.979892	0.968025	1.025272	1.038347	1.010878	...	1.134161	1.144160	1.139985	1.137457	1.138117	1.150313	1.158334	1.152730	1.153170	1.149324
2016-05-04	CAG	1.0	1.012031	1.009143	1.017709	1.021174	1.002502	1.009143	0.997786	1.006256	0.982772	...	0.867950	0.861886	0.856497	0.838210	0.829836	0.829644	0.832435	0.831569	0.835707	0.831569
2015-08-13	SPGI	1.0	1.003642	1.009249	1.006326	0.998371	0.977715	0.947139	0.908416	0.897297	0.933001	...	0.919774	0.920684	0.928113	0.904965	0.919822	0.900077	0.901275	0.889725	0.894709	0.913064
2016-01-04	JBHT	1.0	1.031456	1.022136	0.996505	0.979417	0.994951	0.990680	0.979029	0.981748	0.986408	...	1.094369	1.104466	1.116893	1.121553	1.114951	1.120777	1.134757	1.137087	1.136699	1.133981
2016-05-13	BK	1.0	1.005993	1.000856	1.016267	1.004852	1.014840	1.017979	1.043664	1.069920	1.056792	...	1.025114	1.038242	0.992580	0.991724	0.981735	0.972888	0.965183	0.976884	0.994292	0.986872
2015-01-02	ADM	1.0	0.942065	0.948992	0.973552	1.001259	1.014484	1.018262	0.994962	0.941436	0.942695	...	0.782116	0.794710	0.794081	0.787154	0.782116	0.771411	0.767632	0.766373	0.761335	0.759446
2015-09-14	QCOM	1.0	1.031346	1.031592	1.031223	0.993977	0.998156	0.959926	0.943823	0.922065	0.923540	...	0.684081	0.680025	0.644991	0.665519	0.691948	0.714567	0.712477	0.690227	0.719361	0.696128
2015-06-15	PCG	1.0	1.004349	1.010149	1.028271	1.017760	1.028634	1.031171	1.023559	1.021022	1.026459	...	1.160203	1.140631	1.139906	1.144618	1.125045	1.113447	1.121058	1.114534	1.138094	1.138094
2015-08-13	HRS	1.0	1.005307	1.007202	1.006539	0.995451	0.972707	0.937074	0.902957	0.890163	0.927123	...	0.932809	0.932714	0.939822	0.920205	0.935747	0.921721	0.922384	0.912055	0.915277	0.927028
2015-05-14	AVY	1.0	0.988512	1.001532	1.015573	1.010722	1.009957	1.006127	1.010722	1.018892	1.018381	...	0.942303	0.932857	0.937197	0.924687	0.912688	0.941026	0.931580	0.925453	0.938218	0.926219
2015-03-05	VNO	1.0	0.995117	1.000158	0.977788	0.965974	0.971251	0.976686	1.006065	1.017250	1.022290	...	1.066950	1.063327	1.079001	1.105072	1.112319	1.098299	1.084909	1.099795	1.070180	1.084121
2015-06-15	FITB	1.0	1.007701	1.006093	1.025812	1.013879	1.028182	1.034276	1.032583	1.017519	1.011341	...	0.916892	0.914946	0.921209	0.907160	0.882109	0.868060	0.878555	0.884648	0.915115	0.901405
2016-01-13	BF.B	1.0	1.022664	0.983248	0.969453	0.965840	0.980292	1.001478	0.951059	0.971588	0.957793	...	1.267203	1.248645	1.235507	1.229430	1.232879	1.231565	1.247003	1.255214	1.256200	1.255214