In this notebook an estimator for the Volume will be trained. No hyperparameters will be searched for, and the ones from the 'Close' values estimator will be used instead.


In [38]:
# Basic imports
import os
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import datetime as dt
import scipy.optimize as spo
import sys
from time import time
from sklearn.metrics import r2_score, median_absolute_error

%matplotlib inline

%pylab inline
pylab.rcParams['figure.figsize'] = (20.0, 10.0)

%load_ext autoreload
%autoreload 2

sys.path.append('../../')

from sklearn.externals import joblib
import utils.preprocessing as pp
import predictor.feature_extraction as fe


Populating the interactive namespace from numpy and matplotlib
The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload

Let's generate the datasets


In [39]:
def generate_one_set(params):
    # print(('-'*70 + '\n {}, {} \n' + '-'*70).format(params['base_days'].values, params['ahead_days'].values))
    tic = time()
    
    train_val_time = int(params['train_val_time'])
    base_days = int(params['base_days'])
    step_days = int(params['step_days'])
    ahead_days = int(params['ahead_days'])
    
    print('Generating: base{}_ahead{}'.format(base_days, ahead_days))
    pid = 'base{}_ahead{}'.format(base_days, ahead_days)
    
    # Getting the data
    data_df = pd.read_pickle('../../data/data_train_val_df.pkl')
    today = data_df.index[-1]  # Real date
    print(pid + ') data_df loaded')

    # Drop symbols with many missing points
    data_df = pp.drop_irrelevant_symbols(data_df, params['GOOD_DATA_RATIO'])
    print(pid + ') Irrelevant symbols dropped.')
    
    # Generate the intervals for the predictor
    x, y = fe.generate_train_intervals(data_df, 
                                       train_val_time, 
                                       base_days, 
                                       step_days,
                                       ahead_days, 
                                       today, 
                                       fe.feature_volume_one_to_one,
                                       target_feature=fe.VOLUME_FEATURE)    
    print(pid + ') Intervals generated')
    
    # Drop "bad" samples and fill missing data
    x_y_df = pd.concat([x, y], axis=1)
    x_y_df = pp.drop_irrelevant_samples(x_y_df, params['SAMPLES_GOOD_DATA_RATIO'])
    x = x_y_df.iloc[:, :-1]
    y = x_y_df.iloc[:, -1]
    x = pp.fill_missing(x)
    print(pid + ') Irrelevant samples dropped and missing data filled.')
    
    # Pickle that
    x.to_pickle('../../data/x_volume_{}.pkl'.format(pid))
    y.to_pickle('../../data/y_volume_{}.pkl'.format(pid))
    
    toc = time()
    print('%s) %i intervals generated in: %i seconds.' % (pid, x.shape[0], (toc-tic)))
    
    return pid, x, y

In [40]:
best_params_df = pd.read_pickle('../../data/best_params_final_df.pkl').loc[1,:]
to_drop = [
    'model',
    'mre',
    'r2',
    'x_filename',
    'y_filename',
    'train_days'
]
best_params_df.drop(to_drop, inplace=True)
best_params_df


Out[40]:
GOOD_DATA_RATIO            0.99
SAMPLES_GOOD_DATA_RATIO     0.9
ahead_days                    1
base_days                   112
step_days                     7
train_val_time               -1
Name: 1.0, dtype: object

In [41]:
generate_one_set(best_params_df)


Generating: base112_ahead1
base112_ahead1) data_df loaded
base112_ahead1) Irrelevant symbols dropped.
base112_ahead1) Intervals generated
base112_ahead1) Irrelevant samples dropped and missing data filled.
base112_ahead1) 219281 intervals generated in: 168 seconds.
Out[41]:
('base112_ahead1',
                  0         1         2          3         4          5    \
 1993-01-29 AAPL  1.0  0.799399  1.100301   0.391059  0.706612   0.841097   
            ABT   1.0  1.737416  2.302661   2.362296  1.290798   1.972427   
            ADBE  1.0  2.796128  1.733207   2.519306  2.074187   1.665204   
            ADM   1.0  1.307959  0.440623   0.615481  1.301435   1.294688   
            ADP   1.0  0.712254  1.190919   1.558534  1.658917   1.347101   
            ADSK  1.0  0.799576  0.553022   1.047897  1.136179   1.069282   
            AEP   1.0  2.275556  1.133333   1.462222  2.337778   1.511111   
            AES   1.0  2.347038  2.316376   1.839721  2.841115   5.972125   
            AET   1.0  2.097744  1.658237   3.170882  3.940533   5.034860   
            AFL   1.0  2.545356  0.305185   3.473929  5.266079   1.714198   
            AIG   1.0  1.026964  1.941007   2.961265  1.820695   1.402409   
            AJG   1.0  2.315911  1.027314   1.097471  0.938503   0.829349   
            ALK   1.0  2.562933  2.416789   6.761893  9.916177   3.885006   
            AMAT  1.0  2.032120  1.736617   1.421842  3.229122   0.520343   
            AMD   1.0  1.087518  0.786743   2.667666  1.303023   0.936548   
            AME   1.0  1.066548  0.846488   1.859798  9.660963   2.243993   
            AMGN  1.0  0.734142  0.782556   1.522897  1.418300   2.823373   
            AN    1.0  1.060365  3.055209   2.577014  2.670462   1.811386   
            APA   1.0  1.798526  1.238329   0.838657  1.647011   0.805897   
            APC   1.0  2.659827  4.132586   1.513314  1.245628   1.656787   
            APD   1.0  2.747712  2.135011  16.065789  8.041762   5.586957   
            APH   1.0  1.474860  0.240223   3.511173  0.480447   0.511173   
            ARNC  1.0  1.354223  1.653951   8.166213  3.051771   2.929155   
            AVY   1.0  3.367521  5.145299   3.649573  2.170940   4.239316   
            AXP   1.0  0.957552  1.545388   1.289967  1.536200   2.015803   
            AZO   1.0  0.439237  0.447180   1.380461  0.530580   0.785544   
            BA    1.0  0.753015  1.812349   1.694645  3.768934  12.678244   
            BAC   1.0  1.175926  1.753704   1.672222  2.070370   0.903704   
            BAX   1.0  0.544790  0.767950   0.638758  1.001350   1.598582   
            BBBY  1.0  0.189542  0.265069   0.774873  0.329702   0.362382   
 ...              ...       ...       ...        ...       ...        ...   
 2014-07-18 TWX   1.0  0.775236  0.920909   1.067082  1.180822   1.063247   
            TXN   1.0  0.903685  1.132094   0.807083  1.893544   1.691031   
            TXT   1.0  0.979728  0.796844   1.001587  0.957226   0.843836   
            UDR   1.0  1.488939  1.616842   1.330744  1.897088   1.040619   
            UHS   1.0  0.637570  0.782777   0.560014  0.558266   0.685587   
            UNH   1.0  0.728199  0.697939   0.754300  0.748640   0.827890   
            UNM   1.0  1.222187  2.480161   1.646877  1.604697   1.024687   
            UNP   1.0  1.034173  1.122000   0.667085  1.033498   0.863975   
            USB   1.0  0.697080  2.277888   1.965622  1.958388   4.901866   
            UTX   1.0  0.861043  0.897089   1.114190  1.029418   0.732781   
            VFC   1.0  0.478337  0.479296   0.502592  0.781046   0.302908   
            VLO   1.0  1.175617  1.002927   0.910263  0.836980   0.753943   
            VMC   1.0  1.119121  1.484201   1.175821  1.738728   1.091950   
            VNO   1.0  0.586860  0.763256   0.742285  0.998913   0.640917   
            VRTX  1.0  0.497778  0.677025   0.529962  1.397894   0.893703   
            VZ    1.0  0.655098  0.832476   1.655119  2.091069   0.896350   
            WDC   1.0  0.883370  0.692727   0.698990  1.003498   0.733375   
            WEC   1.0  0.983828  0.769730   0.588174  0.641791   0.648891   
            WFC   1.0  0.764054  1.304965   0.893999  0.993471   0.731476   
            WFM   1.0  1.158300  0.567337   0.713110  0.947244   0.682447   
            WHR   1.0  0.622109  0.571671   0.479058  0.490595   0.316556   
            WMB   1.0  0.582959  0.966706   8.392179  2.581877   1.896077   
            WMT   1.0  0.392280  0.449307   0.357566  0.560103   0.342033   
            WY    1.0  0.703713  0.892094   0.812959  1.471157   0.803616   
            XEL   1.0  0.808009  0.904747   0.854875  0.967681   0.948396   
            XLNX  1.0  1.475966  1.713026   1.349345  1.371480   1.972339   
            XOM   1.0  0.576020  0.536759   0.827962  0.832970   0.630647   
            XRAY  1.0  0.871061  1.030509   0.984077  0.787750   0.857157   
            XRX   1.0  0.852697  1.016390   0.666632  0.511420   0.743556   
            ZION  1.0  0.860937  1.896578   0.915797  0.704707   3.152652   
 
                        6         7         8         9      ...           102  \
 1993-01-29 AAPL   0.788505  1.025169  0.988355  1.099925    ...      0.580766   
            ABT    1.265470  0.427060  1.345303  0.667842    ...      2.384739   
            ADBE   2.393327  1.605166  3.158544  2.404181    ...      0.980768   
            ADM    0.429101  0.249023  0.411915  0.466721    ...      0.350154   
            ADP    1.377462  0.826039  0.678063  0.439278    ...      1.276258   
            ADSK   1.146341  0.823082  0.868770  1.552050    ...      0.419318   
            AEP    0.937778  0.906667  0.706667  0.795556    ...      1.506667   
            AES    3.558885  5.029268  3.219512  4.489199    ...      7.278746   
            AET    2.576213  2.711552  2.138072  2.304853    ...      0.652085   
            AFL    1.792150  2.831077  3.921967  6.707591    ...      4.019382   
            AIG    1.098549  0.933616  1.329729  2.179031    ...      0.807282   
            AJG    0.455045  0.431965  0.499201  0.672396    ...      0.644797   
            ALK    3.499347  2.603529  2.710445  2.161036    ...      1.197572   
            AMAT   0.312634  1.361884  4.813704  0.758030    ...      0.659529   
            AMD    0.525606  0.640187  0.599800  0.894246    ...      0.993671   
            AME    2.016851  1.383316  0.602472  1.932549    ...      1.425906   
            AMGN   1.886279  2.027503  1.230541  0.747632    ...      0.603571   
            AN     1.991622  1.499463  0.918582  1.178303    ...      1.896241   
            APA    2.275184  1.040131  1.162162  1.814087    ...      1.548731   
            APC    0.828897  0.906466  0.653741  0.721168    ...      1.092525   
            APD    4.342677  3.789474  5.086957  7.761442    ...      2.484554   
            APH    0.977654  0.667598  0.731844  0.145251    ...      0.620112   
            ARNC   1.346049  2.024523  2.893733  2.544959    ...      0.803815   
            AVY    1.786325  3.025641  1.188034  2.290598    ...      2.000000   
            AXP    0.744763  0.863102  0.657295  0.670526    ...      1.350423   
            AZO    1.169976  0.924543  2.016680  0.699762    ...      0.992851   
            BA     4.681138  4.806561  4.543174  2.771346    ...      1.134588   
            BAC    1.722222  0.633333  1.279630  0.757407    ...      0.816667   
            BAX    0.727774  0.517893  0.452960  0.818479    ...      0.367207   
            BBBY   0.437908  0.309368  1.033406  0.389252    ...      0.620189   
 ...                    ...       ...       ...       ...    ...           ...   
 2014-07-18 TWX    0.725303  1.029746  0.853268  1.055154    ...      0.974789   
            TXN    1.736169  2.019143  3.112889  2.082898    ...      1.525969   
            TXT    1.057867  0.785366  1.155752  1.427635    ...      1.315725   
            UDR    1.777035  2.499819  1.804504  3.605088    ...      2.130185   
            UHS    0.800004  0.719126  0.705439  1.069736    ...      0.816538   
            UNH    0.871167  0.792160  1.338165  2.275105    ...      1.835890   
            UNM    1.270091  1.291093  1.408312  1.538039    ...      1.245264   
            UNP    0.567020  1.258420  1.842716  1.373468    ...      0.755533   
            USB    3.677410  1.902763  1.500695  1.748765    ...      0.732848   
            UTX    0.990825  1.257383  1.467129  1.408352    ...      1.232453   
            VFC    0.305436  0.224019  0.528657  0.493044    ...      0.365596   
            VLO    0.984043  1.473754  1.489158  1.639548    ...      0.839997   
            VMC    0.947637  3.253781  1.260333  2.098196    ...      1.813770   
            VNO    0.599826  0.738174  1.168563  1.257432    ...      0.661668   
            VRTX   1.099272  1.169215  1.257466  1.255074    ...      0.958279   
            VZ     0.919368  2.151405  1.523302  1.799840    ...      1.673329   
            WDC    0.787632  0.796086  1.207637  2.949817    ...      2.976357   
            WEC    0.869618  0.937009  0.738642  1.268273    ...      1.098167   
            WFC    1.016783  1.758063  3.661011  4.191226    ...      0.484308   
            WFM    1.077936  0.905906  1.217219  1.926337    ...      1.166428   
            WHR    0.756542  0.563684  0.588885  0.841606    ...      0.466000   
            WMB    1.262235  1.222985  1.328080  1.798648    ...      0.802450   
            WMT    0.586186  0.603281  0.917365  0.779255    ...      0.671835   
            WY     1.121423  1.280896  2.032363  1.960516    ...      1.634902   
            XEL    1.120560  0.981374  1.348900  2.229193    ...      2.621996   
            XLNX   2.083613  1.476547  1.890565  2.148335    ...      0.860465   
            XOM    0.603227  0.538388  0.945515  0.742207    ...      0.281240   
            XRAY   1.240143  1.285577  1.415018  1.566664    ...      2.118458   
            XRX    1.134552  1.472962  1.757815  2.219839    ...      1.372078   
            ZION  10.202257  3.781212  4.898003  2.921387    ...      1.786106   
 
                       103       104        105        106       107       108  \
 1993-01-29 AAPL  1.502254  3.806161   1.927874   2.796769  2.639745  5.151766   
            ABT   1.766592  1.795127   2.333440   4.712408  2.440846  5.756973   
            ADBE  4.448833  3.672625   3.513949   2.623441  5.208416  4.821548   
            ADM   0.287081  1.650929   1.666158   1.728794  1.025446  0.484122   
            ADP   1.379376  1.092177   1.199125   2.351751  1.195295  1.156729   
            ADSK  0.916490  0.734624   1.211205   0.452103  0.378667  0.554878   
            AEP   4.986667  3.271111   1.253333   1.426667  1.946667  1.497778   
            AES   7.183275  4.519861   2.747735   2.487805  2.187456  2.221603   
            AET   1.105263  0.908407   1.257690   1.160629  0.864662  0.936432   
            AFL   2.869988  1.772686  15.057936  25.687494  5.201162  1.266160   
            AIG   1.176020  3.317547   2.014919   1.291952  1.480564  1.895702   
            AJG   0.453936  0.589864   1.013270   0.468600  0.357859  0.464896   
            ALK   5.208425  4.503420   1.488501   2.416803  1.840344  1.870101   
            AMAT  3.256959  1.254818   0.952891   2.413276  1.580300  1.154176   
            AMD   0.778583  0.540761   0.474394   0.831959  1.187693  0.817887   
            AME   0.686774  2.270612   1.150826   1.042591  0.787926  0.989352   
            AMGN  0.690400  0.911293   0.535819   0.691928  0.258393  0.340333   
            AN    1.236305  1.163265   0.885285   1.397852  0.801504  1.095596   
            APA   2.028665  2.748567   0.873874   1.036855  1.625717  1.679771   
            APC   0.365022  1.421803   0.660334   0.795186  0.807353  3.172378   
            APD   2.356979  5.458238   4.847826   2.440503  1.323227  1.929062   
            APH   0.089385  0.259777   0.055866   0.335196  0.782123  0.055866   
            ARNC  1.435967  0.765668   1.427793   1.103542  1.803815  3.542234   
            AVY   2.632479  4.188034   2.598291   2.017094  2.068376  5.290598   
            AXP   0.810731  1.164094   1.754686   0.986218  0.351158  0.789416   
            AZO   1.171565  1.095314   1.182685   0.884829  0.424940  2.100079   
            BA    1.037144  1.922335   1.113845   1.479016  0.993247  1.833575   
            BAC   0.812037  0.497222   2.137037   1.698148  1.004630  1.927778   
            BAX   0.579226  0.866532   0.309926   0.218659  0.329845  0.599257   
            BBBY  0.767611  0.583878   0.578794   0.390704  0.709513  0.573711   
 ...                   ...       ...        ...        ...       ...       ...   
 2014-07-18 TWX   1.087740  0.981904   1.062421   0.919032  1.058277  1.277802   
            TXN   1.345164  2.280436   2.445929   2.630392  3.254262  3.054662   
            TXT   0.921984  1.255432   1.537701   1.065384  1.250150  1.392120   
            UDR   2.007327  3.165001   3.086554   2.817447  2.225871  4.892918   
            UHS   1.070097  0.969129   1.158963   0.917131  1.093273  1.490912   
            UNH   2.669333  2.735266   3.490730   2.381830  2.231478  2.942789   
            UNM   1.382166  1.084988   1.212371   1.185419  1.116609  1.854339   
            UNP   0.793017  0.937622   1.244535   1.601986  1.202715  2.052006   
            USB   2.007901  1.609199   1.439298   1.765746  1.224808  2.122626   
            UTX   1.512070  1.691523   1.513280   1.735906  1.655187  1.920690   
            VFC   0.376155  0.491196   0.469023   0.452464  0.513898  0.712792   
            VLO   1.124771  1.011446   1.430038   1.220308  1.944494  1.687013   
            VMC   2.346027  2.143014   2.186996   2.338878  2.260090  3.090988   
            VNO   0.676942  1.088367   1.075682   1.300243  1.093597  3.121243   
            VRTX  1.109188  1.173068   1.432007   1.225368  1.190475  2.166913   
            VZ    2.082797  2.094902   1.822235   1.818064  1.758582  2.626470   
            WDC   5.677609  2.808052   2.597441   4.577213  3.212802  2.692380   
            WEC   1.213431  1.604715   1.565360   1.277337  1.382076  2.336493   
            WFC   0.741768  0.886767   0.629119   0.580302  0.961370  0.825384   
            WFM   1.209162  1.747931   1.633308   1.849937  1.687606  2.493296   
            WHR   0.488568  0.610531   0.913509   0.801408  0.919314  1.007623   
            WMB   0.641483  1.119062   1.506732   3.196091  1.641704  1.784513   
            WMT   0.564901  0.519555   0.834421   0.821836  1.126930  1.324858   
            WY    1.482519  1.285832   1.513722   1.351630  1.759683  2.456335   
            XEL   2.993106  2.603893   3.212892   2.841394  2.787749  4.686587   
            XLNX  1.028636  1.067717   1.554001   0.956018  0.893221  1.785408   
            XOM   0.401707  0.502457   0.445220   0.384164  0.548983  0.820247   
            XRAY  2.053616  2.629364   2.070099   3.131564  2.763502  2.478919   
            XRX   2.138218  1.985444   1.381174   2.532877  1.712842  2.828343   
            ZION  1.984792  2.013023   2.487787   2.411735  1.583735  6.660535   
 
                       109        110        111  
 1993-01-29 AAPL  1.170173   1.052968   0.614576  
            ABT   2.494069   2.722026   2.975313  
            ADBE  3.721397   3.279440   2.370246  
            ADM   0.519792   0.785994   0.458892  
            ADP   1.640591   1.743435   1.394694  
            ADSK  0.418169   0.511400   0.820343  
            AEP   1.040000   1.511111   6.648889  
            AES   1.894077   7.043902  13.678049  
            AET   0.876965   1.515379   1.613807  
            AFL   9.473751  21.252589   1.090890  
            AIG   1.399808   1.077744   0.805502  
            AJG   0.528805   0.444818   0.327604  
            ALK   2.093377   2.523691   2.067680  
            AMAT  1.111349   6.631692   1.443255  
            AMD   0.886085   1.149138   0.726289  
            AME   0.606916   0.482686   8.897897  
            AMGN  0.215262   0.467586   0.318156  
            AN    1.258861   0.703545   4.933835  
            APA   1.298935   1.561016   1.793612  
            APC   1.024086   2.892273   0.545249  
            APD   3.754577   3.188215   2.048627  
            APH   0.796089   0.064246   0.189944  
            ARNC  1.079019   1.103542   1.607629  
            AVY   1.897436   4.487179   2.888889  
            AXP   0.698089   1.233921   0.426314  
            AZO   1.111199   1.337569   2.293487  
            BA    3.261939   2.932947   1.431259  
            BAC   1.665741   1.121296   1.491667  
            BAX   0.415147   0.659352   0.696826  
            BBBY  0.945534   2.437908   0.426289  
 ...                   ...        ...        ...  
 2014-07-18 TWX   0.530034   0.591024   0.223461  
            TXN   3.168576   2.555643   0.841740  
            TXT   0.797579   0.613077   0.318867  
            UDR   1.684727   1.042907   0.702218  
            UHS   0.801403   0.629013   0.242555  
            UNH   1.954080   1.066165   0.422193  
            UNM   0.720394   0.639780   0.240268  
            UNP   0.616161   0.780114   0.244534  
            USB   0.812164   1.163426   0.462780  
            UTX   0.998914   0.692105   0.313967  
            VFC   0.249998   0.242735   0.144441  
            VLO   1.157524   0.777383   0.541410  
            VMC   1.510328   1.209514   0.774067  
            VNO   1.068055   2.067989   0.264214  
            VRTX  1.037646   0.941286   0.653042  
            VZ    2.131411   1.336311   0.563970  
            WDC   1.570191   1.876370   0.962263  
            WEC   1.123205   0.852678   0.277451  
            WFC   0.484006   0.346320   0.226190  
            WFM   1.154334   1.223340   1.743624  
            WHR   0.648346   0.371935   0.168273  
            WMB   0.637206   0.465016   0.178788  
            WMT   0.758578   0.740589   0.275550  
            WY    0.931290   1.130115   0.507147  
            XEL   2.177277   1.669837   0.837650  
            XLNX  0.749315   0.697184   0.230843  
            XOM   0.281015   0.336757   0.108314  
            XRAY  1.688812   1.603256   0.523903  
            XRX   1.248227   1.076201   0.782831  
            ZION  1.341415   1.490635   0.795542  
 
 [219281 rows x 112 columns],
 1993-01-29  AAPL     0.641247
             ABT      1.764027
             ADBE     1.508452
             ADM      0.346670
             ADP      0.760394
             ADSK     0.365677
             AEP      2.342222
             AES      9.845296
             AET      1.326726
             AFL     13.012597
             AIG      1.119765
             AJG      0.452715
             ALK      1.393792
             AMAT     1.289079
             AMD      1.007078
             AME      0.383316
             AMGN     0.227223
             AN       2.084425
             APA      1.739558
             APC      1.680103
             APD      1.787185
             APH      0.215084
             ARNC     0.724796
             AVY      1.358974
             AXP      0.935134
             AZO      2.343129
             BA       1.957549
             BAC      0.399074
             BAX      0.310713
             BBBY     0.400145
                       ...    
 2014-07-18  TWX      0.300524
             TXN      2.001688
             TXT      0.401045
             UDR      0.668886
             UHS      0.397621
             UNH      0.512206
             UNM      0.376141
             UNP      0.518985
             USB      0.834650
             UTX      0.523019
             VFC      0.190948
             VLO      0.782874
             VMC      0.694705
             VNO      0.291869
             VRTX     0.765249
             VZ       0.644282
             WDC      0.954004
             WEC      0.376179
             WFC      0.395389
             WFM      1.403008
             WHR      0.261707
             WMB      0.316105
             WMT      0.224937
             WY       0.661314
             XEL      1.289547
             XLNX     0.381192
             XOM      0.128445
             XRAY     0.828852
             XRX      0.630608
             ZION     0.686132
 Name: 112, Length: 219281, dtype: float64)

In [42]:
x_volume = pd.read_pickle('../../data/x_volume_base112_ahead1.pkl')
print(x_volume.shape)
x_volume.head()


(219281, 112)
Out[42]:
0 1 2 3 4 5 6 7 8 9 ... 102 103 104 105 106 107 108 109 110 111
1993-01-29 AAPL 1.0 0.799399 1.100301 0.391059 0.706612 0.841097 0.788505 1.025169 0.988355 1.099925 ... 0.580766 1.502254 3.806161 1.927874 2.796769 2.639745 5.151766 1.170173 1.052968 0.614576
ABT 1.0 1.737416 2.302661 2.362296 1.290798 1.972427 1.265470 0.427060 1.345303 0.667842 ... 2.384739 1.766592 1.795127 2.333440 4.712408 2.440846 5.756973 2.494069 2.722026 2.975313
ADBE 1.0 2.796128 1.733207 2.519306 2.074187 1.665204 2.393327 1.605166 3.158544 2.404181 ... 0.980768 4.448833 3.672625 3.513949 2.623441 5.208416 4.821548 3.721397 3.279440 2.370246
ADM 1.0 1.307959 0.440623 0.615481 1.301435 1.294688 0.429101 0.249023 0.411915 0.466721 ... 0.350154 0.287081 1.650929 1.666158 1.728794 1.025446 0.484122 0.519792 0.785994 0.458892
ADP 1.0 0.712254 1.190919 1.558534 1.658917 1.347101 1.377462 0.826039 0.678063 0.439278 ... 1.276258 1.379376 1.092177 1.199125 2.351751 1.195295 1.156729 1.640591 1.743435 1.394694

5 rows × 112 columns


In [43]:
y_volume = pd.read_pickle('../../data/y_volume_base112_ahead1.pkl')
print(y_volume.shape)
y_volume.head()


(219281,)
Out[43]:
1993-01-29  AAPL    0.641247
            ABT     1.764027
            ADBE    1.508452
            ADM     0.346670
            ADP     0.760394
Name: 112, dtype: float64

Let's generate the test dataset, also


In [45]:
def generate_one_test_set(params, data_df):
    # print(('-'*70 + '\n {}, {} \n' + '-'*70).format(params['base_days'].values, params['ahead_days'].values))
    tic = time()
    
    train_val_time = int(params['train_val_time'])
    base_days = int(params['base_days'])
    step_days = int(params['step_days'])
    ahead_days = int(params['ahead_days'])
    
    print('Generating: base{}_ahead{}'.format(base_days, ahead_days))
    pid = 'base{}_ahead{}'.format(base_days, ahead_days)
    
    # Getting the data
    today = data_df.index[-1]  # Real date
    print(pid + ') data_df loaded')

    # Drop symbols with many missing points
    y_train_df = pd.read_pickle('../../data/y_volume_{}.pkl'.format(pid))
    kept_symbols = y_train_df.index.get_level_values(1).unique().tolist()
    data_df = data_df.loc[:, (slice(None), kept_symbols)]
    print(pid + ') Irrelevant symbols dropped.')
    
    # Generate the intervals for the predictor
    x, y = fe.generate_train_intervals(data_df, 
                                       train_val_time, 
                                       base_days, 
                                       step_days,
                                       ahead_days, 
                                       today, 
                                       fe.feature_volume_one_to_one,
                                       target_feature=fe.VOLUME_FEATURE)    
    print(pid + ') Intervals generated')
    
    # Drop "bad" samples and fill missing data
    x_y_df = pd.concat([x, y], axis=1)
    x_y_df = pp.drop_irrelevant_samples(x_y_df, params['SAMPLES_GOOD_DATA_RATIO'])
    x = x_y_df.iloc[:, :-1]
    y = x_y_df.iloc[:, -1]
    x = pp.fill_missing(x)
    print(pid + ') Irrelevant samples dropped and missing data filled.')
    
    # Pickle that
    x.to_pickle('../../data/x_volume_{}_test.pkl'.format(pid))
    y.to_pickle('../../data/y_volume_{}_test.pkl'.format(pid))
    
    toc = time()
    print('%s) %i intervals generated in: %i seconds.' % (pid, x.shape[0], (toc-tic)))
    
    return pid, x,

In [46]:
data_test_df = pd.read_pickle('../../data/data_test_df.pkl')
generate_one_test_set(best_params_df, data_test_df)


Generating: base112_ahead1
base112_ahead1) data_df loaded
base112_ahead1) Irrelevant symbols dropped.
base112_ahead1) Intervals generated
base112_ahead1) Irrelevant samples dropped and missing data filled.
base112_ahead1) 15957 intervals generated in: 2 seconds.
Out[46]:
('base112_ahead1',
                  0         1         2         3         4         5    \
 2015-01-02 AAPL  1.0  1.309379  0.881915  0.502535  0.741084  0.551628   
            ABT   1.0  1.020744  1.228717  1.009282  1.113661  0.734277   
            ADBE  1.0  1.876632  2.380451  1.396930  3.661908  1.810396   
            ADM   1.0  2.079629  1.857814  1.529286  1.885960  1.604243   
            ADP   1.0  2.157365  2.960348  2.136902  1.499220  1.718951   
            ADSK  1.0  1.170121  1.162677  0.832090  1.048707  0.711736   
            AEP   1.0  1.073527  1.259167  0.866142  1.414134  0.822234   
            AES   1.0  2.362325  2.152819  1.351209  1.449337  1.495302   
            AET   1.0  1.314205  2.073317  1.391448  1.229979  1.200142   
            AFL   1.0  1.034931  1.371505  0.943251  1.276478  1.014083   
            AJG   1.0  1.422405  1.966099  2.316243  1.546083  1.505282   
            ALK   1.0  1.674931  2.044477  1.283825  1.611856  2.105850   
            AMAT  1.0  1.519000  2.641575  1.029301  1.247451  0.781427   
            AMD   1.0  1.748260  1.372503  1.110396  1.793936  0.935558   
            AME   1.0  1.331017  2.177782  1.852193  2.440179  1.672840   
            AMGN  1.0  1.567243  1.947129  1.343569  1.749368  1.514921   
            AN    1.0  1.698041  1.742035  1.524749  1.425072  1.061121   
            APA   1.0  2.002836  1.521676  1.400045  1.392676  0.949827   
            APC   1.0  1.430615  1.354038  1.370842  1.087004  1.065589   
            APD   1.0  1.665231  2.118093  1.526939  2.047976  1.365097   
            APH   1.0  0.678916  1.630449  1.153657  1.470564  0.712215   
            ARNC  1.0  2.116942  4.007535  2.146717  3.307453  3.039781   
            AVY   1.0  2.247686  2.443505  2.278373  2.489746  1.619325   
            AXP   1.0  1.652595  2.848836  2.273496  2.416261  1.264385   
            AZO   1.0  1.217239  1.229300  1.619671  2.554008  4.271811   
            BA    1.0  1.052354  1.019497  0.608791  0.810926  0.555651   
            BAC   1.0  0.984574  1.639317  0.974967  0.950229  0.640009   
            BAX   1.0  1.996102  2.129876  1.406360  1.579985  1.000849   
            BBBY  1.0  1.900836  2.102104  1.420279  2.106673  2.002082   
            BBT   1.0  1.283045  1.943430  2.543561  4.389168  2.623263   
 ...              ...       ...       ...       ...       ...       ...   
 2016-07-14 TWX   1.0  0.909714  0.922579  1.052514  1.070886  0.858737   
            TXN   1.0  2.251900  1.154166  1.040112  1.512894  1.658280   
            TXT   1.0  0.962747  1.455920  0.976870  0.912868  0.843559   
            UDR   1.0  1.645542  0.930940  0.835749  0.524865  0.608243   
            UHS   1.0  0.817897  0.707206  0.556051  0.404186  0.423550   
            UNH   1.0  1.048697  0.865690  0.826834  0.679675  0.625269   
            UNM   1.0  0.857625  0.660638  1.056228  0.903143  1.157153   
            UNP   1.0  1.249276  1.388450  1.775000  1.829523  1.167174   
            USB   1.0  0.720443  0.773807  1.022645  1.000011  0.994290   
            UTX   1.0  0.755958  0.510331  0.628513  0.796116  1.249229   
            VFC   1.0  1.997235  1.452215  2.265087  1.608391  2.752920   
            VLO   1.0  0.958964  0.849966  0.816881  0.754629  0.762427   
            VMC   1.0  1.256897  0.867078  0.925360  1.082007  0.896877   
            VNO   1.0  1.322490  1.138209  1.123220  1.322277  1.800812   
            VRTX  1.0  1.838026  1.708751  1.676503  1.568943  0.922000   
            VZ    1.0  0.762170  0.771703  0.901242  0.764880  1.087305   
            WDC   1.0  0.855560  0.822979  0.776716  0.684046  0.911343   
            WEC   1.0  1.997958  1.373229  0.909236  0.942627  0.906728   
            WFC   1.0  1.374261  1.125240  1.335525  0.821102  0.796342   
            WFM   1.0  0.883729  0.741493  0.621663  0.661923  0.828346   
            WHR   1.0  1.054215  0.798190  0.778727  0.851870  0.687997   
            WMB   1.0  1.192526  1.471499  1.663737  1.320881  1.057600   
            WMT   1.0  0.865936  0.815470  1.053907  1.152948  0.867580   
            WY    1.0  0.901961  0.821093  0.721907  0.794713  0.666862   
            XEL   1.0  1.112015  1.061592  0.635049  0.794515  0.834956   
            XLNX  1.0  0.987284  0.545648  0.642176  0.861321  0.521535   
            XOM   1.0  1.663253  1.827840  1.750169  3.845671  1.274750   
            XRAY  1.0  1.081037  0.965152  0.944194  1.165436  1.474943   
            XRX   1.0  0.926467  0.877605  0.588330  0.767208  0.597233   
            ZION  1.0  0.787273  0.841850  0.646830  0.740828  0.967934   
 
                       6         7         8         9      ...          102  \
 2015-01-02 AAPL  0.393675  0.513118  0.759931  0.683728    ...     0.490166   
            ABT   0.567758  0.892810  0.817798  1.204868    ...     0.971316   
            ADBE  1.897997  2.174064  2.080942  2.087752    ...     1.638939   
            ADM   2.916509  3.623164  5.267184  2.536432    ...     1.344322   
            ADP   1.895002  2.060124  3.366209  3.947636    ...     1.285805   
            ADSK  0.827306  0.801458  0.828909  0.908657    ...     0.907499   
            AEP   0.916453  1.406367  1.297242  1.349148    ...     1.506619   
            AES   1.670273  1.762120  1.468321  1.252290    ...     3.013902   
            AET   1.199016  1.313512  1.292515  1.297105    ...     1.058216   
            AFL   1.756458  1.191076  0.894483  1.436965    ...     1.043639   
            AJG   1.320893  1.446357  2.710073  2.384031    ...     1.928542   
            ALK   1.255452  1.173873  1.512098  1.609072    ...     1.646459   
            AMAT  0.826378  0.952945  1.126522  0.859464    ...     1.372170   
            AMD   0.708281  1.027661  0.957231  0.766359    ...     0.863920   
            AME   2.303601  1.763586  1.578466  1.769249    ...     1.048180   
            AMGN  1.036040  1.753277  1.314420  1.110955    ...     1.093323   
            AN    1.174986  1.337484  1.253217  0.939827    ...     2.200300   
            APA   1.679392  1.235779  1.682862  0.934154    ...     0.632823   
            APC   1.438755  1.202554  1.338463  1.615223    ...     1.258033   
            APD   2.593475  2.349888  1.995157  1.718512    ...     2.772246   
            APH   0.649977  1.488225  1.261445  0.984633    ...     0.696905   
            ARNC  1.711586  2.395383  2.180967  3.017904    ...     2.845901   
            AVY   1.354080  1.743732  3.172775  2.675464    ...     1.392422   
            AXP   1.820718  2.086664  3.177137  3.838873    ...     1.713364   
            AZO   1.542512  1.147446  0.971196  1.600778    ...     0.869252   
            BA    0.513660  0.597992  0.623627  1.512184    ...     0.552441   
            BAC   0.627417  1.197538  1.197772  0.845683    ...     0.667984   
            BAX   2.075219  2.068944  2.648916  1.893051    ...     0.572115   
            BBBY  2.028663  1.910683  0.960977  0.871603    ...     2.015447   
            BBT   1.944492  2.548825  1.464028  1.707577    ...     1.222109   
 ...                   ...       ...       ...       ...    ...          ...   
 2016-07-14 TWX   0.844474  1.486436  2.883391  2.451845    ...     1.278672   
            TXN   1.135886  1.268350  1.510837  1.487067    ...     1.444330   
            TXT   0.735406  1.127198  1.335242  1.298180    ...     1.333617   
            UDR   0.948457  0.652131  1.138983  1.780780    ...     1.116154   
            UHS   0.412161  0.390108  0.400502  0.450081    ...     0.784591   
            UNH   0.463672  0.545566  1.048538  0.881222    ...     0.682370   
            UNM   1.062287  1.046381  1.334607  1.207831    ...     1.288378   
            UNP   0.898964  0.908317  0.889174  0.894496    ...     1.517103   
            USB   0.819061  1.088907  1.223291  5.383950    ...     8.276785   
            UTX   0.789276  0.514712  0.542020  0.660736    ...     0.925733   
            VFC   4.076905  2.254501  1.523020  1.988893    ...     1.627736   
            VLO   1.029497  1.281862  2.137143  1.531252    ...     1.070784   
            VMC   0.988708  0.977494  1.689763  1.538456    ...     1.466914   
            VNO   1.247626  1.930719  1.674133  2.321748    ...     2.594304   
            VRTX  1.625360  1.340915  0.904854  1.057631    ...     1.712049   
            VZ    0.645844  1.042509  1.319077  1.036631    ...     1.483997   
            WDC   0.359111  0.570000  0.345004  0.447628    ...     0.409431   
            WEC   0.773023  0.689605  0.917605  1.017542    ...     2.651941   
            WFC   1.403939  3.159954  1.827070  2.347811    ...     1.425058   
            WFM   0.833515  0.489713  0.728079  0.746453    ...     0.804177   
            WHR   0.657342  0.991694  1.036558  1.037470    ...     1.569378   
            WMB   0.951178  1.173560  3.108479  2.460970    ...     2.598472   
            WMT   0.917551  0.812924  1.403023  1.001051    ...     4.272631   
            WY    0.649469  0.712258  0.789643  0.869330    ...     1.048123   
            XEL   0.768249  1.167047  1.306677  1.080103    ...     1.203281   
            XLNX  0.377718  0.646707  0.704621  0.734384    ...     4.545990   
            XOM   1.114495  1.736627  1.501408  2.123204    ...     1.706351   
            XRAY  0.794543  0.738303  0.887803  1.105778    ...     1.572767   
            XRX   0.822698  0.469397  0.687939  1.032828    ...     1.433052   
            ZION  1.021068  0.826939  1.414639  2.633002    ...     0.991185   
 
                       103       104       105       106        107       108  \
 2015-01-02 AAPL  0.743715  0.886662  1.044498  0.628821   0.421408  0.687093   
            ABT   0.650973  0.836879  0.704147  0.562672   0.688186  0.891585   
            ADBE  1.171208  1.258540  2.449889  1.714750   1.326784  1.353690   
            ADM   1.820245  1.415031  1.345540  1.654624   1.692824  1.441385   
            ADP   1.338339  1.830701  1.246661  2.432781   1.424882  1.677282   
            ADSK  1.161206  0.874610  0.754384  0.748825   0.641940  0.616540   
            AEP   0.783943  0.661938  0.804946  0.788033   1.265916  3.110682   
            AES   2.137534  3.179703  3.043151  3.160738   2.973048  2.546015   
            AET   1.104412  1.320581  1.623483  1.687550   1.387921  1.422960   
            AFL   1.084495  1.419733  1.137749  1.714690   0.863867  1.749220   
            AJG   1.716100  1.766974  2.223401  1.969470   2.405547  1.842425   
            ALK   1.531179  1.162292  1.235399  1.275089   1.327137  1.049528   
            AMAT  1.222216  0.930921  1.072350  0.666572   0.905208  0.710751   
            AMD   1.086093  0.928488  0.902415  0.723140   0.818217  0.838268   
            AME   1.144184  1.132519  1.786602  1.653829   1.914811  0.877774   
            AMGN  0.957967  0.828776  1.502877  1.316446   0.799001  1.061087   
            AN    2.218631  3.116026  2.643243  2.004636   1.742433  2.458795   
            APA   0.669607  0.974809  0.510852  0.670743   0.860038  1.009563   
            APC   1.109946  0.681452  0.923985  1.052093   0.849384  1.063525   
            APD   1.869295  2.204768  1.356630  1.024508   0.959527  1.464339   
            APH   0.819324  1.094102  0.628818  0.768747   0.590878  0.907470   
            ARNC  2.663311  2.522955  1.303511  1.924036   1.743410  1.828948   
            AVY   1.579532  1.591538  1.623651  1.987663   1.316808  1.334504   
            AXP   2.308777  1.321807  2.559223  1.849814   2.185394  1.840905   
            AZO   0.394840  0.668001  0.395058  0.554487   0.528401  0.474417   
            BA    0.988967  0.752157  0.422797  1.073541   0.576859  0.839358   
            BAC   0.765025  0.658804  0.580024  0.705498   0.742461  0.924713   
            BAX   0.580392  0.787629  0.666715  1.028478   0.577361  0.696948   
            BBBY  1.764496  2.148264  2.727530  1.646108   3.729650  2.773730   
            BBT   1.993510  1.312110  1.564020  1.022638   0.795519  1.290839   
 ...                   ...       ...       ...       ...        ...       ...   
 2016-07-14 TWX   1.960651  2.525270  1.608312  1.594163   1.386166  0.910013   
            TXN   1.602180  1.396580  4.022354  0.891923   1.611158  4.142033   
            TXT   1.097432  1.056981  1.465539  1.470776   1.364459  1.159663   
            UDR   1.332316  0.657397  0.618274  1.160657   1.383365  1.199580   
            UHS   0.855549  0.706938  0.501787  0.603988   0.767997  0.623963   
            UNH   0.678409  0.783438  0.987583  0.971746   1.429747  1.642043   
            UNM   0.957890  1.036618  1.353941  1.151066   0.834745  1.091664   
            UNP   1.135583  1.335436  1.156882  1.289334   1.294925  1.473983   
            USB   9.353526  4.241497  2.371774  3.661436  11.518788  5.263343   
            UTX   0.724290  0.768687  0.775758  0.777231   0.665695  0.840202   
            VFC   2.223378  2.247950  2.256141  2.467401   2.248724  4.024276   
            VLO   0.929882  0.857652  1.133009  0.725744   0.758870  0.854703   
            VMC   1.036436  0.910164  1.458431  1.100285   1.794873  1.258146   
            VNO   1.790825  1.634471  1.748667  1.221944   3.231469  1.991508   
            VRTX  1.302011  0.920159  1.699940  1.029280   2.089041  1.415647   
            VZ    1.672523  1.240792  1.578066  1.595321   1.587301  1.057529   
            WDC   0.461011  0.480078  0.426969  0.352054   0.638083  0.649299   
            WEC   2.265765  1.156598  1.586385  1.385536   3.105546  1.783201   
            WFC   1.242240  1.252415  1.817262  1.521335   1.010848  1.373482   
            WFM   0.782704  0.710426  0.782432  0.800580   0.955876  0.656267   
            WHR   0.825200  0.758019  1.239069  1.510097   1.432990  1.546284   
            WMB   2.154583  1.743191  1.539412  1.670824   2.209782  6.003044   
            WMT   1.317290  0.856201  1.004358  0.980214   0.706317  0.774204   
            WY    1.029763  1.618417  1.366766  1.337191   1.304371  1.073708   
            XEL   0.977755  0.863154  1.672727  2.026088   1.756652  1.200320   
            XLNX  3.182349  1.824059  1.784862  3.449086   3.651118  2.385607   
            XOM   1.807545  7.007270  4.504553  2.952029   3.770322  2.544200   
            XRAY  1.279357  1.555748  1.156926  2.109645   1.215521  1.663749   
            XRX   1.381666  1.240640  0.986018  0.803058   1.118755  1.297604   
            ZION  1.622891  0.807117  0.986565  1.012151   1.375305  1.260563   
 
                       109       110       111  
 2015-01-02 AAPL  0.489222  0.486098  0.450118  
            ABT   0.753670  0.814058  0.761690  
            ADBE  1.481891  1.386150  1.122632  
            ADM   1.564289  1.477611  1.394270  
            ADP   2.096849  1.576910  1.085765  
            ADSK  0.648402  0.681021  0.557779  
            AEP   1.306764  1.188326  1.306306  
            AES   2.639939  2.195160  1.633802  
            AET   1.502647  1.240896  0.936814  
            AFL   1.125791  0.949057  0.696193  
            AJG   2.398097  1.640902  2.556784  
            ALK   1.014248  1.212392  1.185089  
            AMAT  0.573959  0.674200  1.300083  
            AMD   0.868155  0.768263  0.611322  
            AME   0.816372  1.683801  1.649667  
            AMGN  2.291228  1.393825  0.919942  
            AN    4.475812  5.914504  4.578586  
            APA   0.473995  0.412513  0.800065  
            APC   1.161302  0.890748  1.040878  
            APD   1.130471  1.190375  1.240924  
            APH   1.065276  1.169516  0.689181  
            ARNC  2.753110  3.015002  1.951408  
            AVY   1.886899  1.400147  1.149400  
            AXP   2.060932  2.400297  1.769591  
            AZO   0.681198  0.679871  0.554133  
            BA    1.597921  0.655561  0.620938  
            BAC   1.060734  0.698427  0.702154  
            BAX   0.705262  0.449595  0.513686  
            BBBY  1.990361  1.750695  2.905341  
            BBT   1.610808  1.354875  1.104143  
 ...                   ...       ...       ...  
 2016-07-14 TWX   3.046425  1.095342  1.273785  
            TXN   3.856722  1.059564  0.936854  
            TXT   2.139374  1.134609  0.942961  
            UDR   2.039415  0.739796  0.903213  
            UHS   0.905603  0.446458  0.504321  
            UNH   1.488921  0.640478  0.786685  
            UNM   2.414300  1.160345  0.681996  
            UNP   2.534513  1.577183  1.043481  
            USB   3.735727  1.747135  1.178516  
            UTX   1.239713  0.536248  0.567640  
            VFC   5.154066  2.553056  2.366960  
            VLO   1.340557  0.604560  0.462774  
            VMC   1.915799  1.207996  0.934084  
            VNO   2.302574  1.570365  1.340294  
            VRTX  2.484558  1.613962  1.094170  
            VZ    1.596910  1.453010  1.190689  
            WDC   0.872577  0.361997  0.378750  
            WEC   1.887073  0.955798  1.094826  
            WFC   4.276194  1.247334  0.821547  
            WFM   0.988080  0.542170  0.722354  
            WHR   1.951285  0.922996  1.375329  
            WMB   3.884168  1.782162  2.455847  
            WMT   1.339566  0.703908  1.183441  
            WY    2.684885  0.886093  0.919004  
            XEL   2.212358  1.040995  0.777003  
            XLNX  4.939276  1.347456  2.359739  
            XOM   1.985215  0.987633  0.860738  
            XRAY  2.864758  0.981340  1.175850  
            XRX   1.749127  0.723255  1.136066  
            ZION  1.923845  0.771420  1.122944  
 
 [15957 rows x 112 columns])

In [47]:
x_volume_test = pd.read_pickle('../../data/x_volume_base112_ahead1_test.pkl')
print(x_volume_test.shape)
x_volume_test.head()


(15957, 112)
Out[47]:
0 1 2 3 4 5 6 7 8 9 ... 102 103 104 105 106 107 108 109 110 111
2015-01-02 AAPL 1.0 1.309379 0.881915 0.502535 0.741084 0.551628 0.393675 0.513118 0.759931 0.683728 ... 0.490166 0.743715 0.886662 1.044498 0.628821 0.421408 0.687093 0.489222 0.486098 0.450118
ABT 1.0 1.020744 1.228717 1.009282 1.113661 0.734277 0.567758 0.892810 0.817798 1.204868 ... 0.971316 0.650973 0.836879 0.704147 0.562672 0.688186 0.891585 0.753670 0.814058 0.761690
ADBE 1.0 1.876632 2.380451 1.396930 3.661908 1.810396 1.897997 2.174064 2.080942 2.087752 ... 1.638939 1.171208 1.258540 2.449889 1.714750 1.326784 1.353690 1.481891 1.386150 1.122632
ADM 1.0 2.079629 1.857814 1.529286 1.885960 1.604243 2.916509 3.623164 5.267184 2.536432 ... 1.344322 1.820245 1.415031 1.345540 1.654624 1.692824 1.441385 1.564289 1.477611 1.394270
ADP 1.0 2.157365 2.960348 2.136902 1.499220 1.718951 1.895002 2.060124 3.366209 3.947636 ... 1.285805 1.338339 1.830701 1.246661 2.432781 1.424882 1.677282 2.096849 1.576910 1.085765

5 rows × 112 columns


In [48]:
y_volume_test = pd.read_pickle('../../data/y_volume_base112_ahead1_test.pkl')
print(y_volume_test.shape)
y_volume_test.head()


(15957,)
Out[48]:
2015-01-02  AAPL    0.578212
            ABT     1.043824
            ADBE    1.406943
            ADM     1.222931
            ADP     1.390231
Name: 112, dtype: float64

Let's train a predictor with the same hyperparameters as for the 'Close' one.


In [49]:
best_params_df = pd.read_pickle('../../data/best_params_final_df.pkl')

In [50]:
import predictor.feature_extraction as fe
from predictor.linear_predictor import LinearPredictor
import utils.misc as misc
import predictor.evaluation as ev

ahead_days = 1

# Get some parameters
train_days = int(best_params_df.loc[ahead_days, 'train_days'])
GOOD_DATA_RATIO, \
train_val_time, \
base_days, \
step_days, \
ahead_days, \
SAMPLES_GOOD_DATA_RATIO, \
x_filename, \
y_filename = misc.unpack_params(best_params_df.loc[ahead_days,:])

pid = 'base{}_ahead{}'.format(base_days, ahead_days)

# Get the datasets
x_train = pd.read_pickle('../../data/x_volume_{}.pkl'.format(pid))
y_train = pd.read_pickle('../../data/y_volume_{}.pkl'.format(pid))
x_test = pd.read_pickle('../../data/x_volume_{}_test.pkl'.format(pid)).sort_index()
y_test = pd.DataFrame(pd.read_pickle('../../data/y_volume_{}_test.pkl'.format(pid))).sort_index()

# Let's cut the training set to use only the required number of samples
end_date = x_train.index.levels[0][-1]
start_date = fe.add_market_days(end_date, -train_days)
x_sub_df = x_train.loc[(slice(start_date,None),slice(None)),:]
y_sub_df = pd.DataFrame(y_train.loc[(slice(start_date,None),slice(None))])

# Create the estimator and train
estimator = LinearPredictor()
estimator.fit(x_sub_df, y_sub_df)

# Get the training and test predictions
y_train_pred = estimator.predict(x_sub_df)
y_test_pred = estimator.predict(x_test)

# Get the training and test metrics for each symbol
metrics_train = ev.get_metrics_df(y_sub_df, y_train_pred)
metrics_test = ev.get_metrics_df(y_test, y_test_pred)

# Show the mean metrics
metrics_df = pd.DataFrame(columns=['train', 'test'])
metrics_df['train'] = metrics_train.mean()
metrics_df['test'] = metrics_test.mean()
print('Mean metrics: \n{}\n{}'.format(metrics_df,'-'*70))

# Plot the metrics in time
metrics_train_time = ev.get_metrics_in_time(y_sub_df, y_train_pred, base_days + ahead_days)
metrics_test_time = ev.get_metrics_in_time(y_test, y_test_pred, base_days + ahead_days)
plt.plot(metrics_train_time[2], metrics_train_time[0], label='train', marker='.')
plt.plot(metrics_test_time[2], metrics_test_time[0], label='test', marker='.')
plt.title('$r^2$ metrics')
plt.legend()
plt.figure()
plt.plot(metrics_train_time[2], metrics_train_time[1], label='train', marker='.')
plt.plot(metrics_test_time[2], metrics_test_time[1], label='test', marker='.')
plt.title('MRE metrics')
plt.legend()


Mean metrics: 
        train      test
r2   0.539703  0.480612
mre  0.277789  0.277234
----------------------------------------------------------------------
Out[50]:
<matplotlib.legend.Legend at 0x7f37fe481c18>

In [51]:
joblib.dump(estimator, '../../data/best_volume_predictor.pkl')


Out[51]:
['../../data/best_volume_predictor.pkl']

In [ ]: