notebook.community

Edit and run



In [90]:

    
# Basic imports
import os
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import datetime as dt
import scipy.optimize as spo
import sys
from time import time
from sklearn.metrics import r2_score, median_absolute_error

%matplotlib inline

%pylab inline
pylab.rcParams['figure.figsize'] = (20.0, 10.0)

%load_ext autoreload
%autoreload 2

sys.path.append('../../')

from utils import preprocessing as pp









    



Populating the interactive namespace from numpy and matplotlib
The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload



In [91]:

    
%pwd









    Out[91]:





'/home/miguel/udacity/Machine Learning Nanodegree/projects/capstone/capstone/notebooks/dev'



In [92]:

    
# Getting the data
data_df = pd.read_pickle('../../data/data_train_val_df.pkl')

sys.path.append('../../')
import predictor.feature_extraction as fe

train_time = -1 # In real time days
base_days = 7 # In market days
step_days = 7 # market days
ahead_days = 1 # market days
today = data_df.index[-1] # Real date

tic = time()
x, y = fe.generate_train_intervals(data_df, 
                                   train_time, 
                                   base_days, 
                                   step_days, 
                                   ahead_days, 
                                   today, 
                                   fe.feature_close_one_to_one)
toc = time()
print('Elapsed time: %i seconds.' % (toc-tic))
print(data_df.shape)









    



Elapsed time: 186 seconds.
(5520, 2415)

Get the results of a single run



In [93]:

    
from predictor import evaluation as ev
from predictor.dummy_mean_predictor import DummyPredictor



In [94]:

    
predictor = DummyPredictor()



In [95]:

    
y_train_true_df, y_train_pred_df, y_val_true_df, y_val_pred_df = ev.run_single_val(x, y, ahead_days, predictor)









    



Multiindex = True
2014-12-16 00:00:00
23
30



In [96]:

    
print(y_train_true_df.shape)
print(y_train_pred_df.shape)
print(y_val_true_df.shape)
print(y_val_pred_df.shape)









    



(314475, 1)
(314475, 1)
(479, 1)
(479, 1)



In [97]:

    
y_train_true_df.head()



In [98]:

    
y_train_pred_df.head()



In [99]:

    
y_val_true_df.head()



In [11]:

    
y_val_pred_df.head()

Done. Let's test the reshape_by_symbol function



In [12]:

    
y_train_true_rs = ev.reshape_by_symbol(y_train_true_df)
print(y_train_true_rs.shape)
y_train_true_rs.head()









    



(787, 482)






    Out[12]:







  
    
      ticker
      A
      AAL
      AAP
      AAPL
      ABBV
      ABC
      ABT
      ACN
      ADBE
      ADI
      ...
      XLNX
      XOM
      XRAY
      XRX
      XYL
      YHOO
      YUM
      ZBH
      ZION
      ZTS
    
    
      date
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
    
  
  
    
      1993-01-29
      NaN
      19.75
      NaN
      NaN
      2.03
      2.44
      NaN
      NaN
      NaN
      22.75
      ...
      14.40
      10.29
      NaN
      7.25
      NaN
      NaN
      NaN
      NaN
      11.56
      NaN
    
    
      1993-02-09
      NaN
      22.12
      NaN
      NaN
      1.96
      2.50
      NaN
      NaN
      NaN
      21.38
      ...
      13.82
      10.04
      NaN
      6.75
      NaN
      NaN
      NaN
      NaN
      10.69
      NaN
    
    
      1993-02-19
      NaN
      22.25
      NaN
      NaN
      1.94
      2.44
      NaN
      NaN
      NaN
      21.62
      ...
      13.97
      10.49
      NaN
      6.42
      NaN
      NaN
      NaN
      NaN
      11.50
      NaN
    
    
      1993-03-02
      NaN
      23.75
      NaN
      NaN
      2.03
      2.44
      NaN
      NaN
      NaN
      22.69
      ...
      14.05
      10.74
      NaN
      7.50
      NaN
      NaN
      NaN
      NaN
      11.75
      NaN
    
    
      1993-03-11
      NaN
      22.25
      NaN
      NaN
      1.90
      2.39
      NaN
      NaN
      NaN
      22.00
      ...
      13.59
      10.91
      NaN
      6.80
      NaN
      NaN
      NaN
      NaN
      12.00
      NaN
    
  

5 rows × 482 columns



In [13]:

    
y_train_pred_rs = ev.reshape_by_symbol(y_train_pred_df)
print(y_train_pred_rs.shape)
y_train_pred_rs.head()









    



(787, 482)






    Out[13]:







  
    
      ticker
      A
      AAL
      AAP
      AAPL
      ABBV
      ABC
      ABT
      ACN
      ADBE
      ADI
      ...
      XLNX
      XOM
      XRAY
      XRX
      XYL
      YHOO
      YUM
      ZBH
      ZION
      ZTS
    
    
      date
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
    
  
  
    
      1993-01-29
      NaN
      19.730000
      NaN
      NaN
      2.111429
      2.595714
      NaN
      NaN
      NaN
      22.822857
      ...
      14.224286
      10.175714
      NaN
      7.888571
      NaN
      NaN
      NaN
      NaN
      11.374286
      NaN
    
    
      1993-02-09
      NaN
      20.447143
      NaN
      NaN
      1.954286
      2.451429
      NaN
      NaN
      NaN
      22.285714
      ...
      14.102857
      10.068571
      NaN
      7.261429
      NaN
      NaN
      NaN
      NaN
      11.155714
      NaN
    
    
      1993-02-19
      NaN
      21.767143
      NaN
      NaN
      1.934286
      2.402857
      NaN
      NaN
      NaN
      21.671429
      ...
      13.700000
      10.248571
      NaN
      6.497143
      NaN
      NaN
      NaN
      NaN
      11.205714
      NaN
    
    
      1993-03-02
      NaN
      23.177143
      NaN
      NaN
      1.984286
      2.418571
      NaN
      NaN
      NaN
      22.345714
      ...
      13.998571
      10.571429
      NaN
      6.847143
      NaN
      NaN
      NaN
      NaN
      11.282857
      NaN
    
    
      1993-03-11
      NaN
      23.410000
      NaN
      NaN
      1.991429
      2.430000
      NaN
      NaN
      NaN
      22.285714
      ...
      13.835714
      10.688571
      NaN
      7.540000
      NaN
      NaN
      NaN
      NaN
      11.615714
      NaN
    
  

5 rows × 482 columns



In [14]:

    
y_val_true_rs = ev.reshape_by_symbol(y_val_true_df)
print(y_val_true_rs.shape)
y_val_true_rs.head()









    



(1, 479)






    Out[14]:







  
    
      ticker
      A
      AAL
      AAP
      AAPL
      ABBV
      ABC
      ABT
      ACN
      ADBE
      ADI
      ...
      XLNX
      XOM
      XRAY
      XRX
      XYL
      YHOO
      YUM
      ZBH
      ZION
      ZTS
    
    
      date
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
    
  
  
    
      2014-12-16
      47.5
      2.65
      56.28
      167.44
      113.99
      59.97
      160.0
      66.98
      91.26
      149.12
      ...
      14.14
      20.59
      58.4
      53.84
      38.95
      50.86
      73.14
      114.17
      28.56
      44.2
    
  

1 rows × 479 columns

So, the reshape_by_symbol function seems to work with run_single_val. It could be added to it. Let's test the roll_evaluate function.



In [15]:

    
u = x.index.levels[0][0]
print(u)









    



1993-01-29 00:00:00



In [16]:

    
fe.SPY_DF.sort_index().index.unique()









    Out[16]:





DatetimeIndex(['1993-01-29', '1993-02-01', '1993-02-02', '1993-02-03',
               '1993-02-04', '1993-02-05', '1993-02-08', '1993-02-09',
               '1993-02-10', '1993-02-11',
               ...
               '2014-12-17', '2014-12-18', '2014-12-19', '2014-12-22',
               '2014-12-23', '2014-12-24', '2014-12-26', '2014-12-29',
               '2014-12-30', '2014-12-31'],
              dtype='datetime64[ns]', name='date', length=5520, freq=None)



In [17]:

    
md = fe.SPY_DF.index.unique()



In [18]:

    
u in md









    Out[18]:





True



In [19]:

    
fe.add_market_days(u,6)









    Out[19]:





Timestamp('1993-02-08 00:00:00')

Let's do some previous filtering to avoid problems



In [101]:

    
# Getting the data
GOOD_DATA_RATIO = 0.99

data_df = pd.read_pickle('../../data/data_train_val_df.pkl')

sys.path.append('../../')
import predictor.feature_extraction as fe
import utils.preprocessing as pp

data_df = pp.drop_irrelevant_symbols(data_df, GOOD_DATA_RATIO)

train_time = -1 # In real time days
base_days = 7 # In market days
step_days = 7 # market days
ahead_days = 1 # market days
today = data_df.index[-1] # Real date

tic = time()
x, y = fe.generate_train_intervals(data_df, 
                                   train_time, 
                                   base_days, 
                                   step_days, 
                                   ahead_days, 
                                   today, 
                                   fe.feature_close_one_to_one)
toc = time()
print('Elapsed time: %i seconds.' % (toc-tic))









    



Elapsed time: 126 seconds.



In [102]:

    
print(data_df.shape)
data_df.head()









    



(5520, 1425)






    Out[102]:







  
    
      feature
      Close
      ...
      Volume
    
    
      
      SPY
      MMM
      ABT
      ADBE
      AMD
      AES
      AET
      AFL
      APD
      ALK
      ...
      HCN
      WDC
      WY
      WHR
      WFM
      WMB
      XEL
      XRX
      XLNX
      ZION
    
    
      date
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
    
  
  
    
      1993-01-29
      43.94
      24.50
      6.88
      2.59
      18.75
      4.41
      6.42
      4.49
      21.94
      4.19
      ...
      10600.0
      1552200.0
      805900.0
      230300.0
      268800.0
      732198.0
      87800.0
      7633602.0
      1745196.0
      33600.0
    
    
      1993-02-01
      44.25
      24.69
      6.88
      2.72
      19.12
      4.53
      6.64
      4.52
      22.38
      4.19
      ...
      11900.0
      869600.0
      647900.0
      160200.0
      14400.0
      722598.0
      72400.0
      3001200.0
      3574800.0
      32000.0
    
    
      1993-02-02
      44.34
      24.72
      6.53
      2.84
      20.25
      4.53
      6.62
      4.57
      22.31
      4.16
      ...
      9700.0
      1149400.0
      1189900.0
      118100.0
      163200.0
      808398.0
      242200.0
      1388598.0
      2652396.0
      251600.0
    
    
      1993-02-03
      44.81
      25.19
      6.91
      2.70
      20.50
      4.49
      6.50
      4.65
      22.69
      4.22
      ...
      5300.0
      611000.0
      1542300.0
      246300.0
      257600.0
      3141198.0
      272200.0
      1228200.0
      5040396.0
      254800.0
    
    
      1993-02-04
      45.00
      26.06
      6.84
      2.73
      20.12
      4.49
      6.73
      4.84
      23.19
      4.38
      ...
      11700.0
      1102800.0
      1313800.0
      354500.0
      1473600.0
      1550202.0
      162800.0
      1675602.0
      7033200.0
      317200.0
    
  

5 rows × 1425 columns



In [103]:

    
SAMPLES_GOOD_DATA_RATIO = 0.9

x_y_df = pd.concat([x, y], axis=1)
x_y_df = pp.drop_irrelevant_samples(x_y_df, SAMPLES_GOOD_DATA_RATIO)
x = x_y_df.iloc[:, :-1]
y = x_y_df.iloc[:, -1]
x = pp.fill_missing(x)



In [104]:

    
x_y_df.isnull().sum()









    Out[104]:





0         0
1         0
2         0
3         0
4         0
5         0
6         0
target    0
dtype: int64



In [105]:

    
x.isnull().sum().sum()









    Out[105]:





0



In [106]:

    
y.isnull().sum()









    Out[106]:





0



In [107]:

    
x_reshaped = ev.reshape_by_symbol(x)
x_reshaped.head()









    Out[107]:







  
    
      ticker
      AAPL
      ABT
      ADBE
      ADM
      ADP
      ADSK
      AEP
      AES
      AET
      AFL
      ...
      WHR
      WMB
      WMT
      WY
      XEL
      XLNX
      XOM
      XRAY
      XRX
      ZION
    
    
      date
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
    
  
  
    
      1993-01-29
      21.94
      2.59
      11.24
      223.13
      13.44
      17.62
      4.19
      5.94
      33.88
      4.41
      ...
      42.88
      2.49
      4.44
      16.41
      15.72
      14.55
      38.34
      4.92
      7.32
      11.81
    
    
      1993-02-09
      22.75
      2.59
      11.03
      225.75
      13.44
      17.62
      4.34
      5.50
      34.38
      4.29
      ...
      41.88
      2.36
      4.62
      15.47
      15.47
      13.65
      41.08
      4.81
      6.88
      10.38
    
    
      1993-02-19
      21.38
      2.73
      10.71
      213.38
      13.56
      16.81
      4.00
      5.81
      35.12
      4.41
      ...
      41.88
      2.52
      4.06
      16.50
      15.97
      13.80
      41.96
      4.67
      6.36
      11.44
    
    
      1993-03-02
      21.62
      2.75
      11.51
      210.38
      13.69
      17.06
      4.06
      5.59
      35.88
      4.53
      ...
      44.62
      2.98
      4.44
      17.00
      15.97
      14.17
      41.21
      4.69
      7.50
      11.44
    
    
      1993-03-11
      22.69
      2.80
      11.24
      212.63
      14.31
      17.31
      4.19
      5.78
      36.00
      4.61
      ...
      42.25
      2.81
      2.81
      16.56
      16.16
      13.74
      43.71
      4.62
      6.88
      11.81
    
  

5 rows × 1995 columns



In [108]:

    
x_reshaped.isnull().sum().max()









    Out[108]:





6



In [109]:

    
x.shape









    Out[109]:





(224493, 7)



In [110]:

    
x_reshaped.shape









    Out[110]:





(788, 1995)



In [111]:

    
x_reshaped[x_reshaped.notnull()]









    Out[111]:







  
    
      ticker
      AAPL
      ABT
      ADBE
      ADM
      ADP
      ADSK
      AEP
      AES
      AET
      AFL
      ...
      WHR
      WMB
      WMT
      WY
      XEL
      XLNX
      XOM
      XRAY
      XRX
      ZION
    
    
      date
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
    
  
  
    
      1993-01-29
      21.94
      2.59
      11.24
      223.13
      13.44
      17.62
      4.19
      5.94
      33.88
      4.41
      ...
      42.88
      2.49
      4.44
      16.41
      15.72
      14.55
      38.34
      4.92
      7.32
      11.81
    
    
      1993-02-09
      22.75
      2.59
      11.03
      225.75
      13.44
      17.62
      4.34
      5.50
      34.38
      4.29
      ...
      41.88
      2.36
      4.62
      15.47
      15.47
      13.65
      41.08
      4.81
      6.88
      10.38
    
    
      1993-02-19
      21.38
      2.73
      10.71
      213.38
      13.56
      16.81
      4.00
      5.81
      35.12
      4.41
      ...
      41.88
      2.52
      4.06
      16.50
      15.97
      13.80
      41.96
      4.67
      6.36
      11.44
    
    
      1993-03-02
      21.62
      2.75
      11.51
      210.38
      13.69
      17.06
      4.06
      5.59
      35.88
      4.53
      ...
      44.62
      2.98
      4.44
      17.00
      15.97
      14.17
      41.21
      4.69
      7.50
      11.44
    
    
      1993-03-11
      22.69
      2.80
      11.24
      212.63
      14.31
      17.31
      4.19
      5.78
      36.00
      4.61
      ...
      42.25
      2.81
      2.81
      16.56
      16.16
      13.74
      43.71
      4.62
      6.88
      11.81
    
    
      1993-03-22
      22.00
      2.70
      11.08
      210.38
      13.72
      17.25
      4.19
      5.17
      36.00
      4.70
      ...
      41.25
      2.83
      3.00
      15.56
      16.62
      13.92
      41.33
      4.77
      7.46
      12.25
    
    
      1993-03-31
      22.06
      2.69
      11.03
      193.50
      13.66
      17.50
      4.28
      5.44
      36.88
      4.98
      ...
      41.75
      2.71
      2.44
      14.44
      17.00
      13.26
      42.71
      4.69
      6.63
      12.06
    
    
      1993-04-12
      21.19
      2.66
      10.55
      185.63
      14.25
      18.38
      4.34
      5.31
      37.62
      5.10
      ...
      45.50
      2.62
      2.44
      13.19
      16.66
      13.22
      43.96
      4.59
      5.96
      11.56
    
    
      1993-04-21
      21.31
      2.91
      10.33
      197.63
      12.69
      18.75
      4.28
      5.12
      37.50
      5.02
      ...
      44.25
      2.64
      2.44
      13.38
      16.50
      12.55
      42.71
      4.67
      5.96
      10.81
    
    
      1993-04-30
      21.88
      3.58
      10.71
      198.38
      12.41
      19.00
      4.16
      5.06
      37.12
      5.10
      ...
      42.88
      2.91
      2.69
      13.06
      15.91
      12.30
      41.21
      4.75
      6.34
      10.44
    
    
      1993-05-11
      21.62
      3.72
      10.28
      193.13
      12.47
      19.50
      4.06
      5.92
      35.75
      4.90
      ...
      44.50
      3.13
      2.62
      13.94
      16.16
      12.20
      41.96
      4.70
      6.88
      10.00
    
    
      1993-05-20
      22.25
      4.19
      10.33
      204.00
      11.72
      20.31
      4.22
      6.69
      34.75
      4.74
      ...
      45.25
      2.99
      2.56
      13.94
      16.38
      12.61
      41.21
      4.94
      6.48
      10.56
    
    
      1993-06-01
      21.56
      4.22
      10.28
      205.50
      12.00
      19.88
      4.03
      6.22
      35.00
      4.90
      ...
      41.88
      2.84
      2.25
      13.31
      16.84
      12.36
      40.21
      5.05
      6.29
      9.88
    
    
      1993-06-10
      21.00
      4.22
      10.38
      202.13
      11.56
      19.19
      4.16
      6.47
      36.38
      4.96
      ...
      41.00
      2.85
      2.00
      12.56
      16.44
      12.86
      40.58
      5.05
      5.88
      9.62
    
    
      1993-06-21
      20.19
      4.01
      10.38
      202.88
      12.25
      19.25
      3.69
      6.69
      36.50
      5.02
      ...
      41.50
      3.21
      1.94
      13.12
      16.31
      13.40
      40.21
      5.28
      5.54
      10.06
    
    
      1993-06-30
      19.62
      3.86
      10.12
      210.00
      12.41
      18.50
      3.75
      7.05
      37.75
      4.90
      ...
      43.50
      3.14
      2.00
      13.12
      15.97
      12.90
      43.33
      5.31
      5.63
      9.69
    
    
      1993-07-12
      20.75
      3.67
      10.28
      211.50
      12.69
      18.69
      3.44
      5.97
      37.75
      5.02
      ...
      39.12
      3.31
      1.94
      12.88
      16.03
      12.24
      43.46
      5.20
      5.92
      10.12
    
    
      1993-07-21
      20.94
      3.29
      9.96
      210.00
      12.44
      19.06
      3.25
      5.69
      37.75
      5.06
      ...
      39.12
      3.39
      2.12
      12.88
      16.53
      12.26
      46.20
      5.58
      6.40
      10.12
    
    
      1993-07-30
      20.12
      3.09
      10.06
      213.75
      12.59
      19.69
      3.19
      5.16
      38.38
      5.14
      ...
      41.00
      3.57
      2.88
      12.75
      16.16
      12.45
      48.58
      5.58
      6.80
      10.31
    
    
      1993-08-10
      21.06
      3.37
      10.01
      217.88
      13.09
      19.38
      3.12
      5.70
      38.00
      5.10
      ...
      42.12
      3.83
      3.06
      12.75
      16.03
      12.38
      50.07
      5.48
      6.50
      10.66
    
    
      1993-08-19
      21.75
      2.95
      10.35
      223.13
      12.84
      19.88
      3.22
      5.17
      37.88
      5.06
      ...
      41.12
      3.66
      2.88
      12.94
      16.34
      12.32
      52.70
      5.52
      5.88
      10.50
    
    
      1993-08-30
      22.00
      2.64
      10.01
      227.63
      12.66
      19.50
      3.25
      6.19
      38.75
      5.14
      ...
      38.88
      3.39
      2.50
      12.06
      16.38
      12.15
      54.07
      5.41
      5.96
      10.28
    
    
      1993-09-09
      20.44
      2.38
      10.51
      216.38
      13.16
      19.94
      3.22
      5.77
      39.12
      5.14
      ...
      40.50
      3.67
      2.38
      11.94
      16.25
      11.95
      54.70
      5.39
      6.17
      10.50
    
    
      1993-09-20
      19.75
      2.14
      10.74
      206.25
      12.44
      20.00
      3.22
      5.88
      39.25
      5.22
      ...
      40.38
      4.35
      2.44
      12.25
      16.41
      12.26
      56.82
      5.25
      5.75
      10.62
    
    
      1993-09-29
      19.06
      2.33
      10.06
      200.63
      12.66
      19.44
      3.22
      5.75
      38.38
      5.22
      ...
      40.88
      3.78
      2.56
      13.38
      16.38
      12.01
      56.07
      5.59
      6.59
      10.50
    
    
      1993-10-08
      19.56
      2.41
      10.01
      203.25
      13.00
      18.69
      3.22
      5.52
      38.00
      5.32
      ...
      38.75
      3.37
      3.31
      13.38
      16.19
      12.26
      62.31
      5.83
      6.54
      10.94
    
    
      1993-10-19
      20.19
      2.69
      9.95
      206.25
      12.12
      19.19
      3.53
      5.72
      39.50
      5.30
      ...
      40.88
      3.33
      3.94
      12.94
      16.41
      11.76
      59.44
      5.88
      6.25
      10.56
    
    
      1993-10-28
      21.00
      2.64
      10.29
      202.88
      11.59
      18.81
      4.00
      5.44
      38.50
      5.26
      ...
      41.50
      3.21
      3.88
      13.62
      16.06
      13.28
      56.94
      5.80
      6.54
      9.94
    
    
      1993-11-08
      21.00
      2.53
      9.90
      204.38
      11.62
      18.38
      3.91
      5.02
      35.25
      5.06
      ...
      42.75
      3.16
      4.69
      15.00
      15.69
      13.63
      54.94
      5.72
      6.34
      9.69
    
    
      1993-11-17
      21.75
      2.59
      10.06
      206.25
      11.78
      20.38
      4.12
      5.22
      36.25
      5.26
      ...
      42.75
      3.50
      4.75
      14.94
      15.44
      13.76
      53.20
      5.69
      6.59
      9.19
    
    
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
    
    
      2014-02-27
      119.34
      69.92
      39.89
      36.09
      16.49
      128.56
      43.15
      54.28
      49.80
      13.78
      ...
      29.79
      53.03
      84.25
      74.58
      94.99
      10.93
      33.91
      109.84
      46.86
      31.57
    
    
      2014-03-10
      121.10
      68.04
      42.45
      35.82
      17.47
      126.89
      44.46
      52.59
      49.21
      13.84
      ...
      29.61
      53.69
      86.32
      74.77
      94.71
      10.85
      33.06
      110.34
      45.88
      31.43
    
    
      2014-03-19
      121.04
      67.63
      42.66
      35.52
      17.44
      122.24
      46.19
      51.05
      48.94
      13.83
      ...
      28.84
      53.66
      89.70
      76.14
      96.24
      10.91
      31.67
      106.20
      45.21
      29.83
    
    
      2014-03-28
      117.30
      64.88
      43.21
      37.44
      16.98
      124.46
      45.36
      48.34
      50.00
      14.29
      ...
      29.01
      52.93
      89.60
      77.31
      96.78
      11.43
      32.00
      105.73
      46.05
      30.75
    
    
      2014-04-08
      117.12
      61.82
      43.53
      37.59
      16.44
      124.15
      45.40
      47.15
      51.41
      14.38
      ...
      27.90
      51.43
      90.28
      77.22
      99.94
      11.50
      32.28
      107.03
      44.70
      29.88
    
    
      2014-04-17
      117.94
      64.04
      45.04
      40.68
      16.15
      127.92
      46.47
      48.49
      51.73
      14.26
      ...
      29.81
      46.81
      87.53
      79.76
      101.17
      12.00
      31.78
      104.83
      44.58
      28.65
    
    
      2014-04-29
      117.13
      60.91
      43.23
      40.32
      15.24
      128.37
      46.97
      47.57
      53.88
      14.32
      ...
      30.42
      46.18
      83.35
      77.96
      103.11
      11.83
      32.12
      105.11
      45.71
      28.76
    
    
      2014-05-08
      119.04
      59.08
      44.07
      39.84
      14.93
      130.57
      47.35
      46.82
      53.06
      14.08
      ...
      30.31
      45.34
      85.91
      77.01
      100.74
      11.93
      34.07
      105.89
      46.39
      28.02
    
    
      2014-05-19
      118.23
      62.20
      43.89
      40.53
      14.67
      131.35
      48.50
      51.55
      51.02
      14.00
      ...
      31.11
      46.65
      87.20
      75.53
      101.06
      12.20
      35.16
      108.13
      47.24
      28.47
    
    
      2014-05-29
      120.22
      65.45
      44.64
      40.74
      15.15
      135.14
      49.70
      52.69
      52.79
      13.96
      ...
      31.34
      46.00
      92.16
      77.21
      101.60
      12.85
      36.04
      110.34
      48.26
      29.79
    
    
      2014-06-09
      122.95
      66.93
      45.14
      43.05
      15.84
      137.96
      49.50
      54.51
      53.49
      14.39
      ...
      30.93
      47.47
      92.32
      74.99
      102.42
      12.76
      35.51
      107.80
      47.69
      29.77
    
    
      2014-06-18
      130.72
      73.08
      44.43
      43.92
      15.65
      132.48
      47.58
      56.96
      54.12
      15.03
      ...
      32.70
      46.83
      91.89
      74.91
      102.03
      12.34
      35.15
      107.26
      47.95
      29.77
    
    
      2014-06-27
      128.61
      72.00
      43.82
      44.79
      15.33
      128.54
      47.69
      56.40
      55.33
      15.53
      ...
      33.35
      48.57
      94.36
      76.65
      102.83
      12.20
      35.10
      110.40
      47.49
      29.39
    
    
      2014-07-09
      129.28
      71.57
      46.15
      47.07
      15.60
      126.79
      48.43
      55.63
      54.71
      15.38
      ...
      32.25
      47.54
      98.15
      76.61
      102.31
      12.81
      32.77
      109.98
      46.27
      28.52
    
    
      2014-07-18
      129.98
      72.55
      48.10
      49.47
      15.49
      127.64
      48.73
      57.36
      54.39
      15.19
      ...
      32.27
      41.33
      100.81
      75.71
      104.37
      13.10
      32.46
      111.02
      47.19
      29.42
    
    
      2014-07-29
      135.86
      72.27
      47.57
      50.73
      15.34
      122.32
      46.18
      55.47
      54.09
      15.10
      ...
      31.61
      41.26
      101.88
      74.20
      98.98
      12.99
      32.33
      108.86
      46.83
      27.98
    
    
      2014-08-07
      131.16
      67.83
      48.52
      48.00
      15.12
      119.84
      42.76
      53.63
      49.84
      14.39
      ...
      32.96
      42.13
      100.62
      73.90
      99.03
      13.39
      35.86
      112.37
      47.34
      28.12
    
    
      2014-08-18
      134.14
      71.06
      49.79
      48.96
      15.45
      124.98
      47.01
      53.79
      51.47
      14.75
      ...
      34.28
      41.78
      101.95
      75.52
      99.64
      13.67
      35.56
      114.87
      48.40
      29.25
    
    
      2014-08-27
      133.00
      71.55
      49.58
      50.13
      16.20
      128.20
      46.50
      53.78
      53.15
      15.04
      ...
      33.95
      43.06
      100.92
      77.51
      99.26
      13.53
      36.15
      115.51
      47.55
      29.07
    
    
      2014-09-08
      131.99
      73.39
      50.53
      51.03
      16.35
      127.98
      47.33
      54.37
      53.53
      14.94
      ...
      33.10
      43.22
      98.10
      76.32
      97.43
      13.70
      34.75
      116.12
      46.57
      29.19
    
    
      2014-09-17
      130.92
      67.30
      51.20
      48.84
      16.77
      127.76
      46.16
      54.89
      53.32
      14.47
      ...
      31.96
      42.78
      96.19
      76.12
      94.25
      13.26
      33.74
      115.64
      45.38
      28.92
    
    
      2014-09-26
      134.77
      68.36
      50.87
      48.57
      17.03
      128.69
      44.42
      54.28
      52.25
      14.22
      ...
      32.22
      41.16
      94.70
      77.35
      94.52
      12.99
      34.11
      113.42
      46.01
      28.59
    
    
      2014-10-07
      125.85
      65.96
      49.57
      47.85
      16.88
      123.32
      42.80
      56.59
      52.96
      14.24
      ...
      32.09
      37.58
      85.10
      75.20
      90.22
      12.71
      31.77
      105.34
      45.72
      25.96
    
    
      2014-10-16
      127.71
      62.86
      43.17
      43.80
      16.08
      120.29
      43.80
      49.89
      53.94
      12.98
      ...
      33.89
      43.20
      91.68
      76.38
      94.49
      12.55
      33.35
      112.72
      46.14
      27.53
    
    
      2014-10-27
      128.55
      67.03
      44.62
      49.14
      16.59
      122.12
      51.82
      54.77
      56.76
      13.42
      ...
      34.46
      43.77
      100.00
      77.26
      94.52
      13.16
      33.33
      109.35
      50.77
      29.38
    
    
      2014-11-05
      133.97
      71.37
      50.31
      48.78
      17.34
      124.22
      53.97
      58.32
      59.74
      13.95
      ...
      33.95
      42.83
      97.83
      82.94
      94.66
      13.45
      34.99
      112.95
      51.86
      29.28
    
    
      2014-11-14
      134.71
      71.42
      51.35
      50.82
      17.14
      128.86
      55.34
      59.66
      56.06
      13.47
      ...
      34.83
      44.67
      102.76
      85.40
      95.72
      13.68
      35.75
      114.50
      54.48
      29.33
    
    
      2014-11-25
      145.35
      72.00
      52.93
      52.11
      17.10
      134.81
      55.91
      61.03
      56.58
      13.92
      ...
      35.30
      46.90
      103.72
      84.76
      94.37
      14.20
      37.63
      109.53
      55.61
      27.82
    
    
      2014-12-05
      146.73
      72.40
      53.25
      50.85
      17.68
      132.21
      57.26
      60.90
      57.76
      13.68
      ...
      35.59
      44.16
      105.40
      83.94
      86.90
      13.34
      36.50
      102.03
      53.41
      26.82
    
    
      2014-12-16
      136.82
      72.32
      49.32
      43.29
      16.72
      124.25
      55.68
      56.63
      57.83
      12.79
      ...
      36.62
      44.08
      113.11
      86.43
      93.78
      14.08
      38.71
      107.18
      53.76
      28.52
    
  

788 rows × 1995 columns



In [112]:

    
y_train_true_df, y_train_pred_df, y_val_true_df, y_val_pred_df = ev.run_single_val(x, y, ahead_days, predictor)









    



Multiindex = True
2014-12-16 00:00:00
0
0



In [113]:

    
from sklearn.metrics import r2_score

r2_score(y_train_true_df, y_train_pred_df, multioutput='raw_values')









    Out[113]:





array([ 0.99862915])



In [117]:

    
tickers = y_train_true_df.index.levels[1]
tickers









    Out[117]:





Index(['AAPL', 'ABT', 'ADBE', 'ADM', 'ADP', 'ADSK', 'AEP', 'AES', 'AET', 'AFL',
       ...
       'WHR', 'WMB', 'WMT', 'WY', 'XEL', 'XLNX', 'XOM', 'XRAY', 'XRX', 'ZION'],
      dtype='object', length=285)



In [127]:

    
y_train_true_df.loc[(slice(None), 'AAPL'),:]









    Out[127]:







  
    
      
      
      target
    
  
  
    
      1993-01-29
      AAPL
      22.75
    
    
      1993-02-09
      AAPL
      21.38
    
    
      1993-02-19
      AAPL
      21.62
    
    
      1993-03-02
      AAPL
      22.69
    
    
      1993-03-11
      AAPL
      22.00
    
    
      1993-03-22
      AAPL
      22.06
    
    
      1993-03-31
      AAPL
      21.19
    
    
      1993-04-12
      AAPL
      21.31
    
    
      1993-04-21
      AAPL
      21.88
    
    
      1993-04-30
      AAPL
      21.62
    
    
      1993-05-11
      AAPL
      22.25
    
    
      1993-05-20
      AAPL
      21.56
    
    
      1993-06-01
      AAPL
      21.00
    
    
      1993-06-10
      AAPL
      20.19
    
    
      1993-06-21
      AAPL
      19.62
    
    
      1993-06-30
      AAPL
      20.75
    
    
      1993-07-12
      AAPL
      20.94
    
    
      1993-07-21
      AAPL
      20.12
    
    
      1993-07-30
      AAPL
      21.06
    
    
      1993-08-10
      AAPL
      21.75
    
    
      1993-08-19
      AAPL
      22.00
    
    
      1993-08-30
      AAPL
      20.44
    
    
      1993-09-09
      AAPL
      19.75
    
    
      1993-09-20
      AAPL
      19.06
    
    
      1993-09-29
      AAPL
      19.56
    
    
      1993-10-08
      AAPL
      20.19
    
    
      1993-10-19
      AAPL
      21.00
    
    
      1993-10-28
      AAPL
      21.00
    
    
      1993-11-08
      AAPL
      21.75
    
    
      1993-11-17
      AAPL
      21.94
    
    
      ...
      ...
      ...
    
    
      2014-02-18
      AAPL
      119.34
    
    
      2014-02-27
      AAPL
      121.10
    
    
      2014-03-10
      AAPL
      121.04
    
    
      2014-03-19
      AAPL
      117.30
    
    
      2014-03-28
      AAPL
      117.12
    
    
      2014-04-08
      AAPL
      117.94
    
    
      2014-04-17
      AAPL
      117.13
    
    
      2014-04-29
      AAPL
      119.04
    
    
      2014-05-08
      AAPL
      118.23
    
    
      2014-05-19
      AAPL
      120.22
    
    
      2014-05-29
      AAPL
      122.95
    
    
      2014-06-09
      AAPL
      130.72
    
    
      2014-06-18
      AAPL
      128.61
    
    
      2014-06-27
      AAPL
      129.28
    
    
      2014-07-09
      AAPL
      129.98
    
    
      2014-07-18
      AAPL
      135.86
    
    
      2014-07-29
      AAPL
      131.16
    
    
      2014-08-07
      AAPL
      134.14
    
    
      2014-08-18
      AAPL
      133.00
    
    
      2014-08-27
      AAPL
      131.99
    
    
      2014-09-08
      AAPL
      130.92
    
    
      2014-09-17
      AAPL
      134.77
    
    
      2014-09-26
      AAPL
      125.85
    
    
      2014-10-07
      AAPL
      127.71
    
    
      2014-10-16
      AAPL
      128.55
    
    
      2014-10-27
      AAPL
      133.97
    
    
      2014-11-05
      AAPL
      134.71
    
    
      2014-11-14
      AAPL
      145.35
    
    
      2014-11-25
      AAPL
      146.73
    
    
      2014-12-05
      AAPL
      136.82
    
  

787 rows × 1 columns



In [128]:

    
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import r2_score

r2_train_score = []
mae_train = []

for ticker in tickers:
    y_true = y_train_true_df.loc[(slice(None), 'AAPL'),:]
    y_pred = y_train_pred_df.loc[(slice(None), 'AAPL'),:]
    r2_train_score.append(r2_score(y_true, y_pred))
    mae_train.append(mean_absolute_error(y_true, y_pred))



In [132]:

    
np.mean(r2_train_score)









    Out[132]:





0.99653405959847863



In [133]:

    
np.mean(mae_train)









    Out[133]:





1.1550154292975134



In [ ]:



In [72]:

    
train_days = 252
step_eval_days = 252
r2_train_means, r2_train_stds, y_val_true_df, y_val_pred_df = ev.roll_evaluate(x, 
                                                                               y, 
                                                                               train_days, 
                                                                               step_eval_days, 
                                                                               ahead_days, 
                                                                               predictor, 
                                                                               verbose=True)









    



Evaluating approximately 20 training/evaluation pairs
Multiindex = True
1994-01-27 00:00:00
0
0
24






    



---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-72-0a55fab3ef2d> in <module>()
      7                                                                                ahead_days,
      8                                                                                predictor,
----> 9                                                                                verbose=True)

/home/miguel/udacity/Machine Learning Nanodegree/projects/capstone/capstone/predictor/evaluation.py in roll_evaluate(x, y, train_days, step_eval_days, ahead_days, predictor, verbose)
     97         print(y_train_true.isnull().sum().sum())
     98         # Calculate R^2 for training and append
---> 99         scores = r2_score(y_train_true, y_train_pred, multioutput='raw_values')
    100         r2_train_means.append(np.mean(scores))
    101         r2_train_stds.append(np.std(scores))

/home/miguel/anaconda3/envs/cap_env/lib/python3.6/site-packages/sklearn/metrics/regression.py in r2_score(y_true, y_pred, sample_weight, multioutput)
    453     """
    454     y_type, y_true, y_pred, multioutput = _check_reg_targets(
--> 455         y_true, y_pred, multioutput)
    456 
    457     if sample_weight is not None:

/home/miguel/anaconda3/envs/cap_env/lib/python3.6/site-packages/sklearn/metrics/regression.py in _check_reg_targets(y_true, y_pred, multioutput)
     73     """
     74     check_consistent_length(y_true, y_pred)
---> 75     y_true = check_array(y_true, ensure_2d=False)
     76     y_pred = check_array(y_pred, ensure_2d=False)
     77 

/home/miguel/anaconda3/envs/cap_env/lib/python3.6/site-packages/sklearn/utils/validation.py in check_array(array, accept_sparse, dtype, order, copy, force_all_finite, ensure_2d, allow_nd, ensure_min_samples, ensure_min_features, warn_on_dtype, estimator)
    405                              % (array.ndim, estimator_name))
    406         if force_all_finite:
--> 407             _assert_all_finite(array)
    408 
    409     shape_repr = _shape_repr(array.shape)

/home/miguel/anaconda3/envs/cap_env/lib/python3.6/site-packages/sklearn/utils/validation.py in _assert_all_finite(X)
     56             and not np.isfinite(X).all()):
     57         raise ValueError("Input contains NaN, infinity"
---> 58                          " or a value too large for %r." % X.dtype)
     59 
     60 

ValueError: Input contains NaN, infinity or a value too large for dtype('float64').



In [ ]:

    
print(len(r2_train_means))
print(len(r2_train_stds))
print(y_val_true_df.shape)
print(y_val_pred_df)



In [27]:

    
plt.plot(r2_train_means)









    Out[27]:





[<matplotlib.lines.Line2D at 0x7f3fa3fbd630>]



In [28]:

    
from sklearn.metrics import mean_absolute_error

mae = mean_absolute_error(y_val_true_df, y_val_pred_df)
mae









    



---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-28-f03d5a0b0871> in <module>()
      1 from sklearn.metrics import mean_absolute_error
      2 
----> 3 mae = mean_absolute_error(y_val_true_df, y_val_pred_df)
      4 mae

/home/miguel/anaconda3/envs/cap_env/lib/python3.6/site-packages/sklearn/metrics/regression.py in mean_absolute_error(y_true, y_pred, sample_weight, multioutput)
    161     """
    162     y_type, y_true, y_pred, multioutput = _check_reg_targets(
--> 163         y_true, y_pred, multioutput)
    164     output_errors = np.average(np.abs(y_pred - y_true),
    165                                weights=sample_weight, axis=0)

/home/miguel/anaconda3/envs/cap_env/lib/python3.6/site-packages/sklearn/metrics/regression.py in _check_reg_targets(y_true, y_pred, multioutput)
     73     """
     74     check_consistent_length(y_true, y_pred)
---> 75     y_true = check_array(y_true, ensure_2d=False)
     76     y_pred = check_array(y_pred, ensure_2d=False)
     77 

/home/miguel/anaconda3/envs/cap_env/lib/python3.6/site-packages/sklearn/utils/validation.py in check_array(array, accept_sparse, dtype, order, copy, force_all_finite, ensure_2d, allow_nd, ensure_min_samples, ensure_min_features, warn_on_dtype, estimator)
    414                              " minimum of %d is required%s."
    415                              % (n_samples, shape_repr, ensure_min_samples,
--> 416                                 context))
    417 
    418     if ensure_min_features > 0 and array.ndim == 2:

ValueError: Found array with 0 sample(s) (shape=(0, 1)) while a minimum of 1 is required.



In [ ]:

		target
1993-01-29	AAL	19.730000
	ABBV	2.111429
	ABC	2.595714
	ADI	22.822857
	ADP	15.481429

		target
2014-12-16	A	44.962857
	AAL	2.592857
	AAP	56.491429
	AAPL	165.978571
	ABBV	111.152857

ticker	A	AAL	AAP	AAPL	ABBV	ABC	ABT	ACN	ADBE	ADI	...	XLNX	XOM	XRAY	XRX	XYL	YHOO	YUM	ZBH	ZION	ZTS
date
1993-01-29	NaN	19.75	NaN	NaN	2.03	2.44	NaN	NaN	NaN	22.75	...	14.40	10.29	NaN	7.25	NaN	NaN	NaN	NaN	11.56	NaN
1993-02-09	NaN	22.12	NaN	NaN	1.96	2.50	NaN	NaN	NaN	21.38	...	13.82	10.04	NaN	6.75	NaN	NaN	NaN	NaN	10.69	NaN
1993-02-19	NaN	22.25	NaN	NaN	1.94	2.44	NaN	NaN	NaN	21.62	...	13.97	10.49	NaN	6.42	NaN	NaN	NaN	NaN	11.50	NaN
1993-03-02	NaN	23.75	NaN	NaN	2.03	2.44	NaN	NaN	NaN	22.69	...	14.05	10.74	NaN	7.50	NaN	NaN	NaN	NaN	11.75	NaN
1993-03-11	NaN	22.25	NaN	NaN	1.90	2.39	NaN	NaN	NaN	22.00	...	13.59	10.91	NaN	6.80	NaN	NaN	NaN	NaN	12.00	NaN

ticker	A	AAL	AAP	AAPL	ABBV	ABC	ABT	ACN	ADBE	ADI	...	XLNX	XOM	XRAY	XRX	XYL	YHOO	YUM	ZBH	ZION	ZTS
date
2014-12-16	47.5	2.65	56.28	167.44	113.99	59.97	160.0	66.98	91.26	149.12	...	14.14	20.59	58.4	53.84	38.95	50.86	73.14	114.17	28.56	44.2

feature	Close										...	Volume
	SPY	MMM	ABT	ADBE	AMD	AES	AET	AFL	APD	ALK	...	HCN	WDC	WY	WHR	WFM	WMB	XEL	XRX	XLNX	ZION
date
1993-01-29	43.94	24.50	6.88	2.59	18.75	4.41	6.42	4.49	21.94	4.19	...	10600.0	1552200.0	805900.0	230300.0	268800.0	732198.0	87800.0	7633602.0	1745196.0	33600.0
1993-02-01	44.25	24.69	6.88	2.72	19.12	4.53	6.64	4.52	22.38	4.19	...	11900.0	869600.0	647900.0	160200.0	14400.0	722598.0	72400.0	3001200.0	3574800.0	32000.0
1993-02-02	44.34	24.72	6.53	2.84	20.25	4.53	6.62	4.57	22.31	4.16	...	9700.0	1149400.0	1189900.0	118100.0	163200.0	808398.0	242200.0	1388598.0	2652396.0	251600.0
1993-02-03	44.81	25.19	6.91	2.70	20.50	4.49	6.50	4.65	22.69	4.22	...	5300.0	611000.0	1542300.0	246300.0	257600.0	3141198.0	272200.0	1228200.0	5040396.0	254800.0
1993-02-04	45.00	26.06	6.84	2.73	20.12	4.49	6.73	4.84	23.19	4.38	...	11700.0	1102800.0	1313800.0	354500.0	1473600.0	1550202.0	162800.0	1675602.0	7033200.0	317200.0