In [1]:
# Basic imports
import os
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import datetime as dt
import scipy.optimize as spo
import sys
from time import time
from sklearn.metrics import r2_score, median_absolute_error

%matplotlib inline

%pylab inline
pylab.rcParams['figure.figsize'] = (20.0, 10.0)

%load_ext autoreload
%autoreload 2

sys.path.append('../../')

from utils import preprocessing as pp


Populating the interactive namespace from numpy and matplotlib

In [2]:
%pwd


Out[2]:
'/home/miguel/udacity/Machine Learning Nanodegree/projects/capstone/capstone/notebooks/dev'

In [3]:
# Getting the data
data_df = pd.read_pickle('../../data/data_train_val_df.pkl')

sys.path.append('../../')
import predictor.feature_extraction as fe

train_time = -1 # In real time days
base_days = 7 # In market days
step_days = 7 # market days
ahead_days = 1 # market days
today = data_df.index[-1] # Real date

tic = time()
x, y = fe.generate_train_intervals(data_df, 
                                   train_time, 
                                   base_days, 
                                   step_days, 
                                   ahead_days, 
                                   today, 
                                   fe.feature_close_one_to_one)
toc = time()
print('Elapsed time: %i seconds.' % (toc-tic))


Elapsed time: 194 seconds.

In [4]:
print(x.shape)
x.head()


(314954, 7)
Out[4]:
0 1 2 3 4 5 6
2012-07-05 LNC 36.98 35.50 35.17 34.74 34.00 33.75 34.23
2003-03-12 SPLS 52.75 54.32 53.57 54.49 54.32 55.00 55.17
2010-05-14 DD 53.04 51.62 51.42 51.12 48.14 50.30 48.68
2002-07-22 HIG 29.55 28.50 30.75 30.75 30.65 33.12 33.03
2014-03-10 TGNA 179.85 179.64 180.44 181.11 179.53 181.50 182.35

In [5]:
y.shape


Out[5]:
(314954, 1)

In [6]:
x_y = pd.concat([x, y], axis=1)
x_y.shape


Out[6]:
(314954, 8)

In [7]:
x_y_sorted = x_y.sort_index()
x_y_sorted.head()


Out[7]:
0 1 2 3 4 5 6 target
1993-01-29 AAL 18.75 19.12 20.25 20.50 20.12 19.62 19.75 19.75
ABBV 2.12 2.19 2.15 2.14 2.12 2.04 2.02 2.03
ABC 2.62 2.62 2.56 2.62 2.69 2.62 2.44 2.44
ADI 21.94 22.38 22.31 22.69 23.19 24.00 23.25 22.75
ADP 15.25 15.25 15.06 15.56 15.69 15.81 15.75 15.69

In [8]:
x_y_sorted


Out[8]:
0 1 2 3 4 5 6 target
1993-01-29 AAL 18.75 19.12 20.25 20.50 20.12 19.62 19.75 19.75
ABBV 2.12 2.19 2.15 2.14 2.12 2.04 2.02 2.03
ABC 2.62 2.62 2.56 2.62 2.69 2.62 2.44 2.44
ADI 21.94 22.38 22.31 22.69 23.19 24.00 23.25 22.75
ADP 15.25 15.25 15.06 15.56 15.69 15.81 15.75 15.69
ADS 7.00 7.06 7.25 7.25 8.00 8.12 8.19 8.62
ADSK 13.44 13.50 13.66 13.72 14.00 13.75 13.59 13.44
AEE 1.09 1.13 1.19 1.20 1.21 1.17 1.17 1.14
AEP 2.41 2.41 2.41 2.41 2.41 2.39 2.37 2.43
AES 11.24 11.03 11.35 11.24 11.24 11.24 11.08 11.03
AET 11.53 11.79 11.87 12.21 12.35 12.46 12.49 12.35
AFL 5.94 5.97 5.89 5.94 5.80 5.45 5.66 5.50
AIG 7.75 7.50 7.38 7.16 7.47 6.73 6.22 6.55
AIZ 17.56 17.12 17.19 17.88 18.12 18.06 17.62 17.94
AKAM 4.41 4.53 4.53 4.49 4.49 4.37 4.45 4.29
ALB 4.49 4.52 4.57 4.65 4.84 4.80 4.82 4.79
ALK 6.42 6.64 6.62 6.50 6.73 6.66 6.67 6.67
ALXN 250.30 252.42 257.96 265.88 268.26 264.83 261.92 265.62
AMAT 13.44 13.56 13.38 13.50 13.69 13.94 13.75 13.81
AMD 2.59 2.72 2.84 2.70 2.73 2.60 2.62 2.59
AN 2.05 2.12 2.14 2.12 2.11 2.08 2.08 1.98
ANTM 1.94 2.00 2.03 2.09 2.09 1.97 1.97 1.94
APA 223.13 219.38 221.25 222.00 228.00 232.88 229.50 225.75
APC 7.50 7.47 7.53 7.56 7.53 7.75 8.00 7.91
APD 33.88 34.12 34.25 34.88 34.50 34.50 34.62 34.38
APH 9.25 9.36 9.36 9.41 9.36 9.52 9.46 9.57
ARNC 6.77 7.18 7.18 7.25 7.25 7.28 7.36 7.25
AVB 1.50 1.53 1.51 1.53 1.70 1.72 1.70 1.68
AVY 16.69 16.62 16.62 16.94 17.38 17.19 16.94 16.62
AYI 6.88 6.88 6.53 6.91 6.84 6.88 6.66 6.84
... ... ... ... ... ... ... ... ... ...
2014-12-16 VRSN 62.12 63.99 65.43 66.78 66.09 66.29 66.27 66.35
VRTX 55.54 56.26 57.30 58.29 58.49 58.46 58.28 58.58
VTR 47.03 47.79 47.07 48.88 48.84 49.36 49.79 50.29
VZ 62.77 63.75 64.85 64.70 65.15 64.92 64.88 64.76
WAT 47.88 48.56 48.96 48.80 49.07 49.07 48.67 48.40
WBA 104.49 107.95 112.36 113.88 114.28 113.30 113.11 113.09
WDC 41.84 43.63 44.80 46.00 45.64 45.73 45.35 45.63
WEC 174.43 188.93 190.52 189.73 190.29 191.13 191.15 192.45
WFC 114.99 118.22 118.38 119.20 119.28 120.05 119.95 119.89
WFM 81.48 82.90 84.31 84.76 84.71 85.16 85.68 86.32
WHR 34.74 35.01 35.30 36.19 36.49 36.58 36.62 36.63
WLTW 100.94 106.82 108.67 113.62 111.77 113.62 111.95 110.88
WM 52.77 53.79 55.21 54.45 54.97 55.47 55.34 55.28
WMB 140.52 138.97 145.10 150.11 147.48 151.69 150.90 150.37
WU 82.96 84.23 85.94 85.16 86.38 86.66 86.43 86.91
WY 16.95 17.47 17.78 18.00 18.05 18.12 18.07 18.09
WYN 34.22 34.75 35.35 35.40 35.65 35.43 36.23 36.58
WYNN 43.70 43.42 43.71 43.00 43.87 43.91 44.08 43.92
XEC 34.97 35.75 36.43 36.49 37.00 37.20 36.97 37.12
XEL 86.41 89.02 91.16 93.64 93.33 94.59 93.78 93.21
XLNX 13.35 13.46 13.89 13.96 13.97 14.07 14.08 14.14
XOM 19.31 19.60 20.09 20.07 20.15 20.58 20.59 20.59
XRAY 55.91 57.08 57.72 57.37 58.63 58.51 58.42 58.40
XRX 53.00 53.19 54.10 53.70 54.09 54.00 53.76 53.84
XYL 36.27 36.91 38.10 38.59 38.74 38.72 38.76 38.95
YHOO 48.85 50.12 50.91 50.88 51.15 50.02 50.65 50.86
YUM 69.02 70.60 71.74 71.17 72.46 72.68 72.79 73.14
ZBH 108.79 111.92 114.94 114.91 115.05 113.47 114.11 114.17
ZION 26.74 27.45 28.20 28.39 28.18 28.53 28.52 28.56
ZTS 40.85 41.13 43.15 43.51 43.41 42.97 43.84 44.20

314954 rows × 8 columns


In [9]:
x_y_sorted.loc[x_y_sorted.index.levels[0][-1]:]


Out[9]:
0 1 2 3 4 5 6 target
2014-12-16 A 42.67 44.06 45.01 45.12 45.41 45.63 46.84 47.50
AAL 2.50 2.55 2.55 2.57 2.66 2.67 2.65 2.65
AAP 54.78 55.99 57.06 56.75 57.49 56.66 56.71 56.28
AAPL 161.09 164.90 165.89 165.60 169.20 168.22 166.95 167.44
ABBV 106.74 109.41 112.65 111.78 112.94 112.54 112.01 113.99
ABC 55.92 57.07 57.75 58.56 58.63 59.41 59.74 59.97
ABT 157.01 159.55 160.74 159.85 159.44 161.19 160.23 160.00
ACN 65.25 66.57 67.92 67.71 66.97 64.35 66.21 66.98
ADBE 82.87 85.30 89.74 90.51 91.18 91.42 91.32 91.26
ADI 136.82 140.96 144.45 147.04 147.83 148.58 148.58 149.12
ADM 51.42 51.76 52.95 52.36 52.49 53.17 53.76 54.02
ADP 70.73 72.27 74.07 74.48 74.70 74.49 74.45 74.56
ADS 46.28 46.91 47.43 47.42 47.59 47.48 47.45 47.48
ADSK 16.72 17.26 17.53 17.62 17.71 17.93 17.98 17.98
AEE 23.85 24.46 25.01 24.89 25.40 25.40 25.25 25.19
AEP 48.73 50.11 52.29 52.36 53.14 53.79 53.98 53.98
AES 49.32 49.64 51.16 52.24 52.72 53.17 53.11 53.42
AET 82.91 84.68 85.68 84.71 85.70 85.17 85.24 85.20
AFL 56.63 57.01 59.46 59.90 60.19 60.65 60.99 61.03
AGN 65.07 66.19 67.71 68.65 68.71 69.23 69.17 69.06
AIG 159.44 163.48 168.48 170.17 164.53 157.52 160.67 163.46
AIV 19.38 19.67 20.04 20.25 20.30 20.23 20.35 20.42
AIZ 597.93 602.46 615.48 618.60 616.64 621.30 617.43 618.76
AJG 128.36 132.98 137.17 137.38 140.05 139.48 139.94 139.66
AKAM 12.79 13.13 13.21 13.50 13.66 13.65 13.81 14.22
ALB 57.81 59.07 60.32 61.18 61.66 61.94 61.94 61.94
ALK 85.64 87.18 90.13 90.84 90.41 89.45 89.59 89.84
ALL 54.97 57.20 58.84 58.69 60.00 60.82 60.48 60.89
ALLE 35.66 36.12 36.80 37.51 38.32 38.13 37.80 38.02
ALXN 52.88 54.02 55.01 55.78 56.13 56.44 56.28 56.42
... ... ... ... ... ... ... ... ...
VRSN 62.12 63.99 65.43 66.78 66.09 66.29 66.27 66.35
VRTX 55.54 56.26 57.30 58.29 58.49 58.46 58.28 58.58
VTR 47.03 47.79 47.07 48.88 48.84 49.36 49.79 50.29
VZ 62.77 63.75 64.85 64.70 65.15 64.92 64.88 64.76
WAT 47.88 48.56 48.96 48.80 49.07 49.07 48.67 48.40
WBA 104.49 107.95 112.36 113.88 114.28 113.30 113.11 113.09
WDC 41.84 43.63 44.80 46.00 45.64 45.73 45.35 45.63
WEC 174.43 188.93 190.52 189.73 190.29 191.13 191.15 192.45
WFC 114.99 118.22 118.38 119.20 119.28 120.05 119.95 119.89
WFM 81.48 82.90 84.31 84.76 84.71 85.16 85.68 86.32
WHR 34.74 35.01 35.30 36.19 36.49 36.58 36.62 36.63
WLTW 100.94 106.82 108.67 113.62 111.77 113.62 111.95 110.88
WM 52.77 53.79 55.21 54.45 54.97 55.47 55.34 55.28
WMB 140.52 138.97 145.10 150.11 147.48 151.69 150.90 150.37
WU 82.96 84.23 85.94 85.16 86.38 86.66 86.43 86.91
WY 16.95 17.47 17.78 18.00 18.05 18.12 18.07 18.09
WYN 34.22 34.75 35.35 35.40 35.65 35.43 36.23 36.58
WYNN 43.70 43.42 43.71 43.00 43.87 43.91 44.08 43.92
XEC 34.97 35.75 36.43 36.49 37.00 37.20 36.97 37.12
XEL 86.41 89.02 91.16 93.64 93.33 94.59 93.78 93.21
XLNX 13.35 13.46 13.89 13.96 13.97 14.07 14.08 14.14
XOM 19.31 19.60 20.09 20.07 20.15 20.58 20.59 20.59
XRAY 55.91 57.08 57.72 57.37 58.63 58.51 58.42 58.40
XRX 53.00 53.19 54.10 53.70 54.09 54.00 53.76 53.84
XYL 36.27 36.91 38.10 38.59 38.74 38.72 38.76 38.95
YHOO 48.85 50.12 50.91 50.88 51.15 50.02 50.65 50.86
YUM 69.02 70.60 71.74 71.17 72.46 72.68 72.79 73.14
ZBH 108.79 111.92 114.94 114.91 115.05 113.47 114.11 114.17
ZION 26.74 27.45 28.20 28.39 28.18 28.53 28.52 28.56
ZTS 40.85 41.13 43.15 43.51 43.41 42.97 43.84 44.20

479 rows × 8 columns


In [10]:
x_y_sorted.index.levels[0][-1]


Out[10]:
Timestamp('2014-12-16 00:00:00')

In [11]:
fe.add_market_days(x_y_sorted.index.levels[0][-1], -ahead_days)


Out[11]:
Timestamp('2014-12-15 00:00:00')

In [12]:
x_train = x_y_sorted.loc[:fe.add_market_days(x_y_sorted.index.levels[0][-1], -ahead_days)]

In [13]:
x_train.shape


Out[13]:
(314475, 8)

In [14]:
x_y_val = x_y_sorted.loc[x_y_sorted.index[-1]]
pd.DataFrame(x_y_val).set_index(x_y_val.name, append=True)


Out[14]:
(2014-12-16 00:00:00, ZTS)
0 40.85
1 41.13
2 43.15
3 43.51
4 43.41
5 42.97
6 43.84
target 44.20

In [15]:
def reshape_by_symbol(y):
    # TODO: Fix multi/single level
    
    multiindex = y.index.nlevels > 1
    
    if multiindex:
        DATE_LEVEL_NAME = 'level_0'
    else:
        DATE_LEVEL_NAME = 'index'
    grouped_df = y.reset_index() \
        .groupby('level_0') \
        .apply(lambda x: x.reset_index(drop=True)) \
        .drop('level_0', axis=1)
    grouped_df.index = grouped_df.index.droplevel(level=1)
    grouped_df.rename(columns={'level_1':'ticker'}, inplace=True)
    reshaped_df = grouped_df.set_index('ticker', append=True).unstack()
    reshaped_df.columns = reshaped_df.columns.droplevel(level=0)
    reshaped_df.index.name = 'date'
    return reshaped_df

In [16]:
def run_single_val(x, y, ahead_days, estimator):
    x_y = pd.concat([x, y], axis=1)
    x_y_sorted = x_y.sort_index()
    x_y_train = x_y_sorted.loc[:fe.add_market_days(x_y_sorted.index.levels[0][-1], -ahead_days)]
    x_y_val = x_y_sorted.loc[x_y_sorted.index.levels[0][-1]:]
    
    x_train = x_y_train.iloc[:,:-1]
    x_val = x_y_val.iloc[:,:-1]
    y_train_true = x_y_train.iloc[:,-1]
    y_val_true = x_y_val.iloc[:,-1]
    
    estimator.fit(x_train)
    y_train_pred = estimator.predict(x_train)
    y_val_pred = estimator.predict(x_val)
    
    y_train_true_df = pd.DataFrame(y_train_true)
    y_train_pred_df = pd.DataFrame(y_train_pred)
    y_val_true_df = pd.DataFrame(y_val_true)
    y_val_pred_df = pd.DataFrame(y_val_pred)
    
    return y_train_true_df, y_train_pred_df, y_val_true_df, y_val_pred_df

In [17]:
from predictor import dummy_mean_predictor as dmp

predictor = dmp.DummyPredictor()

In [18]:
y_train_true, y_train_pred, y_val_true, y_val_pred = run_single_val(x, y, 1, predictor)
print(y_train_true.shape)
print(y_train_pred.shape)
print(y_val_true.shape)
print(y_val_pred.shape)


(314475, 1)
(314475, 1)
(479, 1)
(479, 1)

In [19]:
y_val_true


Out[19]:
target
2014-12-16 A 47.50
AAL 2.65
AAP 56.28
AAPL 167.44
ABBV 113.99
ABC 59.97
ABT 160.00
ACN 66.98
ADBE 91.26
ADI 149.12
ADM 54.02
ADP 74.56
ADS 47.48
ADSK 17.98
AEE 25.19
AEP 53.98
AES 53.42
AET 85.20
AFL 61.03
AGN 69.06
AIG 163.46
AIV 20.42
AIZ 618.76
AJG 139.66
AKAM 14.22
ALB 61.94
ALK 89.84
ALL 60.89
ALLE 38.02
ALXN 56.42
... ...
VRSN 66.35
VRTX 58.58
VTR 50.29
VZ 64.76
WAT 48.40
WBA 113.09
WDC 45.63
WEC 192.45
WFC 119.89
WFM 86.32
WHR 36.63
WLTW 110.88
WM 55.28
WMB 150.37
WU 86.91
WY 18.09
WYN 36.58
WYNN 43.92
XEC 37.12
XEL 93.21
XLNX 14.14
XOM 20.59
XRAY 58.40
XRX 53.84
XYL 38.95
YHOO 50.86
YUM 73.14
ZBH 114.17
ZION 28.56
ZTS 44.20

479 rows × 1 columns


In [20]:
pd.DataFrame(y_train_pred).reset_index()


Out[20]:
level_0 level_1 0
0 1993-01-29 AAL 19.730000
1 1993-01-29 ABBV 2.111429
2 1993-01-29 ABC 2.595714
3 1993-01-29 ADI 22.822857
4 1993-01-29 ADP 15.481429
5 1993-01-29 ADS 7.552857
6 1993-01-29 ADSK 13.665714
7 1993-01-29 AEE 1.165714
8 1993-01-29 AEP 2.401429
9 1993-01-29 AES 11.202857
10 1993-01-29 AET 12.100000
11 1993-01-29 AFL 5.807143
12 1993-01-29 AIG 7.172857
13 1993-01-29 AIZ 17.650000
14 1993-01-29 AKAM 4.467143
15 1993-01-29 ALB 4.670000
16 1993-01-29 ALK 6.605714
17 1993-01-29 ALXN 260.224286
18 1993-01-29 AMAT 13.608571
19 1993-01-29 AMD 2.685714
20 1993-01-29 AN 2.100000
21 1993-01-29 ANTM 2.012857
22 1993-01-29 APA 225.162857
23 1993-01-29 APC 7.620000
24 1993-01-29 APD 34.392857
25 1993-01-29 APH 9.388571
26 1993-01-29 ARNC 7.181429
27 1993-01-29 AVB 1.598571
28 1993-01-29 AVY 16.911429
29 1993-01-29 AYI 6.797143
... ... ... ...
314445 2014-12-05 VRSN 64.008571
314446 2014-12-05 VRTX 57.330000
314447 2014-12-05 VTR 47.474286
314448 2014-12-05 VZ 62.974286
314449 2014-12-05 WAT 48.060000
314450 2014-12-05 WBA 106.317143
314451 2014-12-05 WDC 45.385714
314452 2014-12-05 WEC 184.938571
314453 2014-12-05 WFC 115.628571
314454 2014-12-05 WFM 83.567143
314455 2014-12-05 WHR 35.907143
314456 2014-12-05 WLTW 100.397143
314457 2014-12-05 WM 54.315714
314458 2014-12-05 WMB 152.341429
314459 2014-12-05 WU 83.781429
314460 2014-12-05 WY 17.835714
314461 2014-12-05 WYN 34.711429
314462 2014-12-05 WYNN 46.145714
314463 2014-12-05 XEC 34.422857
314464 2014-12-05 XEL 89.752857
314465 2014-12-05 XLNX 13.784286
314466 2014-12-05 XOM 20.178571
314467 2014-12-05 XRAY 57.842857
314468 2014-12-05 XRX 54.732857
314469 2014-12-05 XYL 37.255714
314470 2014-12-05 YHOO 50.045714
314471 2014-12-05 YUM 73.715714
314472 2014-12-05 ZBH 112.841429
314473 2014-12-05 ZION 27.728571
314474 2014-12-05 ZTS 43.008571

314475 rows × 3 columns


In [21]:
grouped_df = pd.DataFrame(y_train_pred).reset_index().groupby('level_0').apply(lambda x: x.reset_index(drop=True)).drop('level_0', axis=1)
grouped_df.index = grouped_df.index.droplevel(level=1)
grouped_df.rename(columns={'level_1':'ticker'}, inplace=True)
grouped_df


Out[21]:
ticker 0
level_0
1993-01-29 AAL 19.730000
1993-01-29 ABBV 2.111429
1993-01-29 ABC 2.595714
1993-01-29 ADI 22.822857
1993-01-29 ADP 15.481429
1993-01-29 ADS 7.552857
1993-01-29 ADSK 13.665714
1993-01-29 AEE 1.165714
1993-01-29 AEP 2.401429
1993-01-29 AES 11.202857
1993-01-29 AET 12.100000
1993-01-29 AFL 5.807143
1993-01-29 AIG 7.172857
1993-01-29 AIZ 17.650000
1993-01-29 AKAM 4.467143
1993-01-29 ALB 4.670000
1993-01-29 ALK 6.605714
1993-01-29 ALXN 260.224286
1993-01-29 AMAT 13.608571
1993-01-29 AMD 2.685714
1993-01-29 AN 2.100000
1993-01-29 ANTM 2.012857
1993-01-29 APA 225.162857
1993-01-29 APC 7.620000
1993-01-29 APD 34.392857
1993-01-29 APH 9.388571
1993-01-29 ARNC 7.181429
1993-01-29 AVB 1.598571
1993-01-29 AVY 16.911429
1993-01-29 AYI 6.797143
... ... ...
2014-12-05 VRSN 64.008571
2014-12-05 VRTX 57.330000
2014-12-05 VTR 47.474286
2014-12-05 VZ 62.974286
2014-12-05 WAT 48.060000
2014-12-05 WBA 106.317143
2014-12-05 WDC 45.385714
2014-12-05 WEC 184.938571
2014-12-05 WFC 115.628571
2014-12-05 WFM 83.567143
2014-12-05 WHR 35.907143
2014-12-05 WLTW 100.397143
2014-12-05 WM 54.315714
2014-12-05 WMB 152.341429
2014-12-05 WU 83.781429
2014-12-05 WY 17.835714
2014-12-05 WYN 34.711429
2014-12-05 WYNN 46.145714
2014-12-05 XEC 34.422857
2014-12-05 XEL 89.752857
2014-12-05 XLNX 13.784286
2014-12-05 XOM 20.178571
2014-12-05 XRAY 57.842857
2014-12-05 XRX 54.732857
2014-12-05 XYL 37.255714
2014-12-05 YHOO 50.045714
2014-12-05 YUM 73.715714
2014-12-05 ZBH 112.841429
2014-12-05 ZION 27.728571
2014-12-05 ZTS 43.008571

314475 rows × 2 columns


In [22]:
grouped_df.set_index('ticker', append=True).unstack()


Out[22]:
0
ticker A AAL AAP AAPL ABBV ABC ABT ACN ADBE ADI ... XLNX XOM XRAY XRX XYL YHOO YUM ZBH ZION ZTS
level_0
1993-01-29 NaN 19.730000 NaN NaN 2.111429 2.595714 NaN NaN NaN 22.822857 ... 14.224286 10.175714 NaN 7.888571 NaN NaN NaN NaN 11.374286 NaN
1993-02-09 NaN 20.447143 NaN NaN 1.954286 2.451429 NaN NaN NaN 22.285714 ... 14.102857 10.068571 NaN 7.261429 NaN NaN NaN NaN 11.155714 NaN
1993-02-19 NaN 21.767143 NaN NaN 1.934286 2.402857 NaN NaN NaN 21.671429 ... 13.700000 10.248571 NaN 6.497143 NaN NaN NaN NaN 11.205714 NaN
1993-03-02 NaN 23.177143 NaN NaN 1.984286 2.418571 NaN NaN NaN 22.345714 ... 13.998571 10.571429 NaN 6.847143 NaN NaN NaN NaN 11.282857 NaN
1993-03-11 NaN 23.410000 NaN NaN 1.991429 2.430000 NaN NaN NaN 22.285714 ... 13.835714 10.688571 NaN 7.540000 NaN NaN NaN NaN 11.615714 NaN
1993-03-22 NaN 22.607143 NaN NaN 1.892857 2.398571 NaN NaN NaN 22.221429 ... 13.804286 10.842857 NaN 7.067143 NaN NaN NaN NaN 12.142857 NaN
1993-03-31 NaN 23.250000 NaN NaN 1.798571 2.071429 NaN NaN NaN 21.311429 ... 13.500000 11.067143 NaN 7.010000 NaN NaN NaN NaN 12.070000 NaN
1993-04-12 NaN 25.607143 NaN NaN 1.741429 1.971429 NaN NaN NaN 21.500000 ... 13.337143 11.362857 NaN 6.032857 NaN NaN NaN NaN 11.794286 NaN
1993-04-21 NaN 29.784286 NaN NaN 1.785714 1.862857 NaN NaN NaN 21.721429 ... 12.881429 10.978571 NaN 6.230000 NaN NaN NaN NaN 10.938571 NaN
1993-04-30 NaN 29.285714 NaN NaN 1.911429 1.815714 NaN NaN NaN 21.777143 ... 12.591429 10.865714 NaN 6.210000 NaN NaN NaN NaN 10.317143 NaN
1993-05-11 NaN 28.714286 NaN NaN 1.974286 1.832857 NaN NaN NaN 21.712857 ... 12.342857 10.470000 NaN 6.547143 NaN NaN NaN NaN 10.077143 NaN
1993-05-20 NaN 29.232857 NaN NaN 2.050000 1.947143 NaN NaN NaN 21.741429 ... 12.420000 10.625714 NaN 6.742857 NaN NaN NaN NaN 10.374286 NaN
1993-06-01 NaN 26.142857 NaN NaN 1.887143 2.015714 NaN NaN NaN 21.375714 ... 12.535714 10.300000 NaN 6.334286 NaN NaN NaN NaN 10.174286 NaN
1993-06-10 NaN 21.267143 NaN NaN 1.525714 1.858571 NaN NaN NaN 20.624286 ... 12.705714 10.528571 NaN 6.001429 NaN NaN NaN NaN 9.620000 NaN
1993-06-21 NaN 21.804286 NaN NaN 1.441429 1.857143 NaN NaN NaN 19.935714 ... 13.162857 10.604286 NaN 5.505714 NaN NaN NaN NaN 9.928571 NaN
1993-06-30 NaN 24.161429 NaN NaN 1.344286 1.817143 NaN NaN NaN 19.954286 ... 12.888571 10.497143 NaN 5.541429 NaN NaN NaN NaN 9.772857 NaN
1993-07-12 NaN 24.195714 NaN NaN 1.165714 1.912857 NaN NaN NaN 20.927143 ... 12.511429 10.650000 NaN 5.848571 NaN NaN NaN NaN 9.820000 NaN
1993-07-21 NaN 24.395714 NaN NaN 0.952857 1.908571 NaN NaN NaN 20.855714 ... 12.125714 10.468571 NaN 6.010000 NaN NaN NaN NaN 10.114286 NaN
1993-07-30 NaN 26.178571 NaN NaN 1.040000 1.858571 NaN NaN NaN 20.377143 ... 12.155714 10.504286 NaN 6.635714 NaN NaN NaN NaN 10.222857 NaN
1993-08-10 NaN 30.411429 NaN NaN 0.991429 1.941429 NaN NaN NaN 21.125714 ... 12.295714 10.585714 NaN 6.741429 NaN NaN NaN NaN 10.590000 NaN
1993-08-19 NaN 30.910000 NaN NaN 0.981429 2.015714 NaN NaN NaN 21.805714 ... 12.354286 10.490000 NaN 6.185714 NaN NaN NaN NaN 10.567143 NaN
1993-08-30 NaN 30.071429 NaN NaN 0.935714 1.975714 NaN NaN NaN 21.304286 ... 12.174286 10.678571 NaN 6.070000 NaN NaN NaN NaN 10.635714 NaN
1993-09-09 NaN 28.322857 NaN NaN 0.900000 1.858571 NaN NaN NaN 20.321429 ... 11.944286 10.628571 NaN 6.140000 NaN NaN NaN NaN 10.538571 NaN
1993-09-20 NaN 29.715714 NaN NaN 0.887143 1.837143 NaN NaN NaN 19.312857 ... 11.964286 10.187143 NaN 6.002857 NaN NaN NaN NaN 10.611429 NaN
1993-09-29 NaN 24.411429 NaN NaN 0.828571 1.737143 NaN NaN NaN 19.277143 ... 12.082857 10.561429 NaN 6.278571 NaN NaN NaN NaN 10.528571 NaN
1993-10-08 NaN 20.195714 NaN NaN 0.892857 1.695714 NaN NaN NaN 19.892857 ... 12.095714 10.575714 NaN 6.577143 NaN NaN NaN NaN 10.952857 NaN
1993-10-19 NaN 19.610000 NaN NaN 1.057143 1.478571 NaN NaN NaN 20.115714 ... 12.130000 10.318571 NaN 6.357143 NaN NaN NaN NaN 10.498571 NaN
1993-10-28 NaN 19.697143 NaN NaN 1.131429 1.868571 NaN NaN NaN 21.232857 ... 13.140000 10.361429 NaN 6.487143 NaN NaN NaN NaN 10.445714 NaN
1993-11-08 NaN 19.552857 NaN NaN 1.125714 1.890000 NaN NaN NaN 21.588571 ... 13.415714 10.138571 NaN 6.482857 NaN NaN NaN NaN 9.795714 NaN
1993-11-17 NaN 18.660000 NaN NaN 1.181429 1.728571 NaN NaN NaN 21.600000 ... 13.617143 9.972857 NaN 6.412857 NaN NaN NaN NaN 9.368571 NaN
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
2014-02-18 39.811429 3.700000 51.131429 128.897143 75.648571 51.401429 126.571429 51.122857 83.398571 118.138571 ... 10.725714 21.518571 50.154286 45.201429 38.738571 37.641429 73.075714 95.324286 30.265714 29.975714
2014-02-27 40.371429 3.740000 51.181429 128.662857 75.641429 53.125714 126.317143 51.242857 83.575714 121.341429 ... 10.980000 22.004286 49.230000 45.822857 38.792857 38.982857 75.054286 96.287143 31.237143 30.814286
2014-03-10 40.530000 3.852857 51.101429 129.200000 75.860000 53.158571 125.945714 51.808571 82.998571 120.450000 ... 10.734286 22.555714 49.708571 45.717143 37.202857 38.071429 76.134286 95.874286 31.430000 29.665714
2014-03-19 40.418571 4.018571 52.898571 129.768571 76.617143 52.610000 124.357143 52.351429 82.258571 120.628571 ... 10.967143 23.158571 50.204286 45.522857 36.144286 36.852857 74.342857 93.347143 31.020000 29.452857
2014-03-28 40.582857 3.987143 53.448571 131.940000 76.568571 53.601429 124.487143 52.020000 79.204286 119.304286 ... 11.425714 22.920000 51.232857 46.648571 36.898571 35.431429 75.695714 95.665714 31.134286 29.265714
2014-04-08 40.800000 3.818571 52.947143 133.602857 74.601429 53.267143 118.550000 48.050000 78.077143 116.677143 ... 11.358571 21.928571 49.244286 45.118571 35.668571 34.138571 75.777143 93.042857 29.827143 28.718571
2014-04-17 41.041429 4.092857 52.635714 134.560000 78.497143 53.692857 121.487143 49.637143 79.031429 117.960000 ... 11.740000 20.872857 49.531429 44.894286 35.540000 35.438571 76.970000 95.284286 29.525714 29.622857
2014-04-29 40.857143 4.072857 51.061429 137.342857 84.780000 53.301429 120.680000 51.614286 79.291429 118.335714 ... 11.977143 20.511429 49.537143 45.132857 36.967143 36.090000 76.334286 98.491429 28.724286 30.544286
2014-05-08 39.290000 3.965714 51.385714 139.785714 84.497143 54.602857 123.531429 52.495714 79.034286 118.874286 ... 11.947143 20.498571 50.031429 46.192857 37.481429 33.987143 75.772857 100.461429 28.794286 30.608571
2014-05-19 38.640000 4.044286 52.127143 140.192857 87.482857 55.990000 122.254286 53.794286 79.464286 118.217143 ... 12.035714 20.411429 49.234286 46.847143 38.010000 34.534286 75.240000 102.030000 28.388571 30.457143
2014-05-29 39.252857 4.017143 52.461429 142.290000 91.272857 57.302857 125.497143 54.551429 82.224286 121.488571 ... 12.468571 20.998571 50.395714 47.650000 37.258571 34.951429 78.371429 105.714286 29.000000 31.187143
2014-06-09 38.535714 4.291429 55.570000 139.524286 92.808571 56.112857 125.837143 54.022857 83.045714 121.578571 ... 12.724286 21.410000 50.031429 47.934286 38.231429 35.991429 79.334286 105.690000 29.730000 32.241429
2014-06-18 39.918571 4.114286 54.348571 141.692857 91.045714 58.140000 130.152857 54.415714 82.654286 129.841429 ... 12.484286 21.474286 47.830000 47.768571 39.348571 33.957143 80.780000 105.260000 29.818571 32.502857
2014-06-27 40.117143 4.225714 54.347143 142.522857 93.894286 60.154286 134.055714 57.077143 81.172857 128.861429 ... 12.361429 21.452857 46.170000 47.575714 38.641429 35.257143 81.950000 104.418571 29.554286 32.550000
2014-07-09 39.710000 4.470000 53.945714 145.041429 95.041429 60.162857 132.737143 54.460000 79.711429 129.305714 ... 12.654286 21.307143 44.418571 47.224286 37.278571 34.788571 82.034286 102.964286 29.368571 32.472857
2014-07-18 40.117143 3.770000 51.997143 147.490000 96.285714 56.515714 126.204286 54.062857 80.305714 133.401429 ... 12.981429 20.794286 44.568571 46.818571 37.220000 34.730000 74.734286 101.484286 29.227143 32.831429
2014-07-29 38.398571 3.955714 50.058571 149.414286 96.275714 53.961429 122.964286 52.875714 79.240000 133.795714 ... 13.117143 20.188571 46.952857 46.800000 35.781429 35.962857 70.920000 99.668571 28.530000 32.795714
2014-08-07 38.171429 4.108571 50.147143 150.678571 96.271429 53.951429 126.130000 53.298571 78.738571 132.231429 ... 13.328571 19.652857 46.780000 46.782857 36.474286 35.985714 70.395714 96.772857 27.921429 32.684286
2014-08-18 39.367143 4.231429 52.057143 153.967143 100.655714 54.187143 134.967143 55.205714 80.637143 133.541429 ... 13.564286 20.215714 47.478571 47.907143 37.652857 37.694286 72.578571 100.055714 28.848571 34.721429
2014-08-27 39.724286 4.160000 50.704286 154.108571 100.887143 53.882857 137.640000 55.520000 81.375714 132.812857 ... 13.760000 20.381429 47.595714 47.722857 37.644286 38.845714 72.000000 99.981429 29.214286 35.557143
2014-09-08 38.805714 4.007143 49.494286 149.344286 100.418571 52.721429 135.181429 57.424286 81.008571 130.112857 ... 13.534286 20.462857 48.418571 47.027143 37.372857 41.875714 72.128571 103.958571 29.190000 36.272857
2014-09-17 38.327143 3.755714 49.877143 143.851429 101.092857 50.518571 131.227143 58.778571 79.724286 133.538571 ... 13.690000 20.697143 50.737143 46.184286 37.364286 40.304286 72.301429 103.315714 29.507143 36.561429
2014-09-26 38.504286 3.437143 48.584286 141.720000 99.990000 51.331429 132.011429 57.981429 80.104286 129.887143 ... 13.111429 19.998571 51.490000 45.735714 35.551429 40.757143 71.860000 101.744286 28.854286 37.008571
2014-10-07 39.841429 2.885714 44.918571 146.684286 99.628571 49.950000 133.688571 55.580000 77.427143 123.590000 ... 12.718571 19.410000 48.735714 45.161429 33.224286 39.554286 68.778571 100.248571 27.274286 36.380000
2014-10-16 40.448571 2.687143 45.838571 150.434286 101.314286 50.568571 138.630000 56.240000 77.290000 129.722857 ... 12.647143 18.574286 49.491429 45.484286 34.417143 40.590000 69.025714 100.577143 26.858571 35.788571
2014-10-27 42.095714 2.771429 48.101429 154.794286 107.452857 55.550000 146.305714 61.707143 80.271429 132.060000 ... 13.045714 19.694286 51.722857 49.424286 35.908571 45.871429 70.698571 108.767143 28.704286 37.360000
2014-11-05 42.981429 2.742857 50.217143 157.877143 109.881429 57.678571 145.600000 63.111429 83.694286 134.865714 ... 13.397143 20.242857 54.568571 51.681429 37.167143 49.071429 73.591429 110.334286 29.594286 41.281429
2014-11-14 42.801429 2.692857 51.085714 157.472857 115.672857 57.897143 145.411429 65.845714 84.240000 140.591429 ... 13.540000 20.165714 55.787143 53.290000 37.858571 51.510000 74.955714 110.504286 29.030000 43.794286
2014-11-25 43.177143 2.748571 54.842857 160.830000 116.664286 59.174286 148.440000 68.915714 85.948571 144.781429 ... 13.955714 20.077143 56.978571 55.030000 38.455714 50.978571 77.342857 112.644286 28.062857 44.500000
2014-12-05 43.271429 2.541429 56.160000 162.948571 111.862857 58.547143 157.681429 67.681429 83.961429 142.535714 ... 13.784286 20.178571 57.842857 54.732857 37.255714 50.045714 73.715714 112.841429 27.728571 43.008571

787 rows × 482 columns


In [23]:
compare_ready_df = grouped_df.set_index('ticker', append=True).unstack()
compare_ready_df.columns = compare_ready_df.columns.droplevel(level=0)
compare_ready_df.index.name = 'date'
compare_ready_df


Out[23]:
ticker A AAL AAP AAPL ABBV ABC ABT ACN ADBE ADI ... XLNX XOM XRAY XRX XYL YHOO YUM ZBH ZION ZTS
date
1993-01-29 NaN 19.730000 NaN NaN 2.111429 2.595714 NaN NaN NaN 22.822857 ... 14.224286 10.175714 NaN 7.888571 NaN NaN NaN NaN 11.374286 NaN
1993-02-09 NaN 20.447143 NaN NaN 1.954286 2.451429 NaN NaN NaN 22.285714 ... 14.102857 10.068571 NaN 7.261429 NaN NaN NaN NaN 11.155714 NaN
1993-02-19 NaN 21.767143 NaN NaN 1.934286 2.402857 NaN NaN NaN 21.671429 ... 13.700000 10.248571 NaN 6.497143 NaN NaN NaN NaN 11.205714 NaN
1993-03-02 NaN 23.177143 NaN NaN 1.984286 2.418571 NaN NaN NaN 22.345714 ... 13.998571 10.571429 NaN 6.847143 NaN NaN NaN NaN 11.282857 NaN
1993-03-11 NaN 23.410000 NaN NaN 1.991429 2.430000 NaN NaN NaN 22.285714 ... 13.835714 10.688571 NaN 7.540000 NaN NaN NaN NaN 11.615714 NaN
1993-03-22 NaN 22.607143 NaN NaN 1.892857 2.398571 NaN NaN NaN 22.221429 ... 13.804286 10.842857 NaN 7.067143 NaN NaN NaN NaN 12.142857 NaN
1993-03-31 NaN 23.250000 NaN NaN 1.798571 2.071429 NaN NaN NaN 21.311429 ... 13.500000 11.067143 NaN 7.010000 NaN NaN NaN NaN 12.070000 NaN
1993-04-12 NaN 25.607143 NaN NaN 1.741429 1.971429 NaN NaN NaN 21.500000 ... 13.337143 11.362857 NaN 6.032857 NaN NaN NaN NaN 11.794286 NaN
1993-04-21 NaN 29.784286 NaN NaN 1.785714 1.862857 NaN NaN NaN 21.721429 ... 12.881429 10.978571 NaN 6.230000 NaN NaN NaN NaN 10.938571 NaN
1993-04-30 NaN 29.285714 NaN NaN 1.911429 1.815714 NaN NaN NaN 21.777143 ... 12.591429 10.865714 NaN 6.210000 NaN NaN NaN NaN 10.317143 NaN
1993-05-11 NaN 28.714286 NaN NaN 1.974286 1.832857 NaN NaN NaN 21.712857 ... 12.342857 10.470000 NaN 6.547143 NaN NaN NaN NaN 10.077143 NaN
1993-05-20 NaN 29.232857 NaN NaN 2.050000 1.947143 NaN NaN NaN 21.741429 ... 12.420000 10.625714 NaN 6.742857 NaN NaN NaN NaN 10.374286 NaN
1993-06-01 NaN 26.142857 NaN NaN 1.887143 2.015714 NaN NaN NaN 21.375714 ... 12.535714 10.300000 NaN 6.334286 NaN NaN NaN NaN 10.174286 NaN
1993-06-10 NaN 21.267143 NaN NaN 1.525714 1.858571 NaN NaN NaN 20.624286 ... 12.705714 10.528571 NaN 6.001429 NaN NaN NaN NaN 9.620000 NaN
1993-06-21 NaN 21.804286 NaN NaN 1.441429 1.857143 NaN NaN NaN 19.935714 ... 13.162857 10.604286 NaN 5.505714 NaN NaN NaN NaN 9.928571 NaN
1993-06-30 NaN 24.161429 NaN NaN 1.344286 1.817143 NaN NaN NaN 19.954286 ... 12.888571 10.497143 NaN 5.541429 NaN NaN NaN NaN 9.772857 NaN
1993-07-12 NaN 24.195714 NaN NaN 1.165714 1.912857 NaN NaN NaN 20.927143 ... 12.511429 10.650000 NaN 5.848571 NaN NaN NaN NaN 9.820000 NaN
1993-07-21 NaN 24.395714 NaN NaN 0.952857 1.908571 NaN NaN NaN 20.855714 ... 12.125714 10.468571 NaN 6.010000 NaN NaN NaN NaN 10.114286 NaN
1993-07-30 NaN 26.178571 NaN NaN 1.040000 1.858571 NaN NaN NaN 20.377143 ... 12.155714 10.504286 NaN 6.635714 NaN NaN NaN NaN 10.222857 NaN
1993-08-10 NaN 30.411429 NaN NaN 0.991429 1.941429 NaN NaN NaN 21.125714 ... 12.295714 10.585714 NaN 6.741429 NaN NaN NaN NaN 10.590000 NaN
1993-08-19 NaN 30.910000 NaN NaN 0.981429 2.015714 NaN NaN NaN 21.805714 ... 12.354286 10.490000 NaN 6.185714 NaN NaN NaN NaN 10.567143 NaN
1993-08-30 NaN 30.071429 NaN NaN 0.935714 1.975714 NaN NaN NaN 21.304286 ... 12.174286 10.678571 NaN 6.070000 NaN NaN NaN NaN 10.635714 NaN
1993-09-09 NaN 28.322857 NaN NaN 0.900000 1.858571 NaN NaN NaN 20.321429 ... 11.944286 10.628571 NaN 6.140000 NaN NaN NaN NaN 10.538571 NaN
1993-09-20 NaN 29.715714 NaN NaN 0.887143 1.837143 NaN NaN NaN 19.312857 ... 11.964286 10.187143 NaN 6.002857 NaN NaN NaN NaN 10.611429 NaN
1993-09-29 NaN 24.411429 NaN NaN 0.828571 1.737143 NaN NaN NaN 19.277143 ... 12.082857 10.561429 NaN 6.278571 NaN NaN NaN NaN 10.528571 NaN
1993-10-08 NaN 20.195714 NaN NaN 0.892857 1.695714 NaN NaN NaN 19.892857 ... 12.095714 10.575714 NaN 6.577143 NaN NaN NaN NaN 10.952857 NaN
1993-10-19 NaN 19.610000 NaN NaN 1.057143 1.478571 NaN NaN NaN 20.115714 ... 12.130000 10.318571 NaN 6.357143 NaN NaN NaN NaN 10.498571 NaN
1993-10-28 NaN 19.697143 NaN NaN 1.131429 1.868571 NaN NaN NaN 21.232857 ... 13.140000 10.361429 NaN 6.487143 NaN NaN NaN NaN 10.445714 NaN
1993-11-08 NaN 19.552857 NaN NaN 1.125714 1.890000 NaN NaN NaN 21.588571 ... 13.415714 10.138571 NaN 6.482857 NaN NaN NaN NaN 9.795714 NaN
1993-11-17 NaN 18.660000 NaN NaN 1.181429 1.728571 NaN NaN NaN 21.600000 ... 13.617143 9.972857 NaN 6.412857 NaN NaN NaN NaN 9.368571 NaN
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
2014-02-18 39.811429 3.700000 51.131429 128.897143 75.648571 51.401429 126.571429 51.122857 83.398571 118.138571 ... 10.725714 21.518571 50.154286 45.201429 38.738571 37.641429 73.075714 95.324286 30.265714 29.975714
2014-02-27 40.371429 3.740000 51.181429 128.662857 75.641429 53.125714 126.317143 51.242857 83.575714 121.341429 ... 10.980000 22.004286 49.230000 45.822857 38.792857 38.982857 75.054286 96.287143 31.237143 30.814286
2014-03-10 40.530000 3.852857 51.101429 129.200000 75.860000 53.158571 125.945714 51.808571 82.998571 120.450000 ... 10.734286 22.555714 49.708571 45.717143 37.202857 38.071429 76.134286 95.874286 31.430000 29.665714
2014-03-19 40.418571 4.018571 52.898571 129.768571 76.617143 52.610000 124.357143 52.351429 82.258571 120.628571 ... 10.967143 23.158571 50.204286 45.522857 36.144286 36.852857 74.342857 93.347143 31.020000 29.452857
2014-03-28 40.582857 3.987143 53.448571 131.940000 76.568571 53.601429 124.487143 52.020000 79.204286 119.304286 ... 11.425714 22.920000 51.232857 46.648571 36.898571 35.431429 75.695714 95.665714 31.134286 29.265714
2014-04-08 40.800000 3.818571 52.947143 133.602857 74.601429 53.267143 118.550000 48.050000 78.077143 116.677143 ... 11.358571 21.928571 49.244286 45.118571 35.668571 34.138571 75.777143 93.042857 29.827143 28.718571
2014-04-17 41.041429 4.092857 52.635714 134.560000 78.497143 53.692857 121.487143 49.637143 79.031429 117.960000 ... 11.740000 20.872857 49.531429 44.894286 35.540000 35.438571 76.970000 95.284286 29.525714 29.622857
2014-04-29 40.857143 4.072857 51.061429 137.342857 84.780000 53.301429 120.680000 51.614286 79.291429 118.335714 ... 11.977143 20.511429 49.537143 45.132857 36.967143 36.090000 76.334286 98.491429 28.724286 30.544286
2014-05-08 39.290000 3.965714 51.385714 139.785714 84.497143 54.602857 123.531429 52.495714 79.034286 118.874286 ... 11.947143 20.498571 50.031429 46.192857 37.481429 33.987143 75.772857 100.461429 28.794286 30.608571
2014-05-19 38.640000 4.044286 52.127143 140.192857 87.482857 55.990000 122.254286 53.794286 79.464286 118.217143 ... 12.035714 20.411429 49.234286 46.847143 38.010000 34.534286 75.240000 102.030000 28.388571 30.457143
2014-05-29 39.252857 4.017143 52.461429 142.290000 91.272857 57.302857 125.497143 54.551429 82.224286 121.488571 ... 12.468571 20.998571 50.395714 47.650000 37.258571 34.951429 78.371429 105.714286 29.000000 31.187143
2014-06-09 38.535714 4.291429 55.570000 139.524286 92.808571 56.112857 125.837143 54.022857 83.045714 121.578571 ... 12.724286 21.410000 50.031429 47.934286 38.231429 35.991429 79.334286 105.690000 29.730000 32.241429
2014-06-18 39.918571 4.114286 54.348571 141.692857 91.045714 58.140000 130.152857 54.415714 82.654286 129.841429 ... 12.484286 21.474286 47.830000 47.768571 39.348571 33.957143 80.780000 105.260000 29.818571 32.502857
2014-06-27 40.117143 4.225714 54.347143 142.522857 93.894286 60.154286 134.055714 57.077143 81.172857 128.861429 ... 12.361429 21.452857 46.170000 47.575714 38.641429 35.257143 81.950000 104.418571 29.554286 32.550000
2014-07-09 39.710000 4.470000 53.945714 145.041429 95.041429 60.162857 132.737143 54.460000 79.711429 129.305714 ... 12.654286 21.307143 44.418571 47.224286 37.278571 34.788571 82.034286 102.964286 29.368571 32.472857
2014-07-18 40.117143 3.770000 51.997143 147.490000 96.285714 56.515714 126.204286 54.062857 80.305714 133.401429 ... 12.981429 20.794286 44.568571 46.818571 37.220000 34.730000 74.734286 101.484286 29.227143 32.831429
2014-07-29 38.398571 3.955714 50.058571 149.414286 96.275714 53.961429 122.964286 52.875714 79.240000 133.795714 ... 13.117143 20.188571 46.952857 46.800000 35.781429 35.962857 70.920000 99.668571 28.530000 32.795714
2014-08-07 38.171429 4.108571 50.147143 150.678571 96.271429 53.951429 126.130000 53.298571 78.738571 132.231429 ... 13.328571 19.652857 46.780000 46.782857 36.474286 35.985714 70.395714 96.772857 27.921429 32.684286
2014-08-18 39.367143 4.231429 52.057143 153.967143 100.655714 54.187143 134.967143 55.205714 80.637143 133.541429 ... 13.564286 20.215714 47.478571 47.907143 37.652857 37.694286 72.578571 100.055714 28.848571 34.721429
2014-08-27 39.724286 4.160000 50.704286 154.108571 100.887143 53.882857 137.640000 55.520000 81.375714 132.812857 ... 13.760000 20.381429 47.595714 47.722857 37.644286 38.845714 72.000000 99.981429 29.214286 35.557143
2014-09-08 38.805714 4.007143 49.494286 149.344286 100.418571 52.721429 135.181429 57.424286 81.008571 130.112857 ... 13.534286 20.462857 48.418571 47.027143 37.372857 41.875714 72.128571 103.958571 29.190000 36.272857
2014-09-17 38.327143 3.755714 49.877143 143.851429 101.092857 50.518571 131.227143 58.778571 79.724286 133.538571 ... 13.690000 20.697143 50.737143 46.184286 37.364286 40.304286 72.301429 103.315714 29.507143 36.561429
2014-09-26 38.504286 3.437143 48.584286 141.720000 99.990000 51.331429 132.011429 57.981429 80.104286 129.887143 ... 13.111429 19.998571 51.490000 45.735714 35.551429 40.757143 71.860000 101.744286 28.854286 37.008571
2014-10-07 39.841429 2.885714 44.918571 146.684286 99.628571 49.950000 133.688571 55.580000 77.427143 123.590000 ... 12.718571 19.410000 48.735714 45.161429 33.224286 39.554286 68.778571 100.248571 27.274286 36.380000
2014-10-16 40.448571 2.687143 45.838571 150.434286 101.314286 50.568571 138.630000 56.240000 77.290000 129.722857 ... 12.647143 18.574286 49.491429 45.484286 34.417143 40.590000 69.025714 100.577143 26.858571 35.788571
2014-10-27 42.095714 2.771429 48.101429 154.794286 107.452857 55.550000 146.305714 61.707143 80.271429 132.060000 ... 13.045714 19.694286 51.722857 49.424286 35.908571 45.871429 70.698571 108.767143 28.704286 37.360000
2014-11-05 42.981429 2.742857 50.217143 157.877143 109.881429 57.678571 145.600000 63.111429 83.694286 134.865714 ... 13.397143 20.242857 54.568571 51.681429 37.167143 49.071429 73.591429 110.334286 29.594286 41.281429
2014-11-14 42.801429 2.692857 51.085714 157.472857 115.672857 57.897143 145.411429 65.845714 84.240000 140.591429 ... 13.540000 20.165714 55.787143 53.290000 37.858571 51.510000 74.955714 110.504286 29.030000 43.794286
2014-11-25 43.177143 2.748571 54.842857 160.830000 116.664286 59.174286 148.440000 68.915714 85.948571 144.781429 ... 13.955714 20.077143 56.978571 55.030000 38.455714 50.978571 77.342857 112.644286 28.062857 44.500000
2014-12-05 43.271429 2.541429 56.160000 162.948571 111.862857 58.547143 157.681429 67.681429 83.961429 142.535714 ... 13.784286 20.178571 57.842857 54.732857 37.255714 50.045714 73.715714 112.841429 27.728571 43.008571

787 rows × 482 columns


In [24]:
reshape_by_symbol(y_train_pred)


Out[24]:
ticker A AAL AAP AAPL ABBV ABC ABT ACN ADBE ADI ... XLNX XOM XRAY XRX XYL YHOO YUM ZBH ZION ZTS
date
1993-01-29 NaN 19.730000 NaN NaN 2.111429 2.595714 NaN NaN NaN 22.822857 ... 14.224286 10.175714 NaN 7.888571 NaN NaN NaN NaN 11.374286 NaN
1993-02-09 NaN 20.447143 NaN NaN 1.954286 2.451429 NaN NaN NaN 22.285714 ... 14.102857 10.068571 NaN 7.261429 NaN NaN NaN NaN 11.155714 NaN
1993-02-19 NaN 21.767143 NaN NaN 1.934286 2.402857 NaN NaN NaN 21.671429 ... 13.700000 10.248571 NaN 6.497143 NaN NaN NaN NaN 11.205714 NaN
1993-03-02 NaN 23.177143 NaN NaN 1.984286 2.418571 NaN NaN NaN 22.345714 ... 13.998571 10.571429 NaN 6.847143 NaN NaN NaN NaN 11.282857 NaN
1993-03-11 NaN 23.410000 NaN NaN 1.991429 2.430000 NaN NaN NaN 22.285714 ... 13.835714 10.688571 NaN 7.540000 NaN NaN NaN NaN 11.615714 NaN
1993-03-22 NaN 22.607143 NaN NaN 1.892857 2.398571 NaN NaN NaN 22.221429 ... 13.804286 10.842857 NaN 7.067143 NaN NaN NaN NaN 12.142857 NaN
1993-03-31 NaN 23.250000 NaN NaN 1.798571 2.071429 NaN NaN NaN 21.311429 ... 13.500000 11.067143 NaN 7.010000 NaN NaN NaN NaN 12.070000 NaN
1993-04-12 NaN 25.607143 NaN NaN 1.741429 1.971429 NaN NaN NaN 21.500000 ... 13.337143 11.362857 NaN 6.032857 NaN NaN NaN NaN 11.794286 NaN
1993-04-21 NaN 29.784286 NaN NaN 1.785714 1.862857 NaN NaN NaN 21.721429 ... 12.881429 10.978571 NaN 6.230000 NaN NaN NaN NaN 10.938571 NaN
1993-04-30 NaN 29.285714 NaN NaN 1.911429 1.815714 NaN NaN NaN 21.777143 ... 12.591429 10.865714 NaN 6.210000 NaN NaN NaN NaN 10.317143 NaN
1993-05-11 NaN 28.714286 NaN NaN 1.974286 1.832857 NaN NaN NaN 21.712857 ... 12.342857 10.470000 NaN 6.547143 NaN NaN NaN NaN 10.077143 NaN
1993-05-20 NaN 29.232857 NaN NaN 2.050000 1.947143 NaN NaN NaN 21.741429 ... 12.420000 10.625714 NaN 6.742857 NaN NaN NaN NaN 10.374286 NaN
1993-06-01 NaN 26.142857 NaN NaN 1.887143 2.015714 NaN NaN NaN 21.375714 ... 12.535714 10.300000 NaN 6.334286 NaN NaN NaN NaN 10.174286 NaN
1993-06-10 NaN 21.267143 NaN NaN 1.525714 1.858571 NaN NaN NaN 20.624286 ... 12.705714 10.528571 NaN 6.001429 NaN NaN NaN NaN 9.620000 NaN
1993-06-21 NaN 21.804286 NaN NaN 1.441429 1.857143 NaN NaN NaN 19.935714 ... 13.162857 10.604286 NaN 5.505714 NaN NaN NaN NaN 9.928571 NaN
1993-06-30 NaN 24.161429 NaN NaN 1.344286 1.817143 NaN NaN NaN 19.954286 ... 12.888571 10.497143 NaN 5.541429 NaN NaN NaN NaN 9.772857 NaN
1993-07-12 NaN 24.195714 NaN NaN 1.165714 1.912857 NaN NaN NaN 20.927143 ... 12.511429 10.650000 NaN 5.848571 NaN NaN NaN NaN 9.820000 NaN
1993-07-21 NaN 24.395714 NaN NaN 0.952857 1.908571 NaN NaN NaN 20.855714 ... 12.125714 10.468571 NaN 6.010000 NaN NaN NaN NaN 10.114286 NaN
1993-07-30 NaN 26.178571 NaN NaN 1.040000 1.858571 NaN NaN NaN 20.377143 ... 12.155714 10.504286 NaN 6.635714 NaN NaN NaN NaN 10.222857 NaN
1993-08-10 NaN 30.411429 NaN NaN 0.991429 1.941429 NaN NaN NaN 21.125714 ... 12.295714 10.585714 NaN 6.741429 NaN NaN NaN NaN 10.590000 NaN
1993-08-19 NaN 30.910000 NaN NaN 0.981429 2.015714 NaN NaN NaN 21.805714 ... 12.354286 10.490000 NaN 6.185714 NaN NaN NaN NaN 10.567143 NaN
1993-08-30 NaN 30.071429 NaN NaN 0.935714 1.975714 NaN NaN NaN 21.304286 ... 12.174286 10.678571 NaN 6.070000 NaN NaN NaN NaN 10.635714 NaN
1993-09-09 NaN 28.322857 NaN NaN 0.900000 1.858571 NaN NaN NaN 20.321429 ... 11.944286 10.628571 NaN 6.140000 NaN NaN NaN NaN 10.538571 NaN
1993-09-20 NaN 29.715714 NaN NaN 0.887143 1.837143 NaN NaN NaN 19.312857 ... 11.964286 10.187143 NaN 6.002857 NaN NaN NaN NaN 10.611429 NaN
1993-09-29 NaN 24.411429 NaN NaN 0.828571 1.737143 NaN NaN NaN 19.277143 ... 12.082857 10.561429 NaN 6.278571 NaN NaN NaN NaN 10.528571 NaN
1993-10-08 NaN 20.195714 NaN NaN 0.892857 1.695714 NaN NaN NaN 19.892857 ... 12.095714 10.575714 NaN 6.577143 NaN NaN NaN NaN 10.952857 NaN
1993-10-19 NaN 19.610000 NaN NaN 1.057143 1.478571 NaN NaN NaN 20.115714 ... 12.130000 10.318571 NaN 6.357143 NaN NaN NaN NaN 10.498571 NaN
1993-10-28 NaN 19.697143 NaN NaN 1.131429 1.868571 NaN NaN NaN 21.232857 ... 13.140000 10.361429 NaN 6.487143 NaN NaN NaN NaN 10.445714 NaN
1993-11-08 NaN 19.552857 NaN NaN 1.125714 1.890000 NaN NaN NaN 21.588571 ... 13.415714 10.138571 NaN 6.482857 NaN NaN NaN NaN 9.795714 NaN
1993-11-17 NaN 18.660000 NaN NaN 1.181429 1.728571 NaN NaN NaN 21.600000 ... 13.617143 9.972857 NaN 6.412857 NaN NaN NaN NaN 9.368571 NaN
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
2014-02-18 39.811429 3.700000 51.131429 128.897143 75.648571 51.401429 126.571429 51.122857 83.398571 118.138571 ... 10.725714 21.518571 50.154286 45.201429 38.738571 37.641429 73.075714 95.324286 30.265714 29.975714
2014-02-27 40.371429 3.740000 51.181429 128.662857 75.641429 53.125714 126.317143 51.242857 83.575714 121.341429 ... 10.980000 22.004286 49.230000 45.822857 38.792857 38.982857 75.054286 96.287143 31.237143 30.814286
2014-03-10 40.530000 3.852857 51.101429 129.200000 75.860000 53.158571 125.945714 51.808571 82.998571 120.450000 ... 10.734286 22.555714 49.708571 45.717143 37.202857 38.071429 76.134286 95.874286 31.430000 29.665714
2014-03-19 40.418571 4.018571 52.898571 129.768571 76.617143 52.610000 124.357143 52.351429 82.258571 120.628571 ... 10.967143 23.158571 50.204286 45.522857 36.144286 36.852857 74.342857 93.347143 31.020000 29.452857
2014-03-28 40.582857 3.987143 53.448571 131.940000 76.568571 53.601429 124.487143 52.020000 79.204286 119.304286 ... 11.425714 22.920000 51.232857 46.648571 36.898571 35.431429 75.695714 95.665714 31.134286 29.265714
2014-04-08 40.800000 3.818571 52.947143 133.602857 74.601429 53.267143 118.550000 48.050000 78.077143 116.677143 ... 11.358571 21.928571 49.244286 45.118571 35.668571 34.138571 75.777143 93.042857 29.827143 28.718571
2014-04-17 41.041429 4.092857 52.635714 134.560000 78.497143 53.692857 121.487143 49.637143 79.031429 117.960000 ... 11.740000 20.872857 49.531429 44.894286 35.540000 35.438571 76.970000 95.284286 29.525714 29.622857
2014-04-29 40.857143 4.072857 51.061429 137.342857 84.780000 53.301429 120.680000 51.614286 79.291429 118.335714 ... 11.977143 20.511429 49.537143 45.132857 36.967143 36.090000 76.334286 98.491429 28.724286 30.544286
2014-05-08 39.290000 3.965714 51.385714 139.785714 84.497143 54.602857 123.531429 52.495714 79.034286 118.874286 ... 11.947143 20.498571 50.031429 46.192857 37.481429 33.987143 75.772857 100.461429 28.794286 30.608571
2014-05-19 38.640000 4.044286 52.127143 140.192857 87.482857 55.990000 122.254286 53.794286 79.464286 118.217143 ... 12.035714 20.411429 49.234286 46.847143 38.010000 34.534286 75.240000 102.030000 28.388571 30.457143
2014-05-29 39.252857 4.017143 52.461429 142.290000 91.272857 57.302857 125.497143 54.551429 82.224286 121.488571 ... 12.468571 20.998571 50.395714 47.650000 37.258571 34.951429 78.371429 105.714286 29.000000 31.187143
2014-06-09 38.535714 4.291429 55.570000 139.524286 92.808571 56.112857 125.837143 54.022857 83.045714 121.578571 ... 12.724286 21.410000 50.031429 47.934286 38.231429 35.991429 79.334286 105.690000 29.730000 32.241429
2014-06-18 39.918571 4.114286 54.348571 141.692857 91.045714 58.140000 130.152857 54.415714 82.654286 129.841429 ... 12.484286 21.474286 47.830000 47.768571 39.348571 33.957143 80.780000 105.260000 29.818571 32.502857
2014-06-27 40.117143 4.225714 54.347143 142.522857 93.894286 60.154286 134.055714 57.077143 81.172857 128.861429 ... 12.361429 21.452857 46.170000 47.575714 38.641429 35.257143 81.950000 104.418571 29.554286 32.550000
2014-07-09 39.710000 4.470000 53.945714 145.041429 95.041429 60.162857 132.737143 54.460000 79.711429 129.305714 ... 12.654286 21.307143 44.418571 47.224286 37.278571 34.788571 82.034286 102.964286 29.368571 32.472857
2014-07-18 40.117143 3.770000 51.997143 147.490000 96.285714 56.515714 126.204286 54.062857 80.305714 133.401429 ... 12.981429 20.794286 44.568571 46.818571 37.220000 34.730000 74.734286 101.484286 29.227143 32.831429
2014-07-29 38.398571 3.955714 50.058571 149.414286 96.275714 53.961429 122.964286 52.875714 79.240000 133.795714 ... 13.117143 20.188571 46.952857 46.800000 35.781429 35.962857 70.920000 99.668571 28.530000 32.795714
2014-08-07 38.171429 4.108571 50.147143 150.678571 96.271429 53.951429 126.130000 53.298571 78.738571 132.231429 ... 13.328571 19.652857 46.780000 46.782857 36.474286 35.985714 70.395714 96.772857 27.921429 32.684286
2014-08-18 39.367143 4.231429 52.057143 153.967143 100.655714 54.187143 134.967143 55.205714 80.637143 133.541429 ... 13.564286 20.215714 47.478571 47.907143 37.652857 37.694286 72.578571 100.055714 28.848571 34.721429
2014-08-27 39.724286 4.160000 50.704286 154.108571 100.887143 53.882857 137.640000 55.520000 81.375714 132.812857 ... 13.760000 20.381429 47.595714 47.722857 37.644286 38.845714 72.000000 99.981429 29.214286 35.557143
2014-09-08 38.805714 4.007143 49.494286 149.344286 100.418571 52.721429 135.181429 57.424286 81.008571 130.112857 ... 13.534286 20.462857 48.418571 47.027143 37.372857 41.875714 72.128571 103.958571 29.190000 36.272857
2014-09-17 38.327143 3.755714 49.877143 143.851429 101.092857 50.518571 131.227143 58.778571 79.724286 133.538571 ... 13.690000 20.697143 50.737143 46.184286 37.364286 40.304286 72.301429 103.315714 29.507143 36.561429
2014-09-26 38.504286 3.437143 48.584286 141.720000 99.990000 51.331429 132.011429 57.981429 80.104286 129.887143 ... 13.111429 19.998571 51.490000 45.735714 35.551429 40.757143 71.860000 101.744286 28.854286 37.008571
2014-10-07 39.841429 2.885714 44.918571 146.684286 99.628571 49.950000 133.688571 55.580000 77.427143 123.590000 ... 12.718571 19.410000 48.735714 45.161429 33.224286 39.554286 68.778571 100.248571 27.274286 36.380000
2014-10-16 40.448571 2.687143 45.838571 150.434286 101.314286 50.568571 138.630000 56.240000 77.290000 129.722857 ... 12.647143 18.574286 49.491429 45.484286 34.417143 40.590000 69.025714 100.577143 26.858571 35.788571
2014-10-27 42.095714 2.771429 48.101429 154.794286 107.452857 55.550000 146.305714 61.707143 80.271429 132.060000 ... 13.045714 19.694286 51.722857 49.424286 35.908571 45.871429 70.698571 108.767143 28.704286 37.360000
2014-11-05 42.981429 2.742857 50.217143 157.877143 109.881429 57.678571 145.600000 63.111429 83.694286 134.865714 ... 13.397143 20.242857 54.568571 51.681429 37.167143 49.071429 73.591429 110.334286 29.594286 41.281429
2014-11-14 42.801429 2.692857 51.085714 157.472857 115.672857 57.897143 145.411429 65.845714 84.240000 140.591429 ... 13.540000 20.165714 55.787143 53.290000 37.858571 51.510000 74.955714 110.504286 29.030000 43.794286
2014-11-25 43.177143 2.748571 54.842857 160.830000 116.664286 59.174286 148.440000 68.915714 85.948571 144.781429 ... 13.955714 20.077143 56.978571 55.030000 38.455714 50.978571 77.342857 112.644286 28.062857 44.500000
2014-12-05 43.271429 2.541429 56.160000 162.948571 111.862857 58.547143 157.681429 67.681429 83.961429 142.535714 ... 13.784286 20.178571 57.842857 54.732857 37.255714 50.045714 73.715714 112.841429 27.728571 43.008571

787 rows × 482 columns


In [25]:
reshape_by_symbol(y_train_true)


Out[25]:
ticker A AAL AAP AAPL ABBV ABC ABT ACN ADBE ADI ... XLNX XOM XRAY XRX XYL YHOO YUM ZBH ZION ZTS
date
1993-01-29 NaN 19.75 NaN NaN 2.03 2.44 NaN NaN NaN 22.75 ... 14.40 10.29 NaN 7.25 NaN NaN NaN NaN 11.56 NaN
1993-02-09 NaN 22.12 NaN NaN 1.96 2.50 NaN NaN NaN 21.38 ... 13.82 10.04 NaN 6.75 NaN NaN NaN NaN 10.69 NaN
1993-02-19 NaN 22.25 NaN NaN 1.94 2.44 NaN NaN NaN 21.62 ... 13.97 10.49 NaN 6.42 NaN NaN NaN NaN 11.50 NaN
1993-03-02 NaN 23.75 NaN NaN 2.03 2.44 NaN NaN NaN 22.69 ... 14.05 10.74 NaN 7.50 NaN NaN NaN NaN 11.75 NaN
1993-03-11 NaN 22.25 NaN NaN 1.90 2.39 NaN NaN NaN 22.00 ... 13.59 10.91 NaN 6.80 NaN NaN NaN NaN 12.00 NaN
1993-03-22 NaN 23.00 NaN NaN 1.84 2.38 NaN NaN NaN 22.06 ... 13.88 10.84 NaN 7.42 NaN NaN NaN NaN 12.00 NaN
1993-03-31 NaN 25.88 NaN NaN 1.79 2.06 NaN NaN NaN 21.19 ... 13.32 11.38 NaN 6.34 NaN NaN NaN NaN 12.06 NaN
1993-04-12 NaN 29.00 NaN NaN 1.77 1.94 NaN NaN NaN 21.31 ... 13.20 11.18 NaN 6.19 NaN NaN NaN NaN 11.44 NaN
1993-04-21 NaN 28.12 NaN NaN 1.83 1.88 NaN NaN NaN 21.88 ... 12.84 10.94 NaN 6.04 NaN NaN NaN NaN 10.56 NaN
1993-04-30 NaN 29.12 NaN NaN 1.95 1.88 NaN NaN NaN 21.62 ... 12.24 10.74 NaN 6.29 NaN NaN NaN NaN 10.12 NaN
1993-05-11 NaN 28.75 NaN NaN 2.10 1.94 NaN NaN NaN 22.25 ... 12.26 10.54 NaN 6.92 NaN NaN NaN NaN 10.00 NaN
1993-05-20 NaN 28.25 NaN NaN 2.04 2.06 NaN NaN NaN 21.56 ... 12.67 10.44 NaN 6.46 NaN NaN NaN NaN 10.50 NaN
1993-06-01 NaN 22.25 NaN NaN 1.59 1.81 NaN NaN NaN 21.00 ... 12.36 10.29 NaN 6.21 NaN NaN NaN NaN 9.62 NaN
1993-06-10 NaN 21.38 NaN NaN 1.42 1.84 NaN NaN NaN 20.19 ... 12.86 10.34 NaN 5.79 NaN NaN NaN NaN 10.00 NaN
1993-06-21 NaN 24.12 NaN NaN 1.41 1.81 NaN NaN NaN 19.62 ... 13.30 10.54 NaN 5.50 NaN NaN NaN NaN 10.00 NaN
1993-06-30 NaN 24.62 NaN NaN 1.36 1.75 NaN NaN NaN 20.75 ... 12.57 10.56 NaN 5.67 NaN NaN NaN NaN 9.62 NaN
1993-07-12 NaN 24.88 NaN NaN 0.94 1.94 NaN NaN NaN 20.94 ... 12.20 10.44 NaN 5.92 NaN NaN NaN NaN 10.28 NaN
1993-07-21 NaN 23.88 NaN NaN 0.99 1.88 NaN NaN NaN 20.12 ... 12.15 10.54 NaN 6.63 NaN NaN NaN NaN 10.22 NaN
1993-07-30 NaN 28.38 NaN NaN 1.02 1.88 NaN NaN NaN 21.06 ... 12.36 10.49 NaN 6.88 NaN NaN NaN NaN 10.47 NaN
1993-08-10 NaN 30.00 NaN NaN 0.98 2.00 NaN NaN NaN 21.75 ... 12.49 10.54 NaN 6.59 NaN NaN NaN NaN 10.59 NaN
1993-08-19 NaN 31.50 NaN NaN 0.93 2.00 NaN NaN NaN 22.00 ... 12.28 10.44 NaN 5.75 NaN NaN NaN NaN 10.56 NaN
1993-08-30 NaN 27.75 NaN NaN 0.93 1.91 NaN NaN NaN 20.44 ... 12.01 10.81 NaN 5.96 NaN NaN NaN NaN 10.25 NaN
1993-09-09 NaN 29.75 NaN NaN 0.89 1.84 NaN NaN NaN 19.75 ... 11.86 10.17 NaN 6.25 NaN NaN NaN NaN 10.41 NaN
1993-09-20 NaN 28.12 NaN NaN 0.85 1.75 NaN NaN NaN 19.06 ... 12.05 10.44 NaN 5.88 NaN NaN NaN NaN 10.75 NaN
1993-09-29 NaN 20.50 NaN NaN 0.81 1.78 NaN NaN NaN 19.56 ... 12.05 10.54 NaN 6.63 NaN NaN NaN NaN 10.66 NaN
1993-10-08 NaN 19.50 NaN NaN 0.99 1.50 NaN NaN NaN 20.19 ... 12.28 10.19 NaN 6.50 NaN NaN NaN NaN 10.56 NaN
1993-10-19 NaN 19.75 NaN NaN 1.11 1.69 NaN NaN NaN 21.00 ... 12.72 10.39 NaN 6.27 NaN NaN NaN NaN 10.59 NaN
1993-10-28 NaN 19.00 NaN NaN 1.10 1.94 NaN NaN NaN 21.00 ... 13.26 10.12 NaN 6.50 NaN NaN NaN NaN 9.75 NaN
1993-11-08 NaN 19.00 NaN NaN 1.20 1.75 NaN NaN NaN 21.75 ... 13.72 10.04 NaN 6.42 NaN NaN NaN NaN 9.69 NaN
1993-11-17 NaN 18.88 NaN NaN 1.13 1.75 NaN NaN NaN 21.94 ... 13.72 10.00 NaN 6.67 NaN NaN NaN NaN 9.19 NaN
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
2014-02-18 40.15 3.71 51.03 127.30 75.38 52.55 125.03 50.91 83.36 119.34 ... 11.00 21.72 49.80 45.39 39.37 38.47 73.79 95.25 30.79 30.72
2014-02-27 39.65 3.81 51.80 127.55 75.85 53.65 126.78 51.66 84.04 121.10 ... 10.85 22.70 50.08 46.44 38.05 38.05 77.40 98.35 31.52 30.35
2014-03-10 40.49 3.98 51.60 129.04 75.89 53.10 124.52 53.11 82.63 121.04 ... 10.72 22.83 49.08 45.78 36.44 38.61 74.20 94.73 31.97 29.91
2014-03-19 40.80 3.88 52.84 130.13 76.69 52.08 124.65 50.98 78.81 117.30 ... 11.06 22.72 50.62 45.49 36.00 35.90 74.20 93.71 30.29 29.01
2014-03-28 40.28 3.99 53.96 133.82 74.78 52.36 119.16 49.10 78.29 117.12 ... 11.49 22.45 49.67 46.11 36.04 33.83 76.31 93.85 30.55 29.15
2014-04-08 40.99 3.69 52.85 133.64 74.99 54.99 120.84 48.55 78.90 117.94 ... 11.43 20.95 48.47 45.19 35.30 36.38 76.34 90.99 30.22 28.60
2014-04-17 41.27 4.03 51.19 136.54 84.62 52.65 119.83 51.37 79.68 117.13 ... 12.11 20.48 50.21 44.53 36.44 35.83 76.98 97.91 28.91 30.53
2014-04-29 40.30 3.93 51.31 139.65 84.00 53.78 122.46 52.15 78.83 119.04 ... 11.92 20.60 49.38 45.67 37.30 33.92 75.94 99.40 28.97 30.48
2014-05-08 38.54 4.09 51.98 140.86 86.37 54.76 122.80 54.11 79.56 118.23 ... 12.17 20.20 48.98 46.67 37.90 33.89 74.60 102.00 28.30 30.46
2014-05-19 38.91 4.03 52.26 140.36 90.77 56.90 122.31 54.03 80.42 120.22 ... 12.30 20.55 49.88 47.31 36.75 34.90 76.72 103.86 28.60 30.72
2014-05-29 39.00 4.07 55.31 139.92 93.70 57.05 127.71 53.84 84.26 122.95 ... 12.92 21.64 50.91 48.21 38.11 36.04 79.82 106.81 30.06 32.01
2014-06-09 39.64 4.40 55.13 140.75 92.18 56.26 129.72 54.57 82.69 130.72 ... 12.72 21.49 49.72 47.81 38.92 34.94 79.68 106.03 29.81 32.89
2014-06-18 40.61 4.11 53.50 142.35 91.98 59.13 133.48 56.79 81.35 128.61 ... 12.51 21.26 46.66 47.47 39.25 34.25 81.91 104.35 29.64 32.38
2014-06-27 39.75 4.23 54.74 143.51 95.39 61.23 133.58 55.01 80.64 129.28 ... 12.47 21.52 45.63 47.58 37.54 34.85 83.23 103.79 29.74 32.46
2014-07-09 40.04 3.83 53.48 147.32 94.43 57.43 130.47 54.91 79.74 129.98 ... 12.98 20.45 44.38 46.62 37.45 33.33 77.42 102.44 28.78 32.69
2014-07-18 39.98 3.79 50.52 149.64 98.38 54.74 122.96 53.43 80.57 135.86 ... 12.98 20.74 46.88 47.21 37.20 35.68 73.81 100.93 29.33 33.03
2014-07-29 37.37 4.10 48.93 149.07 94.48 53.06 121.85 52.43 77.50 131.16 ... 12.93 19.52 46.79 45.87 36.04 35.66 69.72 95.31 27.55 31.79
2014-08-07 38.93 4.21 51.34 153.28 99.16 54.20 133.33 54.31 79.81 134.14 ... 13.49 20.02 47.21 47.58 37.40 37.38 72.14 99.31 28.47 33.93
2014-08-18 39.24 4.16 51.02 153.20 102.13 53.96 137.44 55.70 80.98 133.00 ... 13.71 20.29 47.45 48.06 37.43 38.18 72.32 99.78 29.34 35.34
2014-08-27 39.52 4.13 50.07 156.64 98.36 54.00 136.80 55.57 81.73 131.99 ... 13.39 20.35 48.03 47.63 37.67 41.81 72.34 100.92 28.97 36.28
2014-09-08 38.70 3.82 49.63 145.35 101.58 51.88 134.37 59.62 79.68 130.92 ... 13.90 20.74 49.79 46.57 37.40 42.59 72.42 103.97 29.53 36.65
2014-09-17 37.98 3.60 50.02 141.03 100.75 50.45 130.10 59.19 79.29 134.77 ... 13.21 20.29 51.90 45.70 36.60 40.66 72.34 101.66 29.23 36.63
2014-09-26 39.29 3.28 46.21 142.23 98.75 51.36 133.81 56.65 78.81 125.85 ... 12.76 19.66 50.07 45.28 34.15 40.93 69.73 101.40 28.01 36.71
2014-10-07 39.58 2.64 44.60 148.53 96.26 48.22 132.43 52.90 76.15 127.71 ... 12.43 17.74 48.36 44.79 34.04 38.12 67.62 95.95 26.20 35.07
2014-10-16 41.23 2.66 46.62 152.46 105.11 51.99 144.46 60.49 79.02 128.55 ... 12.82 19.15 51.42 46.73 35.02 44.70 69.64 104.98 27.65 36.57
2014-10-27 43.40 2.80 50.03 158.11 108.86 56.80 149.37 62.54 82.48 133.97 ... 13.16 20.18 52.69 51.62 36.39 47.46 72.44 110.99 29.50 39.27
2014-11-05 42.21 2.61 50.62 156.75 114.18 57.38 144.81 64.00 84.64 134.71 ... 13.43 20.16 55.90 52.17 37.62 51.75 74.78 110.12 29.14 43.14
2014-11-14 42.39 2.83 51.71 157.13 117.60 58.80 145.45 68.06 85.32 145.35 ... 13.76 20.22 54.99 54.43 38.39 51.72 76.29 111.08 29.02 43.91
2014-11-25 43.29 2.66 57.34 160.62 115.00 59.79 154.18 69.71 86.19 146.73 ... 14.32 20.63 58.52 56.02 38.59 50.99 78.30 114.96 28.60 43.98
2014-12-05 42.67 2.50 54.78 161.09 106.74 55.92 157.01 65.25 82.87 136.82 ... 13.35 19.31 55.91 53.00 36.27 48.85 69.02 108.79 26.74 40.85

787 rows × 482 columns


In [26]:
reshape_by_symbol(y_val_pred)


Out[26]:
ticker A AAL AAP AAPL ABBV ABC ABT ACN ADBE ADI ... XLNX XOM XRAY XRX XYL YHOO YUM ZBH ZION ZTS
date
2014-12-16 44.962857 2.592857 56.491429 165.978571 111.152857 58.154286 159.715714 66.425714 88.905714 144.894286 ... 13.825714 20.055714 57.662857 53.691429 38.012857 50.368571 71.494286 113.312857 28.001429 42.694286

1 rows × 479 columns


In [27]:
reshape_by_symbol(y_val_true)


Out[27]:
ticker A AAL AAP AAPL ABBV ABC ABT ACN ADBE ADI ... XLNX XOM XRAY XRX XYL YHOO YUM ZBH ZION ZTS
date
2014-12-16 47.5 2.65 56.28 167.44 113.99 59.97 160.0 66.98 91.26 149.12 ... 14.14 20.59 58.4 53.84 38.95 50.86 73.14 114.17 28.56 44.2

1 rows × 479 columns


In [28]:
r2_score(y_val_true, y_val_pred) # Of course this is not what we want to calculate for evaluation or validation...


Out[28]:
0.99867098911463936

In [29]:
pp.fill_missing(reshape_by_symbol(y_train_pred)).head()


Out[29]:
ticker A AAL AAP AAPL ABBV ABC ABT ACN ADBE ADI ... XLNX XOM XRAY XRX XYL YHOO YUM ZBH ZION ZTS
date
1993-01-29 42.81 19.730000 39.49 20.924 2.111429 2.595714 14.08 34.37 15.01 22.822857 ... 14.224286 10.175714 6.907143 7.888571 24.916667 1.306667 7.474 28.781429 11.374286 31.023333
1993-02-09 42.81 20.447143 39.49 20.924 1.954286 2.451429 14.08 34.37 15.01 22.285714 ... 14.102857 10.068571 6.907143 7.261429 24.916667 1.306667 7.474 28.781429 11.155714 31.023333
1993-02-19 42.81 21.767143 39.49 20.924 1.934286 2.402857 14.08 34.37 15.01 21.671429 ... 13.700000 10.248571 6.907143 6.497143 24.916667 1.306667 7.474 28.781429 11.205714 31.023333
1993-03-02 42.81 23.177143 39.49 20.924 1.984286 2.418571 14.08 34.37 15.01 22.345714 ... 13.998571 10.571429 6.907143 6.847143 24.916667 1.306667 7.474 28.781429 11.282857 31.023333
1993-03-11 42.81 23.410000 39.49 20.924 1.991429 2.430000 14.08 34.37 15.01 22.285714 ... 13.835714 10.688571 6.907143 7.540000 24.916667 1.306667 7.474 28.781429 11.615714 31.023333

5 rows × 482 columns


In [30]:
filled_df = pp.fill_missing(reshape_by_symbol(y_train_pred.dropna(how='all', axis=0)))
filled_df.isnull().sum().sum()


Out[30]:
0

In [31]:
filled_df.head()


Out[31]:
ticker A AAL AAP AAPL ABBV ABC ABT ACN ADBE ADI ... XLNX XOM XRAY XRX XYL YHOO YUM ZBH ZION ZTS
date
1993-01-29 42.81 19.730000 39.49 20.924 2.111429 2.595714 14.08 34.37 15.01 22.822857 ... 14.224286 10.175714 6.907143 7.888571 24.916667 1.306667 7.474 28.781429 11.374286 31.023333
1993-02-09 42.81 20.447143 39.49 20.924 1.954286 2.451429 14.08 34.37 15.01 22.285714 ... 14.102857 10.068571 6.907143 7.261429 24.916667 1.306667 7.474 28.781429 11.155714 31.023333
1993-02-19 42.81 21.767143 39.49 20.924 1.934286 2.402857 14.08 34.37 15.01 21.671429 ... 13.700000 10.248571 6.907143 6.497143 24.916667 1.306667 7.474 28.781429 11.205714 31.023333
1993-03-02 42.81 23.177143 39.49 20.924 1.984286 2.418571 14.08 34.37 15.01 22.345714 ... 13.998571 10.571429 6.907143 6.847143 24.916667 1.306667 7.474 28.781429 11.282857 31.023333
1993-03-11 42.81 23.410000 39.49 20.924 1.991429 2.430000 14.08 34.37 15.01 22.285714 ... 13.835714 10.688571 6.907143 7.540000 24.916667 1.306667 7.474 28.781429 11.615714 31.023333

5 rows × 481 columns

That looks like the best way to represent the data if we want to calculate the $R^2$ distance on a per-symbol basis. I could add it to the single val function.


In [45]:
def run_single_val(x, y, ahead_days, estimator):
    multiindex = x.index.nlevels > 1
    
    x_y = pd.concat([x, y], axis=1)
    x_y_sorted = x_y.sort_index()
    if multiindex:
        x_y_train = x_y_sorted.loc[:fe.add_market_days(x_y_sorted.index.levels[0][-1], -ahead_days)]
        x_y_val = x_y_sorted.loc[x_y_sorted.index.levels[0][-1]:]
    else:
        x_y_train = x_y_sorted.loc[:fe.add_market_days(x_y_sorted.index[-1], -ahead_days)]
        x_y_val = x_y_sorted.loc[x_y_sorted.index[-1]:]
    
    x_train = x_y_train.iloc[:,:-1]
    x_val = x_y_val.iloc[:,:-1]
    y_train_true = x_y_train.iloc[:,-1]
    y_val_true = x_y_val.iloc[:,-1]
    
    estimator.fit(x_train)
    y_train_pred = estimator.predict(x_train)
    y_val_pred = estimator.predict(x_val)
    
    y_train_true_df = pd.DataFrame(y_train_true)
    y_train_pred_df = pd.DataFrame(y_train_pred)
    y_val_true_df = pd.DataFrame(y_val_true)
    y_val_pred_df = pd.DataFrame(y_val_pred)
    
    return reshape_by_symbol(y_train_true), \
           reshape_by_symbol(y_train_pred), \
           reshape_by_symbol(y_val_true), \
           reshape_by_symbol(y_val_pred)

Now, let's implement the rolling validation.


In [33]:
train_eval_days = -1 # In market days
base_days = 7 # In market days
step_days = 7 # market days
ahead_days = 1 # market days
today = data_df.index[-1] # Real date

train_days = 252 # market days per training period
step_eval_days =  30 # market days between training periods beginings

filled_data_df = pp.fill_missing(data_df)

tic = time()
x, y = fe.generate_train_intervals(filled_data_df, 
                                   train_eval_days, 
                                   base_days, 
                                   step_days, 
                                   ahead_days, 
                                   today, 
                                   fe.feature_close_one_to_one)
toc = time()
print('Elapsed time: %i seconds.' % (toc-tic))


Elapsed time: 158 seconds.

In [34]:
x_y_sorted = pd.concat([x, y], axis=1).sort_index()
x_y_sorted


Out[34]:
0 1 2 3 4 5 6 target
1993-01-29 A 42.81 42.81 42.81 42.81 42.81 42.81 42.81 42.81
AAL 18.75 19.12 20.25 20.50 20.12 19.62 19.75 19.75
AAP 39.90 39.90 39.90 39.90 39.90 39.90 39.90 39.90
AAPL 20.88 20.88 20.88 20.88 20.88 20.88 20.88 20.88
ABBV 2.12 2.19 2.15 2.14 2.12 2.04 2.02 2.03
ABC 2.62 2.62 2.56 2.62 2.69 2.62 2.44 2.44
ABT 13.91 13.91 13.91 13.91 13.91 13.91 13.91 13.91
ACN 35.00 35.00 35.00 35.00 35.00 35.00 35.00 35.00
ADBE 15.17 15.17 15.17 15.17 15.17 15.17 15.17 15.17
ADI 21.94 22.38 22.31 22.69 23.19 24.00 23.25 22.75
ADM 20.60 20.60 20.60 20.60 20.60 20.60 20.60 20.60
ADP 15.25 15.25 15.06 15.56 15.69 15.81 15.75 15.69
ADS 7.00 7.06 7.25 7.25 8.00 8.12 8.19 8.62
ADSK 13.44 13.50 13.66 13.72 14.00 13.75 13.59 13.44
AEE 1.09 1.13 1.19 1.20 1.21 1.17 1.17 1.14
AEP 2.41 2.41 2.41 2.41 2.41 2.39 2.37 2.43
AES 11.24 11.03 11.35 11.24 11.24 11.24 11.08 11.03
AET 11.53 11.79 11.87 12.21 12.35 12.46 12.49 12.35
AFL 5.94 5.97 5.89 5.94 5.80 5.45 5.66 5.50
AGN 24.70 24.70 24.70 24.70 24.70 24.70 24.70 24.70
AIG 7.75 7.50 7.38 7.16 7.47 6.73 6.22 6.55
AIV 0.94 0.94 0.94 0.94 0.94 0.94 0.94 0.94
AIZ 17.56 17.12 17.19 17.88 18.12 18.06 17.62 17.94
AJG 10.33 10.33 10.33 10.33 10.33 10.33 10.33 10.33
AKAM 4.41 4.53 4.53 4.49 4.49 4.37 4.45 4.29
ALB 4.49 4.52 4.57 4.65 4.84 4.80 4.82 4.79
ALK 6.42 6.64 6.62 6.50 6.73 6.66 6.67 6.67
ALL 7.25 7.25 7.25 7.25 7.25 7.25 7.25 7.25
ALLE 18.50 18.50 18.50 18.50 18.50 18.50 18.50 18.50
ALXN 250.30 252.42 257.96 265.88 268.26 264.83 261.92 265.62
... ... ... ... ... ... ... ... ... ...
2014-12-16 VRTX 55.54 56.26 57.30 58.29 58.49 58.46 58.28 58.58
VTR 47.03 47.79 47.07 48.88 48.84 49.36 49.79 50.29
VZ 62.77 63.75 64.85 64.70 65.15 64.92 64.88 64.76
WAT 47.88 48.56 48.96 48.80 49.07 49.07 48.67 48.40
WBA 104.49 107.95 112.36 113.88 114.28 113.30 113.11 113.09
WDC 41.84 43.63 44.80 46.00 45.64 45.73 45.35 45.63
WEC 174.43 188.93 190.52 189.73 190.29 191.13 191.15 192.45
WFC 114.99 118.22 118.38 119.20 119.28 120.05 119.95 119.89
WFM 81.48 82.90 84.31 84.76 84.71 85.16 85.68 86.32
WHR 34.74 35.01 35.30 36.19 36.49 36.58 36.62 36.63
WLTW 100.94 106.82 108.67 113.62 111.77 113.62 111.95 110.88
WM 52.77 53.79 55.21 54.45 54.97 55.47 55.34 55.28
WMB 140.52 138.97 145.10 150.11 147.48 151.69 150.90 150.37
WMT 76.20 76.20 76.20 76.20 76.20 76.20 76.20 76.20
WU 82.96 84.23 85.94 85.16 86.38 86.66 86.43 86.91
WY 16.95 17.47 17.78 18.00 18.05 18.12 18.07 18.09
WYN 34.22 34.75 35.35 35.40 35.65 35.43 36.23 36.58
WYNN 43.70 43.42 43.71 43.00 43.87 43.91 44.08 43.92
XEC 34.97 35.75 36.43 36.49 37.00 37.20 36.97 37.12
XEL 86.41 89.02 91.16 93.64 93.33 94.59 93.78 93.21
XLNX 13.35 13.46 13.89 13.96 13.97 14.07 14.08 14.14
XOM 19.31 19.60 20.09 20.07 20.15 20.58 20.59 20.59
XRAY 55.91 57.08 57.72 57.37 58.63 58.51 58.42 58.40
XRX 53.00 53.19 54.10 53.70 54.09 54.00 53.76 53.84
XYL 36.27 36.91 38.10 38.59 38.74 38.72 38.76 38.95
YHOO 48.85 50.12 50.91 50.88 51.15 50.02 50.65 50.86
YUM 69.02 70.60 71.74 71.17 72.46 72.68 72.79 73.14
ZBH 108.79 111.92 114.94 114.91 115.05 113.47 114.11 114.17
ZION 26.74 27.45 28.20 28.39 28.18 28.53 28.52 28.56
ZTS 40.85 41.13 43.15 43.51 43.41 42.97 43.84 44.20

380604 rows × 8 columns


In [37]:
start_date = x_y_sorted.index.levels[0][0]
start_date


Out[37]:
Timestamp('1993-01-29 00:00:00')

In [ ]:


In [39]:
end_date = fe.add_market_days(start_date, 252)
end_date


Out[39]:
Timestamp('1994-01-27 00:00:00')

In [41]:
end_date = fe.add_index_days(start_date, 252, x_y_sorted)
end_date


Out[41]:
Timestamp('2000-01-25 00:00:00')

So, I could use a training period based in an amount of market days, or in an amount of sample base periods. The first approach would be taking into consideration the temporal correlation of the data, the second would consider that the amount of samples should be large enough. Not to lose sight of the real problem at hand, I will use the market days approach, and then check that the amount of samples is big enough.


In [43]:
end_date = fe.add_market_days(start_date, 252)
x_i = x_y_sorted.loc[start_date:end_date].iloc[:,:-1]
y_i = x_y_sorted.loc[start_date:end_date].iloc[:,-1]
print(x_i.shape)
print(x_i.head())
print(y_i.shape)
print(y_i.head())


(17871, 7)
                     0      1      2      3      4      5      6
1993-01-29 A     42.81  42.81  42.81  42.81  42.81  42.81  42.81
           AAL   18.75  19.12  20.25  20.50  20.12  19.62  19.75
           AAP   39.90  39.90  39.90  39.90  39.90  39.90  39.90
           AAPL  20.88  20.88  20.88  20.88  20.88  20.88  20.88
           ABBV   2.12   2.19   2.15   2.14   2.12   2.04   2.02
(17871,)
1993-01-29  A       42.81
            AAL     19.75
            AAP     39.90
            AAPL    20.88
            ABBV     2.03
Name: target, dtype: float64

In [44]:
predictor = dmp.DummyPredictor()

y_train_true, y_train_pred, y_val_true, y_val_pred = run_single_val(x_i, y_i, ahead_days, predictor)
print(y_train_true.shape)
print(y_train_pred.shape)
print(y_val_true.shape)
print(y_val_pred.shape)


---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-44-6b026be090fd> in <module>()
      1 predictor = dmp.DummyPredictor()
      2 
----> 3 y_train_true, y_train_pred, y_val_true, y_val_pred = run_single_val(x_i, y_i, ahead_days, predictor)
      4 print(y_train_true.shape)
      5 print(y_train_pred.shape)

<ipython-input-32-42bfc81c868d> in run_single_val(x, y, ahead_days, estimator)
     19     y_val_pred_df = pd.DataFrame(y_val_pred)
     20 
---> 21     return reshape_by_symbol(y_train_true),            reshape_by_symbol(y_train_pred),            reshape_by_symbol(y_val_true),            reshape_by_symbol(y_val_pred)

<ipython-input-15-693722713596> in reshape_by_symbol(y)
      1 def reshape_by_symbol(y):
----> 2     grouped_df = y.reset_index()         .groupby('level_0')         .apply(lambda x: x.reset_index(drop=True))         .drop('level_0', axis=1)
      3     grouped_df.index = grouped_df.index.droplevel(level=1)
      4     grouped_df.rename(columns={'level_1':'ticker'}, inplace=True)
      5     reshaped_df = grouped_df.set_index('ticker', append=True).unstack()

/home/miguel/anaconda3/envs/cap_env/lib/python3.6/site-packages/pandas/core/generic.py in drop(self, labels, axis, level, inplace, errors)
   2048                 new_axis = axis.drop(labels, level=level, errors=errors)
   2049             else:
-> 2050                 new_axis = axis.drop(labels, errors=errors)
   2051             dropped = self.reindex(**{axis_name: new_axis})
   2052             try:

/home/miguel/anaconda3/envs/cap_env/lib/python3.6/site-packages/pandas/core/indexes/base.py in drop(self, labels, errors)
   3573             if errors != 'ignore':
   3574                 raise ValueError('labels %s not contained in axis' %
-> 3575                                  labels[mask])
   3576             indexer = indexer[~mask]
   3577         return self.delete(indexer)

ValueError: labels ['level_0'] not contained in axis

In [ ]:
y_train_pred.head()

In [ ]:
y_train_pred.dropna(axis=1, how='all').shape

In [ ]:
scores = r2_score(pp.fill_missing(y_train_pred), pp.fill_missing(y_train_true), multioutput='raw_values')
print('R^2 score = %f +/- %f' % (np.mean(scores), 2*np.std(scores)))

In [ ]:
scores = r2_score(y_train_pred, y_train_true, multioutput='raw_values')
print('R^2 score = %f +/- %f' % (np.mean(scores), np.std(scores)))

In [ ]:
len(scores)

In [ ]:
y_val_true_df = pd.DataFrame()

In [ ]:
y_val_true

In [ ]:
y_val_true_df.append(y_val_true)

A lot of attention should be paid to the effect of filling the missing data. It may change the whole results.


In [ ]:
x.index.min()

In [ ]:
x.index.max()

In [ ]:
x.index.max() - x.index.min()

In [ ]:
(x.index.max() - fe.add_market_days(x.index.min(), train_days)).days // step_days

That last number is an approximation of the number of train/evaluation sets that are being considered.


In [ ]:
def roll_evaluate(x, y, train_days, step_eval_days, ahead_days, verbose=False):
    """Warning: The final date of the period should be no larger than the final date of the SPY_DF"""
    
    # calculate start and end date
    # sort by date
    x_y_sorted = pd.concat([x, y], axis=1).sort_index()
    start_date = x_y_sorted.index[0]
    end_date = fe.add_market_days(start_date, train_days)
    final_date = x_y_sorted.index[-1]
    
    # loop: run_single_val(x,y, ahead_days, estimator)
    r2_train_means = []
    r2_train_stds = []
    y_val_true_df = pd.DataFrame()
    y_val_pred_df = pd.DataFrame()
    num_training_sets = (252/365) * (x.index.max() - fe.add_market_days(x.index.min(), train_days)).days // step_eval_days
    set_index = 0
    if verbose:
        print('Evaluating approximately %i training/evaluation pairs' % num_training_sets)
    
    while end_date < final_date:
        x = x_y_sorted.loc[start_date:end_date].iloc[:,:-1]
        y = x_y_sorted.loc[start_date:end_date].iloc[:,-1]
        y_train_true, y_train_pred, y_val_true, y_val_pred = run_single_val(x, y, ahead_days, predictor)
        # Calculate R^2 for training and append
        scores = r2_score(y_train_true, y_train_pred, multioutput='raw_values')
        r2_train_means.append(np.mean(scores))
        r2_train_stds.append(np.std(scores))
        # Append validation results
        y_val_true_df = y_val_true_df.append(y_val_true)
        y_val_pred_df = y_val_pred_df.append(y_val_pred)
        
        # Update the dates
        start_date = fe.add_market_days(start_date, step_eval_days)
        end_date = fe.add_market_days(end_date, step_eval_days)
        
        set_index += 1
        if verbose:
            sys.stdout.write('\rApproximately %2.1f percent complete.    ' % (100.0 * set_index / num_training_sets))
            sys.stdout.flush()
    
    return r2_train_means, r2_train_stds, y_val_true_df, y_val_pred_df

Let's test the whole process


In [ ]:
train_eval_days = -1 # In market days
base_days = 14 # In market days
step_days = 30 # market days
ahead_days = 1 # market days
today = data_df.index[-1] # Real date

filled_data_df = pp.fill_missing(data_df)

tic = time()
x, y = fe.generate_train_intervals(filled_data_df, 
                                   train_eval_days, 
                                   base_days, 
                                   step_days, 
                                   ahead_days, 
                                   today, 
                                   fe.feature_close_one_to_one)
toc = time()
print('Elapsed time: %i seconds.' % (toc-tic))

In [ ]:
train_days = 252 # market days per training period
step_eval_days =  10 # market days between training periods beginings

tic = time()
r2_train_means, r2_train_stds, y_val_true_df, y_val_pred_df = roll_evaluate(x, y, train_days, step_eval_days, ahead_days, verbose=True)
toc = time()
print('Elapsed time: %i seconds.' % (toc-tic))

In [ ]:
y_val_true_df.head()

In [ ]:
pd.DataFrame(r2_train_means).describe()

In [ ]:
scores = r2_score(y_val_true_df.T, y_val_pred_df.T, multioutput='raw_values')
print('R^2 score = %f +/- %f' % (np.mean(scores), np.std(scores)))

In [ ]:
pd.DataFrame(scores).describe()

In [ ]:
plt.plot(y_val_true_df.index, r2_train_means, label='r2_train_means')
plt.plot(y_val_true_df.index, scores, label='r2 validation scores')
plt.legend(loc='lower left')

In [ ]:
scores_val = r2_score(y_val_true_df, y_val_pred_df, multioutput='raw_values')
print('R^2 score = %f +/- %f' % (np.mean(scores_val), np.std(scores_val)))

In [ ]:
plt.plot(scores_val, label='r2 validation scores')

In [ ]:
sorted_means = x.sort_index().mean(axis=1)

In [ ]:
sorted_means.head()

In [ ]:
sorted_means.plot()

In [ ]:
sub_period = sorted_means['2009-03-01':]
plt.scatter(sub_period.index, sub_period)

It seems like the weird point, in which the model is predicting terribly may be the 2008 financial crisis. And the big unpredictability is limited to one symbol. I should implement a way to trace the symbols...

What about the mean absolute error?


In [ ]:
from sklearn.metrics import mean_absolute_error

In [ ]:
scores = mean_absolute_error(y_val_true_df.T, y_val_pred_df.T, multioutput='raw_values')
print('MAE score = %f +/- %f' % (np.mean(scores), np.std(scores)))

In [ ]:
plt.plot(y_val_true_df.index, scores, label='MAE validation scores')
plt.legend(loc='lower left')

In [ ]:
pd.DataFrame(scores).describe()

In [ ]:
scores = mean_absolute_error(y_val_true_df, y_val_pred_df, multioutput='raw_values')
print('MAE score = %f +/- %f' % (np.mean(scores), np.std(scores)))

In [ ]:
plt.plot(scores, label='MAE validation scores')
plt.legend(loc='lower left')

In [ ]:
pd.DataFrame(scores).describe()

In [ ]:


In [ ]: