In [3]:
# System libraries
import sys, os, gc
import datetime

# Math libraries
import math, random
import pandas as pd, numpy as np
import scipy
from scipy import stats
from datetime import timedelta
from datetime import datetime
import itertools

# Data storage libraries
import pickle, sqlite3, simpledbf, boto3

# Custom financial data libraries
import utils.findata_utils as fd
import utils.ml_utils as ml_utils

# Plotting libraries
import matplotlib.pyplot as plt
from matplotlib import rcParams

import warnings
#if not sys.warnoptions:
#    warnings.simplefilter("ignore")

from importlib import reload
fd = reload(fd)

import sklearn as sk
import tensorflow as tf
import xgboost as xgb
import keras

from imblearn.over_sampling import RandomOverSampler

from sklearn import svm
from sklearn import preprocessing
from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier
from sklearn.linear_model import ElasticNet, LogisticRegression
from sklearn.metrics import explained_variance_score, mean_squared_error, confusion_matrix, classification_report, accuracy_score
from sklearn.model_selection import cross_val_score, KFold, GridSearchCV
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.pipeline import Pipeline
from sklearn.externals import joblib 

from keras.models import Sequential
from keras.optimizers import SGD
from keras.layers import Dense, Dropout
from keras.wrappers.scikit_learn import KerasRegressor

from yellowbrick.regressor import ResidualsPlot, PredictionError

# Connect to databases
db = 'C:\\Datasets\\thesis.db'
overleaf = ['C:','Users','bryce','OneDrive','Documents','Overleaf','Thesis','assets','exports']
conn = sqlite3.connect(db)
c = conn.cursor()

hdf_path = 'C:\\Datasets\\thesis.h5'
hdf = pd.HDFStore(hdf_path)

Obtain expected returns; compute abnormal returns


In [4]:
df = pd.read_sql('''
    SELECT *--ticker, date, rebal_date, action, [index], r_daily,
        --er_daily, ar_daily
    FROM daily_abnormal_returns''', conn)

bounds = [-1,10]
bounds_corr = [-5,5]

for corr in ['riskfree_return','market_excess_return','sector_excess_return','industry_excess_return',
             'riskfree_return_corr','market_excess_return_corr','sector_excess_return_corr','industry_excess_return_corr']:
    if '_corr' in corr:
        df[corr] = df[corr].clip(upper=max(bounds_corr)).clip(lower=min(bounds_corr))
    else:
        df[corr] = df[corr].clip(upper=max(bounds)).clip(lower=min(bounds))

df['riskfree_return_corr'] = df['riskfree_return_corr'].clip(lower=-1).clip(upper=1)
df['er_daily'] = df['riskfree_return']*df['riskfree_return_corr'] + df['market_excess_return']*df['market_excess_return_corr'] + df['sector_excess_return']*df['sector_excess_return_corr'] + df['industry_excess_return']*df['industry_excess_return_corr']
df['ar_daily'] = df['r_daily']-df['er_daily']

display(df.iloc[:5,:])

# This is now done in the sector_coherence.py script!


ticker date rebal_date action index r_daily riskfree_return market_excess_return sector_excess_return industry_excess_return pre_event riskfree_return_corr market_excess_return_corr sector_excess_return_corr industry_excess_return_corr residual er_daily ar_daily
0 SMU.UN 2013-11-11 2019-03-18 00:00:00 Add S&P/TSX Composite Index 0.014363 0.0 0.0012 0.044602 -0.042225 1 0.478216 0.770259 0.774332 0.780162 0.000054 0.002518 0.011845
1 SMU.UN 2013-11-12 2019-03-18 00:00:00 Add S&P/TSX Composite Index -0.017890 0.0 -0.0022 0.066005 -0.070088 1 0.478216 0.770259 0.774332 0.780162 0.000054 -0.005265 -0.012625
2 SMU.UN 2013-11-13 2019-03-18 00:00:00 Add S&P/TSX Composite Index 0.003621 0.0 0.0088 -0.006116 0.006031 1 0.478216 0.770259 0.774332 0.780162 0.000054 0.006747 -0.003126
3 SMU.UN 2013-11-14 2019-03-18 00:00:00 Add S&P/TSX Composite Index -0.026320 0.0 0.0043 0.010275 -0.036000 1 0.478216 0.770259 0.774332 0.780162 0.000054 -0.016817 -0.009503
4 SMU.UN 2013-11-15 2019-03-18 00:00:00 Add S&P/TSX Composite Index 0.008816 0.0 0.0044 0.032696 -0.039214 1 0.478216 0.770259 0.774332 0.780162 0.000054 -0.001887 0.010703

Compute cumulative abnormal returns


In [16]:
# Compute number of days until the rebalance day
if 'd' not in df.columns:
    for field in ['rebal_date', 'date']:
        df[field] = df[field].apply(lambda date: datetime.strptime(date[:10], '%Y-%m-%d') if type(date)==str else date)
    df['d'] = (df['date']-df['rebal_date']).apply(lambda d: d.days)
    
# Deal with infinities
df = df.replace(-np.inf,min(bounds))
df = df.replace(np.inf,max(bounds))
for column in ['r_daily','er_daily','ar_daily']:
    df[column] = df[column].clip(lower=min(bounds))
    df[column] = df[column].clip(upper=max(bounds))

# Compute the abnormal returns, per security
abnormal_returns = df.pivot_table(index=['d'], columns=['ticker'], values=['r_daily','ar_daily','er_daily'])

# Compute cumulative returns, per security
cars = (abnormal_returns+1).cumprod()-1

# Now, index all culm returns to day 0
cars_indexed = (cars.subtract(cars.loc[cars.index==0].iloc[0,:],axis=1))
ars_indexed = (abnormal_returns.subtract(abnormal_returns.loc[cars.index==0].iloc[0,:],axis=1))
#cars_indexed.clip

# Fill NAs
cars_indexed.loc[cars_indexed.index==0] = cars_indexed.loc[cars_indexed.index==0].fillna(0)
cars_indexed.loc[cars_indexed.index>=0] = cars_indexed.loc[cars_indexed.index>=0].fillna(method='ffill')
cars_indexed.loc[cars_indexed.index<=0] = cars_indexed.loc[cars_indexed.index<=0].fillna(method='bfill')

ars_indexed.loc[ars_indexed.index==0] = ars_indexed.loc[ars_indexed.index==0].fillna(0)
ars_indexed.loc[ars_indexed.index>=0] = ars_indexed.loc[ars_indexed.index>=0].fillna(method='ffill')
ars_indexed.loc[ars_indexed.index<=0] = ars_indexed.loc[ars_indexed.index<=0].fillna(method='bfill')

display(cars_indexed.iloc[:5,:])
print('AR')
display(ars_indexed.iloc[:5,:])


ar_daily ... r_daily
ticker AAV ABX AC ACB ACO.X ACQ AD AEM AEZS AFN ... WJX WN WPK WPM WPRT WSP WTE X Y YRI
d
-3360 0.0 0.0 -20.674735 0.998254 0.0 1.01318 1.006076 0.0 0.0 0.0 ... 0.454992 0.0 0.989011 0.0 0.406888 -0.005701 0.0 0.37336 -2.005691 0.0
-3359 0.0 0.0 -20.674735 0.998254 0.0 1.01318 1.006076 0.0 0.0 0.0 ... 0.454992 0.0 0.989011 0.0 0.406888 -0.005701 0.0 0.37336 -2.005691 0.0
-3358 0.0 0.0 -20.674735 0.998254 0.0 1.01318 1.006076 0.0 0.0 0.0 ... 0.454992 0.0 0.989011 0.0 0.406888 -0.005701 0.0 0.37336 -2.005691 0.0
-3357 0.0 0.0 -20.674735 0.998254 0.0 1.01318 1.006076 0.0 0.0 0.0 ... 0.454992 0.0 0.989011 0.0 0.406888 -0.005701 0.0 0.37336 -2.005691 0.0
-3356 0.0 0.0 -20.674735 0.998254 0.0 1.01318 1.006076 0.0 0.0 0.0 ... 0.454992 0.0 0.989011 0.0 0.406888 -0.005701 0.0 0.37336 -2.005691 0.0

5 rows × 1008 columns

AR
ar_daily ... r_daily
ticker AAV ABX AC ACB ACO.X ACQ AD AEM AEZS AFN ... WJX WN WPK WPM WPRT WSP WTE X Y YRI
d
-3360 0.0 0.0 -0.046101 0.00592 0.0 -0.027773 0.004714 0.0 0.0 0.0 ... 0.023833 0.0 -0.005329 0.0 0.097702 -0.024907 0.0 -0.040313 -0.085086 0.0
-3359 0.0 0.0 -0.046101 0.00592 0.0 -0.027773 0.004714 0.0 0.0 0.0 ... 0.023833 0.0 -0.005329 0.0 0.097702 -0.024907 0.0 -0.040313 -0.085086 0.0
-3358 0.0 0.0 -0.046101 0.00592 0.0 -0.027773 0.004714 0.0 0.0 0.0 ... 0.023833 0.0 -0.005329 0.0 0.097702 -0.024907 0.0 -0.040313 -0.085086 0.0
-3357 0.0 0.0 -0.046101 0.00592 0.0 -0.027773 0.004714 0.0 0.0 0.0 ... 0.023833 0.0 -0.005329 0.0 0.097702 -0.024907 0.0 -0.040313 -0.085086 0.0
-3356 0.0 0.0 -0.046101 0.00592 0.0 -0.027773 0.004714 0.0 0.0 0.0 ... 0.023833 0.0 -0.005329 0.0 0.097702 -0.024907 0.0 -0.040313 -0.085086 0.0

5 rows × 1008 columns

Compute summary statistics for CARs


In [17]:
actions = df[['ticker','action','index']].drop_duplicates()
del_cars = cars_indexed.loc[:,cars_indexed.columns.get_level_values(1).isin(actions.loc[actions['action']=='Delete']['ticker'].values)]
add_cars = cars_indexed.loc[:,cars_indexed.columns.get_level_values(1).isin(actions.loc[actions['action']=='Add']['ticker'].values)]

del_ars = ars_indexed.loc[:,ars_indexed.columns.get_level_values(1).isin(actions.loc[actions['action']=='Delete']['ticker'].values)]
add_ars = ars_indexed.loc[:,ars_indexed.columns.get_level_values(1).isin(actions.loc[actions['action']=='Add']['ticker'].values)]

In [18]:
avg_rw_end = -df['rebal_date'].apply(lambda day: day.day).mean()
avg_rw_start = avg_rw_end - 13

AR


In [27]:
plt.style.use('classic')

font = {'family' : 'Arial',
        'weight' : 'normal',
        'size'   : 12}

plt.rc('font', **font)

fig = plt.figure(figsize=(10,5))
fig.patch.set_facecolor('white')

ax = fig.add_subplot(1, 1, 1)

ax.spines['left'].set_visible(True)
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
ax.spines['bottom'].set_visible(True)
ax.grid(True,axis='both',linestyle=':')

stds = 0.25

plt.plot(ars_indexed.index, add_ars['ar_daily'].mean(axis=1), color='blue', label='Adds')
ax.fill_between(ars_indexed.index,
                add_ars['ar_daily'].mean(axis=1)-add_ars['ar_daily'].std(axis=1)*stds,
                add_ars['ar_daily'].mean(axis=1)+add_ars['ar_daily'].std(axis=1)*stds,
                color='blue', alpha=0.1)

plt.plot(ars_indexed.index, del_ars['ar_daily'].mean(axis=1), color='red', label='Deletes')
ax.fill_between(cars_indexed.index,
                del_ars['ar_daily'].mean(axis=1)-add_ars['ar_daily'].std(axis=1)*stds,
                del_ars['ar_daily'].mean(axis=1)+add_ars['ar_daily'].std(axis=1)*stds,
                color='red', alpha=0.1)

ax.fill_between([avg_rw_start,avg_rw_end], [1,1], color='grey', alpha=0.2)
ax.fill_between([avg_rw_start,avg_rw_end], [-1,-1], color='grey', alpha=0.2)

plt.legend(frameon=False, loc='best')
plt.title('Abnormal Returns for Adds/Deletes to the %s' % ('S&P/TSX Composite Index'))
plt.xlabel('Days since (to) rebalance')
plt.ylabel('Abnormal return (daily)')
xlim = [-90,90]
plt.xlim(xlim)
ylim = [-0.04,0.04]
plt.ylim(ylim)
plt.xticks(np.linspace(-90,90,7))

plt.text(avg_rw_start + 2, min(ylim) + 0.001, 'Ranking Period', rotation=90,
         horizontalalignment='left',verticalalignment='bottom', fontsize=12)

plt.show()

fig.savefig('\\'.join(overleaf+['ar.png']))


CAR


In [34]:
plt.style.use('classic')

font = {'family' : 'Arial',
        'weight' : 'normal',
        'size'   : 12}

plt.rc('font', **font)

fig = plt.figure(figsize=(10,5))
fig.patch.set_facecolor('white')

ax = fig.add_subplot(1, 1, 1)

ax.spines['left'].set_visible(True)
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
ax.spines['bottom'].set_visible(True)
ax.grid(True,axis='both',linestyle=':')

stds = 0.05

plt.plot(cars_indexed.index, add_cars['ar_daily'].mean(axis=1), color='blue')
ax.fill_between(cars_indexed.index,
                add_cars['ar_daily'].mean(axis=1)-add_cars['ar_daily'].std(axis=1)*stds,
                add_cars['ar_daily'].mean(axis=1)+add_cars['ar_daily'].std(axis=1)*stds,
                color='blue', alpha=0.1)

plt.plot(cars_indexed.index, del_cars['ar_daily'].mean(axis=1), color='red')
ax.fill_between(cars_indexed.index,
                del_cars['ar_daily'].mean(axis=1)-add_cars['ar_daily'].std(axis=1)*stds,
                del_cars['ar_daily'].mean(axis=1)+add_cars['ar_daily'].std(axis=1)*stds,
                color='red', alpha=0.1)

ax.fill_between([avg_rw_start,avg_rw_end], [1,1], color='grey', alpha=0.2)
ax.fill_between([avg_rw_start,avg_rw_end], [-1,-1], color='grey', alpha=0.2)

plt.legend(['Adds','Deletes'], frameon=False, loc='best')
plt.title('CAR for Adds/Deletes to the %s' % ('S&P/TSX Composite Index'))
plt.xlabel('Days since (to) rebalance')
plt.ylabel('Cumulative abnormal return\n(indexed to day-0)')
xlim = [-90,90]
plt.xlim(xlim)
ylim = [-0.04,0.02]
plt.ylim(ylim)
plt.xticks(np.linspace(-90,90,7))

plt.text(avg_rw_start + 2, min(ylim) + 0.001, 'Ranking Period', rotation=90,
         horizontalalignment='left',verticalalignment='bottom', fontsize=12)

plt.show()

fig.savefig('\\'.join(overleaf+['car.png']))



In [9]:
df['sector_return'] = df['riskfree_return']+df['market_excess_return']+df['sector_excess_return']
df['industry_return'] = df['sector_return']+df['industry_excess_return']

In [10]:
plt.style.use('classic')
fig = plt.figure(figsize=(5,5))
fig.patch.set_facecolor('white')
ax = fig.add_subplot(1, 1, 1)
ax.spines['left'].set_visible(True)
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
ax.spines['bottom'].set_visible(True)
ax.grid(True,axis='both',linestyle=':')

coherence = df.loc[df['ticker'].isin(actions.loc[actions['action']=='Delete']['ticker'].values)][['d','sector_return','r_daily']]
ax.scatter(coherence['d'], coherence['r_daily']-coherence['sector_return'], linewidths=0, alpha=0.15)

plt.xlabel('Sector Return')
plt.ylabel('Security Return')
plt.xlim([-100,100])
plt.ylim([-1,1])
plt.show()


vs. STD


In [55]:
plt.style.use('classic')

font = {'family' : 'Arial',
        'weight' : 'normal',
        'size'   : 12}

plt.rc('font', **font)

fig = plt.figure(figsize=(10,5))
fig.patch.set_facecolor('white')

ax = fig.add_subplot(1, 1, 1)

ax.spines['left'].set_visible(True)
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
ax.spines['bottom'].set_visible(True)
ax.grid(True,axis='both',linestyle=':')

stds = 0.05

plt.plot(cars_indexed.index, (add_cars['ar_daily'].mean(axis=1)/add_cars['ar_daily'].std(axis=0).mean()).fillna(0), color='blue', label='Adds')
#ax.fill_between(cars_indexed.index,
#                add_cars['ar_daily'].mean(axis=1)-add_cars['ar_daily'].std(axis=1)*stds,
#                add_cars['ar_daily'].mean(axis=1)+add_cars['ar_daily'].std(axis=1)*stds,
#                color='blue', alpha=0.1)

plt.plot(cars_indexed.index, (del_cars['ar_daily'].mean(axis=1)/del_cars['ar_daily'].std(axis=0).mean()).fillna(0), color='red', label='Deletes')
#ax.fill_between(cars_indexed.index,
#                del_cars['ar_daily'].mean(axis=1)-add_cars['ar_daily'].std(axis=1)*stds,
#                del_cars['ar_daily'].mean(axis=1)+add_cars['ar_daily'].std(axis=1)*stds,
#                color='red', alpha=0.1)

ax.fill_between([avg_rw_start,avg_rw_end], [1,1], color='grey', alpha=0.2)
ax.fill_between([avg_rw_start,avg_rw_end], [-1,-1], color='grey', alpha=0.2)

plt.legend(frameon=False, loc='best')
plt.title('CAR per $\sigma$ for Adds/Deletes to the %s' % ('S&P/TSX Composite Index'))
plt.xlabel('Days since (to) rebalance')
plt.ylabel('Cumulative abnormal return (indexed to day-0) over $\sigma$')
xlim = [-30,30]
plt.xlim(xlim)
ylim = [-.2,.1]
plt.ylim(ylim)
plt.xticks(np.linspace(min(xlim),max(xlim),7))

plt.text(avg_rw_start + 2, max(ylim) - 0.025, 'Ranking Period', #rotation=90,
         horizontalalignment='left',verticalalignment='bottom', fontsize=12)

plt.show()

fig.savefig('\\'.join(overleaf+['car-per-s.png']))



In [91]:
sr = pd.read_sql('''SELECT * FROM sector_returns''', conn)
ir = pd.read_sql('''SELECT * FROM industry_returns''', conn)

sr = sr.pivot_table(index=['date'],columns=['sector'], values=['r_intraday_mean'], aggfunc=np.mean).reset_index()
ir = ir.pivot_table(index=['date'],columns=['industry'], values=['r_intraday_mean'], aggfunc=np.mean).reset_index()

#sr.index
#sr['date'].iloc[0]
dates = sr.loc[(sr['date']>='2017-09-01') & (sr['date']<='2017-09-31')]['date']
sr_sample = (sr.loc[(sr['date']>='2017-09-01') & (sr['date']<='2017-09-31')].drop('date',axis=1)+1).cumprod()-1
ir_sample = (ir.loc[(sr['date']>='2017-09-01') & (sr['date']<='2017-09-31')].drop('date',axis=1)+1).cumprod()-1


C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\generic.py:3802: PerformanceWarning: dropping on a non-lexsorted multi-index without a level parameter may impact performance.
  new_axis = axis.drop(labels, errors=errors)

In [ ]:


In [218]:
d = -3330
cars_indexed['ar_daily'].iloc[cars_indexed.index<d,:].var(axis=0).mean()


Out[218]:
1.9842537412291653e-31

In [219]:
add_var = []

for d in cars_indexed.index:
    var = pd.Series(name=d)
    var['var']=cars_indexed['ar_daily'].iloc[cars_indexed.index<=d,:].var(axis=0).mean()
    add_var.append(pd.DataFrame(var).T)
add_var = pd.concat(add_var)
print(add_var)


                var
-3360           NaN
-3359  0.000000e+00
-3358  7.703720e-34
-3357  0.000000e+00
-3356  9.314393e-34
-3353  3.293340e-33
-3352  4.972644e-33
-3351  4.768969e-33
-3350  4.988502e-33
-3349  5.084608e-32
-3346  5.326824e-32
-3345  5.446130e-32
-3344  5.912591e-32
-3343  5.901309e-32
-3342  6.211615e-32
-3339  6.183030e-32
-3338  6.049741e-32
-3337  5.978859e-32
-3336  6.305277e-32
-3335  6.474129e-32
-3332  6.767056e-32
-3331  1.984254e-31
-3330  2.011321e-31
-3329  2.043495e-31
-3328  9.537521e-32
-3325  1.001299e-31
-3324  6.578347e-32
-3323  7.175932e-32
-3322  1.160840e-31
-3321  1.171224e-31
...             ...
 5861  2.870970e-01
 5865  2.870962e-01
 5866  2.870954e-01
 5867  2.870946e-01
 5868  2.870938e-01
 5871  2.870930e-01
 5872  2.870923e-01
 5873  2.870917e-01
 5874  2.870913e-01
 5875  2.870909e-01
 5878  2.870903e-01
 5879  2.870896e-01
 5880  2.870891e-01
 5881  2.870882e-01
 5882  2.870873e-01
 5885  2.870863e-01
 5886  2.870852e-01
 5887  2.870842e-01
 5888  2.870833e-01
 5889  2.870822e-01
 5892  2.870811e-01
 5893  2.870799e-01
 5894  2.870788e-01
 5895  2.870775e-01
 5896  2.870763e-01
 5899  2.870751e-01
 5900  2.870739e-01
 5901  2.870728e-01
 5902  2.870718e-01
 5903  2.870710e-01

[8905 rows x 1 columns]

In [200]:
plt.style.use('classic')

font = {'family' : 'Arial',
        'weight' : 'normal',
        'size'   : 12}

plt.rc('font', **font)

fig = plt.figure(figsize=(10,5))
fig.patch.set_facecolor('white')

ax = fig.add_subplot(1, 1, 1)

ax.spines['left'].set_visible(True)
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
ax.spines['bottom'].set_visible(True)
ax.grid(True,axis='both',linestyle=':')

stds = 0.05

plt.plot(add_var.index, add_var['var'].(axis=1), color='blue', label='Adds')
#ax.fill_between(cars_indexed.index,
#                add_cars['er_daily'].mean(axis=1)-add_cars['er_daily'].std(axis=1)*stds,
#                add_cars['er_daily'].mean(axis=1)+add_cars['er_daily'].std(axis=1)*stds,
#                color='blue', alpha=0.1)

#plt.plot(cars_indexed.index, del_cars['ar_daily'].mean(axis=1), color='red')

#ax.fill_between(cars_indexed.index,
#                del_cars['er_daily'].mean(axis=1)-add_cars['er_daily'].std(axis=1)*stds,
#                del_cars['er_daily'].mean(axis=1)+add_cars['er_daily'].std(axis=1)*stds,
#                color='red', alpha=0.1)

#ax.fill_between([avg_rw_start,avg_rw_end], [1,1], color='grey', alpha=0.2)
#ax.fill_between([avg_rw_start,avg_rw_end], [-1,-1], color='grey', alpha=0.2)

plt.legend(frameon=False, loc='best')
plt.title('Induced Variance for Adds/Deletes to the %s' % ('S&P/TSX Composite Index'))
plt.xlabel('Days since (to) rebalance')
plt.ylabel('Daily variance\n(indexed to day-0)')
xlim = [-90,90]
plt.xlim(xlim)
#ylim = [-1,1]
plt.ylim(ylim)
plt.xticks(np.linspace(-90,90,7))

#plt.text(avg_rw_start + 2, min(ylim) + 0.001, 'Ranking Period', rotation=90,
#         horizontalalignment='left',verticalalignment='bottom', fontsize=12)

plt.show()

fig.savefig('\\'.join(overleaf+['er.png']))



In [131]:
plt.style.use('classic')

fig = plt.figure(figsize=(10,5))
fig.patch.set_facecolor('white')

ax = fig.add_subplot(1, 1, 1)

ax.spines['left'].set_visible(True)
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
ax.spines['bottom'].set_visible(True)
ax.grid(True,axis='both',linestyle=':')

for col in sr_sample.columns.get_level_values(1):
    plt.plot(dates, sr_sample['r_intraday_mean',col], label=col)
    plt.text(max(dates), sr_sample['r_intraday_mean',col].iloc[-1], col,
         horizontalalignment='left',verticalalignment='center', fontsize=12)
plt.xticks(dates[::2],dates[::2], rotation=45)
#plt.legend()
#plt.show()

plt.title('Sector-specific returns for September 2017')
plt.xlabel('Date')
plt.ylabel('Cumulative sector-specific return')

#plt.xticks(np.linspace(min(xlim),max(xlim),7))

#plt.text(avg_rw_start + 2, max(ylim) - 0.025, 'Ranking Period', #rotation=90,
#         horizontalalignment='left',verticalalignment='bottom', fontsize=12)

plt.show()
fig.savefig('\\'.join(overleaf+['sector-returns.png']))



In [189]:
factors = pd.read_sql('''SELECT fc.*, sc.sector, sc.industry FROM factor_coefficients fc
    INNER JOIN (SELECT ticker, sector, industry FROM sectors) sc
        ON sc.ticker=fc.ticker''', conn).drop(['pre_event','action'],axis=1).drop_duplicates()
#factors.drop()
#factors
table = factors.loc[factors.industry=='Copper'].groupby(by=['ticker'])[['riskfree_return','market_excess_return','sector_excess_return','industry_excess_return','residual']].mean().reset_index()

latex = latexify(table).sp


    \begin{table}[h!]
        \centering
        \begin{tabular}{ r  r  r  r  r  r }
            ticker & riskfree_return & market_excess_return & sector_excess_return & industry_excess_return & residual \\ 
ERO & -190.88 & 0.42 & 0.06 & 0.03 & -0.00 \\ 
FM & -33.78 & -0.24 & 0.37 & 0.17 & -0.01 \\ 
LUN & -2.06 & -0.42 & 0.26 & 0.10 & -0.01 \\ 
NSU & -65.98 & -0.15 & 0.26 & 0.09 & -0.01 \\ 
TKO & 6.28 & 0.06 & 0.42 & 0.11 & -0.01
        \end{tabular}
        \caption{?}
        \label{tab:?}
    \end{table}
    

In [243]:
#print(latexify(pd.read_sql('''SELECT * FROM factor_coefficients''',conn)))

prestd = add_cars.loc[add_cars.index<0].std(axis=0).mean()
poststd = add_cars.loc[add_cars.index>0].std(axis=0).mean()

cartab = pd.DataFrame(add_cars['ar_daily'].mean(axis=1))
cartab.columns = ['Mean']
cartab['Mean'] = cartab['Mean']*100
cartab['winow_std'] = [prestd if idx<0 else poststd for idx in cartab.index]

cartab['test'] = cartab['Mean']/cartab['winow_std']

print(latexify(cartab.loc[(cartab.index>-30) & (cartab.index<30)]))


    \begin{table}[h!]
        \centering
        \begin{tabular}{ r  r  r  r  r  r }
            Mean & winow_std & test \\ 
-0.32 & 44481333.14 & -0.00 \\ 
-0.32 & 44481333.14 & -0.00 \\ 
-0.37 & 44481333.14 & -0.00 \\ 
-0.42 & 44481333.14 & -0.00 \\ 
-0.40 & 44481333.14 & -0.00 \\ 
-0.32 & 44481333.14 & -0.00 \\ 
-0.49 & 44481333.14 & -0.00 \\ 
-0.49 & 44481333.14 & -0.00 \\ 
-0.49 & 44481333.14 & -0.00 \\ 
-0.44 & 44481333.14 & -0.00 \\ 
-0.53 & 44481333.14 & -0.00 \\ 
-0.66 & 44481333.14 & -0.00 \\ 
-0.61 & 44481333.14 & -0.00 \\ 
-0.79 & 44481333.14 & -0.00 \\ 
-0.79 & 44481333.14 & -0.00 \\ 
-0.79 & 44481333.14 & -0.00 \\ 
-0.70 & 44481333.14 & -0.00 \\ 
-0.71 & 44481333.14 & -0.00 \\ 
-0.49 & 44481333.14 & -0.00 \\ 
-0.61 & 44481333.14 & -0.00 \\ 
-0.76 & 44481333.14 & -0.00 \\ 
-0.76 & 44481333.14 & -0.00 \\ 
-0.75 & 44481333.14 & -0.00 \\ 
-0.70 & 44481333.14 & -0.00 \\ 
-0.63 & 44481333.14 & -0.00 \\ 
-0.36 & 44481333.14 & -0.00 \\ 
-0.25 & 44481333.14 & -0.00 \\ 
0.01 & 44481333.14 & 0.00 \\ 
0.01 & 44481333.14 & 0.00 \\ 
0.00 & 120308505745126.23 & 0.00 \\ 
-0.05 & 120308505745126.23 & -0.00 \\ 
-0.38 & 120308505745126.23 & -0.00 \\ 
-0.70 & 120308505745126.23 & -0.00 \\ 
-0.74 & 120308505745126.23 & -0.00 \\ 
-0.76 & 120308505745126.23 & -0.00 \\ 
-0.75 & 120308505745126.23 & -0.00 \\ 
-0.94 & 120308505745126.23 & -0.00 \\ 
-0.74 & 120308505745126.23 & -0.00 \\ 
-1.04 & 120308505745126.23 & -0.00 \\ 
-0.97 & 120308505745126.23 & -0.00 \\ 
-1.31 & 120308505745126.23 & -0.00 \\ 
-1.30 & 120308505745126.23 & -0.00 \\ 
-1.30 & 120308505745126.23 & -0.00 \\ 
-1.97 & 120308505745126.23 & -0.00 \\ 
-2.35 & 120308505745126.23 & -0.00 \\ 
-2.23 & 120308505745126.23 & -0.00 \\ 
-2.44 & 120308505745126.23 & -0.00 \\ 
-2.42 & 120308505745126.23 & -0.00 \\ 
-2.41 & 120308505745126.23 & -0.00 \\ 
-2.42 & 120308505745126.23 & -0.00 \\ 
-2.47 & 120308505745126.23 & -0.00 \\ 
-2.52 & 120308505745126.23 & -0.00 \\ 
-2.41 & 120308505745126.23 & -0.00 \\ 
-2.46 & 120308505745126.23 & -0.00 \\ 
-2.36 & 120308505745126.23 & -0.00 \\ 
-2.38 & 120308505745126.23 & -0.00 \\ 
-2.39 & 120308505745126.23 & -0.00 \\ 
-2.26 & 120308505745126.23 & -0.00 \\ 
-2.21 & 120308505745126.23 & -0.00
        \end{tabular}
        \caption{?}
        \label{tab:?}
    \end{table}
    

In [241]:



    \begin{table}[h!]
        \centering
        \begin{tabular}{ r  r  r  r  r  r }
            Mean & winow_std & test \\ 
-0.00 & 44481333.14 & -0.00 \\ 
-0.00 & 44481333.14 & -0.00 \\ 
-0.00 & 44481333.14 & -0.00 \\ 
-0.00 & 44481333.14 & -0.00 \\ 
-0.00 & 44481333.14 & -0.00 \\ 
-0.00 & 44481333.14 & -0.00 \\ 
-0.00 & 44481333.14 & -0.00 \\ 
-0.00 & 44481333.14 & -0.00 \\ 
-0.00 & 44481333.14 & -0.00 \\ 
-0.00 & 44481333.14 & -0.00 \\ 
-0.01 & 44481333.14 & -0.00 \\ 
-0.01 & 44481333.14 & -0.00 \\ 
-0.01 & 44481333.14 & -0.00 \\ 
-0.01 & 44481333.14 & -0.00 \\ 
-0.01 & 44481333.14 & -0.00 \\ 
-0.01 & 44481333.14 & -0.00 \\ 
-0.01 & 44481333.14 & -0.00 \\ 
-0.01 & 44481333.14 & -0.00 \\ 
-0.00 & 44481333.14 & -0.00 \\ 
-0.01 & 44481333.14 & -0.00 \\ 
-0.01 & 44481333.14 & -0.00 \\ 
-0.01 & 44481333.14 & -0.00 \\ 
-0.01 & 44481333.14 & -0.00 \\ 
-0.01 & 44481333.14 & -0.00 \\ 
-0.01 & 44481333.14 & -0.00 \\ 
-0.00 & 44481333.14 & -0.00 \\ 
-0.00 & 44481333.14 & -0.00 \\ 
0.00 & 44481333.14 & 0.00 \\ 
0.00 & 44481333.14 & 0.00 \\ 
0.00 & 120308505745126.23 & 0.00 \\ 
-0.00 & 120308505745126.23 & -0.00 \\ 
-0.00 & 120308505745126.23 & -0.00 \\ 
-0.01 & 120308505745126.23 & -0.00 \\ 
-0.01 & 120308505745126.23 & -0.00 \\ 
-0.01 & 120308505745126.23 & -0.00 \\ 
-0.01 & 120308505745126.23 & -0.00 \\ 
-0.01 & 120308505745126.23 & -0.00 \\ 
-0.01 & 120308505745126.23 & -0.00 \\ 
-0.01 & 120308505745126.23 & -0.00 \\ 
-0.01 & 120308505745126.23 & -0.00 \\ 
-0.01 & 120308505745126.23 & -0.00 \\ 
-0.01 & 120308505745126.23 & -0.00 \\ 
-0.01 & 120308505745126.23 & -0.00 \\ 
-0.02 & 120308505745126.23 & -0.00 \\ 
-0.02 & 120308505745126.23 & -0.00 \\ 
-0.02 & 120308505745126.23 & -0.00 \\ 
-0.02 & 120308505745126.23 & -0.00 \\ 
-0.02 & 120308505745126.23 & -0.00 \\ 
-0.02 & 120308505745126.23 & -0.00 \\ 
-0.02 & 120308505745126.23 & -0.00 \\ 
-0.02 & 120308505745126.23 & -0.00 \\ 
-0.03 & 120308505745126.23 & -0.00 \\ 
-0.02 & 120308505745126.23 & -0.00 \\ 
-0.02 & 120308505745126.23 & -0.00 \\ 
-0.02 & 120308505745126.23 & -0.00 \\ 
-0.02 & 120308505745126.23 & -0.00 \\ 
-0.02 & 120308505745126.23 & -0.00 \\ 
-0.02 & 120308505745126.23 & -0.00 \\ 
-0.02 & 120308505745126.23 & -0.00
        \end{tabular}
        \caption{?}
        \label{tab:?}
    \end{table}
    

In [239]:
def latexify(df):
    latex = r'''
    \begin{table}[h!]
        \centering
        \begin{tabular}{%s}
            %s
        \end{tabular}
        \caption{?}
        \label{tab:?}
    \end{table}
    '''

    rows = []


    line = ''
    for key in df.columns:
        cell = key
        line += r' & ' + cell
    line = line[3:]
    rows.append(line)

    for i, row in df.iterrows():
        line = ''
        for key in row.index:
            cell = ''
            if type(row[key])==float:
                cell = '%.2f' % row[key]
            elif type(row[key])==int:
                cell = '%d' % row[key]
            else:
                try:
                    cell = '%.2f' % float(row[key])
                except:
                    cell = row[key]
            line += r' & ' + cell
        line = line[3:]
        rows.append(line)

    latex = latex % ( ' r '*len(table.columns), ' \\\ \n'.join(rows) )

    return latex