In [3]:
# System libraries
import sys, os, gc
import datetime
# Math libraries
import math, random
import pandas as pd, numpy as np
import scipy
from scipy import stats
from datetime import timedelta
from datetime import datetime
import itertools
# Data storage libraries
import pickle, sqlite3, simpledbf, boto3
# Custom financial data libraries
import utils.findata_utils as fd
import utils.ml_utils as ml_utils
# Plotting libraries
import matplotlib.pyplot as plt
from matplotlib import rcParams
import warnings
#if not sys.warnoptions:
# warnings.simplefilter("ignore")
from importlib import reload
fd = reload(fd)
import sklearn as sk
import tensorflow as tf
import xgboost as xgb
import keras
from imblearn.over_sampling import RandomOverSampler
from sklearn import svm
from sklearn import preprocessing
from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier
from sklearn.linear_model import ElasticNet, LogisticRegression
from sklearn.metrics import explained_variance_score, mean_squared_error, confusion_matrix, classification_report, accuracy_score
from sklearn.model_selection import cross_val_score, KFold, GridSearchCV
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.pipeline import Pipeline
from sklearn.externals import joblib
from keras.models import Sequential
from keras.optimizers import SGD
from keras.layers import Dense, Dropout
from keras.wrappers.scikit_learn import KerasRegressor
from yellowbrick.regressor import ResidualsPlot, PredictionError
# Connect to databases
db = 'C:\\Datasets\\thesis.db'
overleaf = ['C:','Users','bryce','OneDrive','Documents','Overleaf','Thesis','assets','exports']
conn = sqlite3.connect(db)
c = conn.cursor()
hdf_path = 'C:\\Datasets\\thesis.h5'
hdf = pd.HDFStore(hdf_path)
In [4]:
df = pd.read_sql('''
SELECT *--ticker, date, rebal_date, action, [index], r_daily,
--er_daily, ar_daily
FROM daily_abnormal_returns''', conn)
bounds = [-1,10]
bounds_corr = [-5,5]
for corr in ['riskfree_return','market_excess_return','sector_excess_return','industry_excess_return',
'riskfree_return_corr','market_excess_return_corr','sector_excess_return_corr','industry_excess_return_corr']:
if '_corr' in corr:
df[corr] = df[corr].clip(upper=max(bounds_corr)).clip(lower=min(bounds_corr))
else:
df[corr] = df[corr].clip(upper=max(bounds)).clip(lower=min(bounds))
df['riskfree_return_corr'] = df['riskfree_return_corr'].clip(lower=-1).clip(upper=1)
df['er_daily'] = df['riskfree_return']*df['riskfree_return_corr'] + df['market_excess_return']*df['market_excess_return_corr'] + df['sector_excess_return']*df['sector_excess_return_corr'] + df['industry_excess_return']*df['industry_excess_return_corr']
df['ar_daily'] = df['r_daily']-df['er_daily']
display(df.iloc[:5,:])
# This is now done in the sector_coherence.py script!
In [16]:
# Compute number of days until the rebalance day
if 'd' not in df.columns:
for field in ['rebal_date', 'date']:
df[field] = df[field].apply(lambda date: datetime.strptime(date[:10], '%Y-%m-%d') if type(date)==str else date)
df['d'] = (df['date']-df['rebal_date']).apply(lambda d: d.days)
# Deal with infinities
df = df.replace(-np.inf,min(bounds))
df = df.replace(np.inf,max(bounds))
for column in ['r_daily','er_daily','ar_daily']:
df[column] = df[column].clip(lower=min(bounds))
df[column] = df[column].clip(upper=max(bounds))
# Compute the abnormal returns, per security
abnormal_returns = df.pivot_table(index=['d'], columns=['ticker'], values=['r_daily','ar_daily','er_daily'])
# Compute cumulative returns, per security
cars = (abnormal_returns+1).cumprod()-1
# Now, index all culm returns to day 0
cars_indexed = (cars.subtract(cars.loc[cars.index==0].iloc[0,:],axis=1))
ars_indexed = (abnormal_returns.subtract(abnormal_returns.loc[cars.index==0].iloc[0,:],axis=1))
#cars_indexed.clip
# Fill NAs
cars_indexed.loc[cars_indexed.index==0] = cars_indexed.loc[cars_indexed.index==0].fillna(0)
cars_indexed.loc[cars_indexed.index>=0] = cars_indexed.loc[cars_indexed.index>=0].fillna(method='ffill')
cars_indexed.loc[cars_indexed.index<=0] = cars_indexed.loc[cars_indexed.index<=0].fillna(method='bfill')
ars_indexed.loc[ars_indexed.index==0] = ars_indexed.loc[ars_indexed.index==0].fillna(0)
ars_indexed.loc[ars_indexed.index>=0] = ars_indexed.loc[ars_indexed.index>=0].fillna(method='ffill')
ars_indexed.loc[ars_indexed.index<=0] = ars_indexed.loc[ars_indexed.index<=0].fillna(method='bfill')
display(cars_indexed.iloc[:5,:])
print('AR')
display(ars_indexed.iloc[:5,:])
In [17]:
actions = df[['ticker','action','index']].drop_duplicates()
del_cars = cars_indexed.loc[:,cars_indexed.columns.get_level_values(1).isin(actions.loc[actions['action']=='Delete']['ticker'].values)]
add_cars = cars_indexed.loc[:,cars_indexed.columns.get_level_values(1).isin(actions.loc[actions['action']=='Add']['ticker'].values)]
del_ars = ars_indexed.loc[:,ars_indexed.columns.get_level_values(1).isin(actions.loc[actions['action']=='Delete']['ticker'].values)]
add_ars = ars_indexed.loc[:,ars_indexed.columns.get_level_values(1).isin(actions.loc[actions['action']=='Add']['ticker'].values)]
In [18]:
avg_rw_end = -df['rebal_date'].apply(lambda day: day.day).mean()
avg_rw_start = avg_rw_end - 13
In [27]:
plt.style.use('classic')
font = {'family' : 'Arial',
'weight' : 'normal',
'size' : 12}
plt.rc('font', **font)
fig = plt.figure(figsize=(10,5))
fig.patch.set_facecolor('white')
ax = fig.add_subplot(1, 1, 1)
ax.spines['left'].set_visible(True)
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
ax.spines['bottom'].set_visible(True)
ax.grid(True,axis='both',linestyle=':')
stds = 0.25
plt.plot(ars_indexed.index, add_ars['ar_daily'].mean(axis=1), color='blue', label='Adds')
ax.fill_between(ars_indexed.index,
add_ars['ar_daily'].mean(axis=1)-add_ars['ar_daily'].std(axis=1)*stds,
add_ars['ar_daily'].mean(axis=1)+add_ars['ar_daily'].std(axis=1)*stds,
color='blue', alpha=0.1)
plt.plot(ars_indexed.index, del_ars['ar_daily'].mean(axis=1), color='red', label='Deletes')
ax.fill_between(cars_indexed.index,
del_ars['ar_daily'].mean(axis=1)-add_ars['ar_daily'].std(axis=1)*stds,
del_ars['ar_daily'].mean(axis=1)+add_ars['ar_daily'].std(axis=1)*stds,
color='red', alpha=0.1)
ax.fill_between([avg_rw_start,avg_rw_end], [1,1], color='grey', alpha=0.2)
ax.fill_between([avg_rw_start,avg_rw_end], [-1,-1], color='grey', alpha=0.2)
plt.legend(frameon=False, loc='best')
plt.title('Abnormal Returns for Adds/Deletes to the %s' % ('S&P/TSX Composite Index'))
plt.xlabel('Days since (to) rebalance')
plt.ylabel('Abnormal return (daily)')
xlim = [-90,90]
plt.xlim(xlim)
ylim = [-0.04,0.04]
plt.ylim(ylim)
plt.xticks(np.linspace(-90,90,7))
plt.text(avg_rw_start + 2, min(ylim) + 0.001, 'Ranking Period', rotation=90,
horizontalalignment='left',verticalalignment='bottom', fontsize=12)
plt.show()
fig.savefig('\\'.join(overleaf+['ar.png']))
In [34]:
plt.style.use('classic')
font = {'family' : 'Arial',
'weight' : 'normal',
'size' : 12}
plt.rc('font', **font)
fig = plt.figure(figsize=(10,5))
fig.patch.set_facecolor('white')
ax = fig.add_subplot(1, 1, 1)
ax.spines['left'].set_visible(True)
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
ax.spines['bottom'].set_visible(True)
ax.grid(True,axis='both',linestyle=':')
stds = 0.05
plt.plot(cars_indexed.index, add_cars['ar_daily'].mean(axis=1), color='blue')
ax.fill_between(cars_indexed.index,
add_cars['ar_daily'].mean(axis=1)-add_cars['ar_daily'].std(axis=1)*stds,
add_cars['ar_daily'].mean(axis=1)+add_cars['ar_daily'].std(axis=1)*stds,
color='blue', alpha=0.1)
plt.plot(cars_indexed.index, del_cars['ar_daily'].mean(axis=1), color='red')
ax.fill_between(cars_indexed.index,
del_cars['ar_daily'].mean(axis=1)-add_cars['ar_daily'].std(axis=1)*stds,
del_cars['ar_daily'].mean(axis=1)+add_cars['ar_daily'].std(axis=1)*stds,
color='red', alpha=0.1)
ax.fill_between([avg_rw_start,avg_rw_end], [1,1], color='grey', alpha=0.2)
ax.fill_between([avg_rw_start,avg_rw_end], [-1,-1], color='grey', alpha=0.2)
plt.legend(['Adds','Deletes'], frameon=False, loc='best')
plt.title('CAR for Adds/Deletes to the %s' % ('S&P/TSX Composite Index'))
plt.xlabel('Days since (to) rebalance')
plt.ylabel('Cumulative abnormal return\n(indexed to day-0)')
xlim = [-90,90]
plt.xlim(xlim)
ylim = [-0.04,0.02]
plt.ylim(ylim)
plt.xticks(np.linspace(-90,90,7))
plt.text(avg_rw_start + 2, min(ylim) + 0.001, 'Ranking Period', rotation=90,
horizontalalignment='left',verticalalignment='bottom', fontsize=12)
plt.show()
fig.savefig('\\'.join(overleaf+['car.png']))
In [9]:
df['sector_return'] = df['riskfree_return']+df['market_excess_return']+df['sector_excess_return']
df['industry_return'] = df['sector_return']+df['industry_excess_return']
In [10]:
plt.style.use('classic')
fig = plt.figure(figsize=(5,5))
fig.patch.set_facecolor('white')
ax = fig.add_subplot(1, 1, 1)
ax.spines['left'].set_visible(True)
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
ax.spines['bottom'].set_visible(True)
ax.grid(True,axis='both',linestyle=':')
coherence = df.loc[df['ticker'].isin(actions.loc[actions['action']=='Delete']['ticker'].values)][['d','sector_return','r_daily']]
ax.scatter(coherence['d'], coherence['r_daily']-coherence['sector_return'], linewidths=0, alpha=0.15)
plt.xlabel('Sector Return')
plt.ylabel('Security Return')
plt.xlim([-100,100])
plt.ylim([-1,1])
plt.show()
In [55]:
plt.style.use('classic')
font = {'family' : 'Arial',
'weight' : 'normal',
'size' : 12}
plt.rc('font', **font)
fig = plt.figure(figsize=(10,5))
fig.patch.set_facecolor('white')
ax = fig.add_subplot(1, 1, 1)
ax.spines['left'].set_visible(True)
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
ax.spines['bottom'].set_visible(True)
ax.grid(True,axis='both',linestyle=':')
stds = 0.05
plt.plot(cars_indexed.index, (add_cars['ar_daily'].mean(axis=1)/add_cars['ar_daily'].std(axis=0).mean()).fillna(0), color='blue', label='Adds')
#ax.fill_between(cars_indexed.index,
# add_cars['ar_daily'].mean(axis=1)-add_cars['ar_daily'].std(axis=1)*stds,
# add_cars['ar_daily'].mean(axis=1)+add_cars['ar_daily'].std(axis=1)*stds,
# color='blue', alpha=0.1)
plt.plot(cars_indexed.index, (del_cars['ar_daily'].mean(axis=1)/del_cars['ar_daily'].std(axis=0).mean()).fillna(0), color='red', label='Deletes')
#ax.fill_between(cars_indexed.index,
# del_cars['ar_daily'].mean(axis=1)-add_cars['ar_daily'].std(axis=1)*stds,
# del_cars['ar_daily'].mean(axis=1)+add_cars['ar_daily'].std(axis=1)*stds,
# color='red', alpha=0.1)
ax.fill_between([avg_rw_start,avg_rw_end], [1,1], color='grey', alpha=0.2)
ax.fill_between([avg_rw_start,avg_rw_end], [-1,-1], color='grey', alpha=0.2)
plt.legend(frameon=False, loc='best')
plt.title('CAR per $\sigma$ for Adds/Deletes to the %s' % ('S&P/TSX Composite Index'))
plt.xlabel('Days since (to) rebalance')
plt.ylabel('Cumulative abnormal return (indexed to day-0) over $\sigma$')
xlim = [-30,30]
plt.xlim(xlim)
ylim = [-.2,.1]
plt.ylim(ylim)
plt.xticks(np.linspace(min(xlim),max(xlim),7))
plt.text(avg_rw_start + 2, max(ylim) - 0.025, 'Ranking Period', #rotation=90,
horizontalalignment='left',verticalalignment='bottom', fontsize=12)
plt.show()
fig.savefig('\\'.join(overleaf+['car-per-s.png']))
In [91]:
sr = pd.read_sql('''SELECT * FROM sector_returns''', conn)
ir = pd.read_sql('''SELECT * FROM industry_returns''', conn)
sr = sr.pivot_table(index=['date'],columns=['sector'], values=['r_intraday_mean'], aggfunc=np.mean).reset_index()
ir = ir.pivot_table(index=['date'],columns=['industry'], values=['r_intraday_mean'], aggfunc=np.mean).reset_index()
#sr.index
#sr['date'].iloc[0]
dates = sr.loc[(sr['date']>='2017-09-01') & (sr['date']<='2017-09-31')]['date']
sr_sample = (sr.loc[(sr['date']>='2017-09-01') & (sr['date']<='2017-09-31')].drop('date',axis=1)+1).cumprod()-1
ir_sample = (ir.loc[(sr['date']>='2017-09-01') & (sr['date']<='2017-09-31')].drop('date',axis=1)+1).cumprod()-1
In [ ]:
In [218]:
d = -3330
cars_indexed['ar_daily'].iloc[cars_indexed.index<d,:].var(axis=0).mean()
Out[218]:
In [219]:
add_var = []
for d in cars_indexed.index:
var = pd.Series(name=d)
var['var']=cars_indexed['ar_daily'].iloc[cars_indexed.index<=d,:].var(axis=0).mean()
add_var.append(pd.DataFrame(var).T)
add_var = pd.concat(add_var)
print(add_var)
In [200]:
plt.style.use('classic')
font = {'family' : 'Arial',
'weight' : 'normal',
'size' : 12}
plt.rc('font', **font)
fig = plt.figure(figsize=(10,5))
fig.patch.set_facecolor('white')
ax = fig.add_subplot(1, 1, 1)
ax.spines['left'].set_visible(True)
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
ax.spines['bottom'].set_visible(True)
ax.grid(True,axis='both',linestyle=':')
stds = 0.05
plt.plot(add_var.index, add_var['var'].(axis=1), color='blue', label='Adds')
#ax.fill_between(cars_indexed.index,
# add_cars['er_daily'].mean(axis=1)-add_cars['er_daily'].std(axis=1)*stds,
# add_cars['er_daily'].mean(axis=1)+add_cars['er_daily'].std(axis=1)*stds,
# color='blue', alpha=0.1)
#plt.plot(cars_indexed.index, del_cars['ar_daily'].mean(axis=1), color='red')
#ax.fill_between(cars_indexed.index,
# del_cars['er_daily'].mean(axis=1)-add_cars['er_daily'].std(axis=1)*stds,
# del_cars['er_daily'].mean(axis=1)+add_cars['er_daily'].std(axis=1)*stds,
# color='red', alpha=0.1)
#ax.fill_between([avg_rw_start,avg_rw_end], [1,1], color='grey', alpha=0.2)
#ax.fill_between([avg_rw_start,avg_rw_end], [-1,-1], color='grey', alpha=0.2)
plt.legend(frameon=False, loc='best')
plt.title('Induced Variance for Adds/Deletes to the %s' % ('S&P/TSX Composite Index'))
plt.xlabel('Days since (to) rebalance')
plt.ylabel('Daily variance\n(indexed to day-0)')
xlim = [-90,90]
plt.xlim(xlim)
#ylim = [-1,1]
plt.ylim(ylim)
plt.xticks(np.linspace(-90,90,7))
#plt.text(avg_rw_start + 2, min(ylim) + 0.001, 'Ranking Period', rotation=90,
# horizontalalignment='left',verticalalignment='bottom', fontsize=12)
plt.show()
fig.savefig('\\'.join(overleaf+['er.png']))
In [131]:
plt.style.use('classic')
fig = plt.figure(figsize=(10,5))
fig.patch.set_facecolor('white')
ax = fig.add_subplot(1, 1, 1)
ax.spines['left'].set_visible(True)
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
ax.spines['bottom'].set_visible(True)
ax.grid(True,axis='both',linestyle=':')
for col in sr_sample.columns.get_level_values(1):
plt.plot(dates, sr_sample['r_intraday_mean',col], label=col)
plt.text(max(dates), sr_sample['r_intraday_mean',col].iloc[-1], col,
horizontalalignment='left',verticalalignment='center', fontsize=12)
plt.xticks(dates[::2],dates[::2], rotation=45)
#plt.legend()
#plt.show()
plt.title('Sector-specific returns for September 2017')
plt.xlabel('Date')
plt.ylabel('Cumulative sector-specific return')
#plt.xticks(np.linspace(min(xlim),max(xlim),7))
#plt.text(avg_rw_start + 2, max(ylim) - 0.025, 'Ranking Period', #rotation=90,
# horizontalalignment='left',verticalalignment='bottom', fontsize=12)
plt.show()
fig.savefig('\\'.join(overleaf+['sector-returns.png']))
In [189]:
factors = pd.read_sql('''SELECT fc.*, sc.sector, sc.industry FROM factor_coefficients fc
INNER JOIN (SELECT ticker, sector, industry FROM sectors) sc
ON sc.ticker=fc.ticker''', conn).drop(['pre_event','action'],axis=1).drop_duplicates()
#factors.drop()
#factors
table = factors.loc[factors.industry=='Copper'].groupby(by=['ticker'])[['riskfree_return','market_excess_return','sector_excess_return','industry_excess_return','residual']].mean().reset_index()
latex = latexify(table).sp
In [243]:
#print(latexify(pd.read_sql('''SELECT * FROM factor_coefficients''',conn)))
prestd = add_cars.loc[add_cars.index<0].std(axis=0).mean()
poststd = add_cars.loc[add_cars.index>0].std(axis=0).mean()
cartab = pd.DataFrame(add_cars['ar_daily'].mean(axis=1))
cartab.columns = ['Mean']
cartab['Mean'] = cartab['Mean']*100
cartab['winow_std'] = [prestd if idx<0 else poststd for idx in cartab.index]
cartab['test'] = cartab['Mean']/cartab['winow_std']
print(latexify(cartab.loc[(cartab.index>-30) & (cartab.index<30)]))
In [241]:
In [239]:
def latexify(df):
latex = r'''
\begin{table}[h!]
\centering
\begin{tabular}{%s}
%s
\end{tabular}
\caption{?}
\label{tab:?}
\end{table}
'''
rows = []
line = ''
for key in df.columns:
cell = key
line += r' & ' + cell
line = line[3:]
rows.append(line)
for i, row in df.iterrows():
line = ''
for key in row.index:
cell = ''
if type(row[key])==float:
cell = '%.2f' % row[key]
elif type(row[key])==int:
cell = '%d' % row[key]
else:
try:
cell = '%.2f' % float(row[key])
except:
cell = row[key]
line += r' & ' + cell
line = line[3:]
rows.append(line)
latex = latex % ( ' r '*len(table.columns), ' \\\ \n'.join(rows) )
return latex