In [51]:
# System libraries
import sys, os, gc
import datetime
import itertools
# Math libraries
import math, random
import pandas as pd, numpy as np
import scipy
from scipy import stats
import datetime
from datetime import timedelta
#from datetime import datetime
# Data storage libraries
import pickle, sqlite3, simpledbf, boto3
# Custom financial data libraries
import utils.findata_utils as fd
# Plotting libraries
import matplotlib.pyplot as plt
from matplotlib import rcParams
db = 'C:\\Datasets\\thesis.db'
conn = sqlite3.connect(db)
c = conn.cursor()
overleaf = ['C:','Users','bryce','OneDrive','Documents','Overleaf','Thesis','assets','exports']
In [9]:
returns = pd.read_sql('''SELECT * FROM daily_abnormal_returns''', conn)
In [12]:
returns['d'] = returns.apply(lambda r: (datetime.datetime.strptime(r['date'],'%Y-%m-%d') -
datetime.datetime.strptime(str(r['rebal_date'])[:10],'%Y-%m-%d')).days,
axis=1)
In [16]:
plt.style.use('classic')
fig = plt.figure(figsize=(10,5))
fig.patch.set_facecolor('white')
ax = fig.add_subplot(1, 1, 1)
ax.spines['left'].set_visible(True)
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
ax.spines['bottom'].set_visible(True)
ax.grid(True,axis='both',linestyle=':')
agg = returns
ax.plot(returns.loc[returns['d'].isbetween(-30,30)]['d'],
returns.loc[(returns['d']<=30) & (returns['d']>=-30)]['ar_daily'].mean(),
label='Abnormal returns')
ax.legend(frameon=False, loc='best')
plt.title('Portfolio returns')
plt.ylabel('Culmulative returns')
plt.xlabel('Days Since (To) Rebalance')
xlim = [-365,365]
plt.xlim(time_range)
plt.ylim(-0.5,0.5)
plt.show()
In [20]:
model_inputs = pd.read_sql('''SELECT * FROM model_inputs''',conn)
display(model_inputs.head())
In [52]:
for factor in model_inputs.columns:
fig = plt.figure(figsize=(6,6))
fig.patch.set_facecolor('white')
ax = fig.add_subplot(1, 1, 1)
ax.spines['left'].set_visible(True)
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
ax.spines['bottom'].set_visible(True)
ax.grid(True,axis='both',linestyle=':')
ax.hist(model_inputs.loc[model_inputs['add']==1][factor], density=True, bins=np.linspace(0,0.02,10), alpha=0.7, cumulative=False, histtype='stepfilled', label='Add', color=['xkcd:navy blue'])
ax.hist(model_inputs.loc[model_inputs['add']==0][factor], density=True, bins=np.linspace(0,0.02,10), alpha=0.4, cumulative=False, histtype='stepfilled', label='n/c', color=['xkcd:forest green'])
#plt.hist(y_train)
plt.title('Distribution of {factor}'.format(factor=factor))
plt.ylabel('Frequency')
plt.xlabel('{factor}'.format(factor=factor))
plt.legend(frameon=False, loc='top right')
plt.xlim(0,0.02)
#plt.xticks(np.linspace(0,0.02,5), np.linspace(0,2,5))
plt.show()
fig.savefig('\\'.join(overleaf+['distribution_{factor}.png'.format(factor=factor)]))