In [1]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
#import matplotlib.finance as mf
from matplotlib.widgets import MultiCursor
import statsmodels.tsa.stattools as stt
#import scipy.signal as sgn
import statsmodels.api as sm
#from statsmodels.sandbox.regression.predstd import wls_prediction_std
#from matplotlib.mlab import PCA
In [186]:
%matplotlib inline
In [3]:
# Style. 1
sns.set_context('paper')
sns.set_style("darkgrid")
In [241]:
# Style. 2
sns.set_context('paper')
sns.set_style("dark",
rc={'axes.facecolor': 'black',
'grid.color': 'red',
'grid.linestyle': '--',
'figure.facecolor': 'grey'})
In [6]:
hft = pd.read_hdf('HFT_SR_RM_MA_TA.hdf')
In [178]:
ta = hft.minor_xs('TA0001')
In [7]:
#------------------------------------------------
'''Some time length'''
night_len = int(4*3600*2.5)
mor_len = int(4*3600*2.25)
aftn_len = int(4*3600*1.5)
day_len = night_len + mor_len + aftn_len + 3
In [62]:
#-----------------------------------------------
'''add columns'''
def AddCol(df):
vol = df.ix[:, 'volume'].diff()
# this addition is for the convenience of Log y scale plot
vol += 1
vol = vol.rename('vol_diff')
df = df.join(vol)
openint = df.ix[:, 'openInterest'].diff()
# this addition is for the convenience of Log y scale plot
openint += 1
openint = openint.rename('openInt_diff')
df = df.join(openint)
mid = (df.ix[:, 'askPrc_0'] + df.ix[:, 'bidPrc_0']) / 2.
mid = mid.rename('midPrc')
df = df.join(mid)
return df
In [179]:
ta = AddCol(ta)
In [180]:
#------------------------------------------------
'''training dataset and outsample dataset'''
ta_10day = ta.ix[:day_len*10 + 10, :]
ta_out = ta.ix[day_len*10 + 10: , :]
In [181]:
# -------------------------------------------------
def ForwardDiff(df, n=1):
'''
The reverse of pandas' function 'DataFrame.diff()'
'''
ret = df.diff(periods=n)
ret = ret.shift(periods= -1 * n)
ret.dropna(inplace=True)
return ret
def ForwardPricemove(df, n=1):
'''
calculate price move and delete NaN
'''
ret = ForwardDiff(df.ix[:, 'last'], n)
last_boolean1 = np.logical_and.reduce(
[ret.index.hour == 14,
ret.index.minute == 59,
ret.index.second >= 60 - int(n//4) - 1])
# this is the last tick
last_boolean2 = ret.index.hour == 15
# outlier_boolean = abs(ta_10day_pm) > 10
# ta_10day_pm_no_outlier = ta_10day_pm.ix[np.logical_not(outlier_boolean)]
ret = ret.ix[np.logical_not(np.logical_or(last_boolean1, last_boolean2))]
ret = ret.rename('price move')
return ret
In [182]:
forward_ticks = 40
ta_10day_pm = ForwardPricemove(ta_10day, forward_ticks)
ta_out_pm = ForwardPricemove(ta_out, forward_ticks)
In [187]:
ta_10day_pm.plot()
Out[187]:
In [50]:
ta_out_pm.plot()
Out[50]:
In [82]:
#---------------------------
def UpDownCalc(df):
'''
map up as +1, down as -1
return (elementwise) 1 for last > mid; -1 for last < mid; 0 for last == mid
'''
up = df.ix[:, 'last'] > df.ix[:, 'midPrc']
down = df.ix[:, 'last'] < df.ix[:, 'midPrc']
up *= 1
down *= -1
updown = up + down
updown = updown.rename('updown')
return updown
In [188]:
ta_updown = UpDownCalc(ta)
plt.figure()
plt.hist(ta_10day_updown, bins=50)
Out[188]:
In [200]:
def rolling_mean(df, n=3):
'''
calculate rolling mean and delete NaN
'''
roll_obj = df.rolling(window=n)
ret = (roll_obj.mean())
last_boolean = np.logical_and.reduce(
[ret.index.hour == 21,
ret.index.minute == 0,
ret.index.second <= int(n//4) + 1])
ret = ret.ix[np.logical_not(last_boolean)]
#ret = ret.rename('price move')
return ret
In [228]:
#------------------------------------------
'''rolling mean'''
mywindow = 12
lastmid_indicator = rolling_mean(ta_updown, n=mywindow)
lastmid_indicator = lastmid_indicator.rename('lastmidIndicator')
In [202]:
plt.figure()
plt.hist(lastmid_indicator.ix[ta_out_pm.index].dropna(), bins=20)
plt.figure()
plt.hist(ta_out_pm, bins=np.arange(-5.5, 5.5, 1))
Out[202]:
In [90]:
def myols(df, pm, norm=False):
'''
df is indicator DataFrame
pm is Price move Series
sm is satatsmodel module
this function also automatically align index of df and pm
'''
global sm
df = df[pm.index]
df.dropna(inplace=True)
if norm:
df = (df - df.mean()) / df.std()
X = sm.add_constant(df)
Y = pm[df.index]
model = sm.OLS(Y, X)
ret = model.fit()
return ret
In [215]:
def Rsquare(y, yhat):
ybar = y.mean()
#print ybar
#print y-ybar
ss_tot = ((y - ybar) ** 2).sum()
ss_reg = ((yhat - ybar) ** 2).sum()
ss_res = ((yhat - y) ** 2).sum()
#print ss_reg, ss_tot
ret = ss_reg / ss_tot
return ret
def PredictedRsquare(result, xnew, pm):
'''
pm: outsample price move Series
xnew: indicator Series (or DataFrame)
result: insample regression results (comes from statsmodel's model.fit() )
'''
global sm
# first we need to align xnew with outsample
xnew = xnew[pm.index]
xnew.dropna(inplace=True)
pm = pm[xnew.index]
xnew = sm.add_constant(xnew)
ypredict = result.predict(xnew)
rsq = Rsquare(pm, ypredict)
return ypredict, rsq
In [203]:
(ta.ix[:, 'vol_diff'] == 0).sum()
Out[203]:
In [229]:
#------------------------------------------
'''rolling mean'''
mywindow = 12
lastmid_indicator = rolling_mean(ta_updown, n=mywindow)
lastmid_indicator = lastmid_indicator.rename('lastmidIndicator')
vol_roll = rolling_mean(np.log(ta.ix[:, 'vol_diff']), n=mywindow)
vol_roll = vol_roll.rename('vol_roll')
lastmid_indicator1 = lastmid_indicator * vol_roll
In [231]:
res = myols(lastmid_indicator1, ta_10day_pm)
print(res.summary())
In [232]:
PredictedRsquare(res, lastmid_indicator1, ta_out_pm)
Out[232]:
In [138]:
temp1 = lastmid_indicator1.ix[ta_out_pm.index]
temp1.dropna(inplace=True)
print type(temp1)
plt.figure(figsize=(20,10))
sns.swarmplot(x=temp1.ix[:300000:500], y=ta_out_pm.ix[:300000:500])
Out[138]:
In [130]:
# --------------------------------------------
'''plot fit'''
fig, ax = plt.subplots()
fig = sm.graphics.plot_fit(res, res.model.exog_names[1], ax=ax)
In [125]:
#--------------------------------------------------
'''find good window parameter'''
for mywindow in np.arange(1, 60, 1):
# mywindow =
r = ta_updown.rolling(window=mywindow)
lastmid_indicator = (r.mean())
lastmid_indicator = lastmid_indicator.rename('lastmid_indicator')
res = myols(lastmid_indicator, ta_10day_pm)
print '\n--------------------'
print ('window = %d, Rsquare = %f. ' %(mywindow, res.rsquared))
In [233]:
%matplotlib auto
In [221]:
def prc_total(df, t1, t2, fs=(15,10)):
fig = plt.figure(figsize=fs)
ax1 = fig.add_subplot(411)
ax1.plot(df.ix[t1: t2, 'last'], color='#f5f112', marker='*')
ax1.plot(df.ix[t1: t2, 'askPrc_0'], color='lightgreen')
ax1.plot(df.ix[t1: t2, 'bidPrc_0'], color='lightcoral')
ax2 = fig.add_subplot(412, sharex=ax1)
ax2.semilogy(100 * np.ones_like(df.ix[t11: t22].values), color='orange')
ax2.semilogy(df.ix[t11: t22, 'vol_diff']/2., color='orange', marker='*')
ax3 = fig.add_subplot(413, sharex=ax1)
ax3.plot(df.ix[t1: t2, 'openInt_diff'], color='white', lw=0.4, marker='*')
ax4 = fig.add_subplot(414, sharex=ax1)
ax4.plot(df.ix[t1: t2, 'TotalBidLot'],
color='red')
ax4.plot(df.ix[t1: t2, 'TotalAskLot'],
color='green')
return fig
In [244]:
t11, t22 = '2015-11-19 21:00:01','2015-11-28 15:00:00'
temp = ta_10day.ix[ta_10day_pm.index, :]
thefig = prc_total(temp, t11, t22, (15,10))
multi = MultiCursor(thefig.canvas, thefig.axes, color='c', lw=1)
thefig.show()
In [237]:
thefig.axes[2].cla()
thefig.axes[2].plot(lastmid_indicator1.ix[ta_10day_pm.index].ix['2015-12-01 21:00:01': '2015-12-05 15:00:00'].ix[t11:t22])
Out[237]:
In [ ]:
n = len(temp.ix[t11:t22, :])
for i, txt in enumerate(temp.ix[t11:t22, 'askQty_0']):
(thefig.axes[0]).annotate(txt, ((xrange(n))[i], (temp.ix[t11: t22, 'askPrc_0'])[i] + .3), color='white', size=10)
for i, txt in enumerate(temp.ix[t11:t22, 'askQty_1']):
(thefig.axes[0]).annotate(txt, ((xrange(n))[i], (temp.ix[t11: t22, 'askPrc_1'])[i] +.7), color='white', size=10)
for i, txt in enumerate(temp.ix[t11:t22, 'bidQty_0']):
(thefig.axes[0]).annotate(txt, ((xrange(n))[i], (temp.ix[t11: t22, 'bidPrc_0'])[i] - .6), color='white', size=10)
for i, txt in enumerate(temp.ix[t11:t22, 'bidQty_1']):
(thefig.axes[0]).annotate(txt, ((xrange(n))[i], (temp.ix[t11: t22, 'bidPrc_1'])[i] - 1.), color='white', size=10)
for i, txt in enumerate(temp.ix[t11:t22, 'vol_diff']/2.):
(thefig.axes[1]).annotate(txt, ((xrange(n))[i], (temp.ix[t11: t22, 'vol_diff'])[i] + .3), color='white', size=10)