In [1]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib.finance as mf
from matplotlib.widgets import MultiCursor
In [2]:
import statsmodels.tsa.stattools as stt
import scipy.signal as sgn
In [3]:
import statsmodels.api as sm
from statsmodels.sandbox.regression.predstd import wls_prediction_std
In [4]:
from matplotlib.mlab import PCA
In [55]:
%matplotlib auto
In [5]:
sns.set_context('paper')
sns.set_style("darkgrid")
In [68]:
sns.set_context('paper')
sns.set_style("dark", rc={'axes.facecolor': 'black', 'grid.color': 'red',
'grid.linestyle': '--',
'figure.facecolor': 'grey'})
In [7]:
hft = pd.read_hdf('HFT_SR_RM_MA_TA.hdf')
In [8]:
ta = hft.minor_xs('TA0001')
In [197]:
sr = hft.minor_xs('SR0001')
In [9]:
rm = hft.minor_xs('RM0001')
type(rm)
Out[9]:
In [10]:
night_len = int(4*3600*2.5)
mor_len = int(4*3600*2.25)
aftn_len = int(4*3600*1.5)
day_len = night_len + mor_len + aftn_len + 3
In [11]:
dates1 = pd.date_range('2015-11-19 21:01:01', '2015-12-31 21:01:01', freq='D')
In [12]:
dates2 = pd.date_range('2015-11-20 14:59:59', '2015-12-31 14:59:59', freq='D')
In [14]:
type(dates1)
Out[14]:
In [17]:
dates1.weekday
Out[17]:
In [18]:
trade_day1 = dates1[dates1.weekday != 5]
trade_day2 = dates2[np.logical_and(dates2.weekday != 5, dates2.weekday != 6)]
trade_day1
Out[18]:
In [94]:
rm.ix[trade_day2, ['high', 'highLimit', 'low', 'lowLimit']]
Out[94]:
In [100]:
temp = rm.ix[trade_day2, 'high'] - rm.ix[trade_day2, 'highLimit'] >-3
temp
Out[100]:
In [102]:
rm.ix[trade_day2,:].ix[temp, 'last']
Out[102]:
In [136]:
temp[0]
Out[136]:
In [167]:
for pinzhong in hft.minor_axis:
print '\n\n#-------------------------------------'
print pinzhong
xx = hft.minor_xs(pinzhong)
toohigh = xx.ix[trade_day2, 'high'] - xx.ix[trade_day2, 'highLimit'] > -2
toolow = xx.ix[trade_day2, 'low'] - xx.ix[trade_day2, 'lowLimit'] < 2
print 'too high: \n'
if toohigh.any() == True:
print xx.ix[trade_day2,:].ix[toohigh, ['high', 'highLimit']]
temp = (xx.ix[trade_day2,:].ix[toohigh, :].index)[0]
high = xx.ix[trade_day2,:].ix[toohigh, 'highLimit']
#fig1 = plt.figure(figsize=(15,10))
#ax1 = fig1.add_subplot(111)
xx.ix[temp - pd.Timedelta(18, unit='h'): temp, 'last'].plot(figsize=(15,10))
plt.hlines(high, temp - pd.Timedelta(18, unit='h'), temp, colors='r', linestyles='-')
plt.show()
print 'too low: \n'
if toolow.any() == True:
print xx.ix[trade_day2,:].ix[toolow, ['low', 'lowLimit']]
temp = (xx.ix[trade_day2,:].ix[toolow, :].index)[0]
low = xx.ix[trade_day2,:].ix[toolow, 'lowLimit']
#fig2 = plt.figure(figsize=(15,10))
#ax2 = fig2.add_subplot(111)
xx.ix[temp - pd.Timedelta(18, unit='h'): temp, 'last'].plot(figsize=(15,10))
plt.hlines(low, temp - pd.Timedelta(18, unit='h'), temp, colors='g', linestyles='-')
plt.show()
plt.show()
we can see that SR has no zhangting or dieting
In [19]:
ta.index[day_len*10 + 9]
Out[19]:
In [21]:
#------------------ ta_10day is my training dataset
ta_10day = ta.ix[:day_len*10 + 10, :]
In [24]:
def Letitforward(df, forwardnum):
df2 = df.shift(-forwardnum) - df
df2.dropna(inplace=True)
return df2
In [25]:
forward_ticks = 40
In [43]:
ta_10day_pm =letitlag(ta_10day.ix[:, 'last'], forward_ticks)
In [30]:
plt.plot(ta_10day_pm)
Out[30]:
In [31]:
#----------------------------exclude last 36 ticks before ending
last_44_boolean = np.logical_and.reduce((ta_10day_pm.index.hour >= 14,
ta_10day_pm.index.minute >= 59,
ta_10day_pm.index.second >= 49))
last_boolean = ta_10day_pm.index.hour == 15
In [32]:
ta_10day_pm.ix[np.logical_or(last_44_boolean, last_boolean)]
Out[32]:
In [464]:
plt.figure()
ta_10day_pm.hist(bins=100)
Out[464]:
In [465]:
outlier_boolean = abs(ta_10day_pm) > 10
In [468]:
ta_10day_pm_no_outlier = ta_10day_pm.ix[np.logical_not(outlier_boolean)]
In [470]:
plt.figure()
plt.plot(ta_10day_pm_no_outlier)
Out[470]:
ta_10day_pm excludes last 36 ticks before each ending
In [44]:
ta_10day_pm = ta_10day_pm.ix[np.logical_not(np.logical_or(last_44_boolean, last_boolean))]
ta_10day_pm.plot(figsize=(18,10))
ta_10day_pm = ta_10day_pm.rename('price move')
Out[44]:
In [46]:
pm_index = ta_10day_pm.index
In [202]:
plt.figure(figsize=(15,10))
sns.boxplot(ta_10day.ix[:, ['askQty_4', 'askQty_3', 'askQty_2', 'askQty_1', 'askQty_0',
'bidQty_0', 'bidQty_1', 'bidQty_2', 'bidQty_3', 'bidQty_4']].values)
plt.xticks(xrange(10), ('0', 'askQty_4', 'askQty_3', 'askQty_2', 'askQty_1', 'askQty_0',
'bidQty_0', 'bidQty_1', 'bidQty_2', 'bidQty_3', 'bidQty_4'))
plt.ylim((-10,1600))
Out[202]:
In [34]:
ta_10day.ix[:, ['askQty_4', 'askQty_3', 'askQty_2', 'askQty_1', 'askQty_0',
'bidQty_0', 'bidQty_1', 'bidQty_2', 'bidQty_3', 'bidQty_4']].mean()
Out[34]:
we can see that mean of qty still monoly dicrease but slightly larger than median
verify that this distribution do not vary much with different pinzhong
In [ ]:
plt.figure(figsize=(15,10))
plt.boxplot(sr.ix[:day_len*14, ['askQty_4', 'askQty_3', 'askQty_2', 'askQty_1', 'askQty_0',
'bidQty_0', 'bidQty_1', 'bidQty_2', 'bidQty_3', 'bidQty_4']].values)
plt.xticks(xrange(10), ('0', 'askQty_4', 'askQty_3', 'askQty_2', 'askQty_1', 'askQty_0',
'bidQty_0', 'bidQty_1', 'bidQty_2', 'bidQty_3', 'bidQty_4'))
plt.ylim((-10,700))
In [205]:
bidaskqty_cov = ta_10day.ix[:, ['askQty_4', 'askQty_3', 'askQty_2', 'askQty_1', 'askQty_0',
'bidQty_0', 'bidQty_1', 'bidQty_2', 'bidQty_3', 'bidQty_4']].cov()
In [206]:
bidaskqty_cov
Out[206]:
In [112]:
ta_bidaskqty_corr = ta_10day.ix[:, ['TotalAskLot', 'askQty_4', 'askQty_3', 'askQty_2', 'askQty_1', 'askQty_0',
'bidQty_0', 'bidQty_1', 'bidQty_2', 'bidQty_3', 'bidQty_4', 'TotalBidLot']].corr()
ta_bidaskqty_corr
Out[112]:
In [113]:
w, v = np.linalg.eig(ta_bidaskqty_corr)
np.sqrt(w.max() / w.min())
Out[113]:
In [114]:
fig, ax = plt.subplots()
heatmap = ax.pcolor(ta_bidaskqty_corr.abs(), cmap=plt.cm.Blues, alpha=1)
fig = plt.gcf()
fig.set_size_inches(9, 9)
ax.set_frame_on(False)
ax.set_yticks(np.arange(ta_bidaskqty_corr.shape[0]) + 0.5, minor=False)
ax.set_xticks(np.arange(ta_bidaskqty_corr.shape[1]) + 0.5, minor=False)
ax.invert_yaxis()
ax.xaxis.tick_top()
labels = ['TotalAskLot', 'askQty_4', 'askQty_3', 'askQty_2', 'askQty_1', 'askQty_0',
'bidQty_0', 'bidQty_1', 'bidQty_2', 'bidQty_3', 'bidQty_4', 'TotalBidLot']
ax.set_xticklabels(labels, minor=False)
ax.set_yticklabels(labels, minor=False)
Out[114]:
In [49]:
bidqty_5 = ta_10day.ix[:, ['bidQty_0', 'bidQty_1', 'bidQty_2', 'bidQty_3', 'bidQty_4', 'TotalBidLot']].values
pcares1 = PCA(bidqty_5)
#------------------------- Wt is eigenvectors, Wt[0] is the first eigenvector
#------------------------- Y is original data projected to PC axes
print pcares1.fracs
print pcares1.Wt
print pcares1.Y
In [50]:
askqty_5 = ta_10day.ix[:, ['TotalAskLot', 'askQty_4', 'askQty_3', 'askQty_2', 'askQty_1', 'askQty_0']].values
pcares2 = PCA(askqty_5)
print pcares2.fracs
print pcares2.Wt
print pcares2.Y[10000:10020, :]
In [51]:
bid_pc = pd.DataFrame(data=pcares1.Y[:, :], index=ta_10day.index)
ask_pc = pd.DataFrame(data=pcares2.Y[:, :], index=ta_10day.index)
In [52]:
# bid_pc = bid_pc.ix[lag_index, :]
# ask_pc = ask_pc.ix[lag_index, :]
bid_pc.columns = ('bid_pc1', 'bid_pc2', 'bid_pc3', 'bid_pc4', 'bid_pc5', 'bid_pc6')
ask_pc.columns = ('ask_pc1', 'ask_pc2', 'ask_pc3', 'ask_pc4', 'ask_pc5', 'ask_pc6')
In [94]:
(bidask_pc- bidask_pc.mean()) / bidask_pc.std()
Out[94]:
In [93]:
bidask_pc = pd.concat([bid_pc, ask_pc], axis=1)
bidask_pc
Out[93]:
In [270]:
aaa.ix['2015-11-24 10:30:31' : '2015-11-24 10:30:32']
Out[270]:
In [288]:
ta_10day.ix['2015-12-03 11:08:27.500', 'vol_diff']
Out[288]:
In [282]:
ta_10day_vol = ta_10day_vol.rename('vol_diff')
In [287]:
ta_10day = ta_10day.join(ta_10day_vol)
In [276]:
ta_10day_vol = ta_10day.ix[:, 'volume'].diff()
ta_10day_vol = ta_10day_vol.ix[pm_index]
In [277]:
ta_10day_vol = ta_10day_vol.ix[ta_10day_vol >= 0]
ta_10day_vol = ta_10day_vol + 1
In [304]:
def prc_total(df, t1, t2, fs=(15,10)):
fig = plt.figure(figsize=fs)
ax1 = fig.add_subplot(411)
ax1.plot(df.ix[t1: t2, 'last'], color='#f5f112', marker='*')
ax1.plot(df.ix[t1: t2, 'askPrc_0'], color='lightgreen')
ax1.plot(df.ix[t1: t2, 'bidPrc_0'], color='lightcoral')
ax2 = fig.add_subplot(412, sharex=ax1)
ax2.semilogy(100 * np.ones_like(df.ix[t11: t22].values), color='orange')
ax2.semilogy(df.ix[t11: t22, 'vol_diff']/2., color='orange', marker='*')
ax3 = fig.add_subplot(413, sharex=ax1)
ax3.plot(df.ix[t1: t2, 'openInterest'], color='white', lw=0.4, marker='*')
ax4 = fig.add_subplot(414, sharex=ax1)
ax4.plot(df.ix[t1: t2, 'TotalBidLot'],
color='red')
ax4.plot(df.ix[t1: t2, 'TotalAskLot'],
color='green')
return fig
In [402]:
t11, t22 = '2015-12-01 21:00:01', '2015-12-05 15:00:00'
temp = ta_10day.ix[pm_index, :]
thefig = prc_total(temp, t11, t22, (15,10))
multi = MultiCursor(thefig.canvas, thefig.axes, color='c', lw=1)
thefig.show()
look at scenario 3.
In [361]:
t11, t22 = 37180, 37280
temp = ta_10day.ix[pm_index, :].ix['2015-12-01 21:00:01': '2015-12-05 15:00:00', :]
thefig = prc_total(temp, t11, t22, (15,10))
multi = MultiCursor(thefig.canvas, thefig.axes, color='c', lw=1)
thefig.show()
In [362]:
n = len(temp.ix[t11:t22, :])
for i, txt in enumerate(temp.ix[t11:t22, 'askQty_0']):
(thefig.axes[0]).annotate(txt, ((xrange(n))[i], (temp.ix[t11: t22, 'askPrc_0'])[i] + .3), color='white', size=10)
for i, txt in enumerate(temp.ix[t11:t22, 'askQty_1']):
(thefig.axes[0]).annotate(txt, ((xrange(n))[i], (temp.ix[t11: t22, 'askPrc_1'])[i] +.7), color='white', size=10)
for i, txt in enumerate(temp.ix[t11:t22, 'bidQty_0']):
(thefig.axes[0]).annotate(txt, ((xrange(n))[i], (temp.ix[t11: t22, 'bidPrc_0'])[i] - .6), color='white', size=10)
for i, txt in enumerate(temp.ix[t11:t22, 'bidQty_1']):
(thefig.axes[0]).annotate(txt, ((xrange(n))[i], (temp.ix[t11: t22, 'bidPrc_1'])[i] - 1.), color='white', size=10)
for i, txt in enumerate(temp.ix[t11:t22, 'vol_diff']/2.):
(thefig.axes[1]).annotate(txt, ((xrange(n))[i], (temp.ix[t11: t22, 'vol_diff'])[i] + .3), color='white', size=10)
look at scenario 1.
In [376]:
t11, t22 = 152960, 153140
temp = ta_10day.ix[pm_index, :].ix['2015-12-01 21:00:01': '2015-12-05 15:00:00',:]
thefig = prc_total(temp, t11, t22, (15,10))
multi = MultiCursor(thefig.canvas, thefig.axes, color='c', lw=1)
thefig.show()
In [377]:
thefig.axes[2].cla()
thefig.axes[2].plot(lastmid_indicator.ix[pm_index].ix['2015-12-01 21:00:01': '2015-12-05 15:00:00'].ix[t11:t22])
In [366]:
n = len(temp.ix[t11:t22, :])
for i, txt in enumerate(temp.ix[t11:t22, 'askQty_0']):
(thefig.axes[0]).annotate(txt, ((xrange(n))[i], (temp.ix[t11: t22, 'askPrc_0'])[i] + .3), color='white', size=10)
for i, txt in enumerate(temp.ix[t11:t22, 'askQty_1']):
(thefig.axes[0]).annotate(txt, ((xrange(n))[i], (temp.ix[t11: t22, 'askPrc_1'])[i] +.7), color='white', size=10)
for i, txt in enumerate(temp.ix[t11:t22, 'bidQty_0']):
(thefig.axes[0]).annotate(txt, ((xrange(n))[i], (temp.ix[t11: t22, 'bidPrc_0'])[i] - .6), color='white', size=10)
for i, txt in enumerate(temp.ix[t11:t22, 'bidQty_1']):
(thefig.axes[0]).annotate(txt, ((xrange(n))[i], (temp.ix[t11: t22, 'bidPrc_1'])[i] - 1.), color='white', size=10)
for i, txt in enumerate(temp.ix[t11:t22, 'vol_diff']/2.):
(thefig.axes[1]).annotate(txt, ((xrange(n))[i], (temp.ix[t11: t22, 'vol_diff'])[i] + .3), color='white', size=10)
In [252]:
ta_10day.ix[pm_index, :].ix['2015-12-01 21:00:01': '2015-12-05 15:00:00', :].ix[t11: t22,
['volume', 'TotalAskLot',
#'askQty_4', 'askQty_3',
'askQty_2', 'askQty_1', 'askPrc_1', 'askQty_0', 'askPrc_0',
'last',
'bidPrc_0', 'bidQty_0', 'bidPrc_1', 'bidQty_1', 'bidQty_2',
#'bidQty_3', 'bidQty_4',
'TotalBidLot']]
Out[252]:
look at scenario 2.
In [233]:
t11, t22 = '2015-11-25 10:48:01.000', '2015-11-25 10:49'
temp = ta_10day.ix[pm_index, :]
thefig = prc_total(temp, ta_10day_vol, t11, t22, (15,10))
multi = MultiCursor(thefig.canvas, thefig.axes, color='c', lw=1)
thefig.show()
In [234]:
n = len(temp.ix[t11:t22, :])
for i, txt in enumerate(temp.ix[t11:t22, 'askQty_0']):
(thefig.axes[0]).annotate(txt, ((xrange(n))[i], (temp.ix[t11: t22, 'askPrc_0'])[i] + .3), color='white', size=10)
for i, txt in enumerate(temp.ix[t11:t22, 'bidQty_0']):
(thefig.axes[0]).annotate(txt, ((xrange(n))[i], (temp.ix[t11: t22, 'bidPrc_0'])[i] - .6), color='white', size=10)
In [240]:
ta_10day.ix[pm_index, :].ix[t11: t22,:].ix[20:100,
['TotalAskLot',
#'askQty_4', 'askQty_3',
'askQty_2', 'askQty_1', 'askPrc_1', 'askQty_0', 'askPrc_0',
'last',
'bidPrc_0', 'bidQty_0', 'bidPrc_1', 'bidQty_1', 'bidQty_2',
#'bidQty_3', 'bidQty_4',
'TotalBidLot']]
Out[240]:
In [471]:
def myols_no_outlier(df, norm=False):
global ta_10day_pm_no_outlier, sm
df = df[ta_10day_pm_no_outlier.index]
df.dropna(inplace=True)
if norm:
df = (df - df.mean()) / df.std()
X = sm.add_constant(df)
Y = ta_10day_pm_no_outlier[df.index]
model = sm.OLS(Y, X)
ret = model.fit()
return ret
In [437]:
def myols(df, norm=False):
global ta_10day_pm, sm
df = df[ta_10day_pm.index]
df.dropna(inplace=True)
if norm:
df = (df - df.mean()) / df.std()
X = sm.add_constant(df)
Y = ta_10day_pm[df.index]
model = sm.OLS(Y, X)
ret = model.fit()
return ret
In [105]:
nonzero_bool = ta_10day_pm != 0
ta_10day_pm.ix[nonzero_bool]
Out[105]:
In [134]:
temp = 0
sns.swarmplot(x=ta_10day_pm.ix[temp: temp + 100000:100], y=bidask_pc.ix[pm_index, 'bid_pc1'].ix[temp: temp + 100000:100])
Out[134]:
In [135]:
temp = 0
plt.figure()
plt.scatter(ta_10day_pm.ix[temp: temp + 100000:100], bidask_pc.ix[pm_index, 'bid_pc1'].ix[temp: temp + 100000:100])
Out[135]:
In [438]:
res = myols(bidask_pc.ix[:, 'bid_pc1'])
print(res.summary())
In [110]:
res = myols(bidask_pc.ix[pm_index,
['bid_pc1', 'bid_pc2', 'bid_pc3', 'bid_pc4', 'ask_pc1', 'ask_pc2', 'ask_pc3', 'ask_pc4']
], norm=False)
print(res.summary())
In [114]:
temp = ta_10day.ix[pm_index, ['TotalAskLot', 'askQty_4', 'askQty_3', 'askQty_2', 'askQty_1', 'askQty_0',
'bidQty_0', 'bidQty_1', 'bidQty_2', 'bidQty_3', 'bidQty_4', 'TotalBidLot']]
res = myols(temp)
print(res.summary())
In [115]:
Toalpressure_index = ((ta_10day.ix[:, 'TotalBidLot'] - ta_10day.ix[:, 'TotalAskLot']) /
(ta_10day.ix[:, 'TotalBidLot'] + ta_10day.ix[:, 'TotalAskLot']))
temp = pressure_index.ix[pm_index]
res = myols(temp)
print(res.summary())
In [121]:
pressure_index = ((ta_10day.ix[:, 'bidQty_0'] * ta_10day.ix[:, 'bidPrc_0'] - ta_10day.ix[:, 'askQty_0'] * ta_10day.ix[:, 'askPrc_0']) /
(ta_10day.ix[:, 'bidQty_0'] * ta_10day.ix[:, 'bidPrc_0'] + ta_10day.ix[:, 'askQty_0'] * ta_10day.ix[:, 'askPrc_0']))
temp = pressure_index.ix[pm_index]
res = myols(temp)
print(res.summary())
Extreme Points Summary:
In sample ta_10day,
there are 12070 points where spread > 1 * minimum_change
there are 4445 points where last are out of spread
The length of sample: 900040
In [331]:
temp1 = ta_10day.ix[:, 'last'] != ta_10day.ix[:, 'bidPrc_0']
temp2 = ta_10day.ix[:, 'last'] != ta_10day.ix[:, 'askPrc_0']
temp3 = np.logical_and(temp1, temp2)
ta_10day.ix[temp3, ['last', 'askPrc_0', 'bidPrc_0']]
Out[331]:
In [325]:
ta_10day.ix[temp3, ['last', 'askPrc_0', 'bidPrc_0']].plot(marker='*')
Out[325]:
In [326]:
len(ta_10day)
Out[326]:
In [328]:
temp1 = ta_10day.ix[:, 'last'] < ta_10day.ix[:, 'bidPrc_0']
temp2 = ta_10day.ix[:, 'last'] > ta_10day.ix[:, 'askPrc_0']
temp3 = np.logical_or(temp1, temp2)
ta_10day.ix[temp3, ['last', 'askPrc_0', 'bidPrc_0']]
Out[328]:
In [329]:
ta_10day.ix[temp3, ['last', 'askPrc_0', 'bidPrc_0']].plot(marker='*')
Out[329]:
In [333]:
ta_10day.ix[:, ['askPrc_0', 'bidPrc_0']]
Out[333]:
In [335]:
temp = (ta_10day.ix[:, 'askPrc_0'] + ta_10day.ix[:, 'bidPrc_0']) / 2.
temp = temp.rename('mid')
temp
ta_10day = ta_10day.join(temp)
Out[335]:
In [336]:
In [339]:
ta_10day.ix[:5, 30:]
Out[339]:
map up as +1, down as -1
In [ ]:
up = rm_10day.ix[:, 'last'] > rm_10day.ix[:, 'mid']
down = rm_10day.ix[:, 'last'] < rm_10day.ix[:, 'mid']
In [350]:
up *= 1
In [351]:
down
Out[351]:
In [352]:
down *= -1
down
Out[352]:
In [478]:
updown = updown.rename('updown')
In [359]:
updown = up + down
updown
Out[359]:
In [458]:
plt.figure()
plt.hist(updown, bins=50)
Out[458]:
In [459]:
mywindow = 40
r = updown.rolling(window=mywindow)
r
Out[459]:
In [460]:
lastmid_indicator = (r.mean())
lastmid_indicator = lastmid_indicator.rename('lastmid_indicator')
In [401]:
plt.figure()
plt.hist(lastmid_indicator.ix[pm_index].dropna(), bins=20)
Out[401]:
In [388]:
plt.figure()
plt.hist(ta_10day_pm, bins=np.arange(-5.5, 5.5, 1))
Out[388]:
In [483]:
res = myols(updown)
print(res.summary())
In [461]:
res = myols(lastmid_indicator)
print(res.summary())
Use sample with no extreme values, rsquare does not increase much(about 0.003)
In [472]:
res = myols_no_outlier(lastmid_indicator)
print(res.summary())
In [474]:
for mywindow in np.arange(1, 60, 1):
# mywindow =
r = updown.rolling(window=mywindow)
lastmid_indicator = (r.mean())
lastmid_indicator = lastmid_indicator.rename('lastmid_indicator')
res = myols(lastmid_indicator)
print '\n--------------------'
print ('window = %d, Rsquare = %f. ' %(mywindow, res.rsquared))
plot fit
In [482]:
fig, ax = plt.subplots()
fig = sm.graphics.plot_fit(res, 'updown', ax=ax)
In [136]:
ta_10day.ix[0:55, ['last', 'volume']]
Out[136]:
In [137]:
ta_10day_vol
Out[137]:
yes it's ok
In [340]:
prstd, iv_l, iv_u = wls_prediction_std(res2)
fig, ax = plt.subplots(figsize=(8,6))
ax.plot(x, y, 'o', label="Data")
ax.plot(x, y_true, 'b-', label="True")
ax.plot(x, res2.fittedvalues, 'r--.', label="Predicted")
ax.plot(x, iv_u, 'r--')
ax.plot(x, iv_l, 'r--')
legend = ax.legend(loc="best")
In [ ]:
In [12]:
from matplotlib.mlab import PCA
In [18]:
rm_totalbidask = rm.ix[:, ['TotalBidLot', 'TotalAskLot']]
In [32]:
mydata = rm_totalbidask.ix[:day_len,:].values
mydata
Out[32]:
In [33]:
bidaskpca = PCA(mydata)
In [34]:
bidaskpca.fracs
Out[34]:
In [35]:
yy = bidaskpca.Y
plt.scatter(yy[:,0], yy[:,1])
plt.show()
In [188]:
plt.figure(figsize=(18,10))
temp = day_len * 11 + 1000
sns.swarmplot(x=rm_last_diff2.ix[temp:temp+60000:20], y=rm_bidask_pressure_index2.ix[temp:temp+60000:20])
Out[188]: