In [1]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib.finance as mf
from matplotlib.widgets import MultiCursor

In [2]:
import statsmodels.tsa.stattools as stt
import scipy.signal as sgn

In [3]:
import statsmodels.api as sm
from statsmodels.sandbox.regression.predstd import wls_prediction_std

In [4]:
from matplotlib.mlab import PCA

In [55]:
%matplotlib auto


Using matplotlib backend: TkAgg

In [5]:
sns.set_context('paper')
sns.set_style("darkgrid")

In [68]:
sns.set_context('paper')
sns.set_style("dark", rc={'axes.facecolor': 'black', 'grid.color': 'red', 
                              'grid.linestyle': '--',
                              'figure.facecolor': 'grey'})

In [7]:
hft = pd.read_hdf('HFT_SR_RM_MA_TA.hdf')

In [8]:
ta = hft.minor_xs('TA0001')

In [197]:
sr = hft.minor_xs('SR0001')

In [9]:
rm = hft.minor_xs('RM0001')
type(rm)


Out[9]:
pandas.core.frame.DataFrame

In [10]:
night_len = int(4*3600*2.5)
mor_len = int(4*3600*2.25)
aftn_len = int(4*3600*1.5)
day_len = night_len + mor_len + aftn_len + 3

high low limit


In [11]:
dates1 = pd.date_range('2015-11-19 21:01:01', '2015-12-31  21:01:01', freq='D')

In [12]:
dates2 = pd.date_range('2015-11-20 14:59:59', '2015-12-31  14:59:59', freq='D')

In [14]:
type(dates1)


Out[14]:
pandas.tseries.index.DatetimeIndex

In [17]:
dates1.weekday


Out[17]:
array([3, 4, 5, 6, 0, 1, 2, 3, 4, 5, 6, 0, 1, 2, 3, 4, 5, 6, 0, 1, 2, 3, 4,
       5, 6, 0, 1, 2, 3, 4, 5, 6, 0, 1, 2, 3, 4, 5, 6, 0, 1, 2, 3], dtype=int32)

In [18]:
trade_day1 = dates1[dates1.weekday != 5]
trade_day2 = dates2[np.logical_and(dates2.weekday != 5, dates2.weekday != 6)]
trade_day1


Out[18]:
DatetimeIndex(['2015-11-19 21:01:01', '2015-11-20 21:01:01',
               '2015-11-22 21:01:01', '2015-11-23 21:01:01',
               '2015-11-24 21:01:01', '2015-11-25 21:01:01',
               '2015-11-26 21:01:01', '2015-11-27 21:01:01',
               '2015-11-29 21:01:01', '2015-11-30 21:01:01',
               '2015-12-01 21:01:01', '2015-12-02 21:01:01',
               '2015-12-03 21:01:01', '2015-12-04 21:01:01',
               '2015-12-06 21:01:01', '2015-12-07 21:01:01',
               '2015-12-08 21:01:01', '2015-12-09 21:01:01',
               '2015-12-10 21:01:01', '2015-12-11 21:01:01',
               '2015-12-13 21:01:01', '2015-12-14 21:01:01',
               '2015-12-15 21:01:01', '2015-12-16 21:01:01',
               '2015-12-17 21:01:01', '2015-12-18 21:01:01',
               '2015-12-20 21:01:01', '2015-12-21 21:01:01',
               '2015-12-22 21:01:01', '2015-12-23 21:01:01',
               '2015-12-24 21:01:01', '2015-12-25 21:01:01',
               '2015-12-27 21:01:01', '2015-12-28 21:01:01',
               '2015-12-29 21:01:01', '2015-12-30 21:01:01',
               '2015-12-31 21:01:01'],
              dtype='datetime64[ns]', freq=None)

5 ningt pan per week

a complete week starts from 11-23


In [94]:
rm.ix[trade_day2, ['high', 'highLimit', 'low', 'lowLimit']]


Out[94]:
high highLimit low lowLimit
2015-11-20 14:59:59 1798.0 1866.0 1765.0 1722.0
2015-11-23 14:59:59 1796.0 1853.0 1709.0 1709.0
2015-11-24 14:59:59 1773.0 1826.0 1699.0 1684.0
2015-11-25 14:59:59 1787.0 1801.0 1751.0 1661.0
2015-11-26 14:59:59 1840.0 1842.0 1771.0 1700.0
2015-11-27 14:59:59 1821.0 1878.0 1789.0 1732.0
2015-11-30 14:59:59 1807.0 1878.0 1755.0 1732.0
2015-12-01 14:59:59 1834.0 1852.0 1790.0 1708.0
2015-12-02 14:59:59 1927.0 1957.0 1883.0 1805.0
2015-12-03 14:59:59 1899.0 1971.0 1851.0 1819.0
2015-12-04 14:59:59 1872.0 1944.0 1838.0 1794.0
2015-12-07 14:59:59 1886.0 1928.0 1857.0 1778.0
2015-12-08 14:59:59 1879.0 1948.0 1819.0 1798.0
2015-12-09 14:59:59 1852.0 1917.0 1817.0 1769.0
2015-12-10 14:59:59 1862.0 1911.0 1835.0 1763.0
2015-12-11 14:59:59 1892.0 1919.0 1838.0 1771.0
2015-12-14 14:59:59 1890.0 1938.0 1867.0 1788.0
2015-12-15 14:59:59 1921.0 1956.0 1873.0 1804.0
2015-12-16 14:59:59 1908.0 1976.0 1878.0 1824.0
2015-12-17 14:59:59 1917.0 1970.0 1864.0 1818.0
2015-12-18 14:59:59 1907.0 1967.0 1859.0 1815.0
2015-12-21 14:59:59 1941.0 1961.0 1899.0 1809.0
2015-12-22 14:59:59 1947.0 1994.0 1923.0 1840.0
2015-12-23 14:59:59 1934.0 2012.0 1909.0 1856.0
2015-12-24 14:59:59 1927.0 1998.0 1904.0 1844.0
2015-12-25 14:59:59 1920.0 1994.0 1896.0 1840.0
2015-12-28 14:59:59 1904.0 1984.0 1868.0 1830.0
2015-12-29 14:59:59 1899.0 1961.0 1868.0 1809.0
2015-12-30 14:59:59 1915.0 1960.0 1893.0 1808.0
2015-12-31 14:59:59 1924.0 1981.0 1903.0 1827.0

In [100]:
temp = rm.ix[trade_day2, 'high'] - rm.ix[trade_day2, 'highLimit'] >-3
temp


Out[100]:
2015-11-20 14:59:59    False
2015-11-23 14:59:59    False
2015-11-24 14:59:59    False
2015-11-25 14:59:59    False
2015-11-26 14:59:59     True
2015-11-27 14:59:59    False
2015-11-30 14:59:59    False
2015-12-01 14:59:59    False
2015-12-02 14:59:59    False
2015-12-03 14:59:59    False
2015-12-04 14:59:59    False
2015-12-07 14:59:59    False
2015-12-08 14:59:59    False
2015-12-09 14:59:59    False
2015-12-10 14:59:59    False
2015-12-11 14:59:59    False
2015-12-14 14:59:59    False
2015-12-15 14:59:59    False
2015-12-16 14:59:59    False
2015-12-17 14:59:59    False
2015-12-18 14:59:59    False
2015-12-21 14:59:59    False
2015-12-22 14:59:59    False
2015-12-23 14:59:59    False
2015-12-24 14:59:59    False
2015-12-25 14:59:59    False
2015-12-28 14:59:59    False
2015-12-29 14:59:59    False
2015-12-30 14:59:59    False
2015-12-31 14:59:59    False
dtype: bool

In [102]:
rm.ix[trade_day2,:].ix[temp, 'last']


Out[102]:
2015-11-26 14:59:59    1821.0
Name: last, dtype: float64

In [136]:
temp[0]


Out[136]:
Timestamp('2015-11-26 14:59:59')

In [167]:
for pinzhong in hft.minor_axis:
    print '\n\n#-------------------------------------'
    print pinzhong
    xx = hft.minor_xs(pinzhong)
    toohigh = xx.ix[trade_day2, 'high'] - xx.ix[trade_day2, 'highLimit'] > -2
    toolow = xx.ix[trade_day2, 'low'] - xx.ix[trade_day2, 'lowLimit'] < 2
    print 'too high: \n'
    if toohigh.any() == True:
        print xx.ix[trade_day2,:].ix[toohigh, ['high', 'highLimit']]
        temp = (xx.ix[trade_day2,:].ix[toohigh, :].index)[0]
        high = xx.ix[trade_day2,:].ix[toohigh, 'highLimit']
        #fig1 = plt.figure(figsize=(15,10))
        #ax1 = fig1.add_subplot(111)
        xx.ix[temp - pd.Timedelta(18, unit='h'): temp, 'last'].plot(figsize=(15,10))
        plt.hlines(high, temp - pd.Timedelta(18, unit='h'), temp, colors='r', linestyles='-')
        plt.show()
    print 'too low: \n'
    if toolow.any() == True:
        print xx.ix[trade_day2,:].ix[toolow, ['low', 'lowLimit']]
        temp = (xx.ix[trade_day2,:].ix[toolow, :].index)[0]
        low = xx.ix[trade_day2,:].ix[toolow, 'lowLimit']
        #fig2 = plt.figure(figsize=(15,10))
        #ax2 = fig2.add_subplot(111)
        xx.ix[temp - pd.Timedelta(18, unit='h'): temp, 'last'].plot(figsize=(15,10))
        plt.hlines(low, temp - pd.Timedelta(18, unit='h'), temp, colors='g', linestyles='-')
        plt.show()
plt.show()



#-------------------------------------
MA0001
too high: 

too low: 

                        low  lowLimit
2015-11-23 14:59:59  1731.0    1731.0

#-------------------------------------
RM0001
too high: 

too low: 

                        low  lowLimit
2015-11-23 14:59:59  1709.0    1709.0

#-------------------------------------
SR0001
too high: 

too low: 



#-------------------------------------
TA0001
too high: 

too low: 

                        low  lowLimit
2015-12-08 14:59:59  4296.0    4296.0

we can see that SR has no zhangting or dieting

Training dataset


In [19]:
ta.index[day_len*10 + 9]


Out[19]:
Timestamp('2015-12-03 15:00:00')

In [21]:
#------------------   ta_10day is my training dataset
ta_10day = ta.ix[:day_len*10 + 10, :]

pice move


In [24]:
def Letitforward(df, forwardnum):
    df2 = df.shift(-forwardnum) - df
    df2.dropna(inplace=True)
    return df2

In [25]:
forward_ticks = 40
, 'volume', 'openInterest', 'TotalAskLot', 'askQty_4', 'askQty_3', 'askQty_2', 'askQty_1', 'askQty_0', 'bidQty_0', 'bidQty_1', 'bidQty_2', 'bidQty_3', 'bidQty_4', 'TotalBidLot']

In [43]:
ta_10day_pm =letitlag(ta_10day.ix[:, 'last'], forward_ticks)

In [30]:
plt.plot(ta_10day_pm)


Out[30]:
[<matplotlib.lines.Line2D at 0x7f558f982bd0>]

In [31]:
#----------------------------exclude last 36 ticks before ending
last_44_boolean = np.logical_and.reduce((ta_10day_pm.index.hour >= 14,
                              ta_10day_pm.index.minute >= 59,
                              ta_10day_pm.index.second >= 49))
last_boolean = ta_10day_pm.index.hour == 15

In [32]:
ta_10day_pm.ix[np.logical_or(last_44_boolean, last_boolean)]


Out[32]:
2015-11-19 21:59:49.000    0.0
2015-11-19 21:59:49.250    0.0
2015-11-19 21:59:49.500    0.0
2015-11-19 21:59:49.750   -2.0
2015-11-19 21:59:50.000   -2.0
2015-11-19 21:59:50.250   -2.0
2015-11-19 21:59:50.500   -2.0
2015-11-19 21:59:50.750    0.0
2015-11-19 21:59:51.000   -2.0
2015-11-19 21:59:51.250    0.0
2015-11-19 21:59:51.500    0.0
2015-11-19 21:59:51.750    0.0
2015-11-19 21:59:52.000    0.0
2015-11-19 21:59:52.250    0.0
2015-11-19 21:59:52.500   -2.0
2015-11-19 21:59:52.750    0.0
2015-11-19 21:59:53.000   -2.0
2015-11-19 21:59:53.250   -2.0
2015-11-19 21:59:53.500   -2.0
2015-11-19 21:59:53.750   -2.0
2015-11-19 21:59:54.000   -2.0
2015-11-19 21:59:54.250   -2.0
2015-11-19 21:59:54.500    0.0
2015-11-19 21:59:54.750    0.0
2015-11-19 21:59:55.000    0.0
2015-11-19 21:59:55.250    0.0
2015-11-19 21:59:55.500    0.0
2015-11-19 21:59:55.750    0.0
2015-11-19 21:59:56.000    0.0
2015-11-19 21:59:56.250   -2.0
                          ... 
2015-12-02 22:59:53.750    2.0
2015-12-02 22:59:54.000    2.0
2015-12-02 22:59:54.250    2.0
2015-12-02 22:59:54.500    2.0
2015-12-02 22:59:54.750    2.0
2015-12-02 22:59:55.000    2.0
2015-12-02 22:59:55.250    2.0
2015-12-02 22:59:55.500    2.0
2015-12-02 22:59:55.750    2.0
2015-12-02 22:59:56.000    2.0
2015-12-02 22:59:56.250    2.0
2015-12-02 22:59:56.500    2.0
2015-12-02 22:59:56.750    2.0
2015-12-02 22:59:57.000    2.0
2015-12-02 22:59:57.250    2.0
2015-12-02 22:59:57.500    2.0
2015-12-02 22:59:57.750    0.0
2015-12-02 22:59:58.000   -2.0
2015-12-02 22:59:58.250    0.0
2015-12-02 22:59:58.500    0.0
2015-12-02 22:59:58.750    0.0
2015-12-02 22:59:59.000    0.0
2015-12-02 22:59:59.250    0.0
2015-12-02 22:59:59.500    0.0
2015-12-02 22:59:59.750    0.0
2015-12-03 14:59:49.000    2.0
2015-12-03 14:59:49.250    0.0
2015-12-03 14:59:49.500    4.0
2015-12-03 14:59:49.750    2.0
2015-12-03 14:59:50.000    2.0
Name: last, dtype: float64

In [464]:
plt.figure()
ta_10day_pm.hist(bins=100)


Out[464]:
<matplotlib.axes.AxesSubplot at 0x7f5579830c50>

In [465]:
outlier_boolean = abs(ta_10day_pm) > 10

In [468]:
ta_10day_pm_no_outlier = ta_10day_pm.ix[np.logical_not(outlier_boolean)]

In [470]:
plt.figure()
plt.plot(ta_10day_pm_no_outlier)


Out[470]:
[<matplotlib.lines.Line2D at 0x7f557cdfb210>]

ta_10day_pm excludes last 36 ticks before each ending


In [44]:
ta_10day_pm = ta_10day_pm.ix[np.logical_not(np.logical_or(last_44_boolean, last_boolean))]
ta_10day_pm.plot(figsize=(18,10))
ta_10day_pm = ta_10day_pm.rename('price move')


Out[44]:
<matplotlib.axes.AxesSubplot at 0x7f558f07ed90>

In [46]:
pm_index = ta_10day_pm.index

search for index of TA0001

Statistical Character of bid ask qty

box plot


In [202]:
plt.figure(figsize=(15,10))
sns.boxplot(ta_10day.ix[:, ['askQty_4', 'askQty_3', 'askQty_2', 'askQty_1', 'askQty_0', 
                            'bidQty_0', 'bidQty_1', 'bidQty_2', 'bidQty_3', 'bidQty_4']].values)
plt.xticks(xrange(10), ('0', 'askQty_4', 'askQty_3', 'askQty_2', 'askQty_1', 'askQty_0', 
                            'bidQty_0', 'bidQty_1', 'bidQty_2', 'bidQty_3', 'bidQty_4'))
plt.ylim((-10,1600))


Out[202]:
(-10, 1600)

In [34]:
ta_10day.ix[:, ['askQty_4', 'askQty_3', 'askQty_2', 'askQty_1', 'askQty_0', 
                            'bidQty_0', 'bidQty_1', 'bidQty_2', 'bidQty_3', 'bidQty_4']].mean()


Out[34]:
askQty_4    595.281823
askQty_3    570.372536
askQty_2    553.325677
askQty_1    487.992940
askQty_0    302.964337
bidQty_0    353.971766
bidQty_1    613.328817
bidQty_2    720.346558
bidQty_3    723.259685
bidQty_4    808.972382
dtype: float64

we can see that mean of qty still monoly dicrease but slightly larger than median

verify that this distribution do not vary much with different pinzhong


In [ ]:
plt.figure(figsize=(15,10))
plt.boxplot(sr.ix[:day_len*14, ['askQty_4', 'askQty_3', 'askQty_2', 'askQty_1', 'askQty_0', 
                            'bidQty_0', 'bidQty_1', 'bidQty_2', 'bidQty_3', 'bidQty_4']].values)
plt.xticks(xrange(10), ('0', 'askQty_4', 'askQty_3', 'askQty_2', 'askQty_1', 'askQty_0', 
                            'bidQty_0', 'bidQty_1', 'bidQty_2', 'bidQty_3', 'bidQty_4'))
plt.ylim((-10,700))

covar


In [205]:
bidaskqty_cov = ta_10day.ix[:, ['askQty_4', 'askQty_3', 'askQty_2', 'askQty_1', 'askQty_0', 
                            'bidQty_0', 'bidQty_1', 'bidQty_2', 'bidQty_3', 'bidQty_4']].cov()

In [206]:
bidaskqty_cov


Out[206]:
askQty_4 askQty_3 askQty_2 askQty_1 askQty_0 bidQty_0 bidQty_1 bidQty_2 bidQty_3 bidQty_4
askQty_4 584897.597529 78488.500648 21008.736337 36412.750987 10821.094215 8057.135771 -1.410949e+04 -7.678221e+03 -6.221610e+03 3.607272e+04
askQty_3 78488.500648 486116.056358 69981.380024 29039.510207 17291.438331 -2154.936713 4.371028e+03 -2.152167e+04 -3.262025e+03 3.910460e+03
askQty_2 21008.736337 69981.380024 525038.654940 69177.475345 21871.684487 -5677.934937 -4.971645e+03 4.811053e+04 -1.844889e+04 -7.787346e+03
askQty_1 36412.750987 29039.510207 69177.475345 362767.537094 37996.362947 7859.096554 -1.269958e+02 1.047858e+04 6.470127e+03 2.456642e+03
askQty_0 10821.094215 17291.438331 21871.684487 37996.362947 146069.368244 4577.873688 1.224944e+04 1.034594e+04 4.968088e+03 7.116108e+03
bidQty_0 8057.135771 -2154.936713 -5677.934937 7859.096554 4577.873688 490575.507797 1.291189e+05 1.153921e+05 1.853658e+04 1.091052e+04
bidQty_1 -14109.486560 4371.028167 -4971.645017 -126.995792 12249.439151 129118.875362 1.192734e+06 2.738996e+05 1.413018e+05 2.790319e+04
bidQty_2 -7678.221126 -21521.670957 48110.531961 10478.579904 10345.942494 115392.096285 2.738996e+05 1.610194e+06 2.842899e+05 2.233978e+05
bidQty_3 -6221.610016 -3262.025436 -18448.890263 6470.126846 4968.088256 18536.584050 1.413018e+05 2.842899e+05 1.472302e+06 3.517650e+05
bidQty_4 36072.720708 3910.460240 -7787.346453 2456.642066 7116.108323 10910.521772 2.790319e+04 2.233978e+05 3.517650e+05 1.808190e+06

In [112]:
ta_bidaskqty_corr = ta_10day.ix[:, ['TotalAskLot', 'askQty_4', 'askQty_3', 'askQty_2', 'askQty_1', 'askQty_0', 
                            'bidQty_0', 'bidQty_1', 'bidQty_2', 'bidQty_3', 'bidQty_4', 'TotalBidLot']].corr()
ta_bidaskqty_corr


Out[112]:
TotalAskLot askQty_4 askQty_3 askQty_2 askQty_1 askQty_0 bidQty_0 bidQty_1 bidQty_2 bidQty_3 bidQty_4 TotalBidLot
TotalAskLot 1.000000 0.018334 0.017270 -0.013292 -0.012548 -0.000716 0.040750 0.041691 0.044338 0.056331 0.063765 0.403196
askQty_4 0.018334 1.000000 0.147196 0.037911 0.079050 0.037021 0.015041 -0.016893 -0.007912 -0.006704 0.035077 0.166828
askQty_3 0.017270 0.147196 1.000000 0.138521 0.069152 0.064891 -0.004413 0.005740 -0.024326 -0.003856 0.004171 0.159901
askQty_2 -0.013292 0.037911 0.138521 1.000000 0.158509 0.078978 -0.011188 -0.006283 0.052325 -0.020983 -0.007992 0.153927
askQty_1 -0.012548 0.079050 0.069152 0.158509 1.000000 0.165062 0.018630 -0.000193 0.013710 0.008853 0.003033 0.149736
askQty_0 -0.000716 0.037021 0.064891 0.078978 0.165062 1.000000 0.017101 0.029347 0.021333 0.010713 0.013847 0.118023
bidQty_0 0.040750 0.015041 -0.004413 -0.011188 0.018630 0.017101 1.000000 0.168797 0.129833 0.021811 0.011584 0.111092
bidQty_1 0.041691 -0.016893 0.005740 -0.006283 -0.000193 0.029347 0.168797 1.000000 0.197643 0.106629 0.019000 0.151330
bidQty_2 0.044338 -0.007912 -0.024326 0.052325 0.013710 0.021333 0.129833 0.197643 1.000000 0.184639 0.130924 0.151849
bidQty_3 0.056331 -0.006704 -0.003856 -0.020983 0.008853 0.010713 0.021811 0.106629 0.184639 1.000000 0.215592 0.157875
bidQty_4 0.063765 0.035077 0.004171 -0.007992 0.003033 0.013847 0.011584 0.019000 0.130924 0.215592 1.000000 0.167037
TotalBidLot 0.403196 0.166828 0.159901 0.153927 0.149736 0.118023 0.111092 0.151330 0.151849 0.157875 0.167037 1.000000

compute eigenvalues to see if there exists colinear relationship


In [113]:
w, v = np.linalg.eig(ta_bidaskqty_corr)
np.sqrt(w.max() / w.min())


Out[113]:
1.9466995313896642

In [114]:
fig, ax = plt.subplots()
heatmap = ax.pcolor(ta_bidaskqty_corr.abs(), cmap=plt.cm.Blues, alpha=1)
fig = plt.gcf()
fig.set_size_inches(9, 9)
ax.set_frame_on(False)
ax.set_yticks(np.arange(ta_bidaskqty_corr.shape[0]) + 0.5, minor=False)
ax.set_xticks(np.arange(ta_bidaskqty_corr.shape[1]) + 0.5, minor=False)
ax.invert_yaxis()
ax.xaxis.tick_top()
labels = ['TotalAskLot', 'askQty_4', 'askQty_3', 'askQty_2', 'askQty_1', 'askQty_0', 
                            'bidQty_0', 'bidQty_1', 'bidQty_2', 'bidQty_3', 'bidQty_4', 'TotalBidLot']
ax.set_xticklabels(labels, minor=False)
ax.set_yticklabels(labels, minor=False)


Out[114]:
[<matplotlib.text.Text at 0x7fb5fcf3f750>,
 <matplotlib.text.Text at 0x7fb5fcf21490>,
 <matplotlib.text.Text at 0x7fb5fcdc8e10>,
 <matplotlib.text.Text at 0x7fb5fcdbb590>,
 <matplotlib.text.Text at 0x7fb5fcdbbcd0>,
 <matplotlib.text.Text at 0x7fb5fcdb0450>,
 <matplotlib.text.Text at 0x7fb5fcdb0b90>,
 <matplotlib.text.Text at 0x7fb5fcda7310>,
 <matplotlib.text.Text at 0x7fb5fcda7a50>,
 <matplotlib.text.Text at 0x7fb5fcd9d1d0>,
 <matplotlib.text.Text at 0x7fb5fcd9d910>,
 <matplotlib.text.Text at 0x7fb5fcd93090>]

In [49]:
bidqty_5 = ta_10day.ix[:, ['bidQty_0', 'bidQty_1', 'bidQty_2', 'bidQty_3', 'bidQty_4', 'TotalBidLot']].values
pcares1 = PCA(bidqty_5)
#-------------------------  Wt is eigenvectors, Wt[0] is the first eigenvector
#------------------------- Y is original data projected to PC axes
print pcares1.fracs
print pcares1.Wt
print pcares1.Y


[ 0.27656635  0.18674114  0.14559871  0.13814842  0.12848492  0.12446046]
[[ 0.29305986  0.40174599  0.47630782  0.43553142  0.36732859  0.44858559]
 [ 0.57030926  0.45607028  0.09702365 -0.39385526 -0.54711395 -0.05364716]
 [-0.45566205  0.32800415  0.45639621  0.31020202 -0.29059893 -0.54388864]
 [ 0.55878447 -0.39892403  0.25983634  0.07924042  0.29827897 -0.60485968]
 [-0.2353721  -0.24896492  0.6659125  -0.62509241  0.09576495  0.19815532]
 [ 0.1165987  -0.54944465  0.21086819  0.4024246  -0.61898871  0.30815149]]
[[-2.01264894  0.08114616  1.13734071  1.1071202  -0.29763862 -0.44880739]
 [-2.02927098  0.21125848  1.07062806  1.14766522 -0.34765277 -0.4728874 ]
 [-1.77170083 -0.27121572  0.86797375  1.38652728 -0.27100911 -0.93417883]
 ..., 
 [-0.65482406 -0.11722565 -0.33819779 -0.48946345  0.12500907  0.04289447]
 [-0.65368722 -0.11848343 -0.33521574 -0.4901658   0.12834786  0.04345895]
 [-0.65554056 -0.12212164 -0.33150601 -0.49434593  0.1297431   0.04203478]]

In [50]:
askqty_5 = ta_10day.ix[:, ['TotalAskLot', 'askQty_4', 'askQty_3', 'askQty_2', 'askQty_1', 'askQty_0']].values
pcares2 = PCA(askqty_5)
print pcares2.fracs
print pcares2.Wt
print pcares2.Y[10000:10020, :]


[ 0.23295669  0.17498043  0.16476918  0.15544008  0.14297337  0.12888024]
[[ 0.00319068  0.35519131  0.45565483  0.47954075  0.51331369  0.41564089]
 [ 0.48290404  0.55099863  0.43778135 -0.14694109 -0.3353495  -0.37081089]
 [ 0.85020746 -0.23159095 -0.24563655 -0.09121861  0.17140534  0.35422445]
 [ 0.19493507 -0.51143427  0.20439757  0.67828862 -0.11348563 -0.43093212]
 [ 0.07687551  0.35619367 -0.53723953  0.15521727  0.51805676 -0.53489691]
 [ 0.00559071  0.35800326 -0.4582403   0.50592519 -0.5598287   0.30405554]]
[[ 0.61562387  0.05398372  0.09798823 -0.42819189  0.54159361  0.05871186]
 [ 0.56123989  0.10131934  0.04957687 -0.37228981  0.61138432  0.01892019]
 [ 0.59387028  0.07291797  0.07862369 -0.40583106  0.5695099   0.04279519]
 [ 0.59387059  0.07296499  0.07870649 -0.40581207  0.56951738  0.04279574]
 [ 0.58516915  0.08053869  0.07096067 -0.39686774  0.5806839   0.03642907]
 [ 0.58516931  0.08056221  0.07100207 -0.39685825  0.58068764  0.03642934]
 [ 0.58602234  0.080123    0.07149365 -0.39699921  0.58156649  0.03550122]
 [ 0.58602234  0.080123    0.07149365 -0.39699921  0.58156649  0.03550122]
 [ 0.58602234  0.080123    0.07149365 -0.39699921  0.58156649  0.03550122]
 [ 0.5881977   0.07822957  0.0734301  -0.39923529  0.57877486  0.03709289]
 [ 0.60994709  0.05866044  0.09167687 -0.42185241  0.55075751  0.05300221]
 [ 0.25318822  0.36918213 -0.22590164 -0.05513474  1.00858454 -0.20803113]
 [ 0.25318822  0.36918213 -0.22590164 -0.05513474  1.00858454 -0.20803113]
 [ 0.25318822  0.36918213 -0.22590164 -0.05513474  1.00858454 -0.20803113]
 [ 0.25427589  0.36823541 -0.22493341 -0.05625278  1.00718873 -0.2072353 ]
 [ 0.25427589  0.36823541 -0.22493341 -0.05625278  1.00718873 -0.2072353 ]
 [ 0.25427589  0.36823541 -0.22493341 -0.05625278  1.00718873 -0.2072353 ]
 [ 0.25427589  0.36823541 -0.22493341 -0.05625278  1.00718873 -0.2072353 ]
 [ 0.25096609  0.3691318  -0.22451096 -0.06098071  1.00609895 -0.21072775]
 [ 0.25096609  0.3691318  -0.22451096 -0.06098071  1.00609895 -0.21072775]]

In [51]:
bid_pc = pd.DataFrame(data=pcares1.Y[:, :], index=ta_10day.index)
ask_pc = pd.DataFrame(data=pcares2.Y[:, :], index=ta_10day.index)

In [52]:
# bid_pc = bid_pc.ix[lag_index, :]
# ask_pc = ask_pc.ix[lag_index, :]
bid_pc.columns = ('bid_pc1', 'bid_pc2', 'bid_pc3', 'bid_pc4', 'bid_pc5', 'bid_pc6')
ask_pc.columns = ('ask_pc1', 'ask_pc2', 'ask_pc3', 'ask_pc4', 'ask_pc5', 'ask_pc6')

In [94]:
(bidask_pc- bidask_pc.mean()) / bidask_pc.std()


Out[94]:
bid_pc1 bid_pc2 bid_pc3 bid_pc4 bid_pc5 bid_pc6 ask_pc1 ask_pc2 ask_pc3 ask_pc4 ask_pc5 ask_pc6
2015-11-19 21:00:00.000 -1.562401 0.076661 1.216847 1.216035 -0.338990 -0.519360 -0.936487 -0.372957 -0.207183 -0.376735 -0.413840 0.121001
2015-11-19 21:00:00.250 -1.575304 0.199581 1.145471 1.260568 -0.395953 -0.547225 -1.104472 -0.371004 -0.329893 -0.091072 0.330191 -0.254148
2015-11-19 21:00:00.500 -1.375355 -0.256224 0.928650 1.522929 -0.308661 -1.081032 -0.832380 -0.360818 -0.302462 0.221224 -0.313547 0.034237
2015-11-19 21:00:00.750 -1.386918 -0.309665 1.001332 1.451591 -0.269374 -1.103129 -0.545970 -0.512996 0.030099 -0.490522 -0.131462 -0.370727
2015-11-19 21:00:01.000 -1.093258 -0.536956 1.271527 1.522311 -0.865495 -0.740830 -0.088487 -0.814544 -0.011625 0.259957 1.287787 -0.664263
2015-11-19 21:00:01.250 -0.699487 0.180020 2.072320 1.198833 -0.584471 -0.818154 -0.003687 -0.334204 -0.297129 1.131841 -0.459856 0.471152
2015-11-19 21:00:01.500 -0.694733 1.055437 1.412572 1.801192 -0.559216 -0.749921 -0.205611 0.512626 -0.491729 -0.386269 0.601374 0.652812
2015-11-19 21:00:01.750 -0.805642 0.812744 1.646494 1.538568 -0.443037 -0.819123 -0.241981 0.465059 -0.488597 -0.232358 0.602422 0.605646
2015-11-19 21:00:02.000 -0.938641 1.234262 1.050033 1.811150 -0.846458 -0.897129 -0.829416 0.262710 -0.401032 0.269994 -0.295887 -0.138420
2015-11-19 21:00:02.250 -0.961033 1.185264 1.097260 1.758128 -0.823002 -0.911101 -0.052147 0.197239 -0.396613 0.406135 -0.368449 -1.196893
2015-11-19 21:00:02.500 -0.940739 -0.058257 1.429477 1.772386 -0.484799 -0.140317 -0.067102 0.208576 -0.402303 0.398757 -0.382853 -1.193764
2015-11-19 21:00:02.750 -0.958335 -0.109401 1.496396 1.700733 -0.454327 -0.166077 -0.143696 0.284482 -0.472782 0.477352 -0.268521 -1.271626
2015-11-19 21:00:03.000 -0.968582 -0.133694 1.521038 1.674753 -0.440758 -0.172409 -0.111258 0.255179 -0.445728 0.452228 -0.309247 -1.239294
2015-11-19 21:00:03.250 -1.029809 -0.267673 1.650175 1.529772 -0.376621 -0.210613 -0.112458 0.258726 -0.464059 0.500876 -0.243122 -1.283663
2015-11-19 21:00:03.500 -0.946492 1.096597 1.089208 1.571816 -0.750695 -0.881375 -0.463608 0.684149 -0.662137 -0.051432 0.506659 0.815478
2015-11-19 21:00:03.750 -1.006941 0.300733 1.901054 1.080792 -0.156613 -0.773400 -0.103203 0.255758 -0.460537 0.496892 -0.244953 -1.279979
2015-11-19 21:00:04.000 -1.075037 -0.323665 1.662787 1.484122 -0.386309 -0.221104 -0.076637 0.227596 -0.428668 0.454742 -0.288008 -1.262610
2015-11-19 21:00:04.250 -0.999543 0.315987 1.885626 1.096597 -0.163683 -0.768623 -0.082698 0.258048 -0.454238 0.484939 -0.273265 -1.295915
2015-11-19 21:00:04.500 -1.205890 -0.515264 1.122399 1.492756 -0.713652 -0.745998 0.110016 -1.025148 0.292162 0.514179 -0.515764 1.050095
2015-11-19 21:00:04.750 -1.191634 -0.492445 1.119434 1.492161 -0.726208 -0.758862 0.107256 -1.022376 0.289241 0.517652 -0.511243 1.047380
2015-11-19 21:00:05.000 -1.166135 -0.500205 1.166911 1.493174 -0.657526 -0.740700 0.088706 -1.001687 0.265295 0.548225 -0.459715 1.011473
2015-11-19 21:00:05.250 -1.168430 -0.506672 1.176425 1.481816 -0.654712 -0.745884 0.073986 -0.986950 0.249631 0.566728 -0.435610 0.996992
2015-11-19 21:00:05.500 -0.919175 -0.029990 1.483810 1.661519 -0.535608 -0.243706 -0.147754 0.297000 -0.492032 0.490869 -0.303793 -1.265340
2015-11-19 21:00:05.750 -1.040070 -0.298963 1.140434 1.489663 -0.766864 -0.852586 0.047445 -0.990111 0.229890 0.602705 -0.419342 0.967492
2015-11-19 21:00:06.000 -1.068060 -0.360211 1.199467 1.423385 -0.737545 -0.870051 0.052767 -0.994999 0.235545 0.596840 -0.425900 0.970964
2015-11-19 21:00:06.250 -0.948914 -0.095065 1.546533 1.591100 -0.504456 -0.262262 -0.078054 -0.079051 -0.187484 0.138579 -0.464293 -0.845672
2015-11-19 21:00:06.500 -0.955911 -0.110377 1.561291 1.574530 -0.497126 -0.266628 -0.083574 -0.073530 -0.193368 0.145515 -0.455255 -0.851102
2015-11-19 21:00:06.750 -1.004518 -0.216798 1.663821 1.459325 -0.446167 -0.296947 -0.129078 -0.050171 -0.225053 0.178870 -0.403328 -0.857559
2015-11-19 21:00:07.000 -1.128559 -0.418333 1.073067 1.601579 -0.694784 -0.706222 -0.143512 -0.906569 0.203361 0.387451 -0.416312 0.712099
2015-11-19 21:00:07.250 -1.125845 -0.418171 1.086035 1.584313 -0.695987 -0.719535 -0.152712 -0.897353 0.193582 0.399018 -0.401246 0.703049
... ... ... ... ... ... ... ... ... ... ... ... ...
2015-12-03 14:59:52.750 -0.376786 -0.127376 -0.426905 -0.237152 0.001088 0.337652 0.145075 0.031514 1.037404 0.158505 0.240080 -0.172318
2015-12-03 14:59:53.000 -0.376786 -0.127376 -0.426905 -0.237152 0.001088 0.337652 0.140475 0.036134 1.032536 0.164294 0.247616 -0.176843
2015-12-03 14:59:53.250 -0.379934 -0.134266 -0.420264 -0.244608 0.004387 0.335687 0.151515 0.025046 1.044221 0.150401 0.229531 -0.165983
2015-12-03 14:59:53.500 -0.410048 -0.200103 -0.356760 -0.315808 0.035889 0.316887 0.151515 0.025046 1.044221 0.150401 0.229531 -0.165983
2015-12-03 14:59:53.750 -0.410748 -0.201635 -0.355285 -0.317465 0.036622 0.316450 0.141392 0.034751 1.032677 0.162940 0.246028 -0.175945
2015-12-03 14:59:54.000 -0.410231 -0.191549 -0.385347 -0.279520 0.033539 0.340033 0.130352 0.045838 1.020991 0.176832 0.264112 -0.186805
2015-12-03 14:59:54.250 -0.410231 -0.191549 -0.385347 -0.279520 0.033539 0.340033 0.222352 -0.046557 1.118370 0.061061 0.113408 -0.096304
2015-12-03 14:59:54.500 -0.411981 -0.195377 -0.381658 -0.283662 0.035371 0.338942 0.190152 -0.014219 1.084287 0.101581 0.166155 -0.127979
2015-12-03 14:59:54.750 -0.408482 -0.187721 -0.389037 -0.275378 0.031706 0.341125 0.196671 -0.016892 1.084411 0.110215 0.167001 -0.119131
2015-12-03 14:59:55.000 -0.409531 -0.190018 -0.386823 -0.277863 0.032806 0.340470 0.195751 -0.015968 1.083437 0.111372 0.168508 -0.120036
2015-12-03 14:59:55.250 -0.409881 -0.190784 -0.386085 -0.278692 0.033172 0.340252 0.234792 -0.063119 1.141737 0.034085 0.022119 -0.012760
2015-12-03 14:59:55.500 -0.411941 -0.195003 -0.382675 -0.282385 0.035248 0.339717 0.247597 -0.074961 1.153115 0.021582 0.011712 -0.008993
2015-12-03 14:59:55.750 -0.412291 -0.195768 -0.381937 -0.283213 0.035615 0.339498 0.245397 -0.072364 1.150108 0.026034 0.016468 -0.010914
2015-12-03 14:59:56.000 -0.412948 -0.194318 -0.389897 -0.272924 0.035483 0.346592 0.229757 -0.056657 1.133554 0.045715 0.042088 -0.026299
2015-12-03 14:59:56.250 -0.413998 -0.196615 -0.387683 -0.275409 0.036583 0.345937 0.231920 -0.058218 1.134537 0.045159 0.044886 -0.029469
2015-12-03 14:59:56.500 -0.414491 -0.198501 -0.383851 -0.280022 0.037302 0.343368 0.147280 0.026785 1.044949 0.151669 0.183533 -0.112730
2015-12-03 14:59:56.750 -0.416240 -0.202329 -0.380162 -0.284165 0.039135 0.342277 0.055280 0.119180 0.947571 0.267440 0.334237 -0.203231
2015-12-03 14:59:57.000 -0.416240 -0.202329 -0.380162 -0.284165 0.039135 0.342277 0.057120 0.117309 0.949477 0.265115 0.331219 -0.201421
2015-12-03 14:59:57.250 -0.415540 -0.200797 -0.381638 -0.282508 0.038402 0.342713 0.057120 0.117309 0.949477 0.265115 0.331219 -0.201421
2015-12-03 14:59:57.500 -0.416640 -0.203087 -0.379340 -0.284897 0.039469 0.342007 -0.012800 0.187529 0.875469 0.353101 0.445754 -0.270202
2015-12-03 14:59:57.750 -0.416640 -0.203087 -0.379340 -0.284897 0.039469 0.342007 -0.013353 0.186893 0.875782 0.352787 0.446582 -0.269455
2015-12-03 14:59:58.000 -0.417690 -0.205384 -0.377126 -0.287383 0.040568 0.341352 -0.048313 0.222003 0.838778 0.396780 0.503849 -0.303845
2015-12-03 14:59:58.250 -0.380657 -0.110700 -0.467444 -0.189959 0.005309 0.361380 -0.034513 0.208144 0.853384 0.379415 0.481244 -0.290270
2015-12-03 14:59:58.500 -0.380657 -0.110700 -0.467444 -0.189959 0.005309 0.361380 -0.075913 0.249722 0.809564 0.431512 0.549060 -0.330995
2015-12-03 14:59:58.750 -0.380657 -0.110700 -0.467444 -0.189959 0.005309 0.361380 -0.187233 0.361519 0.691736 0.571595 0.731412 -0.440501
2015-12-03 14:59:59.000 -0.502478 -0.099882 -0.372445 -0.525140 0.135739 0.053731 0.519367 0.260082 1.119318 -0.809796 -0.116590 0.545218
2015-12-03 14:59:59.250 -0.508684 -0.111511 -0.361102 -0.538444 0.142743 0.049419 0.518188 0.257903 1.120107 -0.807748 -0.118111 0.543620
2015-12-03 14:59:59.500 -0.508334 -0.110746 -0.361840 -0.537615 0.142377 0.049637 0.517268 0.258827 1.119133 -0.806590 -0.116604 0.542715
2015-12-03 14:59:59.750 -0.507451 -0.111934 -0.358649 -0.538387 0.146180 0.050291 0.430748 0.321281 1.079790 -0.784357 -0.228529 0.669517
2015-12-03 15:00:00.000 -0.508890 -0.115371 -0.354680 -0.542978 0.147769 0.048643 0.420827 0.331041 1.069725 -0.772595 -0.214391 0.661524

900040 rows × 12 columns


In [93]:
bidask_pc = pd.concat([bid_pc, ask_pc], axis=1)
bidask_pc


Out[93]:
bid_pc1 bid_pc2 bid_pc3 bid_pc4 bid_pc5 bid_pc6 ask_pc1 ask_pc2 ask_pc3 ask_pc4 ask_pc5 ask_pc6
2015-11-19 21:00:00.000 -2.012649 0.081146 1.137341 1.107120 -0.297639 -0.448807 -1.107173 -0.382146 -0.206001 -0.363826 -0.383297 0.106404
2015-11-19 21:00:00.250 -2.029271 0.211258 1.070628 1.147665 -0.347653 -0.472887 -1.305774 -0.380145 -0.328010 -0.087951 0.305822 -0.223489
2015-11-19 21:00:00.500 -1.771701 -0.271216 0.867974 1.386527 -0.271009 -0.934179 -0.984091 -0.369708 -0.300735 0.213644 -0.290406 0.030107
2015-11-19 21:00:00.750 -1.786595 -0.327784 0.935907 1.321579 -0.236515 -0.953274 -0.645479 -0.525635 0.029927 -0.473713 -0.121760 -0.326004
2015-11-19 21:00:01.000 -1.408309 -0.568374 1.188448 1.385965 -0.759918 -0.640192 -0.104615 -0.834613 -0.011559 0.251049 1.192745 -0.584130
2015-11-19 21:00:01.250 -0.901063 0.190553 1.936919 1.091459 -0.513174 -0.707011 -0.004359 -0.342438 -0.295433 1.093057 -0.425917 0.414315
2015-11-19 21:00:01.500 -0.894938 1.117193 1.320278 1.639868 -0.491000 -0.648048 -0.243086 0.525257 -0.488922 -0.373033 0.556991 0.574060
2015-11-19 21:00:01.750 -1.037809 0.860300 1.538915 1.400766 -0.388993 -0.707849 -0.286085 0.476517 -0.485808 -0.224396 0.557961 0.532584
2015-11-19 21:00:02.000 -1.209136 1.306481 0.981426 1.648934 -0.743203 -0.775259 -0.980587 0.269182 -0.398743 0.260743 -0.274050 -0.121721
2015-11-19 21:00:02.250 -1.237981 1.254616 1.025567 1.600661 -0.722609 -0.787332 -0.061651 0.202099 -0.394349 0.392218 -0.341256 -1.052506
2015-11-19 21:00:02.500 -1.211838 -0.061665 1.336078 1.613641 -0.425661 -0.121255 -0.079332 0.213715 -0.400006 0.385093 -0.354597 -1.049754
2015-11-19 21:00:02.750 -1.234506 -0.115802 1.398625 1.548406 -0.398906 -0.143517 -0.169886 0.291491 -0.470083 0.460995 -0.248703 -1.118223
2015-11-19 21:00:03.000 -1.247705 -0.141517 1.421657 1.524754 -0.386992 -0.148988 -0.131536 0.261466 -0.443184 0.436732 -0.286424 -1.089791
2015-11-19 21:00:03.250 -1.326577 -0.283335 1.542356 1.392757 -0.330679 -0.182002 -0.132955 0.265101 -0.461410 0.483713 -0.225179 -1.128808
2015-11-19 21:00:03.500 -1.219250 1.160761 1.018042 1.431035 -0.659122 -0.761644 -0.548106 0.701005 -0.658357 -0.049670 0.469266 0.717102
2015-11-19 21:00:03.750 -1.297118 0.318330 1.776843 0.983991 -0.137508 -0.668338 -0.122013 0.262060 -0.457908 0.479865 -0.226875 -1.125568
2015-11-19 21:00:04.000 -1.384838 -0.342603 1.554144 1.351196 -0.339185 -0.191068 -0.090604 0.233203 -0.426221 0.439160 -0.266752 -1.110295
2015-11-19 21:00:04.250 -1.287588 0.334476 1.762423 0.998380 -0.143716 -0.664210 -0.097770 0.264406 -0.451645 0.468322 -0.253097 -1.139582
2015-11-19 21:00:04.500 -1.553400 -0.545413 1.049064 1.359057 -0.626597 -0.644658 0.130068 -1.050406 0.290494 0.496560 -0.477699 0.923416
2015-11-19 21:00:04.750 -1.535035 -0.521259 1.046293 1.358515 -0.637621 -0.655775 0.126805 -1.047566 0.287590 0.499914 -0.473512 0.921029
2015-11-19 21:00:05.000 -1.502188 -0.529473 1.090667 1.359438 -0.577318 -0.640080 0.104874 -1.026367 0.263781 0.529439 -0.425786 0.889454
2015-11-19 21:00:05.250 -1.505145 -0.536319 1.099560 1.349097 -0.574847 -0.644560 0.087471 -1.011267 0.248206 0.547309 -0.403461 0.876720
2015-11-19 21:00:05.500 -1.184060 -0.031744 1.386861 1.512705 -0.470272 -0.210600 -0.174684 0.304317 -0.489223 0.474048 -0.281373 -1.112695
2015-11-19 21:00:05.750 -1.339794 -0.316457 1.065920 1.356241 -0.673318 -0.736767 0.056092 -1.014506 0.228578 0.582053 -0.388394 0.850779
2015-11-19 21:00:06.000 -1.375850 -0.381288 1.121097 1.295900 -0.647575 -0.751859 0.062385 -1.019514 0.234200 0.576388 -0.394468 0.853832
2015-11-19 21:00:06.250 -1.222369 -0.100627 1.445486 1.448592 -0.442920 -0.226635 -0.092280 -0.080999 -0.186413 0.133830 -0.430027 -0.743654
2015-11-19 21:00:06.500 -1.231383 -0.116835 1.459280 1.433507 -0.436484 -0.230408 -0.098807 -0.075342 -0.192264 0.140529 -0.421656 -0.748429
2015-11-19 21:00:06.750 -1.293997 -0.229483 1.555110 1.328621 -0.391742 -0.256608 -0.152604 -0.051407 -0.223768 0.172741 -0.373562 -0.754107
2015-11-19 21:00:07.000 -1.453783 -0.442810 1.002955 1.458133 -0.610031 -0.610285 -0.169669 -0.928906 0.202200 0.374174 -0.385587 0.626195
2015-11-19 21:00:07.250 -1.450287 -0.442639 1.015076 1.442414 -0.611087 -0.621790 -0.180546 -0.919462 0.192477 0.385345 -0.371633 0.618236
... ... ... ... ... ... ... ... ... ... ... ... ...
2015-12-03 14:59:52.750 -0.485366 -0.134829 -0.399012 -0.215911 0.000955 0.291783 0.171516 0.032290 1.031483 0.153074 0.222362 -0.151531
2015-12-03 14:59:53.000 -0.485366 -0.134829 -0.399012 -0.215911 0.000955 0.291783 0.166078 0.037024 1.026642 0.158664 0.229341 -0.155510
2015-12-03 14:59:53.250 -0.489423 -0.142122 -0.392804 -0.222700 0.003851 0.290085 0.179130 0.025663 1.038260 0.145248 0.212591 -0.145960
2015-12-03 14:59:53.500 -0.528215 -0.211812 -0.333450 -0.287523 0.031511 0.273839 0.179130 0.025663 1.038260 0.145248 0.212591 -0.145960
2015-12-03 14:59:53.750 -0.529116 -0.213433 -0.332071 -0.289031 0.032155 0.273462 0.167162 0.035607 1.026782 0.157356 0.227870 -0.154719
2015-12-03 14:59:54.000 -0.528450 -0.202757 -0.360170 -0.254485 0.029448 0.293841 0.154110 0.046967 1.015163 0.170773 0.244620 -0.164269
2015-12-03 14:59:54.250 -0.528450 -0.202757 -0.360170 -0.254485 0.029448 0.293841 0.262878 -0.047704 1.111986 0.058969 0.105039 -0.084686
2015-12-03 14:59:54.500 -0.530704 -0.206809 -0.356721 -0.258256 0.031056 0.292898 0.224809 -0.014569 1.078098 0.098100 0.153892 -0.112540
2015-12-03 14:59:54.750 -0.526197 -0.198705 -0.363618 -0.250713 0.027839 0.294785 0.232517 -0.017308 1.078221 0.106438 0.154676 -0.104760
2015-12-03 14:59:55.000 -0.527549 -0.201137 -0.361549 -0.252976 0.028804 0.294219 0.231429 -0.016362 1.077253 0.107556 0.156072 -0.105555
2015-12-03 14:59:55.250 -0.528000 -0.201947 -0.360859 -0.253730 0.029126 0.294030 0.277586 -0.064674 1.135220 0.032917 0.020486 -0.011220
2015-12-03 14:59:55.500 -0.530653 -0.206413 -0.357672 -0.257093 0.030948 0.293568 0.292725 -0.076808 1.146532 0.020842 0.010847 -0.007908
2015-12-03 14:59:55.750 -0.531104 -0.207223 -0.356982 -0.257847 0.031270 0.293379 0.290123 -0.074147 1.143543 0.025142 0.015253 -0.009597
2015-12-03 14:59:56.000 -0.531951 -0.205688 -0.364422 -0.248479 0.031155 0.299509 0.271633 -0.058053 1.127083 0.044148 0.038982 -0.023126
2015-12-03 14:59:56.250 -0.533303 -0.208119 -0.362353 -0.250742 0.032120 0.298943 0.274190 -0.059653 1.128061 0.043612 0.041573 -0.025914
2015-12-03 14:59:56.500 -0.533937 -0.210115 -0.358771 -0.254942 0.032752 0.296723 0.174124 0.027445 1.038984 0.146472 0.169988 -0.099131
2015-12-03 14:59:56.750 -0.536191 -0.214167 -0.355323 -0.258713 0.034361 0.295780 0.065356 0.122116 0.942162 0.258276 0.309570 -0.178714
2015-12-03 14:59:57.000 -0.536191 -0.214167 -0.355323 -0.258713 0.034361 0.295780 0.067531 0.120199 0.944057 0.256030 0.306774 -0.177122
2015-12-03 14:59:57.250 -0.535289 -0.212547 -0.356702 -0.257205 0.033717 0.296157 0.067531 0.120199 0.944057 0.256030 0.306774 -0.177122
2015-12-03 14:59:57.500 -0.536706 -0.214970 -0.354555 -0.259381 0.034654 0.295547 -0.015133 0.192149 0.870471 0.341001 0.412856 -0.237606
2015-12-03 14:59:57.750 -0.536706 -0.214970 -0.354555 -0.259381 0.034654 0.295547 -0.015786 0.191498 0.870782 0.340699 0.413623 -0.236949
2015-12-03 14:59:58.000 -0.538058 -0.217401 -0.352486 -0.261643 0.035619 0.294981 -0.057118 0.227473 0.833990 0.383184 0.466664 -0.267191
2015-12-03 14:59:58.250 -0.490353 -0.117177 -0.436902 -0.172945 0.004661 0.312288 -0.040803 0.213272 0.848513 0.366414 0.445727 -0.255253
2015-12-03 14:59:58.500 -0.490353 -0.117177 -0.436902 -0.172945 0.004661 0.312288 -0.089749 0.255875 0.804943 0.416725 0.508538 -0.291066
2015-12-03 14:59:58.750 -0.490353 -0.117177 -0.436902 -0.172945 0.004661 0.312288 -0.221358 0.370427 0.687787 0.552009 0.677432 -0.387361
2015-12-03 14:59:59.000 -0.647281 -0.105727 -0.348110 -0.478106 0.119180 0.046432 0.614027 0.266490 1.112929 -0.782047 -0.107985 0.479446
2015-12-03 14:59:59.250 -0.655275 -0.118036 -0.337508 -0.490218 0.125331 0.042706 0.612633 0.264258 1.113713 -0.780069 -0.109394 0.478041
2015-12-03 14:59:59.500 -0.654824 -0.117226 -0.338198 -0.489463 0.125009 0.042894 0.611546 0.265204 1.112745 -0.778951 -0.107998 0.477245
2015-12-03 14:59:59.750 -0.653687 -0.118483 -0.335216 -0.490166 0.128348 0.043459 0.509257 0.329196 1.073627 -0.757480 -0.211663 0.588750
2015-12-03 15:00:00.000 -0.655541 -0.122122 -0.331506 -0.494346 0.129743 0.042035 0.497527 0.339197 1.063618 -0.746121 -0.198568 0.581721

900040 rows × 12 columns

find feature through plots (Advanced tricks)


In [270]:
aaa.ix['2015-11-24 10:30:31' : '2015-11-24 10:30:32']


Out[270]:
2015-11-24 10:30:31.000     3.0
2015-11-24 10:30:31.250     9.0
2015-11-24 10:30:31.500    11.0
2015-11-24 10:30:31.750    13.0
2015-11-24 10:30:32.000     1.0
2015-11-24 10:30:32.250     3.0
2015-11-24 10:30:32.500    41.0
2015-11-24 10:30:32.750     1.0
Name: volume, dtype: float64

In [288]:
ta_10day.ix['2015-12-03 11:08:27.500', 'vol_diff']


Out[288]:
7.0

In [282]:
ta_10day_vol = ta_10day_vol.rename('vol_diff')

In [287]:
ta_10day = ta_10day.join(ta_10day_vol)

In [276]:
ta_10day_vol = ta_10day.ix[:, 'volume'].diff()
ta_10day_vol = ta_10day_vol.ix[pm_index]

In [277]:
ta_10day_vol = ta_10day_vol.ix[ta_10day_vol >= 0]
ta_10day_vol = ta_10day_vol + 1

In [304]:
def prc_total(df, t1, t2, fs=(15,10)):
    fig = plt.figure(figsize=fs)
    ax1 = fig.add_subplot(411)
    
    ax1.plot(df.ix[t1: t2, 'last'], color='#f5f112', marker='*')
    ax1.plot(df.ix[t1: t2, 'askPrc_0'], color='lightgreen')
    ax1.plot(df.ix[t1: t2, 'bidPrc_0'], color='lightcoral')

    ax2 = fig.add_subplot(412, sharex=ax1)
    ax2.semilogy(100 * np.ones_like(df.ix[t11: t22].values), color='orange')
    ax2.semilogy(df.ix[t11: t22, 'vol_diff']/2., color='orange', marker='*')

    ax3 = fig.add_subplot(413, sharex=ax1)
    ax3.plot(df.ix[t1: t2, 'openInterest'], color='white', lw=0.4, marker='*')
    
    ax4 = fig.add_subplot(414, sharex=ax1)
    ax4.plot(df.ix[t1: t2, 'TotalBidLot'], 
             color='red')
    ax4.plot(df.ix[t1: t2, 'TotalAskLot'], 
             color='green')
    return fig

this is for macroly searching for patterns


In [402]:
t11, t22 = '2015-12-01 21:00:01', '2015-12-05 15:00:00'
temp = ta_10day.ix[pm_index, :]
thefig = prc_total(temp, t11, t22, (15,10))
multi = MultiCursor(thefig.canvas, thefig.axes, color='c', lw=1)
thefig.show()

these two cells below are for microly looking at pattern and Qty

look at scenario 3.


In [361]:
t11, t22 = 37180, 37280
temp = ta_10day.ix[pm_index, :].ix['2015-12-01 21:00:01': '2015-12-05 15:00:00', :]

thefig = prc_total(temp, t11, t22, (15,10))
multi = MultiCursor(thefig.canvas, thefig.axes, color='c', lw=1)
thefig.show()

In [362]:
n = len(temp.ix[t11:t22, :])
for i, txt in enumerate(temp.ix[t11:t22, 'askQty_0']):
    (thefig.axes[0]).annotate(txt, ((xrange(n))[i], (temp.ix[t11: t22, 'askPrc_0'])[i] + .3), color='white', size=10)
for i, txt in enumerate(temp.ix[t11:t22, 'askQty_1']):
    (thefig.axes[0]).annotate(txt, ((xrange(n))[i], (temp.ix[t11: t22, 'askPrc_1'])[i] +.7), color='white', size=10)
for i, txt in enumerate(temp.ix[t11:t22, 'bidQty_0']):
    (thefig.axes[0]).annotate(txt, ((xrange(n))[i], (temp.ix[t11: t22, 'bidPrc_0'])[i] - .6), color='white', size=10)
for i, txt in enumerate(temp.ix[t11:t22, 'bidQty_1']):
    (thefig.axes[0]).annotate(txt, ((xrange(n))[i], (temp.ix[t11: t22, 'bidPrc_1'])[i] - 1.), color='white', size=10)
for i, txt in enumerate(temp.ix[t11:t22, 'vol_diff']/2.):
    (thefig.axes[1]).annotate(txt, ((xrange(n))[i], (temp.ix[t11: t22, 'vol_diff'])[i] + .3), color='white', size=10)

look at scenario 1.


In [376]:
t11, t22 = 152960, 153140
temp = ta_10day.ix[pm_index, :].ix['2015-12-01 21:00:01': '2015-12-05 15:00:00',:]

thefig = prc_total(temp, t11, t22, (15,10))
multi = MultiCursor(thefig.canvas, thefig.axes, color='c', lw=1)
thefig.show()

In [377]:
thefig.axes[2].cla()
thefig.axes[2].plot(lastmid_indicator.ix[pm_index].ix['2015-12-01 21:00:01': '2015-12-05 15:00:00'].ix[t11:t22])

In [366]:
n = len(temp.ix[t11:t22, :])
for i, txt in enumerate(temp.ix[t11:t22, 'askQty_0']):
    (thefig.axes[0]).annotate(txt, ((xrange(n))[i], (temp.ix[t11: t22, 'askPrc_0'])[i] + .3), color='white', size=10)
for i, txt in enumerate(temp.ix[t11:t22, 'askQty_1']):
    (thefig.axes[0]).annotate(txt, ((xrange(n))[i], (temp.ix[t11: t22, 'askPrc_1'])[i] +.7), color='white', size=10)
for i, txt in enumerate(temp.ix[t11:t22, 'bidQty_0']):
    (thefig.axes[0]).annotate(txt, ((xrange(n))[i], (temp.ix[t11: t22, 'bidPrc_0'])[i] - .6), color='white', size=10)
for i, txt in enumerate(temp.ix[t11:t22, 'bidQty_1']):
    (thefig.axes[0]).annotate(txt, ((xrange(n))[i], (temp.ix[t11: t22, 'bidPrc_1'])[i] - 1.), color='white', size=10)
for i, txt in enumerate(temp.ix[t11:t22, 'vol_diff']/2.):
    (thefig.axes[1]).annotate(txt, ((xrange(n))[i], (temp.ix[t11: t22, 'vol_diff'])[i] + .3), color='white', size=10)

In [252]:
ta_10day.ix[pm_index, :].ix['2015-12-01 21:00:01': '2015-12-05 15:00:00', :].ix[t11: t22, 
                                           ['volume', 'TotalAskLot', 
                                            #'askQty_4', 'askQty_3', 
                                            'askQty_2', 'askQty_1', 'askPrc_1', 'askQty_0', 'askPrc_0',
                                            'last',
                           'bidPrc_0', 'bidQty_0', 'bidPrc_1', 'bidQty_1', 'bidQty_2', 
                           #'bidQty_3', 'bidQty_4', 
                            'TotalBidLot']]


Out[252]:
volume TotalAskLot askQty_2 askQty_1 askPrc_1 askQty_0 askPrc_0 last bidPrc_0 bidQty_0 bidPrc_1 bidQty_1 bidQty_2 TotalBidLot
2015-12-03 11:08:27.250 787902.0 32647.0 341.0 492.0 4484.0 67.0 4482.0 4482.0 4480.0 5212.0 4478.0 1739.0 706.0 55692.0
2015-12-03 11:08:27.500 787908.0 32705.0 341.0 493.0 4484.0 124.0 4482.0 4482.0 4480.0 5212.0 4478.0 1739.0 706.0 55692.0
2015-12-03 11:08:27.750 788074.0 32707.0 342.0 493.0 4484.0 127.0 4482.0 4480.0 4480.0 5131.0 4478.0 1739.0 706.0 55611.0
2015-12-03 11:08:28.000 791248.0 32695.0 342.0 477.0 4484.0 130.0 4482.0 4482.0 4480.0 3559.0 4478.0 1739.0 706.0 54039.0
2015-12-03 11:08:28.250 791306.0 32695.0 342.0 477.0 4484.0 130.0 4482.0 4480.0 4480.0 3558.0 4478.0 1739.0 708.0 54040.0
2015-12-03 11:08:28.500 792320.0 32679.0 342.0 477.0 4484.0 114.0 4482.0 4480.0 4480.0 3157.0 4478.0 1739.0 708.0 53639.0
2015-12-03 11:08:28.750 795844.0 32772.0 342.0 479.0 4484.0 174.0 4482.0 4480.0 4480.0 1396.0 4478.0 1739.0 706.0 51876.0
2015-12-03 11:08:29.000 798750.0 33068.0 479.0 189.0 4482.0 275.0 4480.0 4478.0 4478.0 1713.0 4476.0 706.0 535.0 50454.0
2015-12-03 11:08:29.250 801536.0 34796.0 485.0 220.0 4482.0 1966.0 4480.0 4478.0 4478.0 322.0 4476.0 706.0 535.0 49063.0
2015-12-03 11:08:29.500 804716.0 36480.0 2164.0 1291.0 4478.0 248.0 4476.0 4474.0 4474.0 427.0 4472.0 1179.0 6066.0 47937.0
2015-12-03 11:08:29.750 805466.0 35766.0 1284.0 1367.0 4478.0 333.0 4476.0 4474.0 4474.0 94.0 4472.0 1179.0 6066.0 47604.0
2015-12-03 11:08:30.000 806456.0 37151.0 1533.0 427.0 4476.0 992.0 4474.0 4472.0 4472.0 803.0 4470.0 6066.0 1656.0 47134.0
2015-12-03 11:08:30.250 807506.0 36765.0 1504.0 430.0 4476.0 1005.0 4474.0 4472.0 4472.0 292.0 4470.0 6066.0 1656.0 46623.0
2015-12-03 11:08:30.500 807858.0 36611.0 1392.0 462.0 4476.0 1004.0 4474.0 4472.0 4472.0 229.0 4470.0 6066.0 1656.0 46560.0
2015-12-03 11:08:30.750 808018.0 36246.0 1023.0 536.0 4476.0 1001.0 4474.0 4474.0 4472.0 719.0 4470.0 6066.0 1656.0 47050.0
2015-12-03 11:08:31.000 808394.0 36080.0 1020.0 529.0 4476.0 851.0 4474.0 4472.0 4472.0 804.0 4470.0 6066.0 1656.0 47134.0
2015-12-03 11:08:31.250 808870.0 35892.0 1018.0 530.0 4476.0 803.0 4474.0 4472.0 4472.0 712.0 4470.0 6066.0 1656.0 47042.0
2015-12-03 11:08:31.500 809580.0 35504.0 1019.0 533.0 4476.0 468.0 4474.0 4474.0 4472.0 728.0 4470.0 6065.0 1656.0 47063.0
2015-12-03 11:08:31.750 810150.0 35302.0 1019.0 533.0 4476.0 255.0 4474.0 4472.0 4472.0 608.0 4470.0 6065.0 1656.0 46943.0
2015-12-03 11:08:32.000 810234.0 35217.0 1015.0 538.0 4476.0 213.0 4474.0 4474.0 4472.0 608.0 4470.0 6065.0 1656.0 46943.0

look at scenario 2.


In [233]:
t11, t22 = '2015-11-25 10:48:01.000', '2015-11-25 10:49'
temp = ta_10day.ix[pm_index, :]

thefig = prc_total(temp, ta_10day_vol, t11, t22, (15,10))
multi = MultiCursor(thefig.canvas, thefig.axes, color='c', lw=1)
thefig.show()

In [234]:
n = len(temp.ix[t11:t22, :])
for i, txt in enumerate(temp.ix[t11:t22, 'askQty_0']):
    (thefig.axes[0]).annotate(txt, ((xrange(n))[i], (temp.ix[t11: t22, 'askPrc_0'])[i] + .3), color='white', size=10)
for i, txt in enumerate(temp.ix[t11:t22, 'bidQty_0']):
    (thefig.axes[0]).annotate(txt, ((xrange(n))[i], (temp.ix[t11: t22, 'bidPrc_0'])[i] - .6), color='white', size=10)

In [240]:
ta_10day.ix[pm_index, :].ix[t11: t22,:].ix[20:100, 
                                           ['TotalAskLot', 
                                            #'askQty_4', 'askQty_3', 
                                            'askQty_2', 'askQty_1', 'askPrc_1', 'askQty_0', 'askPrc_0',
                                            'last',
                           'bidPrc_0', 'bidQty_0', 'bidPrc_1', 'bidQty_1', 'bidQty_2', 
                           #'bidQty_3', 'bidQty_4', 
                            'TotalBidLot']]


Out[240]:
TotalAskLot askQty_2 askQty_1 askPrc_1 askQty_0 askPrc_0 last bidPrc_0 bidQty_0 bidPrc_1 bidQty_1 bidQty_2 TotalBidLot
2015-11-25 10:48:06.000 22478.0 582.0 188.0 4546.0 19.0 4544.0 4544.0 4542.0 26.0 4540.0 127.0 47.0 36056.0
2015-11-25 10:48:06.250 22518.0 582.0 188.0 4546.0 59.0 4544.0 4544.0 4542.0 26.0 4540.0 127.0 47.0 36056.0
2015-11-25 10:48:06.500 22518.0 582.0 188.0 4546.0 59.0 4544.0 4544.0 4542.0 26.0 4540.0 127.0 47.0 36056.0
2015-11-25 10:48:06.750 22518.0 582.0 188.0 4546.0 59.0 4544.0 4544.0 4542.0 26.0 4540.0 127.0 47.0 36056.0
2015-11-25 10:48:07.000 22517.0 582.0 187.0 4546.0 59.0 4544.0 4544.0 4542.0 26.0 4540.0 127.0 47.0 36056.0
2015-11-25 10:48:07.250 22517.0 582.0 187.0 4546.0 59.0 4544.0 4544.0 4542.0 30.0 4540.0 127.0 47.0 36060.0
2015-11-25 10:48:07.500 22517.0 582.0 187.0 4546.0 59.0 4544.0 4544.0 4542.0 30.0 4540.0 127.0 47.0 36060.0
2015-11-25 10:48:07.750 22487.0 582.0 187.0 4546.0 29.0 4544.0 4544.0 4542.0 45.0 4540.0 127.0 47.0 36075.0
2015-11-25 10:48:08.000 22458.0 4549.0 582.0 4548.0 187.0 4546.0 4544.0 4544.0 171.0 4542.0 45.0 127.0 36246.0
2015-11-25 10:48:08.250 22458.0 4549.0 582.0 4548.0 187.0 4546.0 4544.0 4544.0 171.0 4542.0 45.0 127.0 36246.0
2015-11-25 10:48:08.500 22458.0 4549.0 582.0 4548.0 187.0 4546.0 4544.0 4544.0 171.0 4542.0 45.0 127.0 36246.0
2015-11-25 10:48:08.750 22458.0 4549.0 582.0 4548.0 187.0 4546.0 4544.0 4544.0 171.0 4542.0 45.0 127.0 36246.0
2015-11-25 10:48:09.000 22458.0 4549.0 582.0 4548.0 187.0 4546.0 4544.0 4544.0 171.0 4542.0 45.0 127.0 36246.0
2015-11-25 10:48:09.250 22458.0 4549.0 582.0 4548.0 187.0 4546.0 4544.0 4544.0 171.0 4542.0 45.0 127.0 36246.0
2015-11-25 10:48:09.500 22458.0 4549.0 582.0 4548.0 187.0 4546.0 4544.0 4544.0 171.0 4542.0 45.0 127.0 36246.0
2015-11-25 10:48:09.750 22458.0 4549.0 582.0 4548.0 187.0 4546.0 4544.0 4544.0 171.0 4542.0 46.0 127.0 36247.0
2015-11-25 10:48:10.000 22458.0 4549.0 582.0 4548.0 187.0 4546.0 4544.0 4544.0 171.0 4542.0 46.0 127.0 36247.0
2015-11-25 10:48:10.250 22458.0 4549.0 582.0 4548.0 187.0 4546.0 4544.0 4544.0 171.0 4542.0 46.0 127.0 36246.0
2015-11-25 10:48:10.500 22458.0 4549.0 582.0 4548.0 187.0 4546.0 4544.0 4544.0 170.0 4542.0 46.0 126.0 36244.0
2015-11-25 10:48:10.750 22458.0 4549.0 582.0 4548.0 187.0 4546.0 4546.0 4544.0 170.0 4542.0 46.0 126.0 36245.0
2015-11-25 10:48:11.000 22458.0 4549.0 582.0 4548.0 187.0 4546.0 4546.0 4544.0 169.0 4542.0 46.0 126.0 36244.0
2015-11-25 10:48:11.250 22457.0 4549.0 582.0 4548.0 186.0 4546.0 4546.0 4544.0 169.0 4542.0 47.0 126.0 36245.0
2015-11-25 10:48:11.500 22408.0 4549.0 582.0 4548.0 137.0 4546.0 4546.0 4544.0 169.0 4542.0 47.0 126.0 36245.0
2015-11-25 10:48:11.750 22340.0 4549.0 582.0 4548.0 69.0 4546.0 4546.0 4544.0 61.0 4542.0 47.0 126.0 36137.0
2015-11-25 10:48:12.000 22328.0 4549.0 581.0 4548.0 58.0 4546.0 4546.0 4544.0 61.0 4542.0 47.0 126.0 36137.0
2015-11-25 10:48:12.250 22272.0 162.0 4549.0 4550.0 583.0 4548.0 4546.0 4546.0 8.0 4544.0 61.0 47.0 36145.0
2015-11-25 10:48:12.500 22272.0 162.0 4549.0 4550.0 583.0 4548.0 4546.0 4546.0 18.0 4544.0 61.0 47.0 36155.0
2015-11-25 10:48:12.750 22260.0 162.0 4549.0 4550.0 571.0 4548.0 4548.0 4546.0 18.0 4544.0 67.0 47.0 36161.0
2015-11-25 10:48:13.000 22224.0 162.0 4549.0 4550.0 535.0 4548.0 4546.0 4546.0 18.0 4544.0 67.0 47.0 36111.0
2015-11-25 10:48:13.250 22220.0 162.0 4549.0 4550.0 531.0 4548.0 4546.0 4546.0 19.0 4544.0 67.0 47.0 36112.0
... ... ... ... ... ... ... ... ... ... ... ... ... ...
2015-11-25 10:48:18.500 22154.0 162.0 4549.0 4550.0 463.0 4548.0 4548.0 4546.0 28.0 4544.0 71.0 61.0 36139.0
2015-11-25 10:48:18.750 22154.0 162.0 4549.0 4550.0 463.0 4548.0 4548.0 4546.0 28.0 4544.0 71.0 61.0 36139.0
2015-11-25 10:48:19.000 22153.0 162.0 4549.0 4550.0 462.0 4548.0 4548.0 4546.0 27.0 4544.0 71.0 61.0 36138.0
2015-11-25 10:48:19.250 22153.0 162.0 4549.0 4550.0 462.0 4548.0 4548.0 4546.0 27.0 4544.0 71.0 61.0 36138.0
2015-11-25 10:48:19.500 22097.0 162.0 4549.0 4550.0 406.0 4548.0 4548.0 4546.0 27.0 4544.0 71.0 61.0 36138.0
2015-11-25 10:48:19.750 22099.0 162.0 4550.0 4550.0 407.0 4548.0 4548.0 4546.0 27.0 4544.0 71.0 61.0 35938.0
2015-11-25 10:48:20.000 22079.0 162.0 4550.0 4550.0 387.0 4548.0 4548.0 4546.0 27.0 4544.0 71.0 61.0 35938.0
2015-11-25 10:48:20.250 22036.0 162.0 4551.0 4550.0 342.0 4548.0 4548.0 4546.0 28.0 4544.0 72.0 61.0 35940.0
2015-11-25 10:48:20.500 22043.0 162.0 4552.0 4550.0 338.0 4548.0 4548.0 4546.0 28.0 4544.0 72.0 61.0 35940.0
2015-11-25 10:48:20.750 22044.0 162.0 4553.0 4550.0 338.0 4548.0 4548.0 4546.0 28.0 4544.0 72.0 61.0 35940.0
2015-11-25 10:48:21.000 22044.0 162.0 4553.0 4550.0 338.0 4548.0 4548.0 4546.0 29.0 4544.0 72.0 61.0 35951.0
2015-11-25 10:48:21.250 22043.0 162.0 4553.0 4550.0 337.0 4548.0 4548.0 4546.0 29.0 4544.0 73.0 61.0 35952.0
2015-11-25 10:48:21.500 22043.0 162.0 4553.0 4550.0 337.0 4548.0 4548.0 4546.0 39.0 4544.0 73.0 61.0 35962.0
2015-11-25 10:48:21.750 22044.0 162.0 4553.0 4550.0 338.0 4548.0 4548.0 4546.0 41.0 4544.0 73.0 61.0 35964.0
2015-11-25 10:48:22.000 21945.0 163.0 4553.0 4550.0 338.0 4548.0 4548.0 4546.0 41.0 4544.0 73.0 61.0 35765.0
2015-11-25 10:48:22.250 21942.0 163.0 4553.0 4550.0 335.0 4548.0 4548.0 4546.0 36.0 4544.0 73.0 61.0 35760.0
2015-11-25 10:48:22.500 21942.0 163.0 4553.0 4550.0 335.0 4548.0 4548.0 4546.0 36.0 4544.0 73.0 61.0 35760.0
2015-11-25 10:48:22.750 21942.0 163.0 4553.0 4550.0 335.0 4548.0 4548.0 4546.0 40.0 4544.0 74.0 61.0 35765.0
2015-11-25 10:48:23.000 21942.0 163.0 4553.0 4550.0 335.0 4548.0 4548.0 4546.0 41.0 4544.0 74.0 64.0 35768.0
2015-11-25 10:48:23.250 21942.0 163.0 4553.0 4550.0 335.0 4548.0 4548.0 4546.0 41.0 4544.0 74.0 64.0 35768.0
2015-11-25 10:48:23.500 21942.0 163.0 4553.0 4550.0 335.0 4548.0 4548.0 4546.0 41.0 4544.0 94.0 64.0 35787.0
2015-11-25 10:48:23.750 21943.0 163.0 4553.0 4550.0 335.0 4548.0 4548.0 4546.0 41.0 4544.0 94.0 64.0 35788.0
2015-11-25 10:48:24.000 21943.0 163.0 4553.0 4550.0 335.0 4548.0 4546.0 4546.0 40.0 4544.0 94.0 64.0 35787.0
2015-11-25 10:48:24.250 21923.0 163.0 4533.0 4550.0 335.0 4548.0 4546.0 4546.0 40.0 4544.0 94.0 64.0 35787.0
2015-11-25 10:48:24.500 21897.0 163.0 4533.0 4550.0 309.0 4548.0 4548.0 4546.0 45.0 4544.0 94.0 64.0 35792.0
2015-11-25 10:48:24.750 21902.0 163.0 4533.0 4550.0 313.0 4548.0 4548.0 4546.0 45.0 4544.0 94.0 64.0 35792.0
2015-11-25 10:48:25.000 21901.0 163.0 4533.0 4550.0 312.0 4548.0 4548.0 4546.0 45.0 4544.0 94.0 64.0 35791.0
2015-11-25 10:48:25.250 21902.0 163.0 4534.0 4550.0 312.0 4548.0 4548.0 4546.0 35.0 4544.0 94.0 64.0 35781.0
2015-11-25 10:48:25.500 21902.0 163.0 4534.0 4550.0 312.0 4548.0 4546.0 4546.0 11.0 4544.0 94.0 64.0 35757.0
2015-11-25 10:48:25.750 21939.0 4534.0 311.0 4548.0 38.0 4546.0 4546.0 4544.0 94.0 4542.0 64.0 127.0 35746.0

80 rows × 13 columns

OLS FUNC


In [471]:
def myols_no_outlier(df, norm=False):
    global ta_10day_pm_no_outlier, sm
    df = df[ta_10day_pm_no_outlier.index]
    df.dropna(inplace=True)
    if norm:
        df = (df - df.mean()) / df.std()
    X = sm.add_constant(df)
    Y = ta_10day_pm_no_outlier[df.index]
    model = sm.OLS(Y, X)
    ret = model.fit()
    return ret

In [437]:
def myols(df, norm=False):
    global ta_10day_pm, sm
    df = df[ta_10day_pm.index]
    df.dropna(inplace=True)
    if norm:
        df = (df - df.mean()) / df.std()
    X = sm.add_constant(df)
    Y = ta_10day_pm[df.index]
    model = sm.OLS(Y, X)
    ret = model.fit()
    return ret

In [105]:
nonzero_bool = ta_10day_pm != 0
ta_10day_pm.ix[nonzero_bool]


Out[105]:
2015-11-19 21:00:00.000   -20.0
2015-11-19 21:00:00.250   -16.0
2015-11-19 21:00:00.500   -12.0
2015-11-19 21:00:00.750    -4.0
2015-11-19 21:00:01.000    -6.0
2015-11-19 21:00:01.250     2.0
2015-11-19 21:00:01.500     4.0
2015-11-19 21:00:01.750     4.0
2015-11-19 21:00:02.000     2.0
2015-11-19 21:00:02.250     4.0
2015-11-19 21:00:02.750    -2.0
2015-11-19 21:00:03.000    -4.0
2015-11-19 21:00:03.750    -2.0
2015-11-19 21:00:04.250     2.0
2015-11-19 21:00:04.500    -2.0
2015-11-19 21:00:04.750    -4.0
2015-11-19 21:00:05.000    -6.0
2015-11-19 21:00:05.250    -4.0
2015-11-19 21:00:05.750    -4.0
2015-11-19 21:00:06.000    -2.0
2015-11-19 21:00:07.000    -2.0
2015-11-19 21:00:07.250    -6.0
2015-11-19 21:00:07.500    -4.0
2015-11-19 21:00:07.750    -6.0
2015-11-19 21:00:08.000    -4.0
2015-11-19 21:00:08.500    -2.0
2015-11-19 21:00:09.250    -6.0
2015-11-19 21:00:09.500    -6.0
2015-11-19 21:00:10.250    -2.0
2015-11-19 21:00:10.500    -2.0
                           ... 
2015-12-03 14:59:34.750     2.0
2015-12-03 14:59:35.000     2.0
2015-12-03 14:59:35.250     2.0
2015-12-03 14:59:35.500     2.0
2015-12-03 14:59:35.750     2.0
2015-12-03 14:59:36.000     2.0
2015-12-03 14:59:37.250     2.0
2015-12-03 14:59:37.500     2.0
2015-12-03 14:59:38.000     2.0
2015-12-03 14:59:38.250     2.0
2015-12-03 14:59:38.500     2.0
2015-12-03 14:59:38.750     2.0
2015-12-03 14:59:39.250     2.0
2015-12-03 14:59:39.750    -2.0
2015-12-03 14:59:40.000    -2.0
2015-12-03 14:59:41.000     2.0
2015-12-03 14:59:41.250     2.0
2015-12-03 14:59:41.500     2.0
2015-12-03 14:59:41.750     2.0
2015-12-03 14:59:42.750     2.0
2015-12-03 14:59:43.250    -2.0
2015-12-03 14:59:44.000     2.0
2015-12-03 14:59:44.250     2.0
2015-12-03 14:59:44.500     2.0
2015-12-03 14:59:45.500    -2.0
2015-12-03 14:59:45.750    -2.0
2015-12-03 14:59:46.500     2.0
2015-12-03 14:59:46.750     2.0
2015-12-03 14:59:47.250    -2.0
2015-12-03 14:59:48.000    -2.0
Name: last, dtype: float64

In [134]:
temp = 0
sns.swarmplot(x=ta_10day_pm.ix[temp: temp + 100000:100], y=bidask_pc.ix[pm_index, 'bid_pc1'].ix[temp: temp + 100000:100])


Out[134]:
<matplotlib.axes.AxesSubplot at 0x7f5588d31dd0>

In [135]:
temp = 0
plt.figure()
plt.scatter(ta_10day_pm.ix[temp: temp + 100000:100], bidask_pc.ix[pm_index, 'bid_pc1'].ix[temp: temp + 100000:100])


Out[135]:
<matplotlib.collections.PathCollection at 0x7f5588ab25d0>

In [438]:
res = myols(bidask_pc.ix[:, 'bid_pc1'])
print(res.summary())


                            OLS Regression Results                            
==============================================================================
Dep. Variable:                   last   R-squared:                       0.000
Model:                            OLS   Adj. R-squared:                  0.000
Method:                 Least Squares   F-statistic:                     181.6
Date:                Sun, 10 Jul 2016   Prob (F-statistic):           2.22e-41
Time:                        03:43:07   Log-Likelihood:            -1.8303e+06
No. Observations:              898710   AIC:                         3.661e+06
Df Residuals:                  898708   BIC:                         3.661e+06
Df Model:                           1                                         
==============================================================================
                 coef    std err          t      P>|t|      [95.0% Conf. Int.]
------------------------------------------------------------------------------
const          0.0099      0.002      5.074      0.000         0.006     0.014
bid_pc1        0.0205      0.002     13.475      0.000         0.017     0.023
==============================================================================
Omnibus:                   220930.113   Durbin-Watson:                   0.348
Prob(Omnibus):                  0.000   Jarque-Bera (JB):         13630874.951
Skew:                           0.222   Prob(JB):                         0.00
Kurtosis:                      22.074   Cond. No.                         1.29
==============================================================================

In [110]:
res = myols(bidask_pc.ix[pm_index,
                         ['bid_pc1', 'bid_pc2', 'bid_pc3', 'bid_pc4', 'ask_pc1', 'ask_pc2', 'ask_pc3', 'ask_pc4']
                         ], norm=False)
print(res.summary())


                            OLS Regression Results                            
==============================================================================
Dep. Variable:                   last   R-squared:                       0.002
Model:                            OLS   Adj. R-squared:                  0.002
Method:                 Least Squares   F-statistic:                     87.79
Date:                Fri, 08 Jul 2016   Prob (F-statistic):          2.96e-146
Time:                        14:40:23   Log-Likelihood:            -1.0775e+06
No. Observations:              453026   AIC:                         2.155e+06
Df Residuals:                  453017   BIC:                         2.155e+06
Df Model:                           8                                         
==============================================================================
                 coef    std err          t      P>|t|      [95.0% Conf. Int.]
------------------------------------------------------------------------------
const          0.0215      0.004      5.546      0.000         0.014     0.029
bid_pc1        0.0505      0.003     16.022      0.000         0.044     0.057
bid_pc2        0.0353      0.004      9.437      0.000         0.028     0.043
bid_pc3       -0.0012      0.004     -0.266      0.790        -0.010     0.007
bid_pc4        0.0400      0.005      8.822      0.000         0.031     0.049
ask_pc1       -0.0281      0.003     -8.194      0.000        -0.035    -0.021
ask_pc2        0.0230      0.004      5.906      0.000         0.015     0.031
ask_pc3       -0.0474      0.004    -11.800      0.000        -0.055    -0.040
ask_pc4        0.0259      0.004      6.339      0.000         0.018     0.034
==============================================================================
Omnibus:                    71459.997   Durbin-Watson:                   0.278
Prob(Omnibus):                  0.000   Jarque-Bera (JB):          1257815.650
Skew:                           0.156   Prob(JB):                         0.00
Kurtosis:                      11.157   Cond. No.                         1.84
==============================================================================

In [114]:
temp = ta_10day.ix[pm_index, ['TotalAskLot', 'askQty_4', 'askQty_3', 'askQty_2', 'askQty_1', 'askQty_0', 
                            'bidQty_0', 'bidQty_1', 'bidQty_2', 'bidQty_3', 'bidQty_4', 'TotalBidLot']]
res = myols(temp)
print(res.summary())


                            OLS Regression Results                            
==============================================================================
Dep. Variable:                   last   R-squared:                       0.002
Model:                            OLS   Adj. R-squared:                  0.002
Method:                 Least Squares   F-statistic:                     60.35
Date:                Fri, 08 Jul 2016   Prob (F-statistic):          3.76e-147
Time:                        14:41:52   Log-Likelihood:            -1.0775e+06
No. Observations:              453026   AIC:                         2.155e+06
Df Residuals:                  453013   BIC:                         2.155e+06
Df Model:                          12                                         
===============================================================================
                  coef    std err          t      P>|t|      [95.0% Conf. Int.]
-------------------------------------------------------------------------------
const           0.0197      0.004      5.076      0.000         0.012     0.027
TotalAskLot    -0.0239      0.004     -5.604      0.000        -0.032    -0.016
askQty_4        0.0073      0.004      1.835      0.067        -0.001     0.015
askQty_3        0.0046      0.004      1.147      0.252        -0.003     0.012
askQty_2        0.0116      0.004      2.877      0.004         0.004     0.019
askQty_1       -0.0333      0.004     -8.270      0.000        -0.041    -0.025
askQty_0       -0.0504      0.004    -12.728      0.000        -0.058    -0.043
bidQty_0        0.0600      0.004     15.119      0.000         0.052     0.068
bidQty_1        0.0196      0.004      4.832      0.000         0.012     0.027
bidQty_2        0.0281      0.004      6.881      0.000         0.020     0.036
bidQty_3        0.0225      0.004      5.523      0.000         0.014     0.030
bidQty_4        0.0054      0.004      1.342      0.180        -0.002     0.013
TotalBidLot    -0.0040      0.005     -0.868      0.386        -0.013     0.005
==============================================================================
Omnibus:                    71500.161   Durbin-Watson:                   0.278
Prob(Omnibus):                  0.000   Jarque-Bera (JB):          1259758.664
Skew:                           0.157   Prob(JB):                         0.00
Kurtosis:                      11.163   Cond. No.                         1.94
==============================================================================

In [115]:
Toalpressure_index = ((ta_10day.ix[:, 'TotalBidLot'] - ta_10day.ix[:, 'TotalAskLot']) / 
                  (ta_10day.ix[:, 'TotalBidLot'] + ta_10day.ix[:, 'TotalAskLot']))

temp = pressure_index.ix[pm_index]
res = myols(temp)
print(res.summary())


                            OLS Regression Results                            
==============================================================================
Dep. Variable:                   last   R-squared:                       0.000
Model:                            OLS   Adj. R-squared:                  0.000
Method:                 Least Squares   F-statistic:                     18.18
Date:                Fri, 08 Jul 2016   Prob (F-statistic):           2.01e-05
Time:                        14:41:57   Log-Likelihood:            -1.0778e+06
No. Observations:              453026   AIC:                         2.156e+06
Df Residuals:                  453024   BIC:                         2.156e+06
Df Model:                           1                                         
==============================================================================
                 coef    std err          t      P>|t|      [95.0% Conf. Int.]
------------------------------------------------------------------------------
const          0.0197      0.004      5.072      0.000         0.012     0.027
None           0.0165      0.004      4.263      0.000         0.009     0.024
==============================================================================
Omnibus:                    70962.605   Durbin-Watson:                   0.279
Prob(Omnibus):                  0.000   Jarque-Bera (JB):          1246831.266
Skew:                           0.142   Prob(JB):                         0.00
Kurtosis:                      11.122   Cond. No.                         1.00
==============================================================================

In [121]:
pressure_index = ((ta_10day.ix[:, 'bidQty_0'] * ta_10day.ix[:, 'bidPrc_0'] - ta_10day.ix[:, 'askQty_0'] * ta_10day.ix[:, 'askPrc_0']) / 
                  (ta_10day.ix[:, 'bidQty_0'] * ta_10day.ix[:, 'bidPrc_0'] + ta_10day.ix[:, 'askQty_0'] * ta_10day.ix[:, 'askPrc_0']))

temp = pressure_index.ix[pm_index]
res = myols(temp)
print(res.summary())


                            OLS Regression Results                            
==============================================================================
Dep. Variable:                   last   R-squared:                       0.002
Model:                            OLS   Adj. R-squared:                  0.002
Method:                 Least Squares   F-statistic:                     2049.
Date:                Fri, 08 Jul 2016   Prob (F-statistic):               0.00
Time:                        15:31:34   Log-Likelihood:            -1.8294e+06
No. Observations:              898710   AIC:                         3.659e+06
Df Residuals:                  898708   BIC:                         3.659e+06
Df Model:                           1                                         
==============================================================================
                 coef    std err          t      P>|t|      [95.0% Conf. Int.]
------------------------------------------------------------------------------
const          0.0099      0.002      5.078      0.000         0.006     0.014
None           0.0885      0.002     45.268      0.000         0.085     0.092
==============================================================================
Omnibus:                   221545.879   Durbin-Watson:                   0.348
Prob(Omnibus):                  0.000   Jarque-Bera (JB):         13808811.360
Skew:                           0.222   Prob(JB):                         0.00
Kurtosis:                      22.198   Cond. No.                         1.00
==============================================================================

last, bid0, ask0 indicator

find extreme points

Extreme Points Summary:

In sample ta_10day,

there are 12070 points where spread > 1 * minimum_change

there are 4445 points where last are out of spread

The length of sample: 900040


In [331]:
temp1 = ta_10day.ix[:, 'last'] != ta_10day.ix[:, 'bidPrc_0']
temp2 = ta_10day.ix[:, 'last'] != ta_10day.ix[:, 'askPrc_0']
temp3 = np.logical_and(temp1, temp2)
ta_10day.ix[temp3, ['last', 'askPrc_0', 'bidPrc_0']]


Out[331]:
last askPrc_0 bidPrc_0
2015-11-19 21:00:03.500 4508.0 4510.0 4506.0
2015-11-19 21:00:10.500 4512.0 4514.0 4510.0
2015-11-19 21:00:14.500 4510.0 4512.0 4508.0
2015-11-19 21:00:15.000 4508.0 4512.0 4510.0
2015-11-19 21:00:58.750 4508.0 4510.0 4506.0
2015-11-19 21:01:09.250 4508.0 4506.0 4504.0
2015-11-19 21:02:03.750 4506.0 4508.0 4504.0
2015-11-19 21:02:04.250 4506.0 4508.0 4504.0
2015-11-19 21:02:34.500 4498.0 4502.0 4500.0
2015-11-19 21:03:01.000 4504.0 4502.0 4500.0
2015-11-19 21:03:01.500 4504.0 4502.0 4500.0
2015-11-19 21:03:01.750 4504.0 4502.0 4500.0
2015-11-19 21:03:22.500 4502.0 4500.0 4498.0
2015-11-19 21:03:22.750 4502.0 4500.0 4498.0
2015-11-19 21:03:23.000 4502.0 4500.0 4498.0
2015-11-19 21:03:40.500 4494.0 4498.0 4496.0
2015-11-19 21:04:23.000 4498.0 4496.0 4494.0
2015-11-19 21:04:23.250 4498.0 4496.0 4494.0
2015-11-19 21:04:23.500 4498.0 4496.0 4494.0
2015-11-19 21:06:18.000 4496.0 4494.0 4492.0
2015-11-19 21:06:18.250 4496.0 4494.0 4492.0
2015-11-19 21:06:18.500 4496.0 4494.0 4492.0
2015-11-19 21:08:21.000 4494.0 4496.0 4492.0
2015-11-19 21:11:23.500 4496.0 4494.0 4492.0
2015-11-19 21:11:24.000 4494.0 4496.0 4492.0
2015-11-19 21:13:11.250 4494.0 4496.0 4492.0
2015-11-19 21:14:48.750 4494.0 4492.0 4490.0
2015-11-19 21:14:50.000 4490.0 4494.0 4492.0
2015-11-19 21:14:50.250 4490.0 4494.0 4492.0
2015-11-19 21:15:30.250 4490.0 4492.0 4488.0
... ... ... ...
2015-12-03 14:41:52.500 4482.0 4484.0 4480.0
2015-12-03 14:41:52.750 4482.0 4484.0 4480.0
2015-12-03 14:41:53.000 4482.0 4484.0 4480.0
2015-12-03 14:41:53.250 4482.0 4484.0 4480.0
2015-12-03 14:41:53.500 4482.0 4484.0 4480.0
2015-12-03 14:43:03.500 4478.0 4480.0 4476.0
2015-12-03 14:43:03.750 4480.0 4478.0 4476.0
2015-12-03 14:46:18.750 4478.0 4480.0 4476.0
2015-12-03 14:48:34.000 4484.0 4482.0 4480.0
2015-12-03 14:48:34.250 4484.0 4482.0 4480.0
2015-12-03 14:48:39.750 4482.0 4484.0 4480.0
2015-12-03 14:48:40.000 4482.0 4484.0 4480.0
2015-12-03 14:48:40.250 4482.0 4484.0 4480.0
2015-12-03 14:48:40.750 4482.0 4484.0 4480.0
2015-12-03 14:48:41.000 4482.0 4484.0 4480.0
2015-12-03 14:51:38.750 4484.0 4486.0 4482.0
2015-12-03 14:51:39.000 4484.0 4486.0 4482.0
2015-12-03 14:51:39.250 4484.0 4486.0 4482.0
2015-12-03 14:55:54.000 4490.0 4492.0 4488.0
2015-12-03 14:57:01.750 4488.0 4486.0 4484.0
2015-12-03 14:58:23.250 4484.0 4486.0 4482.0
2015-12-03 14:59:06.750 4486.0 4488.0 4484.0
2015-12-03 14:59:19.250 4486.0 4488.0 4484.0
2015-12-03 14:59:19.500 4486.0 4488.0 4484.0
2015-12-03 14:59:19.750 4486.0 4488.0 4484.0
2015-12-03 14:59:20.000 4486.0 4488.0 4484.0
2015-12-03 14:59:20.250 4486.0 4488.0 4484.0
2015-12-03 14:59:32.000 4488.0 4490.0 4486.0
2015-12-03 14:59:32.500 4490.0 4488.0 4486.0
2015-12-03 14:59:33.500 4488.0 4490.0 4486.0

12070 rows × 3 columns


In [325]:
ta_10day.ix[temp3, ['last', 'askPrc_0', 'bidPrc_0']].plot(marker='*')


Out[325]:
<matplotlib.axes.AxesSubplot at 0x7f557e7ce590>

In [326]:
len(ta_10day)


Out[326]:
900040

In [328]:
temp1 = ta_10day.ix[:, 'last'] < ta_10day.ix[:, 'bidPrc_0']
temp2 = ta_10day.ix[:, 'last'] > ta_10day.ix[:, 'askPrc_0']
temp3 = np.logical_or(temp1, temp2)
ta_10day.ix[temp3, ['last', 'askPrc_0', 'bidPrc_0']]


Out[328]:
last askPrc_0 bidPrc_0
2015-11-19 21:00:15.000 4508.0 4512.0 4510.0
2015-11-19 21:01:09.250 4508.0 4506.0 4504.0
2015-11-19 21:02:34.500 4498.0 4502.0 4500.0
2015-11-19 21:03:01.000 4504.0 4502.0 4500.0
2015-11-19 21:03:01.500 4504.0 4502.0 4500.0
2015-11-19 21:03:01.750 4504.0 4502.0 4500.0
2015-11-19 21:03:22.500 4502.0 4500.0 4498.0
2015-11-19 21:03:22.750 4502.0 4500.0 4498.0
2015-11-19 21:03:23.000 4502.0 4500.0 4498.0
2015-11-19 21:03:40.500 4494.0 4498.0 4496.0
2015-11-19 21:04:23.000 4498.0 4496.0 4494.0
2015-11-19 21:04:23.250 4498.0 4496.0 4494.0
2015-11-19 21:04:23.500 4498.0 4496.0 4494.0
2015-11-19 21:06:18.000 4496.0 4494.0 4492.0
2015-11-19 21:06:18.250 4496.0 4494.0 4492.0
2015-11-19 21:06:18.500 4496.0 4494.0 4492.0
2015-11-19 21:11:23.500 4496.0 4494.0 4492.0
2015-11-19 21:14:48.750 4494.0 4492.0 4490.0
2015-11-19 21:14:50.000 4490.0 4494.0 4492.0
2015-11-19 21:14:50.250 4490.0 4494.0 4492.0
2015-11-19 21:15:49.750 4486.0 4490.0 4488.0
2015-11-19 21:15:50.000 4486.0 4490.0 4488.0
2015-11-19 21:17:20.750 4492.0 4490.0 4488.0
2015-11-19 21:18:56.500 4486.0 4490.0 4488.0
2015-11-19 21:18:56.750 4486.0 4490.0 4488.0
2015-11-19 21:18:57.000 4486.0 4490.0 4488.0
2015-11-19 21:18:57.250 4486.0 4490.0 4488.0
2015-11-19 21:24:18.500 4488.0 4492.0 4490.0
2015-11-19 21:24:18.750 4488.0 4492.0 4490.0
2015-11-19 21:24:19.000 4488.0 4492.0 4490.0
... ... ... ...
2015-12-03 14:30:01.250 4488.0 4492.0 4490.0
2015-12-03 14:30:19.750 4488.0 4492.0 4490.0
2015-12-03 14:30:20.000 4488.0 4492.0 4490.0
2015-12-03 14:30:20.250 4488.0 4492.0 4490.0
2015-12-03 14:30:20.500 4488.0 4492.0 4490.0
2015-12-03 14:30:20.750 4488.0 4492.0 4490.0
2015-12-03 14:30:21.000 4488.0 4492.0 4490.0
2015-12-03 14:30:21.250 4488.0 4492.0 4490.0
2015-12-03 14:30:21.500 4488.0 4492.0 4490.0
2015-12-03 14:30:21.750 4488.0 4492.0 4490.0
2015-12-03 14:30:22.000 4488.0 4492.0 4490.0
2015-12-03 14:33:14.750 4492.0 4490.0 4488.0
2015-12-03 14:33:46.000 4488.0 4492.0 4490.0
2015-12-03 14:34:27.250 4488.0 4492.0 4490.0
2015-12-03 14:34:27.500 4488.0 4492.0 4490.0
2015-12-03 14:34:27.750 4488.0 4492.0 4490.0
2015-12-03 14:34:28.000 4488.0 4492.0 4490.0
2015-12-03 14:34:28.250 4488.0 4492.0 4490.0
2015-12-03 14:34:28.500 4488.0 4492.0 4490.0
2015-12-03 14:34:28.750 4488.0 4492.0 4490.0
2015-12-03 14:34:29.000 4488.0 4492.0 4490.0
2015-12-03 14:38:01.750 4482.0 4486.0 4484.0
2015-12-03 14:38:53.250 4480.0 4484.0 4482.0
2015-12-03 14:38:53.500 4480.0 4484.0 4482.0
2015-12-03 14:38:53.750 4480.0 4484.0 4482.0
2015-12-03 14:43:03.750 4480.0 4478.0 4476.0
2015-12-03 14:48:34.000 4484.0 4482.0 4480.0
2015-12-03 14:48:34.250 4484.0 4482.0 4480.0
2015-12-03 14:57:01.750 4488.0 4486.0 4484.0
2015-12-03 14:59:32.500 4490.0 4488.0 4486.0

4445 rows × 3 columns


In [329]:
ta_10day.ix[temp3, ['last', 'askPrc_0', 'bidPrc_0']].plot(marker='*')


Out[329]:
<matplotlib.axes.AxesSubplot at 0x7f5581a55a90>

calculate mid price and join


In [333]:
ta_10day.ix[:, ['askPrc_0', 'bidPrc_0']]


Out[333]:
askPrc_0 bidPrc_0
2015-11-19 21:00:00.000 4530.0 4528.0
2015-11-19 21:00:00.250 4528.0 4522.0
2015-11-19 21:00:00.500 4524.0 4516.0
2015-11-19 21:00:00.750 4518.0 4516.0
2015-11-19 21:00:01.000 4516.0 4514.0
2015-11-19 21:00:01.250 4514.0 4510.0
2015-11-19 21:00:01.500 4510.0 4508.0
2015-11-19 21:00:01.750 4510.0 4508.0
2015-11-19 21:00:02.000 4508.0 4506.0
2015-11-19 21:00:02.250 4512.0 4506.0
2015-11-19 21:00:02.500 4512.0 4510.0
2015-11-19 21:00:02.750 4512.0 4510.0
2015-11-19 21:00:03.000 4512.0 4510.0
2015-11-19 21:00:03.250 4512.0 4510.0
2015-11-19 21:00:03.500 4510.0 4506.0
2015-11-19 21:00:03.750 4512.0 4508.0
2015-11-19 21:00:04.000 4512.0 4510.0
2015-11-19 21:00:04.250 4512.0 4508.0
2015-11-19 21:00:04.500 4514.0 4512.0
2015-11-19 21:00:04.750 4514.0 4512.0
2015-11-19 21:00:05.000 4514.0 4512.0
2015-11-19 21:00:05.250 4514.0 4512.0
2015-11-19 21:00:05.500 4512.0 4510.0
2015-11-19 21:00:05.750 4514.0 4512.0
2015-11-19 21:00:06.000 4514.0 4512.0
2015-11-19 21:00:06.250 4512.0 4510.0
2015-11-19 21:00:06.500 4512.0 4510.0
2015-11-19 21:00:06.750 4512.0 4510.0
2015-11-19 21:00:07.000 4514.0 4512.0
2015-11-19 21:00:07.250 4514.0 4512.0
... ... ...
2015-12-03 14:59:52.750 4490.0 4488.0
2015-12-03 14:59:53.000 4490.0 4488.0
2015-12-03 14:59:53.250 4490.0 4488.0
2015-12-03 14:59:53.500 4490.0 4488.0
2015-12-03 14:59:53.750 4490.0 4488.0
2015-12-03 14:59:54.000 4490.0 4488.0
2015-12-03 14:59:54.250 4490.0 4488.0
2015-12-03 14:59:54.500 4490.0 4488.0
2015-12-03 14:59:54.750 4490.0 4488.0
2015-12-03 14:59:55.000 4490.0 4488.0
2015-12-03 14:59:55.250 4490.0 4488.0
2015-12-03 14:59:55.500 4490.0 4488.0
2015-12-03 14:59:55.750 4490.0 4488.0
2015-12-03 14:59:56.000 4490.0 4488.0
2015-12-03 14:59:56.250 4490.0 4488.0
2015-12-03 14:59:56.500 4490.0 4488.0
2015-12-03 14:59:56.750 4490.0 4488.0
2015-12-03 14:59:57.000 4490.0 4488.0
2015-12-03 14:59:57.250 4490.0 4488.0
2015-12-03 14:59:57.500 4490.0 4488.0
2015-12-03 14:59:57.750 4490.0 4488.0
2015-12-03 14:59:58.000 4490.0 4488.0
2015-12-03 14:59:58.250 4490.0 4488.0
2015-12-03 14:59:58.500 4490.0 4488.0
2015-12-03 14:59:58.750 4490.0 4488.0
2015-12-03 14:59:59.000 4492.0 4490.0
2015-12-03 14:59:59.250 4492.0 4490.0
2015-12-03 14:59:59.500 4492.0 4490.0
2015-12-03 14:59:59.750 4492.0 4490.0
2015-12-03 15:00:00.000 4492.0 4490.0

900040 rows × 2 columns


In [335]:
temp = (ta_10day.ix[:, 'askPrc_0'] + ta_10day.ix[:, 'bidPrc_0']) / 2.
temp = temp.rename('mid')
temp
ta_10day = ta_10day.join(temp)


Out[335]:
2015-11-19 21:00:00.000    4529.0
2015-11-19 21:00:00.250    4525.0
2015-11-19 21:00:00.500    4520.0
2015-11-19 21:00:00.750    4517.0
2015-11-19 21:00:01.000    4515.0
2015-11-19 21:00:01.250    4512.0
2015-11-19 21:00:01.500    4509.0
2015-11-19 21:00:01.750    4509.0
2015-11-19 21:00:02.000    4507.0
2015-11-19 21:00:02.250    4509.0
2015-11-19 21:00:02.500    4511.0
2015-11-19 21:00:02.750    4511.0
2015-11-19 21:00:03.000    4511.0
2015-11-19 21:00:03.250    4511.0
2015-11-19 21:00:03.500    4508.0
2015-11-19 21:00:03.750    4510.0
2015-11-19 21:00:04.000    4511.0
2015-11-19 21:00:04.250    4510.0
2015-11-19 21:00:04.500    4513.0
2015-11-19 21:00:04.750    4513.0
2015-11-19 21:00:05.000    4513.0
2015-11-19 21:00:05.250    4513.0
2015-11-19 21:00:05.500    4511.0
2015-11-19 21:00:05.750    4513.0
2015-11-19 21:00:06.000    4513.0
2015-11-19 21:00:06.250    4511.0
2015-11-19 21:00:06.500    4511.0
2015-11-19 21:00:06.750    4511.0
2015-11-19 21:00:07.000    4513.0
2015-11-19 21:00:07.250    4513.0
                            ...  
2015-12-03 14:59:52.750    4489.0
2015-12-03 14:59:53.000    4489.0
2015-12-03 14:59:53.250    4489.0
2015-12-03 14:59:53.500    4489.0
2015-12-03 14:59:53.750    4489.0
2015-12-03 14:59:54.000    4489.0
2015-12-03 14:59:54.250    4489.0
2015-12-03 14:59:54.500    4489.0
2015-12-03 14:59:54.750    4489.0
2015-12-03 14:59:55.000    4489.0
2015-12-03 14:59:55.250    4489.0
2015-12-03 14:59:55.500    4489.0
2015-12-03 14:59:55.750    4489.0
2015-12-03 14:59:56.000    4489.0
2015-12-03 14:59:56.250    4489.0
2015-12-03 14:59:56.500    4489.0
2015-12-03 14:59:56.750    4489.0
2015-12-03 14:59:57.000    4489.0
2015-12-03 14:59:57.250    4489.0
2015-12-03 14:59:57.500    4489.0
2015-12-03 14:59:57.750    4489.0
2015-12-03 14:59:58.000    4489.0
2015-12-03 14:59:58.250    4489.0
2015-12-03 14:59:58.500    4489.0
2015-12-03 14:59:58.750    4489.0
2015-12-03 14:59:59.000    4491.0
2015-12-03 14:59:59.250    4491.0
2015-12-03 14:59:59.500    4491.0
2015-12-03 14:59:59.750    4491.0
2015-12-03 15:00:00.000    4491.0
Name: mid, dtype: float64

In [336]:


In [339]:
ta_10day.ix[:5, 30:]


Out[339]:
lowLimit open openInterest prevClose prevOpenInterest prevSettle settle volume vol_diff mid
2015-11-19 21:00:00.000 4368.0 4530.0 769978.0 4542.0 769788.0 4552.0 4530.0 1526.0 NaN 4529.0
2015-11-19 21:00:00.250 4368.0 4530.0 770122.0 4542.0 769788.0 4552.0 4530.0 2440.0 915.0 4525.0
2015-11-19 21:00:00.500 4368.0 4530.0 770576.0 4542.0 769788.0 4552.0 4526.0 3802.0 1363.0 4520.0
2015-11-19 21:00:00.750 4368.0 4530.0 770600.0 4542.0 769788.0 4552.0 4526.0 4258.0 457.0 4517.0
2015-11-19 21:00:01.000 4368.0 4530.0 770408.0 4542.0 769788.0 4552.0 4524.0 4522.0 265.0 4515.0

map up as +1, down as -1


In [ ]:
up = rm_10day.ix[:, 'last'] > rm_10day.ix[:, 'mid']
down = rm_10day.ix[:, 'last'] < rm_10day.ix[:, 'mid']

In [350]:
up *= 1

In [351]:
down


Out[351]:
2015-11-19 21:00:00.000    False
2015-11-19 21:00:00.250    False
2015-11-19 21:00:00.500    False
2015-11-19 21:00:00.750    False
2015-11-19 21:00:01.000    False
2015-11-19 21:00:01.250     True
2015-11-19 21:00:01.500     True
2015-11-19 21:00:01.750     True
2015-11-19 21:00:02.000    False
2015-11-19 21:00:02.250     True
2015-11-19 21:00:02.500     True
2015-11-19 21:00:02.750    False
2015-11-19 21:00:03.000    False
2015-11-19 21:00:03.250     True
2015-11-19 21:00:03.500    False
2015-11-19 21:00:03.750    False
2015-11-19 21:00:04.000     True
2015-11-19 21:00:04.250     True
2015-11-19 21:00:04.500     True
2015-11-19 21:00:04.750     True
2015-11-19 21:00:05.000    False
2015-11-19 21:00:05.250     True
2015-11-19 21:00:05.500     True
2015-11-19 21:00:05.750    False
2015-11-19 21:00:06.000     True
2015-11-19 21:00:06.250    False
2015-11-19 21:00:06.500    False
2015-11-19 21:00:06.750     True
2015-11-19 21:00:07.000     True
2015-11-19 21:00:07.250    False
                           ...  
2015-12-03 14:59:52.750    False
2015-12-03 14:59:53.000    False
2015-12-03 14:59:53.250     True
2015-12-03 14:59:53.500     True
2015-12-03 14:59:53.750     True
2015-12-03 14:59:54.000    False
2015-12-03 14:59:54.250    False
2015-12-03 14:59:54.500    False
2015-12-03 14:59:54.750    False
2015-12-03 14:59:55.000    False
2015-12-03 14:59:55.250    False
2015-12-03 14:59:55.500     True
2015-12-03 14:59:55.750     True
2015-12-03 14:59:56.000    False
2015-12-03 14:59:56.250     True
2015-12-03 14:59:56.500    False
2015-12-03 14:59:56.750    False
2015-12-03 14:59:57.000    False
2015-12-03 14:59:57.250     True
2015-12-03 14:59:57.500    False
2015-12-03 14:59:57.750    False
2015-12-03 14:59:58.000     True
2015-12-03 14:59:58.250    False
2015-12-03 14:59:58.500    False
2015-12-03 14:59:58.750    False
2015-12-03 14:59:59.000     True
2015-12-03 14:59:59.250     True
2015-12-03 14:59:59.500    False
2015-12-03 14:59:59.750     True
2015-12-03 15:00:00.000     True
dtype: bool

In [352]:
down *= -1
down


Out[352]:
2015-11-19 21:00:00.000    0
2015-11-19 21:00:00.250    0
2015-11-19 21:00:00.500    0
2015-11-19 21:00:00.750    0
2015-11-19 21:00:01.000    0
2015-11-19 21:00:01.250   -1
2015-11-19 21:00:01.500   -1
2015-11-19 21:00:01.750   -1
2015-11-19 21:00:02.000    0
2015-11-19 21:00:02.250   -1
2015-11-19 21:00:02.500   -1
2015-11-19 21:00:02.750    0
2015-11-19 21:00:03.000    0
2015-11-19 21:00:03.250   -1
2015-11-19 21:00:03.500    0
2015-11-19 21:00:03.750    0
2015-11-19 21:00:04.000   -1
2015-11-19 21:00:04.250   -1
2015-11-19 21:00:04.500   -1
2015-11-19 21:00:04.750   -1
2015-11-19 21:00:05.000    0
2015-11-19 21:00:05.250   -1
2015-11-19 21:00:05.500   -1
2015-11-19 21:00:05.750    0
2015-11-19 21:00:06.000   -1
2015-11-19 21:00:06.250    0
2015-11-19 21:00:06.500    0
2015-11-19 21:00:06.750   -1
2015-11-19 21:00:07.000   -1
2015-11-19 21:00:07.250    0
                          ..
2015-12-03 14:59:52.750    0
2015-12-03 14:59:53.000    0
2015-12-03 14:59:53.250   -1
2015-12-03 14:59:53.500   -1
2015-12-03 14:59:53.750   -1
2015-12-03 14:59:54.000    0
2015-12-03 14:59:54.250    0
2015-12-03 14:59:54.500    0
2015-12-03 14:59:54.750    0
2015-12-03 14:59:55.000    0
2015-12-03 14:59:55.250    0
2015-12-03 14:59:55.500   -1
2015-12-03 14:59:55.750   -1
2015-12-03 14:59:56.000    0
2015-12-03 14:59:56.250   -1
2015-12-03 14:59:56.500    0
2015-12-03 14:59:56.750    0
2015-12-03 14:59:57.000    0
2015-12-03 14:59:57.250   -1
2015-12-03 14:59:57.500    0
2015-12-03 14:59:57.750    0
2015-12-03 14:59:58.000   -1
2015-12-03 14:59:58.250    0
2015-12-03 14:59:58.500    0
2015-12-03 14:59:58.750    0
2015-12-03 14:59:59.000   -1
2015-12-03 14:59:59.250   -1
2015-12-03 14:59:59.500    0
2015-12-03 14:59:59.750   -1
2015-12-03 15:00:00.000   -1
dtype: int64

In [478]:
updown = updown.rename('updown')

In [359]:
updown = up + down
updown


Out[359]:
2015-11-19 21:00:00.000    1
2015-11-19 21:00:00.250    1
2015-11-19 21:00:00.500    1
2015-11-19 21:00:00.750    1
2015-11-19 21:00:01.000    1
2015-11-19 21:00:01.250   -1
2015-11-19 21:00:01.500   -1
2015-11-19 21:00:01.750   -1
2015-11-19 21:00:02.000    1
2015-11-19 21:00:02.250   -1
2015-11-19 21:00:02.500   -1
2015-11-19 21:00:02.750    1
2015-11-19 21:00:03.000    1
2015-11-19 21:00:03.250   -1
2015-11-19 21:00:03.500    0
2015-11-19 21:00:03.750    1
2015-11-19 21:00:04.000   -1
2015-11-19 21:00:04.250   -1
2015-11-19 21:00:04.500   -1
2015-11-19 21:00:04.750   -1
2015-11-19 21:00:05.000    1
2015-11-19 21:00:05.250   -1
2015-11-19 21:00:05.500   -1
2015-11-19 21:00:05.750    1
2015-11-19 21:00:06.000   -1
2015-11-19 21:00:06.250    1
2015-11-19 21:00:06.500    1
2015-11-19 21:00:06.750   -1
2015-11-19 21:00:07.000   -1
2015-11-19 21:00:07.250    1
                          ..
2015-12-03 14:59:52.750    1
2015-12-03 14:59:53.000    1
2015-12-03 14:59:53.250   -1
2015-12-03 14:59:53.500   -1
2015-12-03 14:59:53.750   -1
2015-12-03 14:59:54.000    1
2015-12-03 14:59:54.250    1
2015-12-03 14:59:54.500    1
2015-12-03 14:59:54.750    1
2015-12-03 14:59:55.000    1
2015-12-03 14:59:55.250    1
2015-12-03 14:59:55.500   -1
2015-12-03 14:59:55.750   -1
2015-12-03 14:59:56.000    1
2015-12-03 14:59:56.250   -1
2015-12-03 14:59:56.500    1
2015-12-03 14:59:56.750    1
2015-12-03 14:59:57.000    1
2015-12-03 14:59:57.250   -1
2015-12-03 14:59:57.500    1
2015-12-03 14:59:57.750    1
2015-12-03 14:59:58.000   -1
2015-12-03 14:59:58.250    1
2015-12-03 14:59:58.500    1
2015-12-03 14:59:58.750    1
2015-12-03 14:59:59.000   -1
2015-12-03 14:59:59.250   -1
2015-12-03 14:59:59.500    1
2015-12-03 14:59:59.750   -1
2015-12-03 15:00:00.000   -1
dtype: int64

In [458]:
plt.figure()
plt.hist(updown, bins=50)


Out[458]:
(array([ 455528.,       0.,       0.,       0.,       0.,       0.,
             0.,       0.,       0.,       0.,       0.,       0.,
             0.,       0.,       0.,       0.,       0.,       0.,
             0.,       0.,       0.,       0.,       0.,       0.,
             0.,    7573.,       0.,       0.,       0.,       0.,
             0.,       0.,       0.,       0.,       0.,       0.,
             0.,       0.,       0.,       0.,       0.,       0.,
             0.,       0.,       0.,       0.,       0.,       0.,
             0.,  436939.]),
 array([-1.  , -0.96, -0.92, -0.88, -0.84, -0.8 , -0.76, -0.72, -0.68,
       -0.64, -0.6 , -0.56, -0.52, -0.48, -0.44, -0.4 , -0.36, -0.32,
       -0.28, -0.24, -0.2 , -0.16, -0.12, -0.08, -0.04,  0.  ,  0.04,
        0.08,  0.12,  0.16,  0.2 ,  0.24,  0.28,  0.32,  0.36,  0.4 ,
        0.44,  0.48,  0.52,  0.56,  0.6 ,  0.64,  0.68,  0.72,  0.76,
        0.8 ,  0.84,  0.88,  0.92,  0.96,  1.  ]),
 <a list of 50 Patch objects>)

In [459]:
mywindow = 40
r = updown.rolling(window=mywindow)
r


Out[459]:
Rolling [window=10,center=False,axis=0]

In [460]:
lastmid_indicator = (r.mean())
lastmid_indicator = lastmid_indicator.rename('lastmid_indicator')

In [401]:
plt.figure()
plt.hist(lastmid_indicator.ix[pm_index].dropna(), bins=20)


Out[401]:
(array([ 46968.,  27963.,  16714.,  37004.,  42746.,  48006.,  54443.,
        57924.,  60034.,  61105.,  60592.,  58698.,  55586.,  51417.,
        46370.,  40399.,  35055.,  29101.,  24825.,  43721.]),
 array([-1. , -0.9, -0.8, -0.7, -0.6, -0.5, -0.4, -0.3, -0.2, -0.1,  0. ,
        0.1,  0.2,  0.3,  0.4,  0.5,  0.6,  0.7,  0.8,  0.9,  1. ]),
 <a list of 20 Patch objects>)

In [388]:
plt.figure()
plt.hist(ta_10day_pm, bins=np.arange(-5.5, 5.5, 1))


Out[388]:
(array([      0.,   23550.,       0.,  198574.,       0.,  445684.,
             0.,  195326.,       0.,   24615.]),
 array([-5.5, -4.5, -3.5, -2.5, -1.5, -0.5,  0.5,  1.5,  2.5,  3.5,  4.5]),
 <a list of 10 Patch objects>)

In [483]:
res = myols(updown)
print(res.summary())


                            OLS Regression Results                            
==============================================================================
Dep. Variable:             price move   R-squared:                       0.170
Model:                            OLS   Adj. R-squared:                  0.170
Method:                 Least Squares   F-statistic:                 1.845e+05
Date:                Sun, 10 Jul 2016   Prob (F-statistic):               0.00
Time:                        05:01:16   Log-Likelihood:            -1.7022e+06
No. Observations:              897908   AIC:                         3.404e+06
Df Residuals:                  897906   BIC:                         3.404e+06
Df Model:                           1                                         
==============================================================================
                 coef    std err          t      P>|t|      [95.0% Conf. Int.]
------------------------------------------------------------------------------
const         -0.0089      0.002     -5.256      0.000        -0.012    -0.006
updown        -0.7334      0.002   -429.479      0.000        -0.737    -0.730
==============================================================================
Omnibus:                    64083.972   Durbin-Watson:                   0.278
Prob(Omnibus):                  0.000   Jarque-Bera (JB):           322780.334
Skew:                           0.127   Prob(JB):                         0.00
Kurtosis:                       5.926   Cond. No.                         1.02
==============================================================================

In [461]:
res = myols(lastmid_indicator)
print(res.summary())


                            OLS Regression Results                            
==============================================================================
Dep. Variable:             price move   R-squared:                       0.032
Model:                            OLS   Adj. R-squared:                  0.032
Method:                 Least Squares   F-statistic:                 2.999e+04
Date:                Sun, 10 Jul 2016   Prob (F-statistic):               0.00
Time:                        04:11:09   Log-Likelihood:            -1.8155e+06
No. Observations:              898701   AIC:                         3.631e+06
Df Residuals:                  898699   BIC:                         3.631e+06
Df Model:                           1                                         
=====================================================================================
                        coef    std err          t      P>|t|      [95.0% Conf. Int.]
-------------------------------------------------------------------------------------
const                 0.0004      0.002      0.228      0.820        -0.003     0.004
lastmid_indicator    -0.4619      0.003   -173.171      0.000        -0.467    -0.457
==============================================================================
Omnibus:                   227546.592   Durbin-Watson:                   0.353
Prob(Omnibus):                  0.000   Jarque-Bera (JB):         15324784.174
Skew:                           0.243   Prob(JB):                         0.00
Kurtosis:                      23.224   Cond. No.                         1.39
==============================================================================

Use sample with no extreme values, rsquare does not increase much(about 0.003)


In [472]:
res = myols_no_outlier(lastmid_indicator)
print(res.summary())


                            OLS Regression Results                            
==============================================================================
Dep. Variable:             price move   R-squared:                       0.035
Model:                            OLS   Adj. R-squared:                  0.035
Method:                 Least Squares   F-statistic:                 3.287e+04
Date:                Sun, 10 Jul 2016   Prob (F-statistic):               0.00
Time:                        04:31:06   Log-Likelihood:            -1.7699e+06
No. Observations:              897902   AIC:                         3.540e+06
Df Residuals:                  897900   BIC:                         3.540e+06
Df Model:                           1                                         
=====================================================================================
                        coef    std err          t      P>|t|      [95.0% Conf. Int.]
-------------------------------------------------------------------------------------
const                -0.0034      0.002     -1.837      0.066        -0.007     0.000
lastmid_indicator    -0.4606      0.003   -181.304      0.000        -0.466    -0.456
==============================================================================
Omnibus:                    46755.142   Durbin-Watson:                   0.386
Prob(Omnibus):                  0.000   Jarque-Bera (JB):           177950.728
Skew:                           0.094   Prob(JB):                         0.00
Kurtosis:                       5.173   Cond. No.                         1.39
==============================================================================

find good window parameter


In [474]:
for mywindow in np.arange(1, 60, 1):
#     mywindow = 
    r = updown.rolling(window=mywindow)
    lastmid_indicator = (r.mean())
    lastmid_indicator = lastmid_indicator.rename('lastmid_indicator')
    res = myols(lastmid_indicator)
    print '\n--------------------'
    print ('window = %d, Rsquare = %f. ' %(mywindow, res.rsquared))


--------------------
window = 1, Rsquare = 0.155951. 

--------------------
window = 2, Rsquare = 0.121138. 

--------------------
window = 3, Rsquare = 0.096055. 

--------------------
window = 4, Rsquare = 0.078307. 

--------------------
window = 5, Rsquare = 0.065318. 

--------------------
window = 6, Rsquare = 0.055410. 

--------------------
window = 7, Rsquare = 0.047652. 

--------------------
window = 8, Rsquare = 0.041431. 

--------------------
window = 9, Rsquare = 0.036406. 

--------------------
window = 10, Rsquare = 0.032291. 

--------------------
window = 11, Rsquare = 0.028890. 

--------------------
window = 12, Rsquare = 0.026066. 

--------------------
window = 13, Rsquare = 0.023667. 

--------------------
window = 14, Rsquare = 0.021616. 

--------------------
window = 15, Rsquare = 0.019834. 

--------------------
window = 16, Rsquare = 0.018274. 

--------------------
window = 17, Rsquare = 0.016890. 

--------------------
window = 18, Rsquare = 0.015659. 

--------------------
window = 19, Rsquare = 0.014569. 

--------------------
window = 20, Rsquare = 0.013592. 

--------------------
window = 21, Rsquare = 0.012714. 

--------------------
window = 22, Rsquare = 0.011920. 

--------------------
window = 23, Rsquare = 0.011210. 

--------------------
window = 24, Rsquare = 0.010574. 

--------------------
window = 25, Rsquare = 0.009998. 

--------------------
window = 26, Rsquare = 0.009460. 

--------------------
window = 27, Rsquare = 0.008965. 

--------------------
window = 28, Rsquare = 0.008506. 

--------------------
window = 29, Rsquare = 0.008090. 

--------------------
window = 30, Rsquare = 0.007713. 

--------------------
window = 31, Rsquare = 0.007354. 

--------------------
window = 32, Rsquare = 0.007011. 

--------------------
window = 33, Rsquare = 0.006692. 

--------------------
window = 34, Rsquare = 0.006386. 

--------------------
window = 35, Rsquare = 0.006106. 

--------------------
window = 36, Rsquare = 0.005844. 

--------------------
window = 37, Rsquare = 0.005600. 

--------------------
window = 38, Rsquare = 0.005374. 

--------------------
window = 39, Rsquare = 0.005166. 

--------------------
window = 40, Rsquare = 0.004967. 

--------------------
window = 41, Rsquare = 0.004773. 

--------------------
window = 42, Rsquare = 0.004592. 

--------------------
window = 43, Rsquare = 0.004422. 

--------------------
window = 44, Rsquare = 0.004259. 

--------------------
window = 45, Rsquare = 0.004100. 

--------------------
window = 46, Rsquare = 0.003948. 

--------------------
window = 47, Rsquare = 0.003807. 

--------------------
window = 48, Rsquare = 0.003679. 

--------------------
window = 49, Rsquare = 0.003558. 

--------------------
window = 50, Rsquare = 0.003442. 

--------------------
window = 51, Rsquare = 0.003331. 

--------------------
window = 52, Rsquare = 0.003222. 

--------------------
window = 53, Rsquare = 0.003122. 

--------------------
window = 54, Rsquare = 0.003027. 

--------------------
window = 55, Rsquare = 0.002934. 

--------------------
window = 56, Rsquare = 0.002843. 

--------------------
window = 57, Rsquare = 0.002754. 

--------------------
window = 58, Rsquare = 0.002667. 

--------------------
window = 59, Rsquare = 0.002584. 

plot fit


In [482]:
fig, ax = plt.subplots()
fig = sm.graphics.plot_fit(res, 'updown', ax=ax)

are u ok?


In [136]:
ta_10day.ix[0:55, ['last', 'volume']]


Out[136]:
last volume
2015-11-19 21:00:00.000 4530.0 1526.0
2015-11-19 21:00:00.250 4528.0 2440.0
2015-11-19 21:00:00.500 4524.0 3802.0
2015-11-19 21:00:00.750 4518.0 4258.0
2015-11-19 21:00:01.000 4516.0 4522.0
2015-11-19 21:00:01.250 4510.0 5190.0
2015-11-19 21:00:01.500 4508.0 6418.0
2015-11-19 21:00:01.750 4508.0 7158.0
2015-11-19 21:00:02.000 4508.0 8626.0
2015-11-19 21:00:02.250 4506.0 9398.0
2015-11-19 21:00:02.500 4510.0 9722.0
2015-11-19 21:00:02.750 4512.0 10258.0
2015-11-19 21:00:03.000 4512.0 10348.0
2015-11-19 21:00:03.250 4510.0 10762.0
2015-11-19 21:00:03.500 4508.0 11068.0
2015-11-19 21:00:03.750 4512.0 11198.0
2015-11-19 21:00:04.000 4510.0 11262.0
2015-11-19 21:00:04.250 4508.0 11338.0
2015-11-19 21:00:04.500 4512.0 11462.0
2015-11-19 21:00:04.750 4512.0 11504.0
2015-11-19 21:00:05.000 4514.0 11600.0
2015-11-19 21:00:05.250 4512.0 11652.0
2015-11-19 21:00:05.500 4510.0 11862.0
2015-11-19 21:00:05.750 4514.0 11968.0
2015-11-19 21:00:06.000 4512.0 12164.0
2015-11-19 21:00:06.250 4512.0 12516.0
2015-11-19 21:00:06.500 4512.0 12572.0
2015-11-19 21:00:06.750 4510.0 13022.0
2015-11-19 21:00:07.000 4512.0 13626.0
2015-11-19 21:00:07.250 4514.0 13670.0
2015-11-19 21:00:07.500 4514.0 13836.0
2015-11-19 21:00:07.750 4514.0 13988.0
2015-11-19 21:00:08.000 4512.0 14108.0
2015-11-19 21:00:08.250 4510.0 14148.0
2015-11-19 21:00:08.500 4512.0 14284.0
2015-11-19 21:00:08.750 4510.0 14400.0
2015-11-19 21:00:09.000 4510.0 14448.0
2015-11-19 21:00:09.250 4514.0 14574.0
2015-11-19 21:00:09.500 4514.0 14586.0
2015-11-19 21:00:09.750 4510.0 14644.0
2015-11-19 21:00:10.000 4510.0 14794.0
2015-11-19 21:00:10.250 4512.0 14800.0
2015-11-19 21:00:10.500 4512.0 14818.0
2015-11-19 21:00:10.750 4514.0 14844.0
2015-11-19 21:00:11.000 4510.0 14918.0
2015-11-19 21:00:11.250 4512.0 15182.0
2015-11-19 21:00:11.500 4512.0 15200.0
2015-11-19 21:00:11.750 4512.0 15310.0
2015-11-19 21:00:12.000 4510.0 15520.0
2015-11-19 21:00:12.250 4510.0 15606.0
2015-11-19 21:00:12.500 4510.0 15798.0
2015-11-19 21:00:12.750 4510.0 15918.0
2015-11-19 21:00:13.000 4508.0 16006.0
2015-11-19 21:00:13.250 4510.0 16078.0
2015-11-19 21:00:13.500 4508.0 16194.0

In [137]:
ta_10day_vol


Out[137]:
2015-11-19 21:00:00.000    13268.0
2015-11-19 21:00:00.250    12360.0
2015-11-19 21:00:00.500    11016.0
2015-11-19 21:00:00.750    10586.0
2015-11-19 21:00:01.000    10396.0
2015-11-19 21:00:01.250     9992.0
2015-11-19 21:00:01.500     8782.0
2015-11-19 21:00:01.750     8152.0
2015-11-19 21:00:02.000     6894.0
2015-11-19 21:00:02.250     6208.0
2015-11-19 21:00:02.500     6076.0
2015-11-19 21:00:02.750     5660.0
2015-11-19 21:00:03.000     5658.0
2015-11-19 21:00:03.250     5316.0
2015-11-19 21:00:03.500     5126.0
2015-11-19 21:00:03.750     5204.0
2015-11-19 21:00:04.000     5168.0
2015-11-19 21:00:04.250     5238.0
2015-11-19 21:00:04.500     5232.0
2015-11-19 21:00:04.750     5418.0
2015-11-19 21:00:05.000     5358.0
2015-11-19 21:00:05.250     5416.0
2015-11-19 21:00:05.500     5262.0
2015-11-19 21:00:05.750     5174.0
2015-11-19 21:00:06.000     4994.0
2015-11-19 21:00:06.250     4656.0
2015-11-19 21:00:06.500     4604.0
2015-11-19 21:00:06.750     4470.0
2015-11-19 21:00:07.000     3874.0
2015-11-19 21:00:07.250     3832.0
                            ...   
2015-12-03 14:59:41.500     1426.0
2015-12-03 14:59:41.750     1462.0
2015-12-03 14:59:42.000     1464.0
2015-12-03 14:59:42.250     1504.0
2015-12-03 14:59:42.500     1518.0
2015-12-03 14:59:42.750     1578.0
2015-12-03 14:59:43.000     1192.0
2015-12-03 14:59:43.250     1204.0
2015-12-03 14:59:43.500     1332.0
2015-12-03 14:59:43.750     1356.0
2015-12-03 14:59:44.000     1372.0
2015-12-03 14:59:44.250     1362.0
2015-12-03 14:59:44.500     1436.0
2015-12-03 14:59:44.750     1426.0
2015-12-03 14:59:45.000     1398.0
2015-12-03 14:59:45.250     1400.0
2015-12-03 14:59:45.500     1414.0
2015-12-03 14:59:45.750     1422.0
2015-12-03 14:59:46.000     1422.0
2015-12-03 14:59:46.250     1420.0
2015-12-03 14:59:46.500     1608.0
2015-12-03 14:59:46.750     1792.0
2015-12-03 14:59:47.000     1794.0
2015-12-03 14:59:47.250     1794.0
2015-12-03 14:59:47.500     1952.0
2015-12-03 14:59:47.750     1942.0
2015-12-03 14:59:48.000     2016.0
2015-12-03 14:59:48.250     2018.0
2015-12-03 14:59:48.500     2108.0
2015-12-03 14:59:48.750     2364.0
Name: volume, dtype: float64

yes it's ok


In [340]:
prstd, iv_l, iv_u = wls_prediction_std(res2)

fig, ax = plt.subplots(figsize=(8,6))

ax.plot(x, y, 'o', label="Data")
ax.plot(x, y_true, 'b-', label="True")
ax.plot(x, res2.fittedvalues, 'r--.', label="Predicted")
ax.plot(x, iv_u, 'r--')
ax.plot(x, iv_l, 'r--')
legend = ax.legend(loc="best")


---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-340-7f956343d067> in <module>()
      3 fig, ax = plt.subplots(figsize=(8,6))
      4 
----> 5 ax.plot(x, y, 'o', label="data")
      6 ax.plot(x, y_true, 'b-', label="True")
      7 ax.plot(x, res.fittedvalues, 'r--.', label="OLS")

NameError: name 'x' is not defined

In [ ]:

PCA with totalbid and totalask


In [12]:
from matplotlib.mlab import PCA

In [18]:
rm_totalbidask = rm.ix[:, ['TotalBidLot', 'TotalAskLot']]

In [32]:
mydata = rm_totalbidask.ix[:day_len,:].values
mydata


Out[32]:
array([[  2975.,   3103.],
       [  3010.,   3424.],
       [  3170.,   3648.],
       ..., 
       [  7762.,  13098.],
       [  7771.,  13091.],
       [  7772.,  13029.]])

In [33]:
bidaskpca = PCA(mydata)

In [34]:
bidaskpca.fracs


Out[34]:
array([ 0.52077116,  0.47922884])

In [35]:
yy = bidaskpca.Y
plt.scatter(yy[:,0], yy[:,1])
plt.show()

relation between index and price move

rm_bidask_pressure_index and rm_last_diff2


In [188]:
plt.figure(figsize=(18,10))
temp = day_len * 11 + 1000
sns.swarmplot(x=rm_last_diff2.ix[temp:temp+60000:20], y=rm_bidask_pressure_index2.ix[temp:temp+60000:20])


Out[188]:
<matplotlib.axes.AxesSubplot at 0x7f7e248355d0>