In [1]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib.finance as mf
from matplotlib.widgets import MultiCursor

In [2]:
import statsmodels.tsa.stattools as stt
import scipy.signal as sgn

In [3]:
import statsmodels.api as sm
from statsmodels.sandbox.regression.predstd import wls_prediction_std

In [4]:
from matplotlib.mlab import PCA

In [26]:
%matplotlib inline

In [5]:
sns.set_context('paper')
sns.set_style("darkgrid")

In [5]:
sns.set_context('paper')
sns.set_style("dark", rc={'axes.facecolor': 'black', 'grid.color': 'red', 
                              'grid.linestyle': '--',
                              'figure.facecolor': 'grey'})

In [7]:
hft = pd.read_hdf('HFT_SR_RM_MA_TA.hdf')

In [8]:
ta = hft.minor_xs('TA0001')

In [197]:
sr = hft.minor_xs('SR0001')

In [9]:
rm = hft.minor_xs('RM0001')
type(rm)


Out[9]:
pandas.core.frame.DataFrame

In [9]:
night_len = int(4*3600*2.5)
mor_len = int(4*3600*2.25)
aftn_len = int(4*3600*1.5)
day_len = night_len + mor_len + aftn_len + 3

high low limit


In [11]:
dates1 = pd.date_range('2015-11-19 21:01:01', '2015-12-31  21:01:01', freq='D')

In [12]:
dates2 = pd.date_range('2015-11-20 14:59:59', '2015-12-31  14:59:59', freq='D')

In [14]:
type(dates1)


Out[14]:
pandas.tseries.index.DatetimeIndex

In [17]:
dates1.weekday


Out[17]:
array([3, 4, 5, 6, 0, 1, 2, 3, 4, 5, 6, 0, 1, 2, 3, 4, 5, 6, 0, 1, 2, 3, 4,
       5, 6, 0, 1, 2, 3, 4, 5, 6, 0, 1, 2, 3, 4, 5, 6, 0, 1, 2, 3], dtype=int32)

In [18]:
trade_day1 = dates1[dates1.weekday != 5]
trade_day2 = dates2[np.logical_and(dates2.weekday != 5, dates2.weekday != 6)]
trade_day1


Out[18]:
DatetimeIndex(['2015-11-19 21:01:01', '2015-11-20 21:01:01',
               '2015-11-22 21:01:01', '2015-11-23 21:01:01',
               '2015-11-24 21:01:01', '2015-11-25 21:01:01',
               '2015-11-26 21:01:01', '2015-11-27 21:01:01',
               '2015-11-29 21:01:01', '2015-11-30 21:01:01',
               '2015-12-01 21:01:01', '2015-12-02 21:01:01',
               '2015-12-03 21:01:01', '2015-12-04 21:01:01',
               '2015-12-06 21:01:01', '2015-12-07 21:01:01',
               '2015-12-08 21:01:01', '2015-12-09 21:01:01',
               '2015-12-10 21:01:01', '2015-12-11 21:01:01',
               '2015-12-13 21:01:01', '2015-12-14 21:01:01',
               '2015-12-15 21:01:01', '2015-12-16 21:01:01',
               '2015-12-17 21:01:01', '2015-12-18 21:01:01',
               '2015-12-20 21:01:01', '2015-12-21 21:01:01',
               '2015-12-22 21:01:01', '2015-12-23 21:01:01',
               '2015-12-24 21:01:01', '2015-12-25 21:01:01',
               '2015-12-27 21:01:01', '2015-12-28 21:01:01',
               '2015-12-29 21:01:01', '2015-12-30 21:01:01',
               '2015-12-31 21:01:01'],
              dtype='datetime64[ns]', freq=None)

5 ningt pan per week

a complete week starts from 11-23


In [94]:
rm.ix[trade_day2, ['high', 'highLimit', 'low', 'lowLimit']]


Out[94]:
high highLimit low lowLimit
2015-11-20 14:59:59 1798.0 1866.0 1765.0 1722.0
2015-11-23 14:59:59 1796.0 1853.0 1709.0 1709.0
2015-11-24 14:59:59 1773.0 1826.0 1699.0 1684.0
2015-11-25 14:59:59 1787.0 1801.0 1751.0 1661.0
2015-11-26 14:59:59 1840.0 1842.0 1771.0 1700.0
2015-11-27 14:59:59 1821.0 1878.0 1789.0 1732.0
2015-11-30 14:59:59 1807.0 1878.0 1755.0 1732.0
2015-12-01 14:59:59 1834.0 1852.0 1790.0 1708.0
2015-12-02 14:59:59 1927.0 1957.0 1883.0 1805.0
2015-12-03 14:59:59 1899.0 1971.0 1851.0 1819.0
2015-12-04 14:59:59 1872.0 1944.0 1838.0 1794.0
2015-12-07 14:59:59 1886.0 1928.0 1857.0 1778.0
2015-12-08 14:59:59 1879.0 1948.0 1819.0 1798.0
2015-12-09 14:59:59 1852.0 1917.0 1817.0 1769.0
2015-12-10 14:59:59 1862.0 1911.0 1835.0 1763.0
2015-12-11 14:59:59 1892.0 1919.0 1838.0 1771.0
2015-12-14 14:59:59 1890.0 1938.0 1867.0 1788.0
2015-12-15 14:59:59 1921.0 1956.0 1873.0 1804.0
2015-12-16 14:59:59 1908.0 1976.0 1878.0 1824.0
2015-12-17 14:59:59 1917.0 1970.0 1864.0 1818.0
2015-12-18 14:59:59 1907.0 1967.0 1859.0 1815.0
2015-12-21 14:59:59 1941.0 1961.0 1899.0 1809.0
2015-12-22 14:59:59 1947.0 1994.0 1923.0 1840.0
2015-12-23 14:59:59 1934.0 2012.0 1909.0 1856.0
2015-12-24 14:59:59 1927.0 1998.0 1904.0 1844.0
2015-12-25 14:59:59 1920.0 1994.0 1896.0 1840.0
2015-12-28 14:59:59 1904.0 1984.0 1868.0 1830.0
2015-12-29 14:59:59 1899.0 1961.0 1868.0 1809.0
2015-12-30 14:59:59 1915.0 1960.0 1893.0 1808.0
2015-12-31 14:59:59 1924.0 1981.0 1903.0 1827.0

In [100]:
temp = rm.ix[trade_day2, 'high'] - rm.ix[trade_day2, 'highLimit'] >-3
temp


Out[100]:
2015-11-20 14:59:59    False
2015-11-23 14:59:59    False
2015-11-24 14:59:59    False
2015-11-25 14:59:59    False
2015-11-26 14:59:59     True
2015-11-27 14:59:59    False
2015-11-30 14:59:59    False
2015-12-01 14:59:59    False
2015-12-02 14:59:59    False
2015-12-03 14:59:59    False
2015-12-04 14:59:59    False
2015-12-07 14:59:59    False
2015-12-08 14:59:59    False
2015-12-09 14:59:59    False
2015-12-10 14:59:59    False
2015-12-11 14:59:59    False
2015-12-14 14:59:59    False
2015-12-15 14:59:59    False
2015-12-16 14:59:59    False
2015-12-17 14:59:59    False
2015-12-18 14:59:59    False
2015-12-21 14:59:59    False
2015-12-22 14:59:59    False
2015-12-23 14:59:59    False
2015-12-24 14:59:59    False
2015-12-25 14:59:59    False
2015-12-28 14:59:59    False
2015-12-29 14:59:59    False
2015-12-30 14:59:59    False
2015-12-31 14:59:59    False
dtype: bool

In [102]:
rm.ix[trade_day2,:].ix[temp, 'last']


Out[102]:
2015-11-26 14:59:59    1821.0
Name: last, dtype: float64

In [136]:
temp[0]


Out[136]:
Timestamp('2015-11-26 14:59:59')

In [167]:
for pinzhong in hft.minor_axis:
    print '\n\n#-------------------------------------'
    print pinzhong
    xx = hft.minor_xs(pinzhong)
    toohigh = xx.ix[trade_day2, 'high'] - xx.ix[trade_day2, 'highLimit'] > -2
    toolow = xx.ix[trade_day2, 'low'] - xx.ix[trade_day2, 'lowLimit'] < 2
    print 'too high: \n'
    if toohigh.any() == True:
        print xx.ix[trade_day2,:].ix[toohigh, ['high', 'highLimit']]
        temp = (xx.ix[trade_day2,:].ix[toohigh, :].index)[0]
        high = xx.ix[trade_day2,:].ix[toohigh, 'highLimit']
        #fig1 = plt.figure(figsize=(15,10))
        #ax1 = fig1.add_subplot(111)
        xx.ix[temp - pd.Timedelta(18, unit='h'): temp, 'last'].plot(figsize=(15,10))
        plt.hlines(high, temp - pd.Timedelta(18, unit='h'), temp, colors='r', linestyles='-')
        plt.show()
    print 'too low: \n'
    if toolow.any() == True:
        print xx.ix[trade_day2,:].ix[toolow, ['low', 'lowLimit']]
        temp = (xx.ix[trade_day2,:].ix[toolow, :].index)[0]
        low = xx.ix[trade_day2,:].ix[toolow, 'lowLimit']
        #fig2 = plt.figure(figsize=(15,10))
        #ax2 = fig2.add_subplot(111)
        xx.ix[temp - pd.Timedelta(18, unit='h'): temp, 'last'].plot(figsize=(15,10))
        plt.hlines(low, temp - pd.Timedelta(18, unit='h'), temp, colors='g', linestyles='-')
        plt.show()
plt.show()



#-------------------------------------
MA0001
too high: 

too low: 

                        low  lowLimit
2015-11-23 14:59:59  1731.0    1731.0

#-------------------------------------
RM0001
too high: 

too low: 

                        low  lowLimit
2015-11-23 14:59:59  1709.0    1709.0

#-------------------------------------
SR0001
too high: 

too low: 



#-------------------------------------
TA0001
too high: 

too low: 

                        low  lowLimit
2015-12-08 14:59:59  4296.0    4296.0

we can see that SR has no zhangting or dieting

Training dataset


In [10]:
ta.index[day_len*10 + 9]


Out[10]:
Timestamp('2015-12-03 15:00:00')

In [11]:
#------------------   ta_10day is my training dataset
ta_10day = ta.ix[:day_len*10 + 10, :]

pice move


In [12]:
def Letitforward(df, forwardnum):
    df2 = df.shift(-forwardnum) - df
    df2.dropna(inplace=True)
    return df2

In [13]:
forward_ticks = 40
, 'volume', 'openInterest', 'TotalAskLot', 'askQty_4', 'askQty_3', 'askQty_2', 'askQty_1', 'askQty_0', 'bidQty_0', 'bidQty_1', 'bidQty_2', 'bidQty_3', 'bidQty_4', 'TotalBidLot']

In [21]:
ta_10day_pm =Letitforward(ta_10day.ix[:, 'last'], forward_ticks)

In [17]:
plt.plot(ta_10day_pm)
plt.show()


Out[17]:
[<matplotlib.lines.Line2D at 0x7f9277dc8490>]

In [22]:
#----------------------------exclude last 36 ticks before ending
last_44_boolean = np.logical_and.reduce((ta_10day_pm.index.hour >= 14,
                              ta_10day_pm.index.minute >= 59,
                              ta_10day_pm.index.second >= 49))
last_boolean = ta_10day_pm.index.hour == 15

In [32]:
ta_10day_pm.ix[np.logical_or(last_44_boolean, last_boolean)]


Out[32]:
2015-11-19 21:59:49.000    0.0
2015-11-19 21:59:49.250    0.0
2015-11-19 21:59:49.500    0.0
2015-11-19 21:59:49.750   -2.0
2015-11-19 21:59:50.000   -2.0
2015-11-19 21:59:50.250   -2.0
2015-11-19 21:59:50.500   -2.0
2015-11-19 21:59:50.750    0.0
2015-11-19 21:59:51.000   -2.0
2015-11-19 21:59:51.250    0.0
2015-11-19 21:59:51.500    0.0
2015-11-19 21:59:51.750    0.0
2015-11-19 21:59:52.000    0.0
2015-11-19 21:59:52.250    0.0
2015-11-19 21:59:52.500   -2.0
2015-11-19 21:59:52.750    0.0
2015-11-19 21:59:53.000   -2.0
2015-11-19 21:59:53.250   -2.0
2015-11-19 21:59:53.500   -2.0
2015-11-19 21:59:53.750   -2.0
2015-11-19 21:59:54.000   -2.0
2015-11-19 21:59:54.250   -2.0
2015-11-19 21:59:54.500    0.0
2015-11-19 21:59:54.750    0.0
2015-11-19 21:59:55.000    0.0
2015-11-19 21:59:55.250    0.0
2015-11-19 21:59:55.500    0.0
2015-11-19 21:59:55.750    0.0
2015-11-19 21:59:56.000    0.0
2015-11-19 21:59:56.250   -2.0
                          ... 
2015-12-02 22:59:53.750    2.0
2015-12-02 22:59:54.000    2.0
2015-12-02 22:59:54.250    2.0
2015-12-02 22:59:54.500    2.0
2015-12-02 22:59:54.750    2.0
2015-12-02 22:59:55.000    2.0
2015-12-02 22:59:55.250    2.0
2015-12-02 22:59:55.500    2.0
2015-12-02 22:59:55.750    2.0
2015-12-02 22:59:56.000    2.0
2015-12-02 22:59:56.250    2.0
2015-12-02 22:59:56.500    2.0
2015-12-02 22:59:56.750    2.0
2015-12-02 22:59:57.000    2.0
2015-12-02 22:59:57.250    2.0
2015-12-02 22:59:57.500    2.0
2015-12-02 22:59:57.750    0.0
2015-12-02 22:59:58.000   -2.0
2015-12-02 22:59:58.250    0.0
2015-12-02 22:59:58.500    0.0
2015-12-02 22:59:58.750    0.0
2015-12-02 22:59:59.000    0.0
2015-12-02 22:59:59.250    0.0
2015-12-02 22:59:59.500    0.0
2015-12-02 22:59:59.750    0.0
2015-12-03 14:59:49.000    2.0
2015-12-03 14:59:49.250    0.0
2015-12-03 14:59:49.500    4.0
2015-12-03 14:59:49.750    2.0
2015-12-03 14:59:50.000    2.0
Name: last, dtype: float64

In [35]:
outlier_boolean = abs(ta_10day_pm) > 10

In [38]:
ta_10day_pm.ix[outlier_boolean].plot()


Out[38]:
<matplotlib.axes.AxesSubplot at 0x7f558fb61cd0>

In [23]:
ta_10day_pm = ta_10day_pm.ix[np.logical_not(np.logical_or(last_44_boolean, last_boolean))]
#ta_10day_pm.plot(figsize=(18,10))

In [24]:
pm_index = ta_10day_pm.index

In [25]:
ta_10day_last = ta_10day.ix[:, 'last']

In [ ]:
ta_10day_last_log =

In [28]:
res = sm.tsa.dseasonal_decompose(ta_10day_last)
res.plot()


---------------------------------------------------------------------------
AttributeError                            Traceback (most recent call last)
<ipython-input-28-9c2aeb8f4c35> in <module>()
----> 1 res = sm.tsa.seasonal_decompose(ta_10day_last)
      2 res.plot()

AttributeError: 'module' object has no attribute 'seasonal_decompose'

In [ ]: