notebook.community

Edit and run



In [1]:

    
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib.finance as mf
from matplotlib.widgets import MultiCursor



In [2]:

    
import statsmodels.tsa.stattools as stt
import scipy.signal as sgn



In [3]:

    
import statsmodels.api as sm
from statsmodels.sandbox.regression.predstd import wls_prediction_std



In [4]:

    
from matplotlib.mlab import PCA



In [26]:

    
%matplotlib inline



In [5]:

    
sns.set_context('paper')
sns.set_style("darkgrid")



In [5]:

    
sns.set_context('paper')
sns.set_style("dark", rc={'axes.facecolor': 'black', 'grid.color': 'red', 
                              'grid.linestyle': '--',
                              'figure.facecolor': 'grey'})



In [7]:

    
hft = pd.read_hdf('HFT_SR_RM_MA_TA.hdf')



In [8]:

    
ta = hft.minor_xs('TA0001')



In [197]:

    
sr = hft.minor_xs('SR0001')



In [9]:

    
rm = hft.minor_xs('RM0001')
type(rm)









    Out[9]:





pandas.core.frame.DataFrame



In [9]:

    
night_len = int(4*3600*2.5)
mor_len = int(4*3600*2.25)
aftn_len = int(4*3600*1.5)
day_len = night_len + mor_len + aftn_len + 3

high low limit



In [11]:

    
dates1 = pd.date_range('2015-11-19 21:01:01', '2015-12-31  21:01:01', freq='D')



In [12]:

    
dates2 = pd.date_range('2015-11-20 14:59:59', '2015-12-31  14:59:59', freq='D')



In [14]:

    
type(dates1)









    Out[14]:





pandas.tseries.index.DatetimeIndex



In [17]:

    
dates1.weekday









    Out[17]:





array([3, 4, 5, 6, 0, 1, 2, 3, 4, 5, 6, 0, 1, 2, 3, 4, 5, 6, 0, 1, 2, 3, 4,
       5, 6, 0, 1, 2, 3, 4, 5, 6, 0, 1, 2, 3, 4, 5, 6, 0, 1, 2, 3], dtype=int32)



In [18]:

    
trade_day1 = dates1[dates1.weekday != 5]
trade_day2 = dates2[np.logical_and(dates2.weekday != 5, dates2.weekday != 6)]
trade_day1









    Out[18]:





DatetimeIndex(['2015-11-19 21:01:01', '2015-11-20 21:01:01',
               '2015-11-22 21:01:01', '2015-11-23 21:01:01',
               '2015-11-24 21:01:01', '2015-11-25 21:01:01',
               '2015-11-26 21:01:01', '2015-11-27 21:01:01',
               '2015-11-29 21:01:01', '2015-11-30 21:01:01',
               '2015-12-01 21:01:01', '2015-12-02 21:01:01',
               '2015-12-03 21:01:01', '2015-12-04 21:01:01',
               '2015-12-06 21:01:01', '2015-12-07 21:01:01',
               '2015-12-08 21:01:01', '2015-12-09 21:01:01',
               '2015-12-10 21:01:01', '2015-12-11 21:01:01',
               '2015-12-13 21:01:01', '2015-12-14 21:01:01',
               '2015-12-15 21:01:01', '2015-12-16 21:01:01',
               '2015-12-17 21:01:01', '2015-12-18 21:01:01',
               '2015-12-20 21:01:01', '2015-12-21 21:01:01',
               '2015-12-22 21:01:01', '2015-12-23 21:01:01',
               '2015-12-24 21:01:01', '2015-12-25 21:01:01',
               '2015-12-27 21:01:01', '2015-12-28 21:01:01',
               '2015-12-29 21:01:01', '2015-12-30 21:01:01',
               '2015-12-31 21:01:01'],
              dtype='datetime64[ns]', freq=None)

5 ningt pan per week

a complete week starts from 11-23



In [94]:

    
rm.ix[trade_day2, ['high', 'highLimit', 'low', 'lowLimit']]









    Out[94]:






  
    
      
      high
      highLimit
      low
      lowLimit
    
  
  
    
      2015-11-20 14:59:59
      1798.0
      1866.0
      1765.0
      1722.0
    
    
      2015-11-23 14:59:59
      1796.0
      1853.0
      1709.0
      1709.0
    
    
      2015-11-24 14:59:59
      1773.0
      1826.0
      1699.0
      1684.0
    
    
      2015-11-25 14:59:59
      1787.0
      1801.0
      1751.0
      1661.0
    
    
      2015-11-26 14:59:59
      1840.0
      1842.0
      1771.0
      1700.0
    
    
      2015-11-27 14:59:59
      1821.0
      1878.0
      1789.0
      1732.0
    
    
      2015-11-30 14:59:59
      1807.0
      1878.0
      1755.0
      1732.0
    
    
      2015-12-01 14:59:59
      1834.0
      1852.0
      1790.0
      1708.0
    
    
      2015-12-02 14:59:59
      1927.0
      1957.0
      1883.0
      1805.0
    
    
      2015-12-03 14:59:59
      1899.0
      1971.0
      1851.0
      1819.0
    
    
      2015-12-04 14:59:59
      1872.0
      1944.0
      1838.0
      1794.0
    
    
      2015-12-07 14:59:59
      1886.0
      1928.0
      1857.0
      1778.0
    
    
      2015-12-08 14:59:59
      1879.0
      1948.0
      1819.0
      1798.0
    
    
      2015-12-09 14:59:59
      1852.0
      1917.0
      1817.0
      1769.0
    
    
      2015-12-10 14:59:59
      1862.0
      1911.0
      1835.0
      1763.0
    
    
      2015-12-11 14:59:59
      1892.0
      1919.0
      1838.0
      1771.0
    
    
      2015-12-14 14:59:59
      1890.0
      1938.0
      1867.0
      1788.0
    
    
      2015-12-15 14:59:59
      1921.0
      1956.0
      1873.0
      1804.0
    
    
      2015-12-16 14:59:59
      1908.0
      1976.0
      1878.0
      1824.0
    
    
      2015-12-17 14:59:59
      1917.0
      1970.0
      1864.0
      1818.0
    
    
      2015-12-18 14:59:59
      1907.0
      1967.0
      1859.0
      1815.0
    
    
      2015-12-21 14:59:59
      1941.0
      1961.0
      1899.0
      1809.0
    
    
      2015-12-22 14:59:59
      1947.0
      1994.0
      1923.0
      1840.0
    
    
      2015-12-23 14:59:59
      1934.0
      2012.0
      1909.0
      1856.0
    
    
      2015-12-24 14:59:59
      1927.0
      1998.0
      1904.0
      1844.0
    
    
      2015-12-25 14:59:59
      1920.0
      1994.0
      1896.0
      1840.0
    
    
      2015-12-28 14:59:59
      1904.0
      1984.0
      1868.0
      1830.0
    
    
      2015-12-29 14:59:59
      1899.0
      1961.0
      1868.0
      1809.0
    
    
      2015-12-30 14:59:59
      1915.0
      1960.0
      1893.0
      1808.0
    
    
      2015-12-31 14:59:59
      1924.0
      1981.0
      1903.0
      1827.0



In [100]:

    
temp = rm.ix[trade_day2, 'high'] - rm.ix[trade_day2, 'highLimit'] >-3
temp









    Out[100]:





2015-11-20 14:59:59    False
2015-11-23 14:59:59    False
2015-11-24 14:59:59    False
2015-11-25 14:59:59    False
2015-11-26 14:59:59     True
2015-11-27 14:59:59    False
2015-11-30 14:59:59    False
2015-12-01 14:59:59    False
2015-12-02 14:59:59    False
2015-12-03 14:59:59    False
2015-12-04 14:59:59    False
2015-12-07 14:59:59    False
2015-12-08 14:59:59    False
2015-12-09 14:59:59    False
2015-12-10 14:59:59    False
2015-12-11 14:59:59    False
2015-12-14 14:59:59    False
2015-12-15 14:59:59    False
2015-12-16 14:59:59    False
2015-12-17 14:59:59    False
2015-12-18 14:59:59    False
2015-12-21 14:59:59    False
2015-12-22 14:59:59    False
2015-12-23 14:59:59    False
2015-12-24 14:59:59    False
2015-12-25 14:59:59    False
2015-12-28 14:59:59    False
2015-12-29 14:59:59    False
2015-12-30 14:59:59    False
2015-12-31 14:59:59    False
dtype: bool



In [102]:

    
rm.ix[trade_day2,:].ix[temp, 'last']









    Out[102]:





2015-11-26 14:59:59    1821.0
Name: last, dtype: float64



In [136]:

    
temp[0]









    Out[136]:





Timestamp('2015-11-26 14:59:59')



In [167]:

    
for pinzhong in hft.minor_axis:
    print '\n\n#-------------------------------------'
    print pinzhong
    xx = hft.minor_xs(pinzhong)
    toohigh = xx.ix[trade_day2, 'high'] - xx.ix[trade_day2, 'highLimit'] > -2
    toolow = xx.ix[trade_day2, 'low'] - xx.ix[trade_day2, 'lowLimit'] < 2
    print 'too high: \n'
    if toohigh.any() == True:
        print xx.ix[trade_day2,:].ix[toohigh, ['high', 'highLimit']]
        temp = (xx.ix[trade_day2,:].ix[toohigh, :].index)[0]
        high = xx.ix[trade_day2,:].ix[toohigh, 'highLimit']
        #fig1 = plt.figure(figsize=(15,10))
        #ax1 = fig1.add_subplot(111)
        xx.ix[temp - pd.Timedelta(18, unit='h'): temp, 'last'].plot(figsize=(15,10))
        plt.hlines(high, temp - pd.Timedelta(18, unit='h'), temp, colors='r', linestyles='-')
        plt.show()
    print 'too low: \n'
    if toolow.any() == True:
        print xx.ix[trade_day2,:].ix[toolow, ['low', 'lowLimit']]
        temp = (xx.ix[trade_day2,:].ix[toolow, :].index)[0]
        low = xx.ix[trade_day2,:].ix[toolow, 'lowLimit']
        #fig2 = plt.figure(figsize=(15,10))
        #ax2 = fig2.add_subplot(111)
        xx.ix[temp - pd.Timedelta(18, unit='h'): temp, 'last'].plot(figsize=(15,10))
        plt.hlines(low, temp - pd.Timedelta(18, unit='h'), temp, colors='g', linestyles='-')
        plt.show()
plt.show()









    




#-------------------------------------
MA0001
too high: 

too low: 

                        low  lowLimit
2015-11-23 14:59:59  1731.0    1731.0






    












    




#-------------------------------------
RM0001
too high: 

too low: 

                        low  lowLimit
2015-11-23 14:59:59  1709.0    1709.0






    












    




#-------------------------------------
SR0001
too high: 

too low: 



#-------------------------------------
TA0001
too high: 

too low: 

                        low  lowLimit
2015-12-08 14:59:59  4296.0    4296.0

we can see that SR has no zhangting or dieting

Training dataset



In [10]:

    
ta.index[day_len*10 + 9]









    Out[10]:





Timestamp('2015-12-03 15:00:00')



In [11]:

    
#------------------   ta_10day is my training dataset
ta_10day = ta.ix[:day_len*10 + 10, :]

pice move



In [12]:

    
def Letitforward(df, forwardnum):
    df2 = df.shift(-forwardnum) - df
    df2.dropna(inplace=True)
    return df2



In [13]:

    
forward_ticks = 40

, 'volume', 'openInterest', 'TotalAskLot', 'askQty_4', 'askQty_3', 'askQty_2', 'askQty_1', 'askQty_0', 'bidQty_0', 'bidQty_1', 'bidQty_2', 'bidQty_3', 'bidQty_4', 'TotalBidLot']



In [21]:

    
ta_10day_pm =Letitforward(ta_10day.ix[:, 'last'], forward_ticks)



In [17]:

    
plt.plot(ta_10day_pm)
plt.show()









    Out[17]:





[<matplotlib.lines.Line2D at 0x7f9277dc8490>]



In [22]:

    
#----------------------------exclude last 36 ticks before ending
last_44_boolean = np.logical_and.reduce((ta_10day_pm.index.hour >= 14,
                              ta_10day_pm.index.minute >= 59,
                              ta_10day_pm.index.second >= 49))
last_boolean = ta_10day_pm.index.hour == 15



In [32]:

    
ta_10day_pm.ix[np.logical_or(last_44_boolean, last_boolean)]









    Out[32]:





2015-11-19 21:59:49.000    0.0
2015-11-19 21:59:49.250    0.0
2015-11-19 21:59:49.500    0.0
2015-11-19 21:59:49.750   -2.0
2015-11-19 21:59:50.000   -2.0
2015-11-19 21:59:50.250   -2.0
2015-11-19 21:59:50.500   -2.0
2015-11-19 21:59:50.750    0.0
2015-11-19 21:59:51.000   -2.0
2015-11-19 21:59:51.250    0.0
2015-11-19 21:59:51.500    0.0
2015-11-19 21:59:51.750    0.0
2015-11-19 21:59:52.000    0.0
2015-11-19 21:59:52.250    0.0
2015-11-19 21:59:52.500   -2.0
2015-11-19 21:59:52.750    0.0
2015-11-19 21:59:53.000   -2.0
2015-11-19 21:59:53.250   -2.0
2015-11-19 21:59:53.500   -2.0
2015-11-19 21:59:53.750   -2.0
2015-11-19 21:59:54.000   -2.0
2015-11-19 21:59:54.250   -2.0
2015-11-19 21:59:54.500    0.0
2015-11-19 21:59:54.750    0.0
2015-11-19 21:59:55.000    0.0
2015-11-19 21:59:55.250    0.0
2015-11-19 21:59:55.500    0.0
2015-11-19 21:59:55.750    0.0
2015-11-19 21:59:56.000    0.0
2015-11-19 21:59:56.250   -2.0
                          ... 
2015-12-02 22:59:53.750    2.0
2015-12-02 22:59:54.000    2.0
2015-12-02 22:59:54.250    2.0
2015-12-02 22:59:54.500    2.0
2015-12-02 22:59:54.750    2.0
2015-12-02 22:59:55.000    2.0
2015-12-02 22:59:55.250    2.0
2015-12-02 22:59:55.500    2.0
2015-12-02 22:59:55.750    2.0
2015-12-02 22:59:56.000    2.0
2015-12-02 22:59:56.250    2.0
2015-12-02 22:59:56.500    2.0
2015-12-02 22:59:56.750    2.0
2015-12-02 22:59:57.000    2.0
2015-12-02 22:59:57.250    2.0
2015-12-02 22:59:57.500    2.0
2015-12-02 22:59:57.750    0.0
2015-12-02 22:59:58.000   -2.0
2015-12-02 22:59:58.250    0.0
2015-12-02 22:59:58.500    0.0
2015-12-02 22:59:58.750    0.0
2015-12-02 22:59:59.000    0.0
2015-12-02 22:59:59.250    0.0
2015-12-02 22:59:59.500    0.0
2015-12-02 22:59:59.750    0.0
2015-12-03 14:59:49.000    2.0
2015-12-03 14:59:49.250    0.0
2015-12-03 14:59:49.500    4.0
2015-12-03 14:59:49.750    2.0
2015-12-03 14:59:50.000    2.0
Name: last, dtype: float64



In [35]:

    
outlier_boolean = abs(ta_10day_pm) > 10



In [38]:

    
ta_10day_pm.ix[outlier_boolean].plot()









    Out[38]:





<matplotlib.axes.AxesSubplot at 0x7f558fb61cd0>



In [23]:

    
ta_10day_pm = ta_10day_pm.ix[np.logical_not(np.logical_or(last_44_boolean, last_boolean))]
#ta_10day_pm.plot(figsize=(18,10))



In [24]:

    
pm_index = ta_10day_pm.index



In [25]:

    
ta_10day_last = ta_10day.ix[:, 'last']



In [ ]:

    
ta_10day_last_log =



In [28]:

    
res = sm.tsa.dseasonal_decompose(ta_10day_last)
res.plot()









    



---------------------------------------------------------------------------
AttributeError                            Traceback (most recent call last)
<ipython-input-28-9c2aeb8f4c35> in <module>()
----> 1 res = sm.tsa.seasonal_decompose(ta_10day_last)
      2 res.plot()

AttributeError: 'module' object has no attribute 'seasonal_decompose'



In [ ]:

	high	highLimit	low	lowLimit
2015-11-20 14:59:59	1798.0	1866.0	1765.0	1722.0
2015-11-23 14:59:59	1796.0	1853.0	1709.0	1709.0
2015-11-24 14:59:59	1773.0	1826.0	1699.0	1684.0
2015-11-25 14:59:59	1787.0	1801.0	1751.0	1661.0
2015-11-26 14:59:59	1840.0	1842.0	1771.0	1700.0
2015-11-27 14:59:59	1821.0	1878.0	1789.0	1732.0
2015-11-30 14:59:59	1807.0	1878.0	1755.0	1732.0
2015-12-01 14:59:59	1834.0	1852.0	1790.0	1708.0
2015-12-02 14:59:59	1927.0	1957.0	1883.0	1805.0
2015-12-03 14:59:59	1899.0	1971.0	1851.0	1819.0
2015-12-04 14:59:59	1872.0	1944.0	1838.0	1794.0
2015-12-07 14:59:59	1886.0	1928.0	1857.0	1778.0
2015-12-08 14:59:59	1879.0	1948.0	1819.0	1798.0
2015-12-09 14:59:59	1852.0	1917.0	1817.0	1769.0
2015-12-10 14:59:59	1862.0	1911.0	1835.0	1763.0
2015-12-11 14:59:59	1892.0	1919.0	1838.0	1771.0
2015-12-14 14:59:59	1890.0	1938.0	1867.0	1788.0
2015-12-15 14:59:59	1921.0	1956.0	1873.0	1804.0
2015-12-16 14:59:59	1908.0	1976.0	1878.0	1824.0
2015-12-17 14:59:59	1917.0	1970.0	1864.0	1818.0
2015-12-18 14:59:59	1907.0	1967.0	1859.0	1815.0
2015-12-21 14:59:59	1941.0	1961.0	1899.0	1809.0
2015-12-22 14:59:59	1947.0	1994.0	1923.0	1840.0
2015-12-23 14:59:59	1934.0	2012.0	1909.0	1856.0
2015-12-24 14:59:59	1927.0	1998.0	1904.0	1844.0
2015-12-25 14:59:59	1920.0	1994.0	1896.0	1840.0
2015-12-28 14:59:59	1904.0	1984.0	1868.0	1830.0
2015-12-29 14:59:59	1899.0	1961.0	1868.0	1809.0
2015-12-30 14:59:59	1915.0	1960.0	1893.0	1808.0
2015-12-31 14:59:59	1924.0	1981.0	1903.0	1827.0