In [1]:
from sys import path

In [2]:
path.append('/home/bingnan/ecworkspace/HFT1')

Import Modules I Write


In [3]:
from Initialize_module_part import *

In [4]:
from MyOLS import *

In [5]:
%matplotlib inline

Change Plotting Style


In [6]:
# Style. 1
sns.set_context('paper')
sns.set_style("darkgrid")

In [7]:
# Style. 2
sns.set_context('paper')
sns.set_style("dark", 
    rc={'axes.facecolor': 'black', 
    'grid.color': 'red', 
    'grid.linestyle': '--', 
    'figure.facecolor': 'grey'})

Prepare Data


In [8]:
hft = pd.read_hdf('/home/bingnan/HFT_SR_RM_MA_TA.hdf')


/usr/lib/python2.7/dist-packages/tables/leaf.py:392: PerformanceWarning: The Leaf ``/HFT/block0_values`` is exceeding the maximum recommended rowsize (104857600 bytes);
be ready to see PyTables asking for *lots* of memory and possibly slow
I/O.  You may want to reduce the rowsize by trimming the value of
dimensions that are orthogonal (and preferably close) to the *main*
dimension of this leave.  Alternatively, in case you have specified a
very small/large chunksize, you may want to increase/decrease it.
  PerformanceWarning)

In [11]:
ta = hft.minor_xs('TA0001')

In [12]:
ta = AddCol(ta)

In [13]:
ta_pm = GiveMePM(ta, nforward=60, nbackward=100, lim=[0, 30], cutdepth=0, norm=False, high_var_length=200)

In [66]:
plt.plot(ta_pm)


Out[66]:
[<matplotlib.lines.Line2D at 0x7f8369f3e690>]

In [46]:
def MyMA(ser, method='ewm', length=30):
    if method == 'ewm':
        r = ser.ewm(halflife=length)
    elif method == 'sma':
        r = ser.rolling(window=length)
    ret = r.mean()
    #ret.name = ret.name + '_MA'
    return ret

Calculate Price-aligned bid/ask Quantity Change Indicator

Because limit orders are placed on a certain place, we should investigate information aligned by prices


In [15]:
def QtyLogDiffMean(df, halfwindow=15, method='ewma'):
    '''
    mean_arr:
    row 0 is keys
    row 1 are mean Qty
    '''
    df2 = np.log(df)
    df2 = df2.diff()
    if df2.ndim > 1:
        df2 = df2.mean(axis=1, skipna=True)
    if method == 'ewma':
        roll = df2.ewm(halflife=halfwindow)
    elif method == 'sma':
        roll = df2.rolling(window=halfwindow)
    else:
        print 'ERROR!'
    indicator = roll.mean()
    return indicator

In [16]:
#--------------------------------------------------------------------------------------------
def GetQty2(samp, b=True, depth=5):
    '''
    input data of a window length
    return a dict, whose keys are bid/ask price, values are bid/ask Qty
    b: True for bidQty, False for askQty
    '''
    if b:
        s1 = 'bid'
    else:
        s1 = 'ask'
    cols = (s1+'Prc', s1+'Qty')
    df_PrcQty_all = pd.DataFrame(data=None, index=None, columns=cols)
    df_arr = []
    for i in range(depth):
        s2 = s1 + 'Prc_' + str(i)
        s3 = s1 + 'Qty_' + str(i)
        df_PrcQty = samp.ix[:, [s2, s3]]
        df_PrcQty.columns = cols
        df_arr.append(df_PrcQty)
    df_PrcQty_all = pd.concat(df_arr, axis=0, join='outer')

    grouped = df_PrcQty_all.groupby(cols[0])
    ############## only for counting
    n = grouped.ngroups
    count = 0
    ############## only for counting
    sers = []
#     ret = pd.DataFrame(data=None, index=samp.index, 
#                        #columns=grouped.groups.keys()
#                        )
    for name, group in grouped:
        if name == 0.:
            continue
        ser = group.ix[:, cols[1]]
        ser.name = name
        ser = ser.sort_index(axis=0)
        ############## only for counting
        count += 1
        #print ('\n------  %d / %d ------' % (count, n))
        ############## only for counting
        #ret[name] = ser
        ####ret = ret.join(ser)
        sers.append(ser)
    ret = pd.DataFrame(data=sers)
    print '!!!'
    ret = ret.reindex_axis(sorted(ret.columns), axis=1)
    return ret

Calculation for Each Day

Data of different days are not continuous, to avoid mixing them, we should calculate indicator for every day length then concate them


In [19]:
mywind = 15

In [59]:
def QtyDiffLog(df):
    mydiff = df.diff(axis=0)
    logscale = np.sign(mydiff) * np.log(np.abs(mydiff) + 1) # minimum is 1, so no need to add 1.
    mymean = logscale.mean(axis=1)
    return mymean

In [28]:
temp = bidask.diff()

In [29]:
temp1 = temp.values.flatten()

In [35]:
temp2 = temp1[np.logical_not(np.isnan(temp1))]

In [38]:
plt.plot(temp2)


Out[38]:
[<matplotlib.lines.Line2D at 0x7f612bf286d0>]

In [41]:
plt.hist(temp2, bins=np.linspace(-10, 10, 400))


Out[41]:
(array([  1.51300000e+03,   0.00000000e+00,   0.00000000e+00,
         0.00000000e+00,   0.00000000e+00,   0.00000000e+00,
         0.00000000e+00,   0.00000000e+00,   0.00000000e+00,
         0.00000000e+00,   0.00000000e+00,   0.00000000e+00,
         0.00000000e+00,   0.00000000e+00,   0.00000000e+00,
         0.00000000e+00,   0.00000000e+00,   0.00000000e+00,
         0.00000000e+00,   2.51000000e+02,   0.00000000e+00,
         0.00000000e+00,   0.00000000e+00,   0.00000000e+00,
         0.00000000e+00,   0.00000000e+00,   0.00000000e+00,
         0.00000000e+00,   0.00000000e+00,   0.00000000e+00,
         0.00000000e+00,   0.00000000e+00,   0.00000000e+00,
         0.00000000e+00,   0.00000000e+00,   0.00000000e+00,
         0.00000000e+00,   0.00000000e+00,   0.00000000e+00,
         3.52000000e+02,   0.00000000e+00,   0.00000000e+00,
         0.00000000e+00,   0.00000000e+00,   0.00000000e+00,
         0.00000000e+00,   0.00000000e+00,   0.00000000e+00,
         0.00000000e+00,   0.00000000e+00,   0.00000000e+00,
         0.00000000e+00,   0.00000000e+00,   0.00000000e+00,
         0.00000000e+00,   0.00000000e+00,   0.00000000e+00,
         0.00000000e+00,   0.00000000e+00,   3.30000000e+02,
         0.00000000e+00,   0.00000000e+00,   0.00000000e+00,
         0.00000000e+00,   0.00000000e+00,   0.00000000e+00,
         0.00000000e+00,   0.00000000e+00,   0.00000000e+00,
         0.00000000e+00,   0.00000000e+00,   0.00000000e+00,
         0.00000000e+00,   0.00000000e+00,   0.00000000e+00,
         0.00000000e+00,   0.00000000e+00,   0.00000000e+00,
         0.00000000e+00,   6.04000000e+02,   0.00000000e+00,
         0.00000000e+00,   0.00000000e+00,   0.00000000e+00,
         0.00000000e+00,   0.00000000e+00,   0.00000000e+00,
         0.00000000e+00,   0.00000000e+00,   0.00000000e+00,
         0.00000000e+00,   0.00000000e+00,   0.00000000e+00,
         0.00000000e+00,   0.00000000e+00,   0.00000000e+00,
         0.00000000e+00,   0.00000000e+00,   0.00000000e+00,
         1.87400000e+03,   0.00000000e+00,   0.00000000e+00,
         0.00000000e+00,   0.00000000e+00,   0.00000000e+00,
         0.00000000e+00,   0.00000000e+00,   0.00000000e+00,
         0.00000000e+00,   0.00000000e+00,   0.00000000e+00,
         0.00000000e+00,   0.00000000e+00,   0.00000000e+00,
         0.00000000e+00,   0.00000000e+00,   0.00000000e+00,
         0.00000000e+00,   0.00000000e+00,   1.01100000e+03,
         0.00000000e+00,   0.00000000e+00,   0.00000000e+00,
         0.00000000e+00,   0.00000000e+00,   0.00000000e+00,
         0.00000000e+00,   0.00000000e+00,   0.00000000e+00,
         0.00000000e+00,   0.00000000e+00,   0.00000000e+00,
         0.00000000e+00,   0.00000000e+00,   0.00000000e+00,
         0.00000000e+00,   0.00000000e+00,   0.00000000e+00,
         0.00000000e+00,   1.47700000e+03,   0.00000000e+00,
         0.00000000e+00,   0.00000000e+00,   0.00000000e+00,
         0.00000000e+00,   0.00000000e+00,   0.00000000e+00,
         0.00000000e+00,   0.00000000e+00,   0.00000000e+00,
         0.00000000e+00,   0.00000000e+00,   0.00000000e+00,
         0.00000000e+00,   0.00000000e+00,   0.00000000e+00,
         0.00000000e+00,   0.00000000e+00,   0.00000000e+00,
         3.65500000e+03,   0.00000000e+00,   0.00000000e+00,
         0.00000000e+00,   0.00000000e+00,   0.00000000e+00,
         0.00000000e+00,   0.00000000e+00,   0.00000000e+00,
         0.00000000e+00,   0.00000000e+00,   0.00000000e+00,
         0.00000000e+00,   0.00000000e+00,   0.00000000e+00,
         0.00000000e+00,   0.00000000e+00,   0.00000000e+00,
         0.00000000e+00,   0.00000000e+00,   1.08480000e+04,
         0.00000000e+00,   0.00000000e+00,   0.00000000e+00,
         0.00000000e+00,   0.00000000e+00,   0.00000000e+00,
         0.00000000e+00,   0.00000000e+00,   0.00000000e+00,
         0.00000000e+00,   0.00000000e+00,   0.00000000e+00,
         0.00000000e+00,   0.00000000e+00,   0.00000000e+00,
         0.00000000e+00,   0.00000000e+00,   0.00000000e+00,
         0.00000000e+00,   4.81704000e+05,   0.00000000e+00,
         0.00000000e+00,   0.00000000e+00,   0.00000000e+00,
         0.00000000e+00,   0.00000000e+00,   0.00000000e+00,
         0.00000000e+00,   0.00000000e+00,   0.00000000e+00,
         0.00000000e+00,   0.00000000e+00,   0.00000000e+00,
         0.00000000e+00,   0.00000000e+00,   0.00000000e+00,
         0.00000000e+00,   0.00000000e+00,   0.00000000e+00,
         1.14920000e+04,   0.00000000e+00,   0.00000000e+00,
         0.00000000e+00,   0.00000000e+00,   0.00000000e+00,
         0.00000000e+00,   0.00000000e+00,   0.00000000e+00,
         0.00000000e+00,   0.00000000e+00,   0.00000000e+00,
         0.00000000e+00,   0.00000000e+00,   0.00000000e+00,
         0.00000000e+00,   0.00000000e+00,   0.00000000e+00,
         0.00000000e+00,   0.00000000e+00,   4.06500000e+03,
         0.00000000e+00,   0.00000000e+00,   0.00000000e+00,
         0.00000000e+00,   0.00000000e+00,   0.00000000e+00,
         0.00000000e+00,   0.00000000e+00,   0.00000000e+00,
         0.00000000e+00,   0.00000000e+00,   0.00000000e+00,
         0.00000000e+00,   0.00000000e+00,   0.00000000e+00,
         0.00000000e+00,   0.00000000e+00,   0.00000000e+00,
         0.00000000e+00,   1.55700000e+03,   0.00000000e+00,
         0.00000000e+00,   0.00000000e+00,   0.00000000e+00,
         0.00000000e+00,   0.00000000e+00,   0.00000000e+00,
         0.00000000e+00,   0.00000000e+00,   0.00000000e+00,
         0.00000000e+00,   0.00000000e+00,   0.00000000e+00,
         0.00000000e+00,   0.00000000e+00,   0.00000000e+00,
         0.00000000e+00,   0.00000000e+00,   0.00000000e+00,
         1.13300000e+03,   0.00000000e+00,   0.00000000e+00,
         0.00000000e+00,   0.00000000e+00,   0.00000000e+00,
         0.00000000e+00,   0.00000000e+00,   0.00000000e+00,
         0.00000000e+00,   0.00000000e+00,   0.00000000e+00,
         0.00000000e+00,   0.00000000e+00,   0.00000000e+00,
         0.00000000e+00,   0.00000000e+00,   0.00000000e+00,
         0.00000000e+00,   0.00000000e+00,   1.94300000e+03,
         0.00000000e+00,   0.00000000e+00,   0.00000000e+00,
         0.00000000e+00,   0.00000000e+00,   0.00000000e+00,
         0.00000000e+00,   0.00000000e+00,   0.00000000e+00,
         0.00000000e+00,   0.00000000e+00,   0.00000000e+00,
         0.00000000e+00,   0.00000000e+00,   0.00000000e+00,
         0.00000000e+00,   0.00000000e+00,   0.00000000e+00,
         0.00000000e+00,   6.71000000e+02,   0.00000000e+00,
         0.00000000e+00,   0.00000000e+00,   0.00000000e+00,
         0.00000000e+00,   0.00000000e+00,   0.00000000e+00,
         0.00000000e+00,   0.00000000e+00,   0.00000000e+00,
         0.00000000e+00,   0.00000000e+00,   0.00000000e+00,
         0.00000000e+00,   0.00000000e+00,   0.00000000e+00,
         0.00000000e+00,   0.00000000e+00,   0.00000000e+00,
         3.13000000e+02,   0.00000000e+00,   0.00000000e+00,
         0.00000000e+00,   0.00000000e+00,   0.00000000e+00,
         0.00000000e+00,   0.00000000e+00,   0.00000000e+00,
         0.00000000e+00,   0.00000000e+00,   0.00000000e+00,
         0.00000000e+00,   0.00000000e+00,   0.00000000e+00,
         0.00000000e+00,   0.00000000e+00,   0.00000000e+00,
         0.00000000e+00,   0.00000000e+00,   3.50000000e+02,
         0.00000000e+00,   0.00000000e+00,   0.00000000e+00,
         0.00000000e+00,   0.00000000e+00,   0.00000000e+00,
         0.00000000e+00,   0.00000000e+00,   0.00000000e+00,
         0.00000000e+00,   0.00000000e+00,   0.00000000e+00,
         0.00000000e+00,   0.00000000e+00,   0.00000000e+00,
         0.00000000e+00,   0.00000000e+00,   0.00000000e+00,
         0.00000000e+00,   2.73000000e+02,   0.00000000e+00,
         0.00000000e+00,   0.00000000e+00,   0.00000000e+00,
         0.00000000e+00,   0.00000000e+00,   0.00000000e+00,
         0.00000000e+00,   0.00000000e+00,   0.00000000e+00,
         0.00000000e+00,   0.00000000e+00,   0.00000000e+00,
         0.00000000e+00,   0.00000000e+00,   0.00000000e+00,
         0.00000000e+00,   0.00000000e+00,   1.34800000e+03]),
 array([-10.        ,  -9.94987469,  -9.89974937,  -9.84962406,
        -9.79949875,  -9.74937343,  -9.69924812,  -9.64912281,
        -9.59899749,  -9.54887218,  -9.49874687,  -9.44862155,
        -9.39849624,  -9.34837093,  -9.29824561,  -9.2481203 ,
        -9.19799499,  -9.14786967,  -9.09774436,  -9.04761905,
        -8.99749373,  -8.94736842,  -8.89724311,  -8.84711779,
        -8.79699248,  -8.74686717,  -8.69674185,  -8.64661654,
        -8.59649123,  -8.54636591,  -8.4962406 ,  -8.44611529,
        -8.39598997,  -8.34586466,  -8.29573935,  -8.24561404,
        -8.19548872,  -8.14536341,  -8.0952381 ,  -8.04511278,
        -7.99498747,  -7.94486216,  -7.89473684,  -7.84461153,
        -7.79448622,  -7.7443609 ,  -7.69423559,  -7.64411028,
        -7.59398496,  -7.54385965,  -7.49373434,  -7.44360902,
        -7.39348371,  -7.3433584 ,  -7.29323308,  -7.24310777,
        -7.19298246,  -7.14285714,  -7.09273183,  -7.04260652,
        -6.9924812 ,  -6.94235589,  -6.89223058,  -6.84210526,
        -6.79197995,  -6.74185464,  -6.69172932,  -6.64160401,
        -6.5914787 ,  -6.54135338,  -6.49122807,  -6.44110276,
        -6.39097744,  -6.34085213,  -6.29072682,  -6.2406015 ,
        -6.19047619,  -6.14035088,  -6.09022556,  -6.04010025,
        -5.98997494,  -5.93984962,  -5.88972431,  -5.839599  ,
        -5.78947368,  -5.73934837,  -5.68922306,  -5.63909774,
        -5.58897243,  -5.53884712,  -5.4887218 ,  -5.43859649,
        -5.38847118,  -5.33834586,  -5.28822055,  -5.23809524,
        -5.18796992,  -5.13784461,  -5.0877193 ,  -5.03759398,
        -4.98746867,  -4.93734336,  -4.88721805,  -4.83709273,
        -4.78696742,  -4.73684211,  -4.68671679,  -4.63659148,
        -4.58646617,  -4.53634085,  -4.48621554,  -4.43609023,
        -4.38596491,  -4.3358396 ,  -4.28571429,  -4.23558897,
        -4.18546366,  -4.13533835,  -4.08521303,  -4.03508772,
        -3.98496241,  -3.93483709,  -3.88471178,  -3.83458647,
        -3.78446115,  -3.73433584,  -3.68421053,  -3.63408521,
        -3.5839599 ,  -3.53383459,  -3.48370927,  -3.43358396,
        -3.38345865,  -3.33333333,  -3.28320802,  -3.23308271,
        -3.18295739,  -3.13283208,  -3.08270677,  -3.03258145,
        -2.98245614,  -2.93233083,  -2.88220551,  -2.8320802 ,
        -2.78195489,  -2.73182957,  -2.68170426,  -2.63157895,
        -2.58145363,  -2.53132832,  -2.48120301,  -2.43107769,
        -2.38095238,  -2.33082707,  -2.28070175,  -2.23057644,
        -2.18045113,  -2.13032581,  -2.0802005 ,  -2.03007519,
        -1.97994987,  -1.92982456,  -1.87969925,  -1.82957393,
        -1.77944862,  -1.72932331,  -1.67919799,  -1.62907268,
        -1.57894737,  -1.52882206,  -1.47869674,  -1.42857143,
        -1.37844612,  -1.3283208 ,  -1.27819549,  -1.22807018,
        -1.17794486,  -1.12781955,  -1.07769424,  -1.02756892,
        -0.97744361,  -0.9273183 ,  -0.87719298,  -0.82706767,
        -0.77694236,  -0.72681704,  -0.67669173,  -0.62656642,
        -0.5764411 ,  -0.52631579,  -0.47619048,  -0.42606516,
        -0.37593985,  -0.32581454,  -0.27568922,  -0.22556391,
        -0.1754386 ,  -0.12531328,  -0.07518797,  -0.02506266,
         0.02506266,   0.07518797,   0.12531328,   0.1754386 ,
         0.22556391,   0.27568922,   0.32581454,   0.37593985,
         0.42606516,   0.47619048,   0.52631579,   0.5764411 ,
         0.62656642,   0.67669173,   0.72681704,   0.77694236,
         0.82706767,   0.87719298,   0.9273183 ,   0.97744361,
         1.02756892,   1.07769424,   1.12781955,   1.17794486,
         1.22807018,   1.27819549,   1.3283208 ,   1.37844612,
         1.42857143,   1.47869674,   1.52882206,   1.57894737,
         1.62907268,   1.67919799,   1.72932331,   1.77944862,
         1.82957393,   1.87969925,   1.92982456,   1.97994987,
         2.03007519,   2.0802005 ,   2.13032581,   2.18045113,
         2.23057644,   2.28070175,   2.33082707,   2.38095238,
         2.43107769,   2.48120301,   2.53132832,   2.58145363,
         2.63157895,   2.68170426,   2.73182957,   2.78195489,
         2.8320802 ,   2.88220551,   2.93233083,   2.98245614,
         3.03258145,   3.08270677,   3.13283208,   3.18295739,
         3.23308271,   3.28320802,   3.33333333,   3.38345865,
         3.43358396,   3.48370927,   3.53383459,   3.5839599 ,
         3.63408521,   3.68421053,   3.73433584,   3.78446115,
         3.83458647,   3.88471178,   3.93483709,   3.98496241,
         4.03508772,   4.08521303,   4.13533835,   4.18546366,
         4.23558897,   4.28571429,   4.3358396 ,   4.38596491,
         4.43609023,   4.48621554,   4.53634085,   4.58646617,
         4.63659148,   4.68671679,   4.73684211,   4.78696742,
         4.83709273,   4.88721805,   4.93734336,   4.98746867,
         5.03759398,   5.0877193 ,   5.13784461,   5.18796992,
         5.23809524,   5.28822055,   5.33834586,   5.38847118,
         5.43859649,   5.4887218 ,   5.53884712,   5.58897243,
         5.63909774,   5.68922306,   5.73934837,   5.78947368,
         5.839599  ,   5.88972431,   5.93984962,   5.98997494,
         6.04010025,   6.09022556,   6.14035088,   6.19047619,
         6.2406015 ,   6.29072682,   6.34085213,   6.39097744,
         6.44110276,   6.49122807,   6.54135338,   6.5914787 ,
         6.64160401,   6.69172932,   6.74185464,   6.79197995,
         6.84210526,   6.89223058,   6.94235589,   6.9924812 ,
         7.04260652,   7.09273183,   7.14285714,   7.19298246,
         7.24310777,   7.29323308,   7.3433584 ,   7.39348371,
         7.44360902,   7.49373434,   7.54385965,   7.59398496,
         7.64411028,   7.69423559,   7.7443609 ,   7.79448622,
         7.84461153,   7.89473684,   7.94486216,   7.99498747,
         8.04511278,   8.0952381 ,   8.14536341,   8.19548872,
         8.24561404,   8.29573935,   8.34586466,   8.39598997,
         8.44611529,   8.4962406 ,   8.54636591,   8.59649123,
         8.64661654,   8.69674185,   8.74686717,   8.79699248,
         8.84711779,   8.89724311,   8.94736842,   8.99749373,
         9.04761905,   9.09774436,   9.14786967,   9.19799499,
         9.2481203 ,   9.29824561,   9.34837093,   9.39849624,
         9.44862155,   9.49874687,   9.54887218,   9.59899749,
         9.64912281,   9.69924812,   9.74937343,   9.79949875,
         9.84962406,   9.89974937,   9.94987469,  10.        ]),
 <a list of 399 Patch objects>)

In [60]:
#----------------------------------------------------------
'''Calculation qty_indicator'''
qty_indicator = pd.Series(data=None, index=None)
for d in range(0, 30, 1):
    mysample = ta.ix[day_len * (d): day_len * (d+1), :]
    biddic = GetQty2(mysample, b=True, depth=3)
    askdic = GetQty2(mysample, b=False, depth=3)
    biddic = biddic.T #TODO: this transformation is because the GetQty2 func is not good enough
    askdic = askdic.T
    bidask = biddic.combine_first((-1)*askdic)
    print '\n-------%d -------------------' %d
#     bidask2 = bidask + 2 * abs(bidask).max().max()
    qty_indicator = pd.concat([qty_indicator, QtyDiffLog(bidask)], axis=0, join='outer')


!!!
!!!

-------0 -------------------
!!!
!!!

-------1 -------------------
!!!
!!!

-------2 -------------------
!!!
!!!

-------3 -------------------
!!!
!!!

-------4 -------------------
!!!
!!!

-------5 -------------------
!!!
!!!

-------6 -------------------
!!!
!!!

-------7 -------------------
!!!
!!!

-------8 -------------------
!!!
!!!

-------9 -------------------
!!!
!!!

-------10 -------------------
!!!
!!!

-------11 -------------------
!!!
!!!

-------12 -------------------
!!!
!!!

-------13 -------------------
!!!
!!!

-------14 -------------------
!!!
!!!

-------15 -------------------
!!!
!!!

-------16 -------------------
!!!
!!!

-------17 -------------------
!!!
!!!

-------18 -------------------
!!!
!!!

-------19 -------------------
!!!
!!!

-------20 -------------------
!!!
!!!

-------21 -------------------
!!!
!!!

-------22 -------------------
!!!
!!!

-------23 -------------------
!!!
!!!

-------24 -------------------
!!!
!!!

-------25 -------------------
!!!
!!!

-------26 -------------------
!!!
!!!

-------27 -------------------
!!!
!!!

-------28 -------------------
!!!
!!!

-------29 -------------------

In [44]:
%matplotlib auto


Using matplotlib backend: TkAgg

In [46]:
fig = plt.figure()
ax1 = fig.add_subplot(211)
ax1.plot(mysample.ix[:, 'midPrc'])
ax2 = fig.add_subplot(212, sharex=ax1)
ax2.plot(QtyLogDiffMean2(bidask, halfwindow=mywind))


Out[46]:
[<matplotlib.lines.Line2D at 0x7f7bf6ee0890>]

In [17]:
def QtyLogDiffMean2(df, halfwindow=15, method='ewm'):
    mydiff = df.diff(axis=0)
    logscale = np.sign(mydiff) * np.log(np.abs(mydiff) + 1) # minimum is 1, so no need to add 1.
    mymean = logscale.mean(axis=1)
    ret = MyMA(mymean, method, halfwindow)
    return ret

Check out Distribution of the Indicator


In [61]:
qty_indicator.hist(bins=100)


Out[61]:
<matplotlib.axes.AxesSubplot at 0x7f8388524850>

In [38]:
qty_indicator[ta_pm.index].hist(bins=100)


Out[38]:
<matplotlib.axes.AxesSubplot at 0x7f7bf7f5b650>

Use power transformation


In [94]:
qty_indicator_power = np.power(qty_indicator, 1./3)

In [146]:
len(qty_indicator_out.index)


Out[146]:
450020

In [135]:
len(qty_indicator.index)


Out[135]:
1800080

In [32]:
%matplotlib auto


Using matplotlib backend: TkAgg

Near lowLimit, price depth is shalow


In [116]:
ta_in.ix[164841, ['last', 'bidPrc_0', 'lowLimit',]]


Out[116]:
last        1712.0
bidPrc_0    1711.0
lowLimit    1709.0
Name: 2015-11-23 13:56:48.500000, dtype: float64

In [108]:
ta_in.ix[164841: 199946, 'last'].plot()


Out[108]:
<matplotlib.axes.AxesSubplot at 0x7f045c868cd0>

In [112]:
(ta_in.ix[:, 'bidPrc_0'] == 0).sum()


Out[112]:
7262

In [110]:
ta_in.ix[164841-13000: 199946, 'bidPrc_0'].plot()


Out[110]:
<matplotlib.axes.AxesSubplot at 0x7f04577a1c90>

Test out-sample Performance


In [44]:
insample_index0, outsample_index0 = GiveMeIndex([[0,2], [4, 6], [8, 10], [14, 16], [18,20], [22, 24], [26,28]],
                                               [[2, 4], [6, 8], [12, 14], [16, 18], [20,22], [24, 26], [28,30]])
insample_index1, outsample_index1 = GiveMeIndex([[0, 20]], [[20, 25]])
insample_index2, outsample_index2 = GiveMeIndex([[5, 25]], [[0, 5]])
insample_index3, outsample_index3 = GiveMeIndex([[0, 5], [10, 25]], [[5, 10]])
insample_index4, outsample_index4 = GiveMeIndex([[0, 10], [15, 25]], [[10, 15]])
insample_index5, outsample_index5 = GiveMeIndex([[10, 30]], [[0, 5]])
insample_index6, outsample_index6 = GiveMeIndex([[0, 18]], [[18, 25]])
# different_io_index = ((insample_index0, outsample_index0), (insample_index1, outsample_index1), 
#                       (insample_index2, outsample_index2), (insample_index3, outsample_index3),
#                       (insample_index4, outsample_index4), (insample_index5, outsample_index5))
insample_index00, outsample_index00 = GiveMeIndex([[0, 20]],
                                               [[25, 30]])

In [75]:
for forward_ticks in range(44, 45, 1):
    ta_in_pm = GiveMePM(ta, nforward=forward_ticks, nbackward=mywind, lim=insample_index1)
    ta_out_pm = GiveMePM(ta, nforward=forward_ticks, nbackward=mywind, lim=outsample_index1)
    for mywind in range(12, 13, 1):
        qty_indicator_ma = MyMA(qty_indicator, method='sma', length=mywind)
        
        res = myols(qty_indicator_ma, ta_in_pm)
        print ('\n\n\n------halflife: %d,     forward_ticks: %d ------------' %(mywind, forward_ticks))
        print res.summary()
        print '\n[OutSample R_SQUARE: %f]' % (PredictedRsquare(res, qty_indicator_ma, ta_out_pm))[1]




------halflife: 12,     forward_ticks: 44 ------------
                            OLS Regression Results                            
==============================================================================
Dep. Variable:                 midPrc   R-squared:                       0.018
Model:                            OLS   Adj. R-squared:                  0.018
Method:                 Least Squares   F-statistic:                 3.243e+04
Date:                Tue, 19 Jul 2016   Prob (F-statistic):               0.00
Time:                        09:34:53   Log-Likelihood:            -3.0901e+06
No. Observations:             1786394   AIC:                         6.180e+06
Df Residuals:                 1786392   BIC:                         6.180e+06
Df Model:                           1                                         
==============================================================================
                 coef    std err          t      P>|t|      [95.0% Conf. Int.]
------------------------------------------------------------------------------
const          0.0038      0.001      3.705      0.000         0.002     0.006
None           1.3327      0.007    180.079      0.000         1.318     1.347
==============================================================================
Omnibus:                   633345.200   Durbin-Watson:                   0.089
Prob(Omnibus):                  0.000   Jarque-Bera (JB):        134099992.967
Skew:                           0.514   Prob(JB):                         0.00
Kurtosis:                      45.433   Cond. No.                         7.25
==============================================================================

[OutSample R_SQUARE: 0.017340]

In [67]:
qty_indicator_ma.to_hdf('qty_indicator_ma.hdf', 'Series')

In [47]:
fig, ax = plt.subplots()
fig = sm.graphics.plot_fit(res, res.model.exog_names[1], ax=ax)
fig.show()


/usr/lib/pymodules/python2.7/matplotlib/figure.py:371: UserWarning: matplotlib is currently using a non-GUI backend, so cannot show the figure
  "matplotlib is currently using a non-GUI backend, "
/usr/lib/pymodules/python2.7/matplotlib/collections.py:548: FutureWarning: elementwise comparison failed; returning scalar instead, but in the future will perform elementwise comparison
  if self._edgecolors == 'face':

Total bid/ask Lot Indicator


In [17]:
def Normalize(ser):
    mx = ser.max()
    mn = ser.min()
    ret = (ser - mn + .01) / (mx + mn - .001)
    return ret

In [18]:
def Standardize(ser):
    mystd = ser.std()
    mymean = ser.mean()
    ret = (ser - mymean) / mystd
    return ret

In [19]:
def PowerTrans(ser, power):
    condi = np.where(ser > 0, 1, -1)
    ret = np.power(np.abs(ser), power) * condi
    return ret

In [31]:
def NormDiffMean(df, halfwindow=15, method='ewma'):
    '''
    mean_arr:
    row 0 is keys
    row 1 are mean Qty
    '''
    df2 = Normalize(df)
    df2 = np.log(df2)
    df2 = df2.diff()
    if df2.ndim > 1:
        df2 = df2.mean(axis=1, skipna=True)
    if method == 'ewma':
        roll = df2.ewm(halflife=halfwindow)
    elif method == 'sma':
        roll = df2.rolling(window=halfwindow)
    else:
        print 'ERROR!'
    ret = roll.mean()
    return ret

In [37]:
temp = NormDiffMean(ta.ix[:, 'TotalBidLot'], 15) - NormDiffMean(ta.ix[:, 'TotalAskLot'], 15)
#temp = temp[ta_pm.index].dropna()
NormDiffMean(temp).hist(bins=100)


Out[37]:
<matplotlib.axes.AxesSubplot at 0x7f46a5fa0310>

In [35]:
fig = plt.figure(figsize=(16,8))
ax1 = fig.add_subplot(121)
ax2 = fig.add_subplot(122)
NormDiffMean(ta.ix[:, 'TotalBidLot'], 15).hist(ax=ax1)
NormDiffMean(ta.ix[:, 'TotalAskLot'], 15).hist(ax=ax2)


Out[35]:
<matplotlib.axes.AxesSubplot at 0x7f46a52955d0>

In [86]:
fig = plt.figure(figsize=(16,8))
ax1 = fig.add_subplot(121)
ax2 = fig.add_subplot(122)
ta.ix[:, 'TotalBidLot'].hist(ax=ax1)
ta.ix[:, 'TotalAskLot'].hist(ax=ax2)


Out[86]:
<matplotlib.axes.AxesSubplot at 0x7fc0659aafd0>

In [76]:
ta.ix[:, 'TotalBidLot'].describe()


Out[76]:
count    2.700120e+06
mean     4.558636e+04
std      1.672607e+04
min      0.000000e+00
25%      3.189700e+04
50%      4.357200e+04
75%      5.826400e+04
max      8.950600e+04
Name: TotalBidLot, dtype: float64

In [23]:
insample_index0, outsample_index0 = GiveMeIndex([[0,2], [4, 6], [8, 10], [14, 16], [18,20], [22, 24], [26,28]],
                                               [[2, 4], [6, 8], [12, 14], [16, 18], [20,22], [24, 26], [28,30]])
insample_index1, outsample_index1 = GiveMeIndex([[0, 12], [14,20]], [[20, 25]])
insample_index2, outsample_index2 = GiveMeIndex([[5, 25]], [[0, 5]])
insample_index3, outsample_index3 = GiveMeIndex([[0, 5], [10, 25]], [[5, 10]])
insample_index4, outsample_index4 = GiveMeIndex([[0, 10], [15, 25]], [[10, 15]])
insample_index5, outsample_index5 = GiveMeIndex([[0, 13]], [[13, 25]])
insample_index6, outsample_index6 = GiveMeIndex([[0, 18]], [[18, 25]])
# different_io_index = ((insample_index0, outsample_index0), (insample_index1, outsample_index1), 
#                       (insample_index2, outsample_index2), (insample_index3, outsample_index3),
#                       (insample_index4, outsample_index4), (insample_index5, outsample_index5))
insample_index00, outsample_index00 = GiveMeIndex([[0, 20]],
                                               [[25, 30]])

In [28]:
import statsmodels.api as sm
import matplotlib.pyplot as plt
def myols(ser, pm, norm=False):
    '''
    ser is indicator Series
    pm is Price move Series
    sm is satatsmodel module
    this function also automatically align index of df and pm
    '''
    global sm
    ser = ser[pm.index]
    ser = ser.dropna()
    if norm:
        ser = (ser - ser.mean()) / ser.std()
    X = sm.add_constant(ser)
    Y = pm[X.index]
    model = sm.OLS(Y, X)
    ret = model.fit()
    return ret

def Rsquare(y, yhat):
    # ret = 1 - (y-yhat).var() / y.var()
    ret = 1 - ((y-yhat)**2).mean() / y.var()
    return ret

def PredictedRsquare(res, xnew, pm):
    '''
    pm: outsample price move Series
    xnew: indicator Series (or DataFrame)
    res: insample regression results (comes from statsmodel's model.fit() )
    '''
    # first we need to align xnew with outsample
    xnew = xnew[pm.index]
    xnew = xnew.dropna()
    y = pm[xnew.index]
    
    xnew = sm.add_constant(xnew)
    ynew = res.predict(xnew)
    rsq = Rsquare(y, ynew)
    return ynew, rsq

def PlotFit(fitres):
    fig, ax = plt.subplots()
    fig = sm.graphics.plot_fit(fitres, fitres.model.exog_names[1], ax=ax)
    fig.show()

In [38]:
for mywind in range(5, 30, 5):
    totallot_indicator =QtyLogDiffMean(ta.ix[:, 'TotalBidLot'], halfwindow=mywind) 
    - QtyLogDiffMean(ta.ix[:, 'TotalAskLot'], halfwindow=mywind)
    
#     totallot_indicator = (QtyLogDiffMean(ta.ix[:, 'TotalBidLot'], halfwindow=windou) 
#                            - QtyLogDiffMean(ta.ix[:, 'TotalAskLot'], halfwindow=windou))
   # totallot_indicator = totallot_indicator[ta_pm].dropna()
    totallot_indicator.name = 'totallot_indicator'
    for forward_ticks in range(44, 45, 1):
        ta_in_pm = GiveMePM(ta, nforward=forward_ticks, nbackward=mywind, lim=insample_index1)
        ta_out_pm = GiveMePM(ta, nforward=forward_ticks, nbackward=mywind, lim=outsample_index1)
        res = myols(totallot_indicator, ta_in_pm)
        print ('\n\n\n------halflife: %d,     forward_ticks: %d ------------' %(mywind, forward_ticks))
        print res.summary()
        print '\n[OutSample R_SQUARE: %f]' % (PredictedRsquare(res, totallot_indicator, ta_out_pm))[1]




------halflife: 5,     forward_ticks: 44 ------------
                            OLS Regression Results                            
==============================================================================
Dep. Variable:                 midPrc   R-squared:                       0.003
Model:                            OLS   Adj. R-squared:                  0.003
Method:                 Least Squares   F-statistic:                     5511.
Date:                Sun, 17 Jul 2016   Prob (F-statistic):               0.00
Time:                        11:35:17   Log-Likelihood:            -2.8086e+06
No. Observations:             1612062   AIC:                         5.617e+06
Df Residuals:                 1612060   BIC:                         5.617e+06
Df Model:                           1                                         
======================================================================================
                         coef    std err          t      P>|t|      [95.0% Conf. Int.]
--------------------------------------------------------------------------------------
const                  0.0035      0.001      3.189      0.001         0.001     0.006
totallot_indicator   155.2199      2.091     74.238      0.000       151.122   159.318
==============================================================================
Omnibus:                   608914.445   Durbin-Watson:                   0.085
Prob(Omnibus):                  0.000   Jarque-Bera (JB):        135465283.861
Skew:                           0.638   Prob(JB):                         0.00
Kurtosis:                      47.890   Cond. No.                     1.92e+03
==============================================================================

Warnings:
[1] The condition number is large, 1.92e+03. This might indicate that there are
strong multicollinearity or other numerical problems.

[OutSample R_SQUARE: -0.000107]



------halflife: 10,     forward_ticks: 44 ------------
                            OLS Regression Results                            
==============================================================================
Dep. Variable:                 midPrc   R-squared:                       0.004
Model:                            OLS   Adj. R-squared:                  0.004
Method:                 Least Squares   F-statistic:                     6723.
Date:                Sun, 17 Jul 2016   Prob (F-statistic):               0.00
Time:                        11:35:26   Log-Likelihood:            -2.8080e+06
No. Observations:             1612062   AIC:                         5.616e+06
Df Residuals:                 1612060   BIC:                         5.616e+06
Df Model:                           1                                         
======================================================================================
                         coef    std err          t      P>|t|      [95.0% Conf. Int.]
--------------------------------------------------------------------------------------
const                  0.0024      0.001      2.237      0.025         0.000     0.005
totallot_indicator   231.0883      2.818     81.996      0.000       225.564   236.612
==============================================================================
Omnibus:                   610401.057   Durbin-Watson:                   0.084
Prob(Omnibus):                  0.000   Jarque-Bera (JB):        135548141.646
Skew:                           0.644   Prob(JB):                         0.00
Kurtosis:                      47.904   Cond. No.                     2.59e+03
==============================================================================

Warnings:
[1] The condition number is large, 2.59e+03. This might indicate that there are
strong multicollinearity or other numerical problems.

[OutSample R_SQUARE: -0.000413]



------halflife: 15,     forward_ticks: 44 ------------
                            OLS Regression Results                            
==============================================================================
Dep. Variable:                 midPrc   R-squared:                       0.004
Model:                            OLS   Adj. R-squared:                  0.004
Method:                 Least Squares   F-statistic:                     7070.
Date:                Sun, 17 Jul 2016   Prob (F-statistic):               0.00
Time:                        11:35:34   Log-Likelihood:            -2.8078e+06
No. Observations:             1612062   AIC:                         5.616e+06
Df Residuals:                 1612060   BIC:                         5.616e+06
Df Model:                           1                                         
======================================================================================
                         coef    std err          t      P>|t|      [95.0% Conf. Int.]
--------------------------------------------------------------------------------------
const                  0.0017      0.001      1.602      0.109        -0.000     0.004
totallot_indicator   280.8695      3.340     84.082      0.000       274.322   287.417
==============================================================================
Omnibus:                   610844.146   Durbin-Watson:                   0.084
Prob(Omnibus):                  0.000   Jarque-Bera (JB):        135575904.728
Skew:                           0.645   Prob(JB):                         0.00
Kurtosis:                      47.908   Cond. No.                     3.07e+03
==============================================================================

Warnings:
[1] The condition number is large, 3.07e+03. This might indicate that there are
strong multicollinearity or other numerical problems.

[OutSample R_SQUARE: -0.000459]



------halflife: 20,     forward_ticks: 44 ------------
                            OLS Regression Results                            
==============================================================================
Dep. Variable:                 midPrc   R-squared:                       0.004
Model:                            OLS   Adj. R-squared:                  0.004
Method:                 Least Squares   F-statistic:                     7079.
Date:                Sun, 17 Jul 2016   Prob (F-statistic):               0.00
Time:                        11:35:43   Log-Likelihood:            -2.8078e+06
No. Observations:             1612062   AIC:                         5.616e+06
Df Residuals:                 1612060   BIC:                         5.616e+06
Df Model:                           1                                         
======================================================================================
                         coef    std err          t      P>|t|      [95.0% Conf. Int.]
--------------------------------------------------------------------------------------
const                  0.0012      0.001      1.139      0.255        -0.001     0.003
totallot_indicator   316.7386      3.765     84.138      0.000       309.360   324.117
==============================================================================
Omnibus:                   610870.805   Durbin-Watson:                   0.084
Prob(Omnibus):                  0.000   Jarque-Bera (JB):        135552880.987
Skew:                           0.645   Prob(JB):                         0.00
Kurtosis:                      47.904   Cond. No.                     3.46e+03
==============================================================================

Warnings:
[1] The condition number is large, 3.46e+03. This might indicate that there are
strong multicollinearity or other numerical problems.

[OutSample R_SQUARE: -0.000459]



------halflife: 25,     forward_ticks: 44 ------------
                            OLS Regression Results                            
==============================================================================
Dep. Variable:                 midPrc   R-squared:                       0.004
Model:                            OLS   Adj. R-squared:                  0.004
Method:                 Least Squares   F-statistic:                     6936.
Date:                Sun, 17 Jul 2016   Prob (F-statistic):               0.00
Time:                        11:35:52   Log-Likelihood:            -2.8079e+06
No. Observations:             1612062   AIC:                         5.616e+06
Df Residuals:                 1612060   BIC:                         5.616e+06
Df Model:                           1                                         
======================================================================================
                         coef    std err          t      P>|t|      [95.0% Conf. Int.]
--------------------------------------------------------------------------------------
const                  0.0009      0.001      0.789      0.430        -0.001     0.003
totallot_indicator   343.9778      4.130     83.283      0.000       335.883   352.073
==============================================================================
Omnibus:                   610729.860   Durbin-Watson:                   0.084
Prob(Omnibus):                  0.000   Jarque-Bera (JB):        135512148.997
Skew:                           0.645   Prob(JB):                         0.00
Kurtosis:                      47.898   Cond. No.                     3.80e+03
==============================================================================

Warnings:
[1] The condition number is large, 3.8e+03. This might indicate that there are
strong multicollinearity or other numerical problems.

[OutSample R_SQUARE: -0.000459]

In [63]:
fig, ax = plt.subplots()
fig = sm.graphics.plot_fit(res, res.model.exog_names[1], ax=ax)


/usr/lib/pymodules/python2.7/matplotlib/collections.py:548: FutureWarning: elementwise comparison failed; returning scalar instead, but in the future will perform elementwise comparison
  if self._edgecolors == 'face':

check the distribution of indicator


In [69]:
totallot_indicator.describe()


Out[69]:
count    2.700119e+06
mean    -3.549160e-04
std      1.086576e-02
min     -4.168572e-01
25%     -1.137495e-04
50%     -2.959191e-07
75%      1.126383e-04
max      3.923453e+00
Name: totallot_indicator, dtype: float64

In [30]:
totallot_indicator.hist(bins=100)


Out[30]:
<matplotlib.axes.AxesSubplot at 0x7f46a5dc9ad0>

In [58]:
ta_pm = ForwardDiff(ta.ix[:, 'last'], forward_ticks)

In [185]:
roll = ta.ix[:, 'last'].rolling(window=80000)

In [186]:
temp = roll.std()

In [188]:
plt.plot(temp)


Out[188]:
[<matplotlib.lines.Line2D at 0x7ffd0b107790>]

In [189]:
plt.plot(res.resid)


Out[189]:
[<matplotlib.lines.Line2D at 0x7ffd0b97e090>]

In [309]:
%matplotlib inline

In [26]:
qty_indicator.hist(bins=np.arange(-.01005, .01005, .0001))


Out[26]:
<matplotlib.axes.AxesSubplot at 0x7f4a98a95750>

Using Power transformation to adjust the distribution of indicator


In [136]:
def PowerTrans(ser, power):
condi = np.where(ser > 0, 1, -1)
indicator_power = np.power(np.abs(power),power) * condi

In [169]:
indicator_power = np.power(np.abs(totallot_indicator), 1.05) * condi

In [171]:
indicator_power.hist(bins=np.arange(-.01005, .01005, .0001))


Out[171]:
<matplotlib.axes.AxesSubplot at 0x7ffd0c00b590>

In [53]:
qty_indicator.hist(bins=np.arange(-0.505, 0.505, .01), color='lightyellow')


Out[53]:
<matplotlib.axes.AxesSubplot at 0x7ff354b3e890>

Try Boolinger Bands Indicator


In [ ]:
def BoolingerIndicator(df, window=33):
    mid_roll = df.ix[:, 'midPrc'].rolling(window=200)
    mymean = mid_roll.mean()
    mystd =mid_roll.std()
    ret = mymean - df.ix[:, 'midPrc']) / mystd

In [110]:
temp = pd.Series(data=None, index=[0,1,2,3])
temp.fillna(value=0.)


Out[110]:
0    0.0
1    0.0
2    0.0
3    0.0
dtype: float64

In [119]:
def LWAP(df, level=4):
    fenzi = pd.Series(data=None, index=df.index)
    fenzi = fenzi.fillna(value=0.)
    fenmu = pd.Series(data=None, index=df.index)
    fenmu = fenmu.fillna(value=0.)
    for i in range(level):
        s1 = 'bidPrc_' + str(i)
        s2 = 'askPrc_' + str(i)
        s3 = 'bidQty_' + str(i)
        s4 = 'askQty_' + str(i)
        fenzi += df[s1] * df[s3] + df[s2] * df[s4]
        fenmu += df[s3] + df[s4]
    lwap = fenzi/fenmu
    lwap.name = 'lwap_indicator'
#     indicator = 
    return lwap

In [137]:
lwap = LWAP(ta)

In [138]:
temp = lwap - ta['midPrc']
#temp = temp[selected_arr]

In [198]:
selected_arr = CuthlLimit(ta, 44, 10, depth=4)

In [194]:
plt.plot(temp[selected_arr])
plt.figure()
temp[selected_arr].hist(bins=200)


Out[194]:
<matplotlib.axes.AxesSubplot at 0x7f4654b24190>

In [200]:
plt.plot((ta.ix[:, 'bidPrc_4'] - ta.ix[:, 'bidPrc_0'])[selected_arr])
plt.figure()
plt.hist((ta.ix[:, 'bidPrc_4'] - ta.ix[:, 'bidPrc_0'])[selected_arr])


Out[200]:
(array([  1.00000000e+00,   0.00000000e+00,   3.00000000e+00,
         0.00000000e+00,   0.00000000e+00,   3.10000000e+01,
         0.00000000e+00,   4.39000000e+02,   0.00000000e+00,
         2.68311600e+06]),
 array([-16. , -15.2, -14.4, -13.6, -12.8, -12. , -11.2, -10.4,  -9.6,
        -8.8,  -8. ]),
 <a list of 10 Patch objects>)

In [176]:
temp1 = np.sign(temp) * np.log(np.abs(temp) + 1)

In [175]:
plt.plot(temp1[selected_arr])
plt.figure()
temp1[selected_arr].hist(bins=50)


Out[175]:
<matplotlib.axes.AxesSubplot at 0x7f465608bf90>

In [184]:
temp2 = np.where(np.abs(temp) > 1, np.sign(temp) * np.log(np.abs(temp)), temp)

In [190]:
temp2[selected_arr]


Out[190]:
array([-0.07307692, -0.07307692, -0.07307692, ..., -0.07307692,
       -0.07307692, -0.07307692])

In [193]:
plt.plot(temp2)
plt.figure()
plt.hist(temp2, bins=100)


Out[193]:
(array([  1.10000000e+01,   2.94000000e+02,   3.68300000e+03,
         1.38990000e+04,   1.92340000e+04,   2.46640000e+04,
         4.40810000e+04,   4.68020000e+04,   5.62610000e+04,
         5.55430000e+04,   1.06975000e+05,   1.15086000e+05,
         1.22516000e+05,   1.15142000e+05,   1.09652000e+05,
         9.66190000e+04,   9.46790000e+04,   9.50350000e+04,
         8.77130000e+04,   9.46230000e+04,   8.86780000e+04,
         8.86030000e+04,   9.54420000e+04,   9.84320000e+04,
         1.04834000e+05,   1.09023000e+05,   1.22273000e+05,
         1.25661000e+05,   1.16100000e+05,   1.09326000e+05,
         1.11775000e+05,   6.80870000e+04,   5.25300000e+04,
         3.79070000e+04,   2.46790000e+04,   1.82240000e+04,
         1.21080000e+04,   9.13200000e+03,   7.42000000e+02,
         1.60000000e+02,   3.00000000e+01,   0.00000000e+00,
         2.00000000e+00,   1.00000000e+00,   1.00000000e+00,
         0.00000000e+00,   0.00000000e+00,   0.00000000e+00,
         0.00000000e+00,   0.00000000e+00,   0.00000000e+00,
         0.00000000e+00,   0.00000000e+00,   0.00000000e+00,
         0.00000000e+00,   0.00000000e+00,   0.00000000e+00,
         0.00000000e+00,   0.00000000e+00,   0.00000000e+00,
         0.00000000e+00,   0.00000000e+00,   0.00000000e+00,
         0.00000000e+00,   0.00000000e+00,   0.00000000e+00,
         0.00000000e+00,   0.00000000e+00,   0.00000000e+00,
         0.00000000e+00,   0.00000000e+00,   0.00000000e+00,
         0.00000000e+00,   0.00000000e+00,   0.00000000e+00,
         0.00000000e+00,   0.00000000e+00,   0.00000000e+00,
         0.00000000e+00,   0.00000000e+00,   0.00000000e+00,
         0.00000000e+00,   0.00000000e+00,   0.00000000e+00,
         0.00000000e+00,   0.00000000e+00,   0.00000000e+00,
         0.00000000e+00,   0.00000000e+00,   0.00000000e+00,
         0.00000000e+00,   0.00000000e+00,   0.00000000e+00,
         0.00000000e+00,   0.00000000e+00,   0.00000000e+00,
         0.00000000e+00,   0.00000000e+00,   0.00000000e+00,
         3.85800000e+03]),
 array([-1.98786188, -1.89124058, -1.79461928, -1.69799798, -1.60137668,
       -1.50475538, -1.40813408, -1.31151278, -1.21489147, -1.11827017,
       -1.02164887, -0.92502757, -0.82840627, -0.73178497, -0.63516367,
       -0.53854237, -0.44192107, -0.34529977, -0.24867846, -0.15205716,
       -0.05543586,  0.04118544,  0.13780674,  0.23442804,  0.33104934,
        0.42767064,  0.52429194,  0.62091325,  0.71753455,  0.81415585,
        0.91077715,  1.00739845,  1.10401975,  1.20064105,  1.29726235,
        1.39388365,  1.49050495,  1.58712626,  1.68374756,  1.78036886,
        1.87699016,  1.97361146,  2.07023276,  2.16685406,  2.26347536,
        2.36009666,  2.45671797,  2.55333927,  2.64996057,  2.74658187,
        2.84320317,  2.93982447,  3.03644577,  3.13306707,  3.22968837,
        3.32630967,  3.42293098,  3.51955228,  3.61617358,  3.71279488,
        3.80941618,  3.90603748,  4.00265878,  4.09928008,  4.19590138,
        4.29252269,  4.38914399,  4.48576529,  4.58238659,  4.67900789,
        4.77562919,  4.87225049,  4.96887179,  5.06549309,  5.16211439,
        5.2587357 ,  5.355357  ,  5.4519783 ,  5.5485996 ,  5.6452209 ,
        5.7418422 ,  5.8384635 ,  5.9350848 ,  6.0317061 ,  6.12832741,
        6.22494871,  6.32157001,  6.41819131,  6.51481261,  6.61143391,
        6.70805521,  6.80467651,  6.90129781,  6.99791912,  7.09454042,
        7.19116172,  7.28778302,  7.38440432,  7.48102562,  7.57764692,
        7.67426822]),
 <a list of 100 Patch objects>)

In [177]:
for mywind in range(10, 15, 5):
    for forward_ticks in range(44, 45, 1):
        ta_in_pm = GiveMePM(ta, nforward=forward_ticks, nbackward=mywind, lim=insample_index1)
        ta_out_pm = GiveMePM(ta, nforward=forward_ticks, nbackward=mywind, lim=outsample_index1)
        res = myols(temp1, ta_in_pm)
        print ('\n\n\n------halflife: %d,     forward_ticks: %d ------------' %(mywind, forward_ticks))
        print res.summary()
        print '\n[OutSample R_SQUARE: %f]' % (PredictedRsquare(res, temp1, ta_out_pm))[1]




------halflife: 10,     forward_ticks: 44 ------------
                            OLS Regression Results                            
==============================================================================
Dep. Variable:                 midPrc   R-squared:                       0.002
Model:                            OLS   Adj. R-squared:                  0.002
Method:                 Least Squares   F-statistic:                     2447.
Date:                Mon, 18 Jul 2016   Prob (F-statistic):               0.00
Time:                        01:53:50   Log-Likelihood:            -2.8101e+06
No. Observations:             1612062   AIC:                         5.620e+06
Df Residuals:                 1612060   BIC:                         5.620e+06
Df Model:                           1                                         
==============================================================================
                 coef    std err          t      P>|t|      [95.0% Conf. Int.]
------------------------------------------------------------------------------
const          0.0039      0.001      3.618      0.000         0.002     0.006
None          -0.0536      0.001    -49.463      0.000        -0.056    -0.052
==============================================================================
Omnibus:                   606008.457   Durbin-Watson:                   0.082
Prob(Omnibus):                  0.000   Jarque-Bera (JB):        135839676.831
Skew:                           0.625   Prob(JB):                         0.00
Kurtosis:                      47.953   Cond. No.                         1.03
==============================================================================

[OutSample R_SQUARE: 0.000994]

In [89]:
plt.plot(bdown[:10000])


Out[89]:
[<matplotlib.lines.Line2D at 0x7f4659acba90>]

In [90]:
plt.plot(mid_rmean[:10000], marker='*')


Out[90]:
[<matplotlib.lines.Line2D at 0x7f4659ad4ad0>]

In [201]:
openpos = (ta.ix[:, 'openInt_diff'] + ta.ix[:, 'vol_diff']) / 2.
closepos = (- ta.ix[:, 'openInt_diff'] + ta.ix[:, 'vol_diff']) / 2.

In [203]:
%matplotlib auto


Using matplotlib backend: TkAgg

In [206]:
plt.plot(openpos[ta_ta_pm.index


Out[206]:
[<matplotlib.lines.Line2D at 0x7f4653f408d0>]