In [4]:
import numpy as np
import pandas as pd
import seaborn as sns
sns.set_context('talk')

In [5]:
import matplotlib.pyplot as plt
%matplotlib inline

In [6]:
hft = pd.read_hdf('HFT_SR_RM_MA_TA.hdf')


/usr/lib/python2.7/dist-packages/tables/leaf.py:392: PerformanceWarning: The Leaf ``/HFT/block0_values`` is exceeding the maximum recommended rowsize (104857600 bytes);
be ready to see PyTables asking for *lots* of memory and possibly slow
I/O.  You may want to reduce the rowsize by trimming the value of
dimensions that are orthogonal (and preferably close) to the *main*
dimension of this leave.  Alternatively, in case you have specified a
very small/large chunksize, you may want to increase/decrease it.
  PerformanceWarning)

In [7]:
hft.head


Out[7]:
<bound method Panel.head of <class 'pandas.core.panel.Panel'>
Dimensions: 38 (items) x 2700120 (major_axis) x 4 (minor_axis)
Items axis: AveragePrice to volume
Major_axis axis: 2015-11-19 21:00:00 to 2015-12-31 15:00:00
Minor_axis axis: MA0001 to TA0001>

In [8]:
hft.items


Out[8]:
Index([u'AveragePrice', u'LifeHigh', u'LifeLow', u'TotalAskLot',
       u'TotalBidLot', u'askPrc_0', u'askPrc_1', u'askPrc_2', u'askPrc_3',
       u'askPrc_4', u'askQty_0', u'askQty_1', u'askQty_2', u'askQty_3',
       u'askQty_4', u'bidPrc_0', u'bidPrc_1', u'bidPrc_2', u'bidPrc_3',
       u'bidPrc_4', u'bidQty_0', u'bidQty_1', u'bidQty_2', u'bidQty_3',
       u'bidQty_4', u'close', u'high', u'highLimit', u'last', u'low',
       u'lowLimit', u'open', u'openInterest', u'prevClose',
       u'prevOpenInterest', u'prevSettle', u'settle', u'volume'],
      dtype='object')

In [9]:
# output cols to csv
cols = hft.items
cols = np.array(cols).astype(str)
np.savetxt('cols.csv', cols, delimiter=',', fmt='%s')

In [10]:
hft.major_axis


Out[10]:
DatetimeIndex([       '2015-11-19 21:00:00', '2015-11-19 21:00:00.250000',
               '2015-11-19 21:00:00.500000', '2015-11-19 21:00:00.750000',
                      '2015-11-19 21:00:01', '2015-11-19 21:00:01.250000',
               '2015-11-19 21:00:01.500000', '2015-11-19 21:00:01.750000',
                      '2015-11-19 21:00:02', '2015-11-19 21:00:02.250000',
               ...
               '2015-12-31 14:59:57.750000',        '2015-12-31 14:59:58',
               '2015-12-31 14:59:58.250000', '2015-12-31 14:59:58.500000',
               '2015-12-31 14:59:58.750000',        '2015-12-31 14:59:59',
               '2015-12-31 14:59:59.250000', '2015-12-31 14:59:59.500000',
               '2015-12-31 14:59:59.750000',        '2015-12-31 15:00:00'],
              dtype='datetime64[ns]', length=2700120, freq=None)

In [11]:
hft.minor_axis


Out[11]:
Index([u'MA0001', u'RM0001', u'SR0001', u'TA0001'], dtype='object')

In [113]:
night_len = int(4*3600*2.5)
mor_len = int(4*3600*2.25)
aftn_len = int(4*3600*1.5)

In [29]:
mor_len


Out[29]:
32400

FOCUS on RM


In [14]:
rm = hft.minor_xs('RM0001')
type(rm)


Out[14]:
pandas.core.frame.DataFrame

show columns


In [15]:
rm.iloc[:5, :12]


Out[15]:
AveragePrice LifeHigh LifeLow TotalAskLot TotalBidLot askPrc_0 askPrc_1 askPrc_2 askPrc_3 askPrc_4 askQty_0 askQty_1
2015-11-19 21:00:00.000 1775.0 2322.0 1768.0 3103.0 2975.0 1776.0 1777.0 1778.0 1779.0 1780.0 21.0 7.0
2015-11-19 21:00:00.250 1775.0 2322.0 1768.0 3424.0 3010.0 1776.0 1777.0 1778.0 1779.0 1780.0 20.0 7.0
2015-11-19 21:00:00.500 1775.0 2322.0 1768.0 3648.0 3170.0 1776.0 1777.0 1778.0 1779.0 1780.0 8.0 13.0
2015-11-19 21:00:00.750 1775.0 2322.0 1768.0 3830.0 3234.0 1775.0 1776.0 1777.0 1778.0 1779.0 38.0 37.0
2015-11-19 21:00:01.000 1775.0 2322.0 1768.0 4012.0 3294.0 1775.0 1776.0 1777.0 1778.0 1779.0 96.0 88.0

In [16]:
rm.iloc[:5, 12:24]


Out[16]:
askQty_2 askQty_3 askQty_4 bidPrc_0 bidPrc_1 bidPrc_2 bidPrc_3 bidPrc_4 bidQty_0 bidQty_1 bidQty_2 bidQty_3
2015-11-19 21:00:00.000 58.0 30.0 262.0 1775.0 1774.0 1773.0 1772.0 1771.0 83.0 26.0 68.0 154.0
2015-11-19 21:00:00.250 58.0 352.0 262.0 1775.0 1774.0 1773.0 1772.0 1771.0 83.0 56.0 68.0 159.0
2015-11-19 21:00:00.500 80.0 362.0 263.0 1775.0 1774.0 1773.0 1772.0 1771.0 83.0 109.0 89.0 160.0
2015-11-19 21:00:00.750 55.0 84.0 362.0 1774.0 1773.0 1772.0 1771.0 1770.0 183.0 90.0 164.0 88.0
2015-11-19 21:00:01.000 77.0 84.0 369.0 1774.0 1773.0 1772.0 1771.0 1770.0 185.0 94.0 174.0 89.0

In [17]:
rm.iloc[:5, 24:]


Out[17]:
bidQty_4 close high highLimit last low lowLimit open openInterest prevClose prevOpenInterest prevSettle settle volume
2015-11-19 21:00:00.000 60.0 0.0 1775.0 1866.0 1775.0 1775.0 1722.0 1775.0 587568.0 1779.0 587568.0 1794.0 1775.0 520.0
2015-11-19 21:00:00.250 60.0 0.0 1776.0 1866.0 1776.0 1775.0 1722.0 1775.0 587566.0 1779.0 587568.0 1794.0 1775.0 522.0
2015-11-19 21:00:00.500 80.0 0.0 1777.0 1866.0 1775.0 1775.0 1722.0 1775.0 587534.0 1779.0 587568.0 1794.0 1775.0 674.0
2015-11-19 21:00:00.750 407.0 0.0 1777.0 1866.0 1775.0 1775.0 1722.0 1775.0 587648.0 1779.0 587568.0 1794.0 1775.0 1036.0
2015-11-19 21:00:01.000 422.0 0.0 1777.0 1866.0 1775.0 1774.0 1722.0 1775.0 587652.0 1779.0 587568.0 1794.0 1775.0 1086.0

ALL datatype are float64


In [18]:
for col, ser in rm.iteritems():
    print col, ser.dtype, ser.std(axis=0)
    #ser.plot(marker='*', linestyle='-')
    #plt.show()


AveragePrice float64 54.434424448
LifeHigh float64 21.2264025889
LifeLow float64 23.243718561
TotalAskLot float64 4143.52344434
TotalBidLot float64 5431.52860007
askPrc_0 float64 54.7464276683
askPrc_1 float64 54.7464246804
askPrc_2 float64 54.746428347
askPrc_3 float64 55.0503771675
askPrc_4 float64 58.4466642821
askQty_0 float64 748.917923457
askQty_1 float64 421.785572043
askQty_2 float64 418.087426985
askQty_3 float64 413.856144793
askQty_4 float64 394.690490296
bidPrc_0 float64 110.444738874
bidPrc_1 float64 126.184326121
bidPrc_2 float64 138.026853276
bidPrc_3 float64 143.214587735
bidPrc_4 float64 147.346242573
bidQty_0 float64 390.154663217
bidQty_1 float64 431.977763012
bidQty_2 float64 397.773367011
bidQty_3 float64 393.917308311
bidQty_4 float64 382.138728729
close float64 2.53507178231
high float64 52.5286801604
highLimit float64 55.9599265724
last float64 54.7501530753
low float64 57.651762213
lowLimit float64 51.8069267635
open float64 55.7954302954
openInterest float64 87128.4161217
prevClose float64 55.845033993
prevOpenInterest float64 87144.7523699
prevSettle float64 53.8828045932
settle float64 54.434424448
volume float64 288637.682074

Standard Deviation


In [22]:
rm_20 = rm.ix[night_len + mor_len + aftn_len + 1: night_len + mor_len + aftn_len +night_len + mor_len + aftn_len, :]
for col, ser in rm_20.iteritems():
    print col, ser.dtype, ser.std(axis=0)
    print ser.mean(axis=0)
    #ser.plot(marker='*', linestyle='-')
    #plt.show()


AveragePrice float64 9.78584380897
1778.29026989
LifeHigh float64 0.0
2322.0
LifeLow float64 21.9351354691
1749.2633807
TotalAskLot float64 4756.83462798
17630.6455627
TotalBidLot float64 3782.283291
8361.70233003
askPrc_0 float64 30.1693681976
1762.12290137
askPrc_1 float64 30.1693681976
1763.12290137
askPrc_2 float64 30.1693681976
1764.12290137
askPrc_3 float64 30.1693681976
1765.12290137
askPrc_4 float64 30.1693681976
1766.12290137
askQty_0 float64 3464.61119866
1296.75515284
askQty_1 float64 383.672196245
566.726741408
askQty_2 float64 300.231166411
522.411326793
askQty_3 float64 329.549605872
523.192935477
askQty_4 float64 308.92017189
474.665229614
bidPrc_0 float64 481.373354751
1623.45279392
bidPrc_1 float64 560.935358248
1566.35854843
bidPrc_2 float64 616.038855766
1518.06502294
bidPrc_3 float64 638.889974348
1494.88815431
bidPrc_4 float64 656.518057259
1475.57688419
bidQty_0 float64 820.390116646
502.691007678
bidQty_1 float64 892.580542505
643.946377182
bidQty_2 float64 530.384970412
498.109745664
bidQty_3 float64 522.438849381
524.685940955
bidQty_4 float64 486.345020246
523.983210925
close float64 0.0
0.0
high float64 3.88692879213
1794.10597896
highLimit float64 0.0750551179951
1853.00043334
last float64 30.1111491551
1761.64696274
low float64 26.3038950071
1755.01507795
lowLimit float64 0.0750551179951
1709.00043334
open float64 0.01154694123
1776.99993333
openInterest float64 11622.1768732
538866.873599
prevClose float64 0.01154694123
1777.00006667
prevOpenInterest float64 237.047156511
546511.368615
prevSettle float64 0.0750551179951
1781.00043334
settle float64 9.78584380897
1778.29026989
volume float64 416314.41521
635516.389782

In [24]:
rm_20['last'].plot()


Out[24]:
<matplotlib.axes.AxesSubplot at 0x7f3e24283910>

In [28]:
rm.ix[night_len + mor_len, :]


Out[28]:
AveragePrice          1782.0
LifeHigh              2322.0
LifeLow               1765.0
TotalAskLot          18021.0
TotalBidLot           9921.0
askPrc_0              1778.0
askPrc_1              1779.0
askPrc_2              1780.0
askPrc_3              1781.0
askPrc_4              1782.0
askQty_0               361.0
askQty_1               267.0
askQty_2               942.0
askQty_3               481.0
askQty_4               508.0
bidPrc_0              1777.0
bidPrc_1              1776.0
bidPrc_2              1775.0
bidPrc_3              1774.0
bidPrc_4              1773.0
bidQty_0               155.0
bidQty_1               476.0
bidQty_2               540.0
bidQty_3                95.0
bidQty_4               315.0
close                    0.0
high                  1798.0
highLimit             1866.0
last                  1778.0
low                   1765.0
lowLimit              1722.0
open                  1775.0
openInterest        556792.0
prevClose             1779.0
prevOpenInterest    587568.0
prevSettle            1794.0
settle                1782.0
volume              767392.0
Name: 2015-11-20 11:29:59.500000, dtype: float64

Resample

datetimeindex format: 2015-11-20 09:30:00.000000

In this data set, Monday's night trading begins at Sunday night.


In [120]:
rm.ix['2015-11-20 14:59:00': '2015-11-23 09:01:00', 'last'].plot()


Out[120]:
<matplotlib.axes.AxesSubplot at 0x7f3e20084f10>

In [122]:
rm_23 = rm.ix['2015-11-22 21:00:00': '2015-11-23 15:00:00', :]
rm_23.describe()


Out[122]:
AveragePrice LifeHigh LifeLow TotalAskLot TotalBidLot askPrc_0 askPrc_1 askPrc_2 askPrc_3 askPrc_4 ... last low lowLimit open openInterest prevClose prevOpenInterest prevSettle settle volume
count 90004.000000 90004.0 90004.000000 90004.000000 90004.000000 90004.000000 90004.000000 90004.000000 90004.000000 90004.000000 ... 90004.000000 90004.000000 90004.0 90004.0 90004.000000 90004.0 90004.0 90004.0 90004.000000 9.000400e+04
mean 1778.288109 2322.0 1749.259277 17630.756733 8360.978768 1762.117684 1763.117684 1764.117684 1765.117684 1766.117684 ... 1761.641805 1755.010655 1709.0 1777.0 538863.792909 1777.0 546510.0 1781.0 1778.288109 6.355760e+05
std 9.788023 0.0 21.937622 4756.631191 3782.998119 30.172564 30.172564 30.172564 30.172564 30.172564 ... 30.114289 26.306678 0.0 0.0 11625.633963 0.0 0.0 0.0 9.788023 4.163665e+05
min 1755.000000 2322.0 1709.000000 4594.000000 0.000000 1709.000000 1710.000000 1711.000000 1712.000000 1713.000000 ... 1709.000000 1709.000000 1709.0 1777.0 507074.000000 1777.0 546510.0 1781.0 1755.000000 4.900000e+02
25% 1775.000000 2322.0 1742.000000 15060.000000 6522.000000 1746.000000 1747.000000 1748.000000 1749.000000 1750.000000 ... 1745.000000 1742.000000 1709.0 1777.0 532352.000000 1777.0 546510.0 1781.0 1775.000000 2.731060e+05
50% 1782.000000 2322.0 1765.000000 17409.000000 8422.000000 1778.000000 1779.000000 1780.000000 1781.000000 1782.000000 ... 1778.000000 1776.000000 1709.0 1777.0 535940.000000 1777.0 546510.0 1781.0 1782.000000 5.432880e+05
75% 1786.000000 2322.0 1765.000000 19278.000000 11218.000000 1787.000000 1788.000000 1789.000000 1790.000000 1791.000000 ... 1787.000000 1776.000000 1709.0 1777.0 549208.000000 1777.0 546510.0 1781.0 1786.000000 9.371845e+05
max 1788.000000 2322.0 1765.000000 37160.000000 15932.000000 1797.000000 1798.000000 1799.000000 1800.000000 1801.000000 ... 1796.000000 1777.000000 1709.0 1777.0 560880.000000 1777.0 546510.0 1781.0 1788.000000 1.436512e+06

8 rows × 38 columns


In [95]:
rm_1min = rm.ix[::240, :]
rm_1min.head()


Out[95]:
AveragePrice LifeHigh LifeLow TotalAskLot TotalBidLot askPrc_0 askPrc_1 askPrc_2 askPrc_3 askPrc_4 ... last low lowLimit open openInterest prevClose prevOpenInterest prevSettle settle volume
2015-11-19 21:00:00 1775.0 2322.0 1768.0 3103.0 2975.0 1776.0 1777.0 1778.0 1779.0 1780.0 ... 1775.0 1775.0 1722.0 1775.0 587568.0 1779.0 587568.0 1794.0 1775.0 520.0
2015-11-19 21:01:00 1777.0 2322.0 1768.0 4754.0 3981.0 1778.0 1779.0 1780.0 1781.0 1782.0 ... 1778.0 1774.0 1722.0 1775.0 586902.0 1779.0 587568.0 1794.0 1777.0 8562.0
2015-11-19 21:02:00 1777.0 2322.0 1768.0 6424.0 4349.0 1776.0 1777.0 1778.0 1779.0 1780.0 ... 1775.0 1774.0 1722.0 1775.0 587470.0 1779.0 587568.0 1794.0 1777.0 13102.0
2015-11-19 21:03:00 1775.0 2322.0 1768.0 6589.0 4588.0 1773.0 1774.0 1775.0 1776.0 1777.0 ... 1772.0 1770.0 1722.0 1775.0 588386.0 1779.0 587568.0 1794.0 1775.0 20892.0
2015-11-19 21:04:00 1773.0 2322.0 1767.0 8793.0 4045.0 1768.0 1769.0 1770.0 1771.0 1772.0 ... 1768.0 1767.0 1722.0 1775.0 589904.0 1779.0 587568.0 1794.0 1773.0 34234.0

5 rows × 38 columns


In [128]:
rm_1min_last = rm_1min.ix[:, 'last']
rm_1min_lastd = rm_1min_last.diff()
rm_1min_lastd


Out[128]:
2015-11-19 21:00:00.000    NaN
2015-11-19 21:01:00.000    3.0
2015-11-19 21:02:00.000   -3.0
2015-11-19 21:03:00.000   -3.0
2015-11-19 21:04:00.000   -4.0
2015-11-19 21:05:00.000    2.0
2015-11-19 21:06:00.000    1.0
2015-11-19 21:07:00.000   -4.0
2015-11-19 21:08:00.000    5.0
2015-11-19 21:09:00.000    3.0
2015-11-19 21:10:00.000   -1.0
2015-11-19 21:11:00.000    4.0
2015-11-19 21:12:00.000    0.0
2015-11-19 21:13:00.000    0.0
2015-11-19 21:14:00.000    1.0
2015-11-19 21:15:00.000   -1.0
2015-11-19 21:16:00.000    0.0
2015-11-19 21:17:00.000    0.0
2015-11-19 21:18:00.000   -1.0
2015-11-19 21:19:00.000    1.0
2015-11-19 21:20:00.000   -1.0
2015-11-19 21:21:00.000   -1.0
2015-11-19 21:22:00.000    0.0
2015-11-19 21:23:00.000    1.0
2015-11-19 21:24:00.000    1.0
2015-11-19 21:25:00.000    1.0
2015-11-19 21:26:00.000    2.0
2015-11-19 21:27:00.000    3.0
2015-11-19 21:28:00.000   -2.0
2015-11-19 21:29:00.000    2.0
                          ... 
2015-12-31 14:30:30.250    1.0
2015-12-31 14:31:30.250   -1.0
2015-12-31 14:32:30.250    0.0
2015-12-31 14:33:30.250    1.0
2015-12-31 14:34:30.250   -1.0
2015-12-31 14:35:30.250    0.0
2015-12-31 14:36:30.250    0.0
2015-12-31 14:37:30.250    0.0
2015-12-31 14:38:30.250    0.0
2015-12-31 14:39:30.250    0.0
2015-12-31 14:40:30.250   -1.0
2015-12-31 14:41:30.250    0.0
2015-12-31 14:42:30.250   -1.0
2015-12-31 14:43:30.250    1.0
2015-12-31 14:44:30.250    1.0
2015-12-31 14:45:30.250   -2.0
2015-12-31 14:46:30.250    0.0
2015-12-31 14:47:30.250    1.0
2015-12-31 14:48:30.250    0.0
2015-12-31 14:49:30.250    0.0
2015-12-31 14:50:30.250   -1.0
2015-12-31 14:51:30.250    0.0
2015-12-31 14:52:30.250    1.0
2015-12-31 14:53:30.250   -1.0
2015-12-31 14:54:30.250    2.0
2015-12-31 14:55:30.250   -1.0
2015-12-31 14:56:30.250    0.0
2015-12-31 14:57:30.250   -1.0
2015-12-31 14:58:30.250    0.0
2015-12-31 14:59:30.250    2.0
Name: last, dtype: float64

In [133]:
rm_23_1min_lastd = rm_1min_lastd.ix['2015-11-22 21:00:00': '2015-11-23 15:00:00']
rm_23_1min_lastd.hist(bins=50)


Out[133]:
<matplotlib.axes.AxesSubplot at 0x7f3e182d9110>

Prove that there is no missing points


In [68]:
for i in xrange(len(inde) - 1):
    if (inde[i + 1] - inde[i] != pd.Timedelta(250, 'ms') 
        and inde[i].strftime('%T') != '23:30:00'
        and inde[i].strftime('%T') != '10:15:00'
        and inde[i].strftime('%T') != '11:30:00'
        and inde[i].strftime('%T') != '15:00:00'):
        print inde[i], inde[i+1]

The number of ticks = the number of time intervals + 1


In [83]:
np.where(np.array(inde) == np.datetime64('2015-11-20T11:30:00.000000000+0000'))


Out[83]:
(array([68402]),)

In [89]:
suspect1 = rm.ix['2015-11-20 09:00:00.000000': '2015-11-20 10:15:00.000000', :]
suspect2 = rm.ix['2015-11-20 10:30:00.000000': '2015-11-20 11:30:00.000000', :]

In [90]:
suspect1.index


Out[90]:
DatetimeIndex([       '2015-11-20 09:00:00', '2015-11-20 09:00:00.250000',
               '2015-11-20 09:00:00.500000', '2015-11-20 09:00:00.750000',
                      '2015-11-20 09:00:01', '2015-11-20 09:00:01.250000',
               '2015-11-20 09:00:01.500000', '2015-11-20 09:00:01.750000',
                      '2015-11-20 09:00:02', '2015-11-20 09:00:02.250000',
               ...
               '2015-11-20 10:14:57.750000',        '2015-11-20 10:14:58',
               '2015-11-20 10:14:58.250000', '2015-11-20 10:14:58.500000',
               '2015-11-20 10:14:58.750000',        '2015-11-20 10:14:59',
               '2015-11-20 10:14:59.250000', '2015-11-20 10:14:59.500000',
               '2015-11-20 10:14:59.750000',        '2015-11-20 10:15:00'],
              dtype='datetime64[ns]', length=18001, freq=None)

In [91]:
suspect2.index


Out[91]:
DatetimeIndex([       '2015-11-20 10:30:00', '2015-11-20 10:30:00.250000',
               '2015-11-20 10:30:00.500000', '2015-11-20 10:30:00.750000',
                      '2015-11-20 10:30:01', '2015-11-20 10:30:01.250000',
               '2015-11-20 10:30:01.500000', '2015-11-20 10:30:01.750000',
                      '2015-11-20 10:30:02', '2015-11-20 10:30:02.250000',
               ...
               '2015-11-20 11:29:57.750000',        '2015-11-20 11:29:58',
               '2015-11-20 11:29:58.250000', '2015-11-20 11:29:58.500000',
               '2015-11-20 11:29:58.750000',        '2015-11-20 11:29:59',
               '2015-11-20 11:29:59.250000', '2015-11-20 11:29:59.500000',
               '2015-11-20 11:29:59.750000',        '2015-11-20 11:30:00'],
              dtype='datetime64[ns]', length=14401, freq=None)

In [ ]: