In [ ]:
# Trading Physics To Orders Cancels Trades 2.0 -converter
##### Usage examples:
In [ ]:
# Example 1: Download orderbook data from trading physics
obdt = OrderBookDataTool()
dl_path = 'D:/Data' # target folder
ticker = 'SPY' # ticker
min_date = '2014-05-26' # first date to get
max_date = '2015-01-01' # first date not to get
c_num = '111-222-333' # trading physics account customer number
pw_hash = 'asd1GHlaR9IU13094u8dLi' # trading physics account password hash
obdt.getdata(ticker,min_date,max_date,c_num,pw_hash,dl_path)
In [ ]:
# Example 2: Convert a folder of trading physics imput data into OCT2-format.
obdt = OrderBookDataTool()
input_folder = 'F:/Data/20130111-SPY.csv'
output_folder = 'F:/Output'
mode = 'w'
obdt.convert(input_folder,output_folder,mode) # convert data to oct2 format
In [ ]:
# Example 3: extend initially converted OCT2 data with order book state dependent featuers.
ob = OrderBook('SPY','F:/Output/SPY_OCT2.h5')
ob.build_features(date='2013_01_11',save_path='F:/Output/SPY_OCT2_PLUS.h5',debug_mode=True)
In [ ]:
# Example 4: plot ob data using step
ob = OrderBook('SPY','C:/temp/Data/output/SPY_OCT2.h5')
ylim = [1468000, 1473100]
xlim = [-30000, 32000]
ob.plot('2013_01_11',67000,ylim,xlim)
In [ ]:
# Example 5: plot ob data using ms time (12:00:00.000)
ob = OrderBook('SPY','C:/temp/Data/output/SPY_OCT2.h5')
ylim = [1468000, 1473100]
xlim = [-30000, 32000]
date= '2013_01_11'
ob.plot(date,ob.time2step(date,43200000)[0],ylim,xlim)
In [ ]:
# Example 6: plot ob data using time string
ob = OrderBook('SPY','C:/temp/Data/output/SPY_OCT2.h5')
ylim = [1468000, 1473100]
xlim = [-30000, 32000]
date= '2013_01_11'
strtime = '12:00:00.000'
step = ob.str2step(date,strtime)[0]
ob.plot(date,step,ylim,xlim)
In [ ]:
import h5py as h5 # 2.6.0 + hdf5 1.8.15.1
import numpy as np # 1.11.1
import pandas as pd # 0.19.2
import bisect as bs
import random as rn
import warnings as wrn
import requests as rq
from matplotlib import pyplot as plt
from matplotlib.figure import Figure
import matplotlib.patches as patches
from datetime import datetime as dt
import math
import time
import os
import re
from IPython import display
# figure/plotting specifications
%matplotlib inline
fig_size = [20,16]
plt.rcParams["figure.figsize"] = fig_size
wrn.filterwarnings('ignore')
class OrderBookDataTool(object):
def __init__(self):
pass
def convert(self,input_path,output_path,mode):
# params:
# input_path - input file or folder containing (only) input files
# output_path - output folder
# mode - 'w' (over)write or 'a' append
# ticker = 'SPY' # ticker code
# path_in = 'D:/SPY_project/data_example/input/' # input file location
# path_out = 'D:/SPY_project/data_example/output/' # output file location
# file_out = 'spy_to_hdf_test.h5' # input file name
# time the funcintion
start_time = time.time()
# suppress warnings generated by having hdf5 group named with yyyy_mm_dd since it is not really a problem
wrn.filterwarnings('ignore', '.*NaturalNameWarning.*')
# handle params
if os.path.isfile(input_path): # if input_path points to a single file
file_path ,file_name = os.path.split(input_path)
files = [file_name]
elif os.path.isdir(input_path): # else if input_path points to a directory
files = [f for f in os.listdir(input_path) if os.path.isfile(os.path.join(input_path, f))] # get all files in the dir
file_path = input_path
else: # invalid input_path
print('Invalid input_path. Stopping.')
return -2
ticker = files[0].split('-',2)[1].split('.',2)[0] # get the ticker code from the first file
file_re = re.compile(r'[0-9]{8}-'+ticker+'.csv') # construct file name re
convert_files = (list(filter(file_re.match, files))) # find all that match the re
convert_files = [os.path.join(file_path, f) for f in convert_files] # extend to full paths
if (len(convert_files) != len(files)): # if all files didn't match
print('Invalid output_path. All files should match: "[0-9]{8}-'+ticker+'.csv". Stopping.')
return -2
file_out = ticker + '_OCT2.h5' # output file name
if os.path.isdir(output_path): # if input_path points to a single file
path_out_full = os.path.join(output_path,file_out)# everything ok
else: # invalid input_path
print('Invalid output_path. Stopping.')
return -3
if ((mode=='w')|((mode=='a')&(os.path.isfile(path_out_full)))):
pass # everything ok
else: # invalid input_path
print('Invalid mode. Needs to be "w"(rite) or "a"(ppend). To append the file must exist in output_path. Stopping.')
return -4
print('Started converting '+str(len(convert_files))+' file(s).')
# Loop input files
for file in convert_files:
loop_time = time.time() # time the loop
## get the date from the input file name
path ,file_name = os.path.split(file)
yyyy_str = file_name[0:4] # yyyy date str
mm_str = file_name[4:6] # mm date str
dd_str = file_name[6:8] # dd date str
date = yyyy_str + mm_str + dd_str # yyyymmdd date str
grp_date = yyyy_str + '_' + mm_str + '_' + dd_str # yyyy_mm_dd date str
## Open/Read input file
allcols = {'Time','Ticker','Order','T','Shares','Price', 'MPID', 'X'} # Bring in all data for the testing
cols = {'Time','Order','T','Shares','Price', 'MPID'} # Bring in just the columns that we want
cols_dtype = {'Time': 'u4', 'Ticker':'a3', 'Order':'u4', 'T': 'object', 'Shares': 'i4', 'Price': 'u4', 'MPID': 'object', 'X': 'object'}
df = pd.read_csv(file, dtype = cols_dtype, engine = 'c',usecols = cols) # read input file into pandas datarame
df.reset_index(level=0, inplace=True) # create a column from the index
## Reorganize the input data into the OCT2 format
### Create the orders table first
or_df = df.query('T == "B" or T == "S" or T == "E" or T == "C"').copy() # filter a dataframe with only new limit order creations
rn_cols = {'index':'STEP','Time':'T_CREATED','Order':'ORDER','T':'SIDE','Shares':'DELTA_Q','Price':'PRICE','MPID':'PARTICIPANT'};
or_df.rename(columns=rn_cols, inplace=True) # rename the columns to match the output format
or_df.loc[:,'STEP'] = or_df['STEP'].astype('u4') # convert the key from int64 to uint32
or_df.loc[or_df['PRICE'] == 0,'DELTA_Q'] = -or_df.loc[or_df['PRICE']==0,'DELTA_Q'] # multiply by -1 the quanities of partials
or_df['UPDATE'] = or_df['SIDE'].copy() # get a copy of the 'SIDE'-column and name it as 'UPDATE'
or_df['STEP_PRIORITY'] = or_df['STEP'].copy() # get a copy of the 'STEP'-column and name it as 'STEP_PRIORITY'
ind_update = (or_df['PRICE'] == 0) # find boolean array indicating rows with order updates
or_df.loc[(ind_update == 0),'UPDATE'] = 'L'; # the non-updates are new (L)imit orders
or_df.loc[ind_update,'STEP_PRIORITY'] = np.nan # the update rows priority is invalid so fill it with nan's
or_df.loc[ind_update,'SIDE'] = np.nan # the update rows data side & price is empty/invalid so fill them with nan's
or_df.loc[ind_update,'PRICE'] = np.nan # the update rows data side & price is empty/invalid so fill them with nan's
### Filter and use the data on full exections and cancellations to get the destruction times
fd_df = df[['index','Order','Time','T']].query('T == "F" or T == "D"').copy().set_index('Order') # filter to get full executions or cancellations
orj_df = or_df.join(fd_df,on = ['ORDER'],how = 'left') # join the destruction times back to the order submission dataframe
orj_df.rename(columns={'Time':'T_DESTROYED','index':'S_DESTROYED','T':'DESTROYED_BY'},inplace=True) # rename the newly acquired destruction times accordingly
orj_df.set_index(['ORDER','STEP'],drop=False,inplace=True) # change to a multi-index to group the orders' rows together
orj_df.sort_index(ascending=True, inplace=True) # sort by the new index
ind_update_next = (orj_df.shift(-1)['UPDATE'] != 'L') # find boolean array indicating rows prior to order updates
ind_update_next.iloc[-1] = False # there cannot be an update after the last entry
orj_df.loc[ind_update_next,'S_DESTROYED'] = orj_df.shift(-1).loc[ind_update_next,'STEP'].astype('u4') # assign correct destruction STEPs
orj_df.loc[ind_update_next,'T_DESTROYED'] = orj_df.shift(-1).loc[ind_update_next,'T_CREATED'].astype('u4') # assign correct destruction times
orj_df[['PRICE','SIDE','STEP_PRIORITY']] = orj_df[['PRICE','SIDE','STEP_PRIORITY']].fillna(method='ffill') # forward fill in the price, side and step_priority for the orders updates
### Create the traded column:
ind_cancel_next = (orj_df.shift(-1)['UPDATE'] == 'C') # find boolean array indicating rows prior to partial cancellations
ind_trade_next = (ind_update_next & ~(ind_cancel_next)) # find boolean array indicating rows prior to partial trades
ind_final_before_full_trade = (~(ind_update_next) & (orj_df['DESTROYED_BY'] == 'F')) # find barray to indicate that there is no more updates and order was eventually traded (F messages)
orj_df['TRADED'] = False # create boolean field to indicate if the order is eventually traded or is eventually cancelled
orj_df.loc[ind_trade_next,'TRADED'] = True # traded bool is true when there is a partial trade next
orj_df.loc[ind_final_before_full_trade,'TRADED'] = True # ... or when there is no more updates and order was eventually traded (F messages)
orj_df.drop(['ORDER','STEP','DESTROYED_BY'],axis=1,inplace=True) # drop the extra columns (they are in the index as well)
### Cumsum to get the remaining quantities. This will take a couple of minutes.
orj_df['QUANTITY'] = orj_df.groupby(level=[0])['DELTA_Q'].transform(pd.Series.cumsum) # cumsum over ORDERs to get quantities
### Cumsum to get the quantities to be traded.
orj_df['DELTA_Q_TO_TRADE'] = 0 # create new temporary column for the following trades delta q's
orj_df.loc[ind_trade_next,'DELTA_Q_TO_TRADE'] = orj_df.shift(-1).loc[ind_trade_next,'DELTA_Q'].astype('i4') # get the partial trade quantities
orj_df.loc[ind_final_before_full_trade,'DELTA_Q_TO_TRADE'] = -orj_df.loc[ind_final_before_full_trade,'QUANTITY'] # get the full trade quantities
# orj_df.index.sortlevel(level=[1],ascending=[False],sort_remaining=False)
orj_df.sort_index(level=[0,1], ascending=[True,False], inplace=True, sort_remaining=False) # sort again to get the decending step order
orj_df['Q_TO_TRADE'] = -orj_df.groupby(level=[0])['DELTA_Q_TO_TRADE'].transform(pd.Series.cumsum) # cumsum to get the remaining quantities to be traded
orj_df.sort_index(ascending=True,inplace=True) # return to the original order
### Create the cancellations table
cd_df = df[['index','Order', 'Time','T','Shares','MPID']].query('T == "C" or T == "D"').copy() # filter all of the cancellations
rn_cols = {'index':'STEP','Time':'TIME','Order':'ORDER','T':'UPDATE','Shares':'DELTA_Q','MPID':'PARTICIPANT'};
cd_df.rename(columns=rn_cols, inplace=True) # rename the columns to match the output format
cd_df.set_index(['ORDER','STEP'],inplace =True) # change to a multi-index to group the orders' rows together
cd_df.sort_index(ascending=True, inplace=True) # sort by the new index so that ORDERs are grouped together
### Join the last updates' remaining quantities back to the cancellation data to determine the delta_q
cdj_df = cd_df.join(orj_df.groupby(level=[0],as_index=True).last(),how = 'left',rsuffix = '_OR')
ixd = (cdj_df['UPDATE'] == 'D') # filter for full cancellations
ixc = (cdj_df['UPDATE'] == 'C') # filter for partial cancellations
cdj_df.loc[ixd,'DELTA_Q'] = -cdj_df.loc[ixd,'QUANTITY'] # update full cancellations to match the remaining quantity times -1
cdj_df.loc[ixc,'DELTA_Q'] = -cdj_df.loc[ixc,'DELTA_Q'] # update partial cancellations to be negative in delta_q
cdj_df.drop(['T_CREATED','DELTA_Q_OR','UPDATE_OR','T_DESTROYED','QUANTITY','S_DESTROYED'], axis=1,inplace=True) # drop the extra columns
### Create the trades table
ef_df = df[['index','Order', 'Time','T','Price','Shares','MPID']].query('T == "E" or T == "F" or T == "T"').copy() # filter all of the trades
rn_cols = {'index':'STEP','Time':'TIME','Price':'TMP_PRICE','Order':'ORDER','T':'UPDATE','Shares':'DELTA_Q','MPID':'PARTICIPANT'};
ef_df.rename(columns=rn_cols, inplace=True) # rename the columns to match the output format
ef_df.set_index(['ORDER','STEP'],inplace =True) # change to a multi-index to group the orders' rows together
ef_df.sort_index(ascending=True, inplace=True) # sort by the new index so that ORDERs are grouped together
### Join the last updates' remaining quantities back to the trade data to determine the delta_q
efj_df = ef_df.join(orj_df.groupby(level=[0],as_index=True).last(),how = 'left',rsuffix = '_OR')
ixf = (efj_df['UPDATE'] == 'F') # filter for full excutions (trades)
ixe = (efj_df['UPDATE'] == 'E') # filter for partial executions
efj_df.loc[ixf,'DELTA_Q'] = -efj_df.loc[ixf,'QUANTITY'] # update full executions to match the remaining quantity times -1
efj_df.loc[~(ixf),'DELTA_Q'] = -efj_df.loc[~(ixf),'DELTA_Q'] # update partial and non-display trades to be negative in delta_q
efj_df.loc[~(ixe|ixf),'PRICE'] = efj_df.loc[~(ixe|ixf),'TMP_PRICE'] # set correct prices for non-display trades
efj_df.drop(['T_CREATED','DELTA_Q_OR','UPDATE_OR','TMP_PRICE','T_DESTROYED','QUANTITY','S_DESTROYED'],
axis=1,inplace=True) # drop extra columns
efj_df.loc[(efj_df['UPDATE'] == 'T'),['STEP_PRIORITY']] = 0 # fill nans with 0s ...
efj_df['STEP_PRIORITY'] = efj_df['STEP_PRIORITY'].astype('u4') # ... so it's possible to convert them back to unsigned ints
### Get the cross events
x_df = df.query('T == "X"').copy() # filter a dataframe with only cross events
x_df.drop(['T','MPID'], axis=1,inplace=True)
rn_cols = {'index':'STEP','Time':'TIME','Order':'ORDER','Shares':'QUANTITY','Price':'PRICE'};
x_df.rename(columns=rn_cols, inplace=True) # rename the columns to match the output format
x_df['QUANTITY'] = x_df['QUANTITY'].astype("u4") # dtype to uint32
### Convert datatypes and prepare the data to be written into the hdf5 file
orj_df.reset_index(inplace=True) # Switch back to using row number as the index
orj_df.index.names = ['INDEX'] # Rename with caps so it fits in with the other column names
orj_df['ORDER'] = orj_df['ORDER'].astype("u4") # dtype to uint32
orj_df['T_DESTROYED'] = orj_df['T_DESTROYED'].astype("u4") # dtype to uint32
orj_df['S_DESTROYED'] = orj_df['S_DESTROYED'].astype("u4") # dtype to uint32
orj_df['STEP_PRIORITY'] = orj_df['STEP_PRIORITY'].astype("u4") # dtype to uint32
orj_df['QUANTITY'] = orj_df['QUANTITY'].astype("u4") # dtype to uint32
orj_df['PRICE'] = orj_df['PRICE'].astype("u4") # dtype to uint32
orj_df['DELTA_Q'] = orj_df['DELTA_Q'].astype("i4") # dtype to int32
orj_df['SIDE'] = orj_df['SIDE'].astype('category') # dtype to category
orj_df['UPDATE'] = orj_df['UPDATE'].astype('category') # dtype to category
orj_df['PARTICIPANT'] = orj_df['PARTICIPANT'].astype('category') # dtype to category
rn_cols = {'STEP':'STEP_CREATED','S_DESTROYED':'STEP_DESTROYED','T_DESTROYED':'TIME_DESTROYED','T_CREATED':'TIME_CREATED','DELTA_Q':'DELTA_QUANTITY','Q_TO_TRADE':'QUANTITY_TO_BE_TRADED'}
orj_df.rename(columns=rn_cols, inplace=True) # rename
orj_df['SIDE'].cat.rename_categories(['BID','ASK'],inplace=True) # rename category labels to be more descriptive
orj_df['UPDATE'].cat.rename_categories(['CANCEL','TRADE','ORDER'],inplace=True) # rename category labels to be more descriptive
cdj_df.reset_index(inplace=True) # Switch back to using row number as the index
cdj_df.index.names = ['INDEX'] # Rename with caps so it fits in with the other column names
cdj_df['ORDER'] = cdj_df['ORDER'].astype("u4") # dtype to uint32
cdj_df['STEP'] = cdj_df['STEP'].astype("u4") # dtype to uint32
cdj_df['TIME'] = cdj_df['TIME'].astype("u4") # dtype to uint32
cdj_df['PRICE'] = cdj_df['PRICE'].astype("u4") # dtype to uint32
cdj_df['DELTA_Q'] = cdj_df['DELTA_Q'].astype("i4") # dtype to int32
cdj_df['SIDE'] = cdj_df['SIDE'].astype('category') # dtype to category
cdj_df['UPDATE'] = cdj_df['UPDATE'].astype('category') # dtype to category
cdj_df['PARTICIPANT'] = cdj_df['PARTICIPANT'].astype('category') # dtype to category
rn_cols = {'DELTA_Q':'DELTA_QUANTITY'}
cdj_df.rename(columns=rn_cols, inplace=True) # rename
cdj_df['SIDE'].cat.rename_categories(['BID','ASK'],inplace=True) # rename category labels to be more descriptive
cdj_df['UPDATE'].cat.rename_categories(['PART','FULL'],inplace=True) # rename category labels to be more descriptive
efj_df.reset_index(inplace=True) # Switch back to using row number as the index
efj_df.index.names = ['INDEX'] # Rename with caps so it fits in with the other column names
efj_df['ORDER'] = efj_df['ORDER'].astype("u4") # dtype to uint32
efj_df['STEP'] = efj_df['STEP'].astype("u4") # dtype to uint32
efj_df['TIME'] = efj_df['TIME'].astype("u4") # dtype to uint32
efj_df['PRICE'] = efj_df['PRICE'].astype("u4") # dtype to uint32
efj_df['DELTA_Q'] = efj_df['DELTA_Q'].astype("i4") # dtype to int32
efj_df['SIDE'] = efj_df['SIDE'].astype('category') # dtype to category
efj_df['UPDATE'] = efj_df['UPDATE'].astype('category') # dtype to category
efj_df['PARTICIPANT'] = efj_df['PARTICIPANT'].astype('category') # dtype to category
rn_cols = {'DELTA_Q':'DELTA_QUANTITY'}
efj_df.rename(columns=rn_cols, inplace=True) # rename
efj_df['SIDE'].cat.rename_categories(['BID','ASK'],inplace=True) # rename category labels to be more descriptive
efj_df['UPDATE'].cat.rename_categories(['PART','FULL','HIDDEN'],inplace=True) # rename category labels to be more descriptive
## Write the tables into the HDF5 file
cols = ['ORDER','STEP_CREATED','STEP_DESTROYED','STEP_PRIORITY','TIME_CREATED','TIME_DESTROYED','SIDE',
'UPDATE','PRICE','QUANTITY','DELTA_QUANTITY','TRADED','QUANTITY_TO_BE_TRADED','PARTICIPANT']
grp_orders = ('/' + grp_date + '/ORDERS')
orj_df[cols].to_hdf(path_out_full, grp_orders, mode=mode,format='table',data_columns=cols,complevel=0,complib='blosc') # orders
mode = 'a' # even if mode was write it still needs to be 'a'ppend for the rest of the datasets
cols = ['ORDER','STEP','STEP_PRIORITY','TIME','SIDE','UPDATE','PRICE','DELTA_QUANTITY','PARTICIPANT']
grp_cancels = ('/' + grp_date + '/CANCELS')
cdj_df[cols].to_hdf(path_out_full, grp_cancels, mode=mode,format='table',data_columns=cols,complevel=0,complib='blosc') # cancels
cols = ['ORDER','STEP','STEP_PRIORITY','TIME','SIDE','UPDATE','PRICE','DELTA_QUANTITY','PARTICIPANT']
grp_trades = ('/' + grp_date + '/TRADES')
efj_df[cols].to_hdf(path_out_full, grp_trades, mode=mode,format='table',data_columns=cols,complevel=0,complib='blosc') # trades
cols = ['ORDER','STEP','TIME','PRICE','QUANTITY']
grp_crosses = ('/' + grp_date + '/CROSS_EVENTS')
x_df[cols].to_hdf(path_out_full, grp_crosses, mode=mode,format='table',data_columns=cols,complevel=0,complib='blosc') # cross events
print('Conversion of '+ file +' complete. It took '+str(time.time()-loop_time)+' seconds.')
print('Conversion complete. It took '+str(time.time()-start_time)+' seconds.')
return 1
def getdata(self,ticker,min_date,max_date,c_num,pw_hash,dl_path): # Get the data files using trading physics api
start_time = time.time() # time the funcintion
if os.path.isdir(dl_path): # if dl_path is directory
pass # everything ok
else: # invalid input_path
print('Invalid dl_path. Stopping.')
return -2
data_type = 'orderflow' # type of data to get - it should always be 'orderflow' or the converter will not work
data_format = 'CSV' # format of data to get - it should always be 'CSV' or the converter will not work
data_comp = 'stream' # compression of data to use - it should always be 'stream' or the converter will not work
api_url = 'http://api.tradingphysics.com' # trading physics data api url
# Get the available dates and filter the ones we want
r = rq.get(api_url+'/getdates',params={'type':data_type})
datelist = r.text.splitlines(); # split by newline
all_dts = np.array([dt.strptime(datestr, '%Y-%m-%d') for datestr in datelist]) # convert strings to datetime objects
min_dt = dt.strptime(min_date, '%Y-%m-%d') # datetime lower limit
max_dt = dt.strptime(max_date, '%Y-%m-%d') # datetime lower limit
get_dts = all_dts[(all_dts < max_dt)&(all_dts >= min_dt)] # find the span of dates to get
get_dts = get_dts[::-1] # flip
# download data for date each in get_dts
for date in get_dts:
strdate = dt.strftime(date,'%Y%m%d') # make a YYYYmmdd -string from date
filename = strdate+'-'+ticker+'.csv' # create name for the file that will be downloaded
# get the ticket
ticket_params = (('date',strdate),('stock',ticker),('format',data_format),('compression',data_comp))
ticket_url = api_url+'/getticket?C='+c_num+'&P='+pw_hash+'?getdata?type=orderflow' # api doesn't follow normal syntax so this gets messy
r = rq.get(ticket_url,params=ticket_params) # request to get the ticket
if (r.status_code!=200): # if there is a problem with the request
print('Could not get a dl ticket using url: '+r.url)
print('Status code: '+str(r.status_code))
print('Reason: '+r.reason)
print('Stopping because otherwise download credits might be wasted.')
return -1
ticket = r.text # get the ticket from the response
data_params = (('type',data_type),('date',strdate),('stock',ticker),('format',data_format),
('compression',data_comp),('t',ticket))
data_url = api_url+'/getdata' # use the ticket to get the data
r = rq.get(data_url,params=data_params,stream=True) # request to get the data
if (r.status_code==200): # if there is no problem with the request
file = open(os.path.join(dl_path,filename), 'wb')
for chunk in r.iter_content(chunk_size=512 * 1024): # avoid running out of memory with large files -> write in chunks
if chunk: # filter out keep-alive new chunks
file.write(chunk)
file.close()
else:
print('Could not download file using url: '+r.url)
print('Status code: '+str(r.status_code))
print('Reason: '+r.reason)
print('Stopping because otherwise download credits might be wasted.')
return -1
print('Completed loading of '+str(get_dts.size)+' files.')
print('Time taken: ' +str((time.time()-start_time))+' seconds.')
return 1
## Class to build and hold order book states from oct2 data
class OrderBookData(object):
# orders, pd.DataFrame - orders table as a pandas dataframe
# cancels, pd.DataFrame - cancels table as a pandas dataframe
# trades, pd.DataFrame - trades table as a pandas dataframe
# cross_trades, pd.DataFrame - cross_trades table as a pandas dataframe
# tick_size - int price tick size *10000
def __init__(self,date,orders,cancels,trades,cross_trades,tick_size):
self.date = date # date of the data
self.tick_size = tick_size # size of price tick
# self.fig = plt.figure()
# self.ax = self.fig.add_subplot(1, 1, 1, facecolor=[0.15,0.15,0.15])
orders.set_index(['STEP_CREATED','STEP_DESTROYED'],drop=False,inplace=True) # index by timestamps
orders.sort_index(inplace=True) # sort by index so they are in chronological order by creation time (S_CREATED)
self.bid_orders = orders[(orders['SIDE'] == 'BID')].copy() # filter to get only bid side orders
self.ask_orders = orders[(orders['SIDE'] == 'ASK')].copy() # filter to get only bid side orders
self.bsc = self.bid_orders.ix[:,'STEP_CREATED'] # get list of bid state creation times
self.bsd = self.bid_orders.ix[:,'STEP_DESTROYED'] # get list of bid state destruction times
self.ssc = self.ask_orders.ix[:,'STEP_CREATED'] # get list of bid state creation times
self.ssd = self.ask_orders.ix[:,'STEP_DESTROYED'] # get list of bid state destruction times
self.orders = orders
self.cancels = cancels.set_index(['STEP'],drop=False)
self.trades = trades.set_index(['STEP'],drop=False)
self.cross_trades = cross_trades.set_index(['STEP'],drop=False)
self.nd_trades = self.trades[self.trades['ORDER']==0]
# create events dataframe
events = orders[['STEP_CREATED','STEP_PRIORITY','TIME_CREATED','SIDE','PRICE','UPDATE']]
rncols = {'STEP_CREATED':'STEP','TIME_CREATED':'TIME','UPDATE':'UPDATE_OLD'}
events.rename(columns=rncols,inplace=True)
events.set_index(['STEP'],drop=False,inplace=True)
events.loc[events['UPDATE_OLD']=='ORDER','UPDATE'] = 'O'
events.loc[events['UPDATE_OLD']=='TRADE','UPDATE'] = 'PT'
events.loc[events['UPDATE_OLD']=='CANCEL','UPDATE'] = 'PC'
cancels = cancels[cancels['UPDATE']=='FULL']
cancels = cancels[['STEP','STEP_PRIORITY','TIME','SIDE','PRICE']]
cancels.set_index(['STEP'],drop=False,inplace=True)
cancels['UPDATE'] = 'C'
events = events.append(cancels)
trades = trades[trades['UPDATE']=='FULL']
trades = trades[['STEP','STEP_PRIORITY','TIME','SIDE','PRICE']]
trades.set_index(['STEP'],drop=False,inplace=True)
trades['UPDATE'] = 'T'
events = events.append(trades)
events['UPDATE'] = events['UPDATE'].astype('category')
events.drop('UPDATE_OLD',axis=1,inplace=True)
events['UPDATE'].cat.rename_categories(['CANCEL','ORDER','PART_CANCEL','PART_TRADE','TRADE'],inplace=True)
events.sort_index(ascending=True, inplace=True)
self.events = events
self.ask_events = events[events['SIDE']=='ASK'].copy()
self.bid_events = events[events['SIDE']=='BID'].copy()
def update_rm_ask_state(self,step): # update ask side by removing everything that has expired up to current step
self.ask_state = self.ask_state[self.ask_state['STEP_DESTROYED']!=step]
def update_rm_bid_state(self,step): # update bid side by removing everything that has expired up to current step
self.bid_state = self.bid_state[self.bid_state['STEP_DESTROYED']!=step]
def update_add_ask_state(self,step): # update ask side by adding current step order
self.ask_state = pd.concat([self.ask_state, self.ask_orders.xs(step, level=0, drop_level=False)])
def update_add_bid_state(self,step): # update bid side by adding current step order
self.bid_state = pd.concat([self.bid_state, self.bid_orders.xs(step, level=0, drop_level=False)])
def update_addrm_ask_state(self,step): # update bid side by removing everything that has expired up to current step
self.ask_state = pd.concat([self.ask_state[self.ask_state['STEP_DESTROYED']!=step], self.ask_orders.xs(step, level=0, drop_level=False)])
def update_addrm_bid_state(self,step): # update bid side by adding current step order
self.bid_state = pd.concat([self.bid_state[self.bid_state['STEP_DESTROYED']!=step], self.bid_orders.xs(step, level=0, drop_level=False)])
def init_ask_state(self,step): # get the collection of orders on ask side (regardless of sequence of states)
self.ask_state = self.ask_orders[(self.ssc <= step)&(self.ssd > step)]
pass
def init_bid_state(self,step): # get the collection of orders on bid side (regardless of sequence of states)
self.bid_state = self.bid_orders[(self.bsc <= step)&(self.bsd > step)]
pass
def update_event_ask_lvl(self,p): # get the level relative to ask side events price: 0.5 if p is a new best, 1 at lvl1, 1.5 if at [lvl1-lvl2] etc...
if not self.ask_levels.empty:
lvl = bs.bisect_left(self.ask_levels.index.values,p) + 1 # ask level of the event
return (lvl if (p in self.ask_levels.index.values) else lvl - 0.5)
else:
return 0
def update_event_bid_lvl(self,p): # get the level relative to bid side events price: 0.5 if p is a new best, 1 at lvl1, 1.5 if at [lvl1-lvl2] etc...
if not self.bid_levels.empty:
lvl = bs.bisect_left(-self.bid_levels.index.values[::-1],-p) + 1 # bid level of the event
return (lvl if (p in self.bid_levels.index.values) else lvl - 0.5)
else:
return 0
def update_ask_tick(self,p): # get the ask side updates tick depth of ask order (deeper means lower price priority negatives are inside the spread)
return (p-self.s_best)/self.tick_size if not(np.isnan(self.s_best)) else np.nan
def update_bid_tick(self,p): # get the bid side updates tick depth of ask order (deeper means lower price priority negatives are inside the spread)
return (self.b_best-p)/self.tick_size if not(np.isnan(self.b_best)) else np.nan
def update_nth_ask_lvl(self,n): # update ask levels and return nth level [p,q]
self.ask_levels = self.ask_state[['PRICE','QUANTITY']].groupby(['PRICE']).agg({'QUANTITY': np.sum}).sort_index()
if not self.ask_levels.empty:
self.s_best = self.ask_levels.index[n-1]
return [self.ask_levels.index[n-1], self.ask_levels['QUANTITY'].iloc[n-1]]
else:
return [-1,-1]
def update_nth_bid_lvl(self,n): # update bid levels and return nth level [p,q]
self.bid_levels = self.bid_state[['PRICE','QUANTITY']].groupby(['PRICE']).agg({'QUANTITY': np.sum}).sort_index()
if not self.bid_levels.empty:
self.b_best = self.bid_levels.index[-n]
return [self.bid_levels.index[-n], self.bid_levels['QUANTITY'].iloc[-n]]
else:
return [-1,-1]
def update_ask_q_in_front(self,p,sp): # cumulative quantity of orders with higher priority on ask side
orders_in_front = ((self.ask_state['PRICE']<p) | ((self.ask_state['PRICE'] == p) & (self.ask_state['STEP_PRIORITY'] < sp)))
if any(orders_in_front):
return self.ask_state.loc[orders_in_front,'QUANTITY'].sum()
else:
return 0
def update_bid_q_in_front(self,p,sp): # cumulative quantity of orders with higher priority on bid side
orders_in_front = ((self.bid_state['PRICE']>p) | ((self.bid_state['PRICE'] == p) & (self.bid_state['STEP_PRIORITY'] < sp)))
if any(orders_in_front):
return self.bid_state.loc[orders_in_front,'QUANTITY'].sum()
else:
return 0
def init_first_state(self): # start the order book at the first (empty) state
self.init_bid_state(-1)
self.init_ask_state(-1)
self.update_nth_ask_lvl(1)
self.update_nth_bid_lvl(1)
self.b_best = np.nan
self.s_best = np.nan
def update_ask_data_on_order(self,row): # calculate and return various features that depend on the order book state and the ask side event
al = self.update_event_ask_lvl(row['PRICE'])
at = self.update_ask_tick(row['PRICE']) if (al > 0) else np.nan # event & old state relative
self.update_add_ask_state(row['STEP']) # update state with event
aqif = self.update_ask_q_in_front(row['PRICE'],row['STEP_PRIORITY'])
[ap,aq] = self.update_nth_ask_lvl(1) # 1st level price and quantity at the new state
return pd.Series([ap ,aq , al, at, aqif])
def update_bid_data_on_order(self,row): # calculate and return various features that depend on the order book state and the bid side event
bl = self.update_event_bid_lvl(row['PRICE']) # event & old state relative
bt = self.update_bid_tick(row['PRICE']) if (bl > 0) else np.nan # event & old state relative
self.update_add_bid_state(row['STEP']) # update state with event
bqif = self.update_bid_q_in_front(row['PRICE'],row['STEP_PRIORITY'])
[bp,bq] = self.update_nth_bid_lvl(1) # 1st level price and quantity at the new state
return pd.Series([bp ,bq , bl, bt, bqif])
def update_ask_data_on_cancel(self,row): # calculate and return various features that depend on the order book state and the ask side event
al = self.update_event_ask_lvl(row['PRICE'])
at = self.update_ask_tick(row['PRICE']) if (al > 0) else np.nan # event & old state relative
self.update_rm_ask_state(row['STEP']) # update state with event
aqif = self.update_ask_q_in_front(row['PRICE'],row['STEP_PRIORITY'])
[ap,aq] = self.update_nth_ask_lvl(1) # 1st level price and quantity at the new state
return pd.Series([ap ,aq , al, at, aqif])
def update_bid_data_on_cancel(self,row): # calculate and return various features that depend on the order book state and the bid side event
bl = self.update_event_bid_lvl(row['PRICE']) # event & old state relative
bt = self.update_bid_tick(row['PRICE']) if (bl > 0) else np.nan# event & old state relative
self.update_rm_bid_state(row['STEP']) # update state with event
bqif = self.update_bid_q_in_front(row['PRICE'],row['STEP_PRIORITY'])
[bp,bq] = self.update_nth_bid_lvl(1) # 1st level price and quantity at the new state
return pd.Series([bp ,bq , bl, bt, bqif])
def update_ask_data_on_part_cancel(self,row): # calculate and return various features that depend on the order book state and the ask side event
al = self.update_event_ask_lvl(row['PRICE'])
at = self.update_ask_tick(row['PRICE']) if (al > 0) else np.nan # event & old state relative
self.update_addrm_ask_state(row['STEP']) # update state with event
aqif = self.update_ask_q_in_front(row['PRICE'],row['STEP_PRIORITY'])
[ap,aq] = self.update_nth_ask_lvl(1) # 1st level price and quantity at the new state
return pd.Series([ap ,aq , al, at, aqif])
def update_bid_data_on_part_cancel(self,row): # calculate and return various features that depend on the order book state and the bid side event
bl = self.update_event_bid_lvl(row['PRICE']) # event & old state relative
bt = self.update_bid_tick(row['PRICE']) if (bl > 0) else np.nan# event & old state relative
self.update_addrm_bid_state(row['STEP']) # update state with event
bqif = self.update_bid_q_in_front(row['PRICE'],row['STEP_PRIORITY'])
[bp,bq] = self.update_nth_bid_lvl(1) # 1st level price and quantity at the new state
return pd.Series([bp ,bq , bl, bt, bqif])
def update_ask_data_on_trade(self,row): # process ask side trades
self.update_rm_ask_state(row['STEP']) # update state with event
[ap,aq] = self.update_nth_ask_lvl(1) # 1st level price and quantity at the new state
return pd.Series([ap ,aq, 1.0, 0.0, 0.0])
def update_bid_data_on_trade(self,row): # process bid side trades
self.update_rm_bid_state(row['STEP']) # update state with event
[bp,bq] = self.update_nth_bid_lvl(1) # 1st level price and quantity at the new state
return pd.Series([bp ,bq, 1.0, 0.0, 0.0])
def update_ask_data_on_part_trade(self,row): # process ask side trades
self.update_addrm_ask_state(row['STEP']) # update state with event
[ap,aq] = self.update_nth_ask_lvl(1) # 1st level price and quantity at the new state
return pd.Series([ap ,aq, 1.0, 0.0, 0.0])
def update_bid_data_on_part_trade(self,row): # process bid side trades
self.update_addrm_bid_state(row['STEP']) # update state with event
[bp,bq] = self.update_nth_bid_lvl(1) # 1st level price and quantity at the new state
return pd.Series([bp ,bq ,1.0, 0.0, 0.0])
def update_data_on_nd_trade(self,row): # process non display trades (very innefficient but there are only very few of them)
self.init_bid_state(row['STEP']) # update state with event
[bp,bq] = self.update_nth_bid_lvl(1) # 1st level price and quantity at the new state
self.init_ask_state(row['STEP']) # update state with event
[ap,aq] = self.update_nth_ask_lvl(1) # 1st level price and quantity at the new state
mt = ( row['PRICE'] - ((ap-bp)/2.0) )/self.tick_size
return pd.Series([ap, aq, bp, bq, mt])
def init_updates(self): # unit function dictionaries for different updates
self.ask_updates = {
'ORDER' : self.update_ask_data_on_order,
'CANCEL' : self.update_ask_data_on_cancel,
'PART_CANCEL' : self.update_ask_data_on_part_cancel,
'TRADE' : self.update_ask_data_on_trade,
'PART_TRADE' : self.update_ask_data_on_part_trade
}
self.bid_updates = {
'ORDER' : self.update_bid_data_on_order,
'CANCEL' : self.update_bid_data_on_cancel,
'PART_CANCEL' : self.update_bid_data_on_part_cancel,
'TRADE' : self.update_bid_data_on_trade,
'PART_TRADE' : self.update_bid_data_on_part_trade
}
def update_ask_data(self,row): # update ask side data after ask side event
return self.ask_updates[row['UPDATE']](row)
def update_bid_data(self,row): # update bid side data after bid side event
return self.bid_updates[row['UPDATE']](row)
def build_ask_features(self): # build all of the ask side dependent features
self.init_first_state()
self.ask_events[['ASK_PRICE','ASK_QUANTITY','UPDATE_LEVEL','TICK_DIST','QUANTITY_IN_FRONT']] = self.ask_events.apply(
lambda row: self.update_ask_data(row),axis=1)
return
def build_bid_features(self): # build all of the bid side dependent features
self.init_first_state()
self.bid_events[['BID_PRICE','BID_QUANTITY','UPDATE_LEVEL','TICK_DIST','QUANTITY_IN_FRONT']] = self.bid_events.apply(
lambda row: self.update_bid_data(row),axis=1)
return
def build_nd_features(self): # build the non-display trades relatad features
self.nd_trades[['ASK_PRICE','ASK_QUANTITY','BID_PRICE','BID_QUANTITY','TICK_DIST']] = self.nd_trades.apply(
lambda row: self.update_data_on_nd_trade(row),axis=1)
return
def join_event_data(self): # fill the blanks and prepare orders, cancels and trades tables
self.events = self.bid_events.append(self.ask_events)
self.nd_trades = self.nd_trades[['STEP','STEP_PRIORITY','TIME','SIDE','PRICE','ASK_PRICE','ASK_QUANTITY','BID_PRICE','BID_QUANTITY','TICK_DIST']]
self.nd_trades['UPDATE_LEVEL'] = np.nan
self.nd_trades['TICK_DIST'] = np.nan
self.nd_trades.set_index(['STEP'],drop=False,inplace=True)
self.events = self.events.append(self.nd_trades)
self.events.sort_index(ascending=True,inplace=True)
# fill nan values
self.events['ASK_PRICE'].fillna(method='ffill',inplace=True)
self.events['ASK_QUANTITY'].fillna(method='ffill',inplace=True)
self.events['BID_PRICE'].fillna(method='ffill',inplace=True)
self.events['BID_QUANTITY'].fillna(method='ffill',inplace=True)
self.events['ASK_PRICE'].fillna(-1,inplace=True)
self.events['ASK_QUANTITY'].fillna(-1,inplace=True)
self.events['BID_PRICE'].fillna(-1,inplace=True)
self.events['BID_QUANTITY'].fillna(-1,inplace=True)
# convert datatypes to int32
self.events['ASK_PRICE'] = self.events['ASK_PRICE'].astype('i4')
self.events['ASK_QUANTITY'] = self.events['ASK_QUANTITY'].astype('i4')
self.events['BID_PRICE'] = self.events['BID_PRICE'].astype('i4')
self.events['BID_QUANTITY'] = self.events['BID_QUANTITY'].astype('i4')
self.orders.set_index('STEP_CREATED',inplace=True,drop=False)
self.orders.sort_index()
self.cancels.set_index('STEP',inplace=True,drop=False)
self.cancels.sort_index()
self.trades.set_index('STEP',inplace=True,drop=False)
self.trades.sort_index()
self.orders = self.orders.join(self.events[['ASK_PRICE','ASK_QUANTITY','BID_PRICE','BID_QUANTITY','UPDATE_LEVEL','TICK_DIST','QUANTITY_IN_FRONT']],how='left')
self.cancels = self.cancels.join(self.events[['ASK_PRICE','ASK_QUANTITY','BID_PRICE','BID_QUANTITY','UPDATE_LEVEL','TICK_DIST','QUANTITY_IN_FRONT']],how='left')
self.trades = self.trades.join(self.events[['ASK_PRICE','ASK_QUANTITY','BID_PRICE','BID_QUANTITY','UPDATE_LEVEL','TICK_DIST','QUANTITY_IN_FRONT']],how='left')
def build_features(self,debug_mode): # build features
start = time.time()
print('Building features for ' + self.date + '.')
if (debug_mode==True):
lim = 5000
blim = (self.bid_events['STEP'] < lim) # limit for testing and and debugging
alim = (self.ask_events['STEP'] < lim) # limit for testing and and debugging
self.bid_events = self.bid_events[(blim)] # limit for testing and and debugging
self.ask_events = self.ask_events[(alim)] # limit for testing and and debugging
self.init_updates()
self.build_ask_features()
self.build_bid_features()
self.build_nd_features()
self.join_event_data()
print('Building features done. The process took ' +str((time.time()-start)/60)+' minutes.')
def save_data(self,save_path):
print('Saving extended data for ' + self.date + '.')
# categorize the text columns
self.orders['UPDATE'] = self.orders['UPDATE'].astype('category')
self.orders['SIDE'] = self.orders['SIDE'].astype('category')
self.orders['PARTICIPANT'] = self.orders['PARTICIPANT'].astype('category')
self.cancels['UPDATE'] = self.cancels['UPDATE'].astype('category')
self.cancels['SIDE'] = self.cancels['SIDE'].astype('category')
self.cancels['PARTICIPANT'] = self.cancels['PARTICIPANT'].astype('category')
self.trades['UPDATE'] = self.trades['UPDATE'].astype('category')
self.trades['SIDE'] = self.trades['SIDE'].astype('category')
self.trades['PARTICIPANT'] = self.trades['PARTICIPANT'].astype('category')
## Write the tables into the HDF5 file
mode = 'w'
cols = ['ORDER','STEP_CREATED','STEP_DESTROYED','STEP_PRIORITY','TIME_CREATED','TIME_DESTROYED','SIDE','UPDATE','PRICE','QUANTITY','DELTA_QUANTITY','TRADED','QUANTITY_TO_BE_TRADED','PARTICIPANT','ASK_PRICE','ASK_QUANTITY','BID_PRICE','BID_QUANTITY','UPDATE_LEVEL','TICK_DIST','QUANTITY_IN_FRONT']
grp_orders = ('/' + self.date + '/ORDERS')
self.orders[cols].to_hdf(save_path, grp_orders, mode=mode,format='table',data_columns=cols,complevel=0,complib='blosc') # orders
mode = 'a' # even if mode was write it still needs to be 'a'ppend for the rest of the datasets
cols = ['ORDER','STEP','STEP_PRIORITY','TIME','SIDE','UPDATE','PRICE','DELTA_QUANTITY','PARTICIPANT','ASK_PRICE','ASK_QUANTITY','BID_PRICE','BID_QUANTITY','UPDATE_LEVEL','TICK_DIST','QUANTITY_IN_FRONT']
grp_cancels = ('/' + self.date + '/CANCELS')
self.cancels[cols].to_hdf(save_path, grp_cancels, mode=mode,format='table',data_columns=cols,complevel=0,complib='blosc') # cancels
cols = ['ORDER','STEP','STEP_PRIORITY','TIME','SIDE','UPDATE','PRICE','DELTA_QUANTITY','PARTICIPANT','ASK_PRICE','ASK_QUANTITY','BID_PRICE','BID_QUANTITY','UPDATE_LEVEL','TICK_DIST','QUANTITY_IN_FRONT']
grp_trades = ('/' + self.date + '/TRADES')
self.trades[cols].to_hdf(save_path, grp_trades, mode=mode,format='table',data_columns=cols,complevel=0,complib='blosc') # trades
cols = ['ORDER','STEP','TIME','PRICE','QUANTITY']
grp_crosses = ('/' + self.date + '/CROSS_EVENTS')
self.cross_trades[cols].to_hdf(save_path, grp_crosses, mode=mode,format='table',data_columns=cols,complevel=0,complib='blosc') # cross events
print('Extended data for ' + self.date +' saved to ' +save_path+'.')
def time2step(self,t): # convert time (ms from start of day) to the a pair [first step / last step]
ss = self.events.loc[self.events['TIME']==t,'STEP']
if(not ss.empty):
return [min(ss),max(ss)]
else:
ss = self.events.loc[self.events['TIME']<t,'STEP']
return [ss.iloc[-1],ss.iloc[-1]]
def step2time(self,s): # convert step to time (ms from start of day)
return self.events.loc[s,'TIME']
def time2str(self,t): # convert time (ms from start of day) to string ('HH:MM:SS.mmm') representing that time
hrs = math.floor(t / (60*60*1000*1.0))
mins = math.floor((t - hrs*60*60*1000) / (60*1000*1.0))
secs = math.floor((t - hrs*60*60*1000 - mins*60*1000) / (1000*1.0))
mils = (t - hrs*60*60*1000 - mins*60*1000 - secs*1000)
return time.strftime("%H:%M:%S.{}".format(mils), (1970,1,1,hrs,mins,secs,0,0,0) )
def str2time(self,s):
ms = int(s[-3:])
t = time.strptime(s[:-4],"%H:%M:%S")
return t[3]*60*60*1000+t[4]*60*1000+t[5]*1000+ms # time in milliseconds since midnight
def plot(self,step,ylim,xlim):
print(self.time2str(self.step2time(step)))
tick = self.tick_size
border = 30
alp = 0.5
self.ax.clear()
self.init_ask_state(step)
self.init_bid_state(step)
ss = self.ask_state.set_index(['PRICE','STEP_PRIORITY','ORDER'],drop=False)
ss.sort_index(inplace=True)
bs = self.bid_state.set_index(['PRICE','STEP_PRIORITY','ORDER'],drop=False)
bs.sort_index(inplace=True)
maxq = 0
minq = 0
lastp = 0
lastq = 0
bflip = False
for ind, row in ss.iterrows():
if (row['PRICE']!=lastp):
maxq = max(lastq,maxq)
lastp = 0
lastq = 0
bflip = False
col_ca = 'sandybrown' if bflip==True else 'orangered'
col_tr = 'cyan' if bflip==True else 'darkcyan'
self.ax.add_patch(
patches.Rectangle(
(lastq, row['PRICE']-(tick-border)/2), # (x,y)
row['QUANTITY'], # width
(tick-border), # height
facecolor=col_ca,
alpha=alp
)
)
self.ax.add_patch(
patches.Rectangle(
(lastq, row['PRICE']-(tick-border)/2), # (x,y)
row['QUANTITY_TO_TRADE'], # width
(tick-border), # height
facecolor=col_tr,
alpha=alp
)
)
lastp = row['PRICE']
lastq += row['QUANTITY']
bflip = not bflip
lastp = 0
lastq = 0
bflip = False
for ind, row in bs.iterrows():
if (row['PRICE']!=lastp):
minq = min(-lastq,minq)
lastp = 0
lastq = 0
bflip = False
col_ca = 'palegreen' if bflip==True else 'seagreen'
col_tr = 'magenta' if bflip==True else 'darkmagenta'
self.ax.add_patch(
patches.Rectangle(
(-row['QUANTITY']-lastq, row['PRICE']-(tick-border)/2), # (x,y)
row['QUANTITY'], # width
(tick-border), # height
facecolor=col_ca,
alpha=alp
)
)
self.ax.add_patch(
patches.Rectangle(
(-row['QUANTITY_TO_TRADE']-lastq, row['PRICE']-(tick-border)/2), # (x,y)
row['QUANTITY_TO_TRADE'], # width
(tick-border), # height
facecolor=col_tr,
alpha=alp
)
)
lastp = row['PRICE']
lastq += row['QUANTITY']
bflip = not bflip
self.ax.set_ylim(ylim[0],ylim[1])
self.ax.set_xlim(xlim[0],xlim[1])
display.display(self.fig)
return 1
class OrderBook(object):
def __init__(self,ticker,data_path):
print('Started order book ' + ticker)
self.ticker = ticker
self.data_path = data_path
self.data = {}
self.file = h5.File(data_path,'r')
self.dates = [date for date in self.file]
self.file.close()
for date in self.dates:
print('Reading in data for '+ date)
orders = pd.read_hdf(data_path,date+'/ORDERS')
cancels = pd.read_hdf(data_path,date+'/CANCELS')
trades = pd.read_hdf(data_path,date+'/TRADES')
cross_trades = pd.read_hdf(data_path,date+'/CROSS_EVENTS')
self.data[date] = OrderBookData(date,orders,cancels,trades,cross_trades,100)
def build_features(self,date,save_path,debug_mode=False):
self.data[date].build_features(debug_mode)
self.data[date].save_data(save_path)
pass
def plot(self,date,step,ylim,xlim):
self.data[date].plot(step,ylim,xlim)
def time2step(self,date,time):
return self.data[date].time2step(time)
def step2time(self,date,step):
return self.date[date].step2time(step)
def time2str(self,date,time):
return self.data[date].time2str(time)
def str2step(self,date,strtime):
return self.data[date].time2step(self.data[date].str2time(strtime))
In [ ]: