In [1]:
'''
The python script to analyze the trade data for lykke

Author: Jinhua Wang, University of Toronto

January 2017

License: The MIT License (MIT)

Copyright (c) 2016 Jinhua Wang

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
'''


Out[1]:
'\nThe python script to analyze the trade data for lykke\n\nAuthor: Jinhua Wang, University of Toronto\n\nJanuary 2017\n\nLicense: The MIT License (MIT)\n\nCopyright (c) 2016 Jinhua Wang\n\nPermission is hereby granted, free of charge, to any person obtaining a copy\nof this software and associated documentation files (the "Software"), to deal\nin the Software without restriction, including without limitation the rights\nto use, copy, modify, merge, publish, distribute, sublicense, and/or sell\ncopies of the Software, and to permit persons to whom the Software is\nfurnished to do so, subject to the following conditions:\n\nThe above copyright notice and this permission notice shall be included in all\ncopies or substantial portions of the Software.\n\nTHE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\nIMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\nFITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\nAUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\nLIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\nOUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\nSOFTWARE.\n'

In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import networkx as nx
import json
import math
#the transaction log
transaction_log=[]
transaction_log_json=[]

In [3]:
#pd.read_csv("trade_log_20160801_20161130.csv")

In [4]:
df=pd.read_csv("trade_log_20160801_20161130.csv")

In [5]:
df["time"]=pd.to_datetime(df.TradeDt)

In [6]:
#sort the data by time
df=df.sort_values(by="time")

In [7]:
#drop the useless columns
df.drop('TraderWalletId',1,inplace=True)
df.drop('MktMakerWalletId',1,inplace=True)
df.drop('UsdVol',1,inplace=True)
df.drop('BtcVol',1,inplace=True)
df.drop('TxHashId',1,inplace=True)
df.drop('BlockchainDt',1,inplace=True)
df.drop('ConfTimeHours',1,inplace=True)
df.drop('ConfTimeMinutes',1,inplace=True)
df.drop('TxUrl',1,inplace=True)
df.drop('MarketOrderId',1,inplace=True)
df.drop('MarketOrderDt',1,inplace=True)
df.drop('LimitOrderld',1,inplace=True)
df.drop('LimitOrderDt',1,inplace=True)
df.drop('TradeDt',1,inplace=True)
df=df.rename(columns={'Asset1':'Sell', 'Asset2':'Buy', 'Qty1':'Sell_Vol', 'Qty2':'Buy_Vol'})
df["Sell_Vol"]=df["Sell_Vol"].abs()
df['Price'] = df['Price'].astype('float64') 
#df

In [8]:
#the set used to count the number of unique assets trading in the exchange
s={}
#construct the orders and put the orders into the order book
for index, row in df.iterrows():
    if row["Buy"] not in s:
        s[row["Buy"]]=True
    if row["Sell"] not in s:
        s[row["Sell"]]=True
print "the number of unique assets are " + str(len(s))
print "the list of unique assets follows:"
print "**********************************"
for key in s: 
    print "Asset " + key
print "**********************************"


the number of unique assets are 8
the list of unique assets follows:
**********************************
Asset USD
Asset BTC
Asset CHF
Asset Germany30
Asset JPY
Asset GBP
Asset EUR
Asset LKK
**********************************

In [9]:
#we assume all orders transact at the mid price
df_orderbook=df.values.tolist()
log_result = []
i=0
for row in df_orderbook:
    if i<len(df_orderbook)-1:
        i+=1
        current_time=row[7]
        current_date = current_time.date()
        #query downwards to find trades on the same date
        for n_row in df_orderbook[i:]:
            #a trade match found on the same trading day
            if n_row[3]==row[1] and n_row[1]==row[3] and n_row[7].date()==current_date and row[2]!=0 and row[4]!=0 and n_row[2]!=0 and n_row[4]!=0:
                #calculate the average price for the trade
                #for example, sell row[1]=BTC, buy row[3]=USD
                if (row[1]<row[3]):
                    price_1 = float(row[5])
                else:
                    price_1 = float(row[6])
                
                if (n_row[1]<n_row[3]):
                    price_2 = float(n_row[5])
                else:
                    price_2 = float(n_row[6])
                bid=0
                ask=0
                if price_1>price_2:
                    bid=price_2
                    ask=price_1
                else:
                    bid=price_1
                    ask=price_2
                price_avg=(price_1+price_2)/2
                #the quantity that the first trader can buy
                #for example, sell row[1]=BTC, buy row[3]=USD
                if (row[1]<row[3]):
                    qty_buy_1=row[2]
                else:
                    qty_buy_1=row[4]
                #the actual transacted volume
                vol_tmp=0
                #check if the next trader has enough currency to sell 
                #for example, sell n_row[1]=USD, buy n_row[3]=BTC
                if (n_row[1]>n_row[3]):
                    if qty_buy_1<n_row[4]:
                        n_row[4]=n_row[4]-qty_buy_1
                        vol_tmp=qty_buy_1
                        row[2]=0
                    else:
                        vol_tmp=n_row[4]
                        row[2]=row[2]-n_row[4]
                        n_row[4]=0
                else:
                    if qty_buy_1<n_row[2]:
                        n_row[2]=n_row[2]-qty_buy_1
                        vol_tmp=qty_buy_1
                        row[4]=0
                    else:
                        vol_tmp=n_row[2]
                        row[4]=row[4]-n_row[2]
                        n_row[2]=0
                trader1_id=row[0]
                trader2_id=n_row[0]
                #write log of the transaction
                tmp_arr = []
                tmp_arr.append(trader1_id)
                tmp_arr.append(trader2_id)
                tmp_arr.append(bid)
                tmp_arr.append(ask)
                tmp_arr.append(price_avg)
                tmp_arr.append(row[1])
                tmp_arr.append(row[3])
                tmp_arr.append(n_row[1])
                tmp_arr.append(n_row[3])
                tmp_arr.append(vol_tmp)
                tmp_arr.append(row[7].time())
                tmp_arr.append(n_row[7])
                tmp_arr.append(n_row[7].date())
                log_result.append(tmp_arr)
                tmp_log={"trader1":trader1_id, "trader2":trader2_id, "bid":bid, "ask":ask , "price":price_avg, "trader1_sell":row[1], "trader1_buy":row[3], "trader2_sell":n_row[1], "trader2_buy":n_row[3], "vol_trader1_buy/sell":vol_tmp,"start_time":row[7],"transaction_time":n_row[7], "date":n_row[7].date()} 
                transaction_log.append(tmp_log)
                tmp_log_json={"trader1":trader1_id, "trader2":trader2_id, "bid":bid, "ask":ask , "price":price_avg, "trader1_sell":row[1], "trader1_buy":row[3], "trader2_sell":n_row[1], "trader2_buy":n_row[3], "vol_trader1_buy/sell":vol_tmp,"start_time":str(row[7]),"transaction_time":str(n_row[7]), "date":str(n_row[7].date())} 
                transaction_log_json.append(tmp_log_json)
                if(row[2]<=0): 
                    df_orderbook.remove(row)
                    #if row[7].month == 11:
                        #print "remove"+row[0]+" time:" + str(row[7])
                    break
                if(n_row[2]<=0):
                    #remove the transaction since the order has already been filled
                    df_orderbook.remove(n_row)
                    #if n_row[7].month == 11:
                        #print "remove"+n_row[0]+" time:" + str(n_row[7])
log_list = ["trader1", "trader2", "bid", "ask", "price", "trader1_sell", "trader1_buy", "trader2_sell", "trader2_buy", "vol_trader1_buy","start_time","transaction_time", "date"]
transaction_log_df = pd.DataFrame(log_result,columns=log_list)
transaction_log_df.to_csv("python_csv/hypo_trade_log.csv")
df


Out[9]:
TradeId Sell Sell_Vol Buy Buy_Vol Price InvPrice time
3456 20160802091718670_00189 LKK 1.000000 CHF 0.050000 0.050000 20 2016-08-02 09:17:18.670
3455 20160802091912238_00193 LKK 500.000000 BTC 0.042741 0.000085 11698.44041 2016-08-02 09:19:12.238
3454 20160802101745121_00197 BTC 0.100000 EUR 53.610000 536.100000 0.001865324 2016-08-02 10:17:45.121
3453 20160802101831880_00201 EUR 54.570000 CHF 59.030000 1.081730 0.924445197 2016-08-02 10:18:31.880
3452 20160802134855913_00205 CHF 59.030000 EUR 54.560000 0.924276 1.081928152 2016-08-02 13:48:55.913
3451 20160802134930680_00209 EUR 54.370000 BTC 0.100000 0.001839 543.7 2016-08-02 13:49:30.680
3450 20160802164746973_00213 BTC 0.034654 LKK 400.000000 11542.579130 8.66358E-05 2016-08-02 16:47:46.973
3449 20160802213915426_00217 BTC 0.100000 EUR 49.710000 497.100000 0.002011668 2016-08-02 21:39:15.426
3448 20160802214057360_00221 EUR 49.900000 USD 55.990000 1.122044 0.891230577 2016-08-02 21:40:57.360
3447 20160803060056038_00225 LKK 100.000000 CHF 5.000000 0.050000 20 2016-08-03 06:00:56.038
3446 20160803060504304_00229 BTC 0.050000 CHF 26.440000 528.800000 0.001891074 2016-08-03 06:05:04.304
3445 20160803081211811_00233 USD 55.890000 CHF 54.000000 0.966184 1.035 2016-08-03 08:12:11.811
3444 20160803182757562_00241 USD 1127.940000 BTC 2.000000 0.001773 563.97 2016-08-03 18:27:57.562
3443 20160803191259156_00245 CHF 5.000000 LKK 100.000000 20.000000 0.05 2016-08-03 19:12:59.156
3442 20160803233700698_00249 EUR 0.500000 CHF 0.540000 1.080000 0.925925926 2016-08-03 23:37:00.698
3441 20160804130722975_00253 LKK 100.000000 CHF 5.000000 0.050000 20 2016-08-04 13:07:22.975
3440 20160805145514411_00257 USD 1151.760000 BTC 2.000000 0.001736 575.88 2016-08-05 14:55:14.411
3439 20160805153253156_00261 BTC 2.000000 USD 1150.540000 575.270000 0.001738314 2016-08-05 15:32:53.156
3438 20160805161410348_00265 LKK 11.000000 BTC 0.000887 0.000081 12401.35287 2016-08-05 16:14:10.348
3437 20160806220116393_00269 CHF 109.870000 EUR 101.000000 0.919268 1.087821782 2016-08-06 22:01:16.393
3436 20160808173005986_00001 EUR 0.300000 CHF 0.320000 1.066667 0.9375 2016-08-08 17:30:05.986
3435 20160808180640556_00009 CHF 5.000000 USD 5.080000 1.016000 0.984251969 2016-08-08 18:06:40.556
3434 20160809071417487_00013 LKK 1159.000000 BTC 0.100000 0.000086 11590 2016-08-09 07:14:17.487
3433 20160809171553486_00021 EUR 49.580000 GBP 42.320000 0.853570 1.171550095 2016-08-09 17:15:53.486
3432 20160810070045061_00025 USD 5.110000 LKK 100.000000 19.569472 0.0511 2016-08-10 07:00:45.061
3431 20160810192953887_00029 CHF 6.550000 EUR 6.000000 0.916031 1.091666667 2016-08-10 19:29:53.887
3430 20160810204650104_00033 GBP 36.360000 EUR 42.320000 1.163916 0.859168242 2016-08-10 20:46:50.104
3429 20160810204728382_00037 GBP 5.120000 EUR 5.960000 1.164062 0.859060403 2016-08-10 20:47:28.382
3428 20160810204815531_00041 GBP 0.840000 EUR 0.970000 1.154762 0.865979381 2016-08-10 20:48:15.531
3427 20160810205531504_00045 EUR 49.250000 CHF 53.670000 1.089746 0.917644867 2016-08-10 20:55:31.504
... ... ... ... ... ... ... ... ...
29 20161130074238672_00561 BTC 0.005380 CHF 4.000000 743.571800 0.001345 2016-11-30 07:40:48.000
28 20161130075628094_00577 BTC 0.001340 CHF 1.000000 745.712200 0.001341 2016-11-30 07:55:12.000
27 20161130075216280_00573 BTC 0.001340 CHF 1.000000 744.967700 0.001342 2016-11-30 07:55:12.000
26 20161130074936699_00569 BTC 0.001340 CHF 1.000000 744.790200 0.001343 2016-11-30 07:55:12.000
25 20161130080554853_00589 BTC 0.001340 CHF 1.000000 746.263100 0.00134 2016-11-30 08:09:36.000
24 20161130080458482_00585 LKK 11820.700000 BTC 0.740000 0.000063 15973.93 2016-11-30 08:09:36.000
23 20161130083041294_00593 BTC 2.010960 CHF 1500.030000 745.928600 0.001341 2016-11-30 08:24:00.000
22 20161130085025127_00597 LKK 3214.000000 BTC 0.201570 0.000063 15944.83 2016-11-30 08:52:48.000
21 20161130100131181_00601 CHF 1000.000000 BTC 1.338905 0.001339 746.879 2016-11-30 10:04:48.000
20 20161130105249395_00609 BTC 0.002670 CHF 2.000000 748.906600 0.001335 2016-11-30 10:48:00.000
19 20161130104720455_00605 CHF 500.000000 BTC 0.667538 0.001335 749.021 2016-11-30 10:48:00.000
18 20161130105936639_00613 BTC 0.001330 CHF 1.000000 749.417300 0.001334 2016-11-30 11:02:24.000
17 20161130111104335_00617 BTC 0.667660 CHF 500.000000 748.882000 0.001335 2016-11-30 11:16:48.000
16 20161130111638866_00621 BTC 0.389420 LKK 6200.000000 15921.170000 6.28E-05 2016-11-30 11:16:48.000
15 20161130113801120_00625 BTC 1.334780 CHF 1000.300000 749.414100 0.001334 2016-11-30 11:31:12.000
14 20161130132315882_00633 BTC 8.000000 USD 5935.440000 741.930000 0.001348 2016-11-30 13:26:24.000
13 20161130135518213_00637 EUR 50.000000 USD 53.190000 1.063800 0.940026 2016-11-30 13:55:12.000
11 20161130144022900_00641 BTC 0.082430 EUR 57.680000 699.766200 0.001429 2016-11-30 14:38:24.000
12 20161130144113198_00645 BTC 0.654780 USD 485.660000 741.715200 0.001348 2016-11-30 14:38:24.000
10 20161130145153155_00657 USD 6421.100000 BTC 8.676224 0.001351 740.08 2016-11-30 14:52:48.000
9 20161130144654709_00649 LKK 2554.000000 USD 117.040000 0.045826 21.8216 2016-11-30 14:52:48.000
8 20161130145126403_00653 USD 117.000000 BTC 0.158091 0.001351 740.08 2016-11-30 14:52:48.000
7 20161130153436533_00661 BTC 0.436840 LKK 7000.000000 16024.260000 6.24E-05 2016-11-30 15:36:00.000
6 20161130155604648_00665 BTC 0.275000 LKK 4405.200000 16018.910000 6.24E-05 2016-11-30 15:50:24.000
5 20161130160615599_00669 BTC 8.676220 USD 6429.160000 741.009000 0.00135 2016-11-30 16:04:48.000
4 20161130160706923_00677 BTC 0.760000 LKK 12190.080000 16039.580000 6.23E-05 2016-11-30 16:04:48.000
3 20161130161627773_00681 USD 6429.160000 BTC 8.682891 0.001351 740.44 2016-11-30 16:19:12.000
2 20161130162914585_00685 BTC 8.682890 USD 6434.360000 741.038900 0.001349 2016-11-30 16:33:36.000
1 20161130163512814_00689 BTC 1.240000 LKK 19916.630000 16061.800000 6.23E-05 2016-11-30 16:33:36.000
0 20161130164852233_00697 USD 6434.360000 BTC 8.683581 0.001350 740.98 2016-11-30 16:48:00.000

3457 rows × 8 columns


In [10]:
def convert_milli_hr(x):
    x = float(x)
    x = x / 1000
    seconds = x % 60
    x /= 60
    minutes = x % 60
    x /= 60
    hours = x % 24
    x /= 24
    days = x
    return hours
#calculate the average transaction time
time_arr = []
for log in transaction_log:
    t1_ms = (log["start_time"].hour*60*60 + log["start_time"].minute*60 + log["start_time"].second)*1000 + log["start_time"].microsecond
    t2_ms = (log["transaction_time"].hour*60*60 + log["transaction_time"].minute*60 + log["transaction_time"].second)*1000 + log["transaction_time"].microsecond
    diff=t2_ms-t1_ms
    if diff>0:
        time_arr.append(diff)
np_time_arr = np.array(time_arr)
    
print "the average trade wait time for all transactions (without market maker) in milliseconds: "+ str(np.mean(np_time_arr))
print "the median is :" + str(np.median(np_time_arr))

avg_time_dict={}
for log in transaction_log:
    tu = ()
    if log["trader1_buy"]> log["trader1_sell"]:
        tu = (log["trader1_buy"], log["trader1_sell"])  
    else:
        tu =(log["trader1_sell"], log["trader1_buy"])
    if log["transaction_time"].month not in avg_time_dict or avg_time_dict[log["transaction_time"].month] is None:
        avg_time_dict[log["transaction_time"].month] = {}
    if tu not in avg_time_dict[log["transaction_time"].month] or avg_time_dict[log["transaction_time"].month][tu] is None:
        avg_time_dict[log["transaction_time"].month][tu] = []
    t1_ms = (log["start_time"].hour*60*60 + log["start_time"].minute*60 + log["start_time"].second)*1000 + log["start_time"].microsecond/1000
    t2_ms = (log["transaction_time"].hour*60*60 + log["transaction_time"].minute*60 + log["transaction_time"].second)*1000 + log["transaction_time"].microsecond/1000
    #if t2_ms-t1_ms>0:
    avg_time_dict[log["transaction_time"].month][tu].append(convert_milli_hr(t2_ms-t1_ms)) 
    
result=[]
for month in avg_time_dict:
    for key in avg_time_dict[month]:
        if len(avg_time_dict[month][key])>0:
            arr = np.array(avg_time_dict[month][key])
            #if month == 11:
                #print key
                #print len(avg_time_dict[month][key])
            key_new = ()
            tmp = []
            if key[0]<key[1]:
                key_new=(key[1],key[0])
            else:
                key_new = key
            tmp.append(key_new)
            tmp.append(month)
            tmp.append(round(np.mean(arr),2))
            tmp.append(round(np.std(arr), 2))
            tmp.append(round(np.median(arr),2))
            tmp.append(len(avg_time_dict[month][key]))
            result.append(tmp)
            #print "the average trade time for "+str(key)+" (without the market maker) in milliseconds is " + str(avg) + " standard deviation is: "+str(std)+" the number of observation is: " + str(len(avg_time_dict[key]))
        else: 
            #print "None enough data for " + str(key)
            pass
header_list = ["Currency Pair", "Month", "Trader Wait Time","Std. Deviation", "Median","Observations"]
df_result = pd.DataFrame(result, columns=header_list)
df_result = df_result.sort(['Currency Pair', 'Month'], ascending=False)
df_result = df_result.reset_index(drop=True)
print df_result.to_latex()
df_result.to_csv("python_csv/hypo_trade_interaction_tim.csv")


the average trade wait time for all transactions (without market maker) in milliseconds: 13308945.4306
the median is :5184000.0
\begin{tabular}{llrrrrr}
\toprule
{} &     Currency Pair &  Month &  Trader Wait Time &  Std. Deviation &  Median &  Observations \\
\midrule
0  &        (USD, LKK) &     11 &              4.77 &            4.14 &    3.36 &             9 \\
1  &        (USD, LKK) &     10 &              8.42 &            6.23 &    6.98 &            28 \\
2  &        (USD, LKK) &      9 &              5.66 &            4.63 &    4.76 &            25 \\
3  &        (USD, LKK) &      8 &             10.67 &            3.65 &   10.67 &             2 \\
4  &        (USD, GBP) &     10 &             10.18 &            0.00 &   10.18 &             1 \\
5  &        (USD, EUR) &     11 &              7.92 &            4.80 &    7.92 &             2 \\
6  &        (USD, EUR) &     10 &              5.89 &            0.00 &    5.89 &             1 \\
7  &        (USD, EUR) &      9 &              0.03 &            0.02 &    0.03 &             2 \\
8  &        (USD, EUR) &      8 &              1.65 &            0.22 &    1.58 &             3 \\
9  &        (USD, CHF) &     11 &              3.12 &            2.76 &    2.64 &             3 \\
10 &        (USD, CHF) &     10 &              5.78 &            5.72 &    5.78 &             2 \\
11 &        (USD, CHF) &      9 &              4.13 &            3.17 &    3.37 &             5 \\
12 &        (USD, CHF) &      8 &              8.62 &            3.81 &    9.23 &             4 \\
13 &        (USD, BTC) &     11 &              0.88 &            1.75 &    0.24 &           977 \\
14 &        (USD, BTC) &     10 &              6.59 &            4.74 &    5.06 &            28 \\
15 &        (USD, BTC) &      9 &              5.47 &            4.12 &    4.53 &            13 \\
16 &        (USD, BTC) &      8 &              2.42 &            2.62 &    1.46 &             6 \\
17 &        (LKK, JPY) &      9 &              5.13 &            3.16 &    3.47 &             3 \\
18 &        (LKK, GBP) &     11 &              0.48 &            0.24 &    0.48 &             2 \\
19 &        (LKK, GBP) &     10 &              1.98 &            2.55 &    1.18 &             9 \\
20 &        (LKK, GBP) &      9 &              1.82 &            0.00 &    1.82 &             1 \\
21 &        (LKK, EUR) &     11 &              3.84 &            6.24 &    0.24 &             4 \\
22 &        (LKK, EUR) &     10 &              4.11 &            2.74 &    3.33 &             9 \\
23 &        (LKK, EUR) &      9 &              7.37 &            5.74 &    8.07 &             3 \\
24 &        (LKK, EUR) &      8 &              2.72 &            0.00 &    2.72 &             1 \\
25 &        (LKK, CHF) &     11 &              1.88 &            3.01 &    0.12 &            12 \\
26 &        (LKK, CHF) &     10 &              6.58 &            5.46 &    4.93 &            41 \\
27 &        (LKK, CHF) &      9 &              2.61 &            2.23 &    1.52 &             7 \\
28 &        (LKK, CHF) &      8 &             13.20 &            0.00 &   13.20 &             1 \\
29 &        (LKK, BTC) &     11 &              6.37 &            5.00 &    5.04 &           140 \\
30 &        (LKK, BTC) &     10 &              6.07 &            5.35 &    4.52 &           222 \\
31 &        (LKK, BTC) &      9 &              4.65 &            5.17 &    3.07 &            91 \\
32 &        (LKK, BTC) &      8 &              3.38 &            3.07 &    2.57 &             3 \\
33 &        (JPY, EUR) &      9 &             12.15 &            0.00 &   12.15 &             1 \\
34 &        (JPY, EUR) &      8 &              0.48 &            0.00 &    0.48 &             1 \\
35 &        (JPY, CHF) &      9 &              0.71 &            0.24 &    0.61 &             3 \\
36 &        (JPY, BTC) &     10 &             13.36 &            0.00 &   13.36 &             1 \\
37 &        (JPY, BTC) &      9 &             16.72 &            0.00 &   16.72 &             1 \\
38 &  (Germany30, EUR) &     10 &              0.53 &            0.44 &    0.53 &             2 \\
39 &        (GBP, EUR) &     10 &              3.73 &            0.00 &    3.73 &             1 \\
40 &        (GBP, CHF) &     11 &              0.00 &            0.00 &    0.00 &             1 \\
41 &        (GBP, CHF) &     10 &              0.83 &            0.81 &    0.83 &             2 \\
42 &        (GBP, BTC) &     11 &             12.96 &            0.00 &   12.96 &             1 \\
43 &        (GBP, BTC) &     10 &              3.71 &            4.39 &    1.22 &             3 \\
44 &        (GBP, BTC) &      9 &              0.01 &            0.00 &    0.01 &             1 \\
45 &        (EUR, CHF) &     11 &             18.48 &            0.00 &   18.48 &             1 \\
46 &        (EUR, CHF) &     10 &              5.90 &            4.17 &    3.37 &             3 \\
47 &        (EUR, CHF) &      9 &             12.34 &            0.37 &   12.34 &             2 \\
48 &        (EUR, CHF) &      8 &              3.90 &            4.53 &    1.43 &             3 \\
49 &        (EUR, BTC) &     10 &              3.58 &            3.88 &    1.79 &            40 \\
50 &        (EUR, BTC) &      9 &              0.28 &            0.00 &    0.28 &             1 \\
51 &        (EUR, BTC) &      8 &              1.79 &            1.74 &    1.79 &             2 \\
52 &        (CHF, BTC) &     11 &              4.59 &            3.74 &    4.32 &            82 \\
53 &        (CHF, BTC) &     10 &              7.62 &            5.32 &    7.82 &            20 \\
54 &        (CHF, BTC) &      9 &              0.74 &            0.72 &    0.74 &             2 \\
\bottomrule
\end{tabular}

/Library/Python/2.7/site-packages/ipykernel/__main__.py:68: FutureWarning: sort(columns=....) is deprecated, use sort_values(by=.....)

In [11]:
#transaction_log

In [12]:
print "Total Number of Trades (without the market maker): " + str(len(transaction_log))
header_count_list = ["Currency Pair", "Total number of trades","Percentile"]
result_count=[]
#get the number of trades per currency pair
count_dict = {}
for log in transaction_log:
    tu = ()
    if log["trader1_buy"]<log["trader1_sell"]:
        tu =(log["trader1_sell"],log["trader1_buy"])
    else:
        tu =(log["trader1_buy"],log["trader1_sell"])   
    if tu not in count_dict or count_dict[tu] is None:
        count_dict[tu]=0
    count_dict[tu]+=1
for key in count_dict:
    tmp = []
    tmp.append(key)
    tmp.append(count_dict[key])
    tmp.append(round(float(count_dict[key])/len(transaction_log),4)) 
    result_count.append(tmp)
    #print "Total number of trades (without the market maker) for "+str(key)+" is:" + str(count_dict[key])
df_count = pd.DataFrame(result_count, columns=header_count_list)
df_count = df_count.sort('Currency Pair', ascending=False)
df_count = df_count.reset_index(drop=True)
print df_count.to_latex()
df_count.to_csv("python_csv/trade_summary.csv")


Total Number of Trades (without the market maker): 1834
\begin{tabular}{llrr}
\toprule
{} &     Currency Pair &  Total number of trades &  Percentile \\
\midrule
0  &        (USD, LKK) &                      64 &      0.0349 \\
1  &        (USD, GBP) &                       1 &      0.0005 \\
2  &        (USD, EUR) &                       8 &      0.0044 \\
3  &        (USD, CHF) &                      14 &      0.0076 \\
4  &        (USD, BTC) &                    1024 &      0.5583 \\
5  &        (LKK, JPY) &                       3 &      0.0016 \\
6  &        (LKK, GBP) &                      12 &      0.0065 \\
7  &        (LKK, EUR) &                      17 &      0.0093 \\
8  &        (LKK, CHF) &                      61 &      0.0333 \\
9  &        (LKK, BTC) &                     456 &      0.2486 \\
10 &        (JPY, EUR) &                       2 &      0.0011 \\
11 &        (JPY, CHF) &                       3 &      0.0016 \\
12 &        (JPY, BTC) &                       2 &      0.0011 \\
13 &  (Germany30, EUR) &                       2 &      0.0011 \\
14 &        (GBP, EUR) &                       1 &      0.0005 \\
15 &        (GBP, CHF) &                       3 &      0.0016 \\
16 &        (GBP, BTC) &                       5 &      0.0027 \\
17 &        (EUR, CHF) &                       9 &      0.0049 \\
18 &        (EUR, BTC) &                      43 &      0.0234 \\
19 &        (CHF, BTC) &                     104 &      0.0567 \\
\bottomrule
\end{tabular}

/Library/Python/2.7/site-packages/ipykernel/__main__.py:23: FutureWarning: sort(columns=....) is deprecated, use sort_values(by=.....)

In [13]:
#calculate the average bid ask spread (Without the Market Maker)
avg_spread={}
avg_spread_mid={}
for log in transaction_log:
    tmp=()
    if log['trader1_buy']<log['trader1_sell']:
        tmp=(log['trader1_buy'], log['trader1_sell'])
    else:
        tmp=(log['trader1_sell'], log['trader1_buy'])
    spread=abs(log['ask']-log['bid'])
    if tmp not in avg_spread or avg_spread[tmp] is None:
        avg_spread[tmp] = []
    if tmp not in avg_spread_mid or avg_spread_mid[tmp] is None:
        avg_spread_mid[tmp] = []
    avg_spread[tmp].append(spread)
    if log["price"]!=0:
        avg_spread_mid[tmp].append(spread/log["price"])
header_list_spread = ["Currency Pair", "Bid-Ask Spread", "Standard Deviation", "Observations"]
result_spread = []
for key in avg_spread: 
    key_new = ()
    tmp = []
    arr = np.array(avg_spread[key])
    ave = np.mean(arr)
    std = np.std(arr)
    if key[0]<key[1]:
        key_new=(key[1],key[0])
    else:
        key_new = key
    tmp.append(key_new)
    tmp.append(round(ave,6))
    tmp.append(round(std,6))
    tmp.append(len(avg_spread[key]))
    result_spread.append(tmp)
df_result_spread = pd.DataFrame(result_spread, columns=header_list_spread)
df_result_spread = df_result_spread.sort('Currency Pair', ascending=False)
df_result_spread = df_result_spread.reset_index(drop=True)
    
header_list_spread_mid = ["Currency Pair", "Bid-Ask Spread/Mid Price", "Standard Deviation", "Observations"] 
result_spread_mid=[]
for key in avg_spread_mid:
    key_new = ()
    tmp = []
    arr_ = np.array(avg_spread_mid[key])
    ave_ = np.mean(arr_) * 10000
    std_ = np.std(arr_)
    if key[0]<key[1]:
        key_new=(key[1],key[0])
    else:
        key_new = key
    tmp.append(key_new)
    tmp.append(round(ave_,6))
    tmp.append(round(std_,6))
    tmp.append(len(avg_spread_mid[key]))
    result_spread_mid.append(tmp)
df_result_spread_mid = pd.DataFrame(result_spread_mid, columns=header_list_spread_mid)
df_result_spread_mid = df_result_spread_mid.sort('Currency Pair', ascending=False)
df_result_spread_mid = df_result_spread_mid.reset_index(drop=True)


/Library/Python/2.7/site-packages/ipykernel/__main__.py:36: FutureWarning: sort(columns=....) is deprecated, use sort_values(by=.....)
/Library/Python/2.7/site-packages/ipykernel/__main__.py:57: FutureWarning: sort(columns=....) is deprecated, use sort_values(by=.....)

In [14]:
print "*******WITHOUT MARKET MAKER MEASURE*******"
print df_result_spread.to_latex()
df_result_spread.to_csv("without_MM_ave_spread.csv")


*******WITHOUT MARKET MAKER MEASURE*******
\begin{tabular}{llrrr}
\toprule
{} &     Currency Pair &  Bid-Ask Spread &  Standard Deviation &  Observations \\
\midrule
0  &        (USD, LKK) &        0.000768 &            0.001749 &            64 \\
1  &        (USD, GBP) &        0.007391 &            0.000000 &             1 \\
2  &        (USD, EUR) &        0.005918 &            0.007241 &             8 \\
3  &        (USD, CHF) &        0.027887 &            0.084851 &            14 \\
4  &        (USD, BTC) &        2.599197 &           11.121981 &          1024 \\
5  &        (LKK, JPY) &        0.000468 &            0.000244 &             3 \\
6  &        (LKK, GBP) &        2.634538 &            6.350860 &            12 \\
7  &        (LKK, EUR) &        2.846734 &            5.591686 &            17 \\
8  &        (LKK, CHF) &        1.678506 &            4.665309 &            61 \\
9  &        (LKK, BTC) &      187.490187 &          825.778980 &           456 \\
10 &        (JPY, EUR) &        1.749906 &            0.732749 &             2 \\
11 &        (JPY, CHF) &        0.038182 &            0.020088 &             3 \\
12 &        (JPY, BTC) &      114.268225 &           81.673685 &             2 \\
13 &  (Germany30, EUR) &        0.000000 &            0.000000 &             2 \\
14 &        (GBP, EUR) &        0.000000 &            0.000000 &             1 \\
15 &        (GBP, CHF) &        0.003582 &            0.003219 &             3 \\
16 &        (GBP, BTC) &       11.558638 &           15.314320 &             5 \\
17 &        (EUR, CHF) &        0.033715 &            0.080489 &             9 \\
18 &        (EUR, BTC) &        7.843314 &           25.418067 &            43 \\
19 &        (CHF, BTC) &        3.861814 &            4.076568 &           104 \\
\bottomrule
\end{tabular}


In [15]:
print "*******WITHOUT MARKET MAKER MEASURE*******"
print df_result_spread_mid.to_latex()
df_result_spread_mid.to_csv("without_MM_ave_spread_basis_pts.csv")


*******WITHOUT MARKET MAKER MEASURE*******
\begin{tabular}{llrrr}
\toprule
{} &     Currency Pair &  Bid-Ask Spread/Mid Price &  Standard Deviation &  Observations \\
\midrule
0  &        (USD, LKK) &                145.509032 &            0.031944 &            64 \\
1  &        (USD, GBP) &                 60.790274 &            0.000000 &             1 \\
2  &        (USD, EUR) &                 53.140491 &            0.006517 &             8 \\
3  &        (USD, CHF) &                243.742386 &            0.072632 &            14 \\
4  &        (USD, BTC) &                 39.318166 &            0.021757 &          1024 \\
5  &        (LKK, JPY) &                 24.495258 &            0.001281 &             3 \\
6  &        (LKK, GBP) &               1528.555248 &            0.417395 &            12 \\
7  &        (LKK, EUR) &               1888.683958 &            0.440394 &            17 \\
8  &        (LKK, CHF) &               1398.332195 &            0.446645 &            61 \\
9  &        (LKK, BTC) &                130.699477 &            0.054153 &           456 \\
10 &        (JPY, EUR) &                153.710017 &            0.006379 &             2 \\
11 &        (JPY, CHF) &                  3.682977 &            0.000194 &             3 \\
12 &        (JPY, BTC) &                 17.548062 &            0.001233 &             2 \\
13 &  (Germany30, EUR) &                  7.165744 &            0.000616 &             2 \\
14 &        (GBP, EUR) &                  0.000000 &            0.000000 &             1 \\
15 &        (GBP, CHF) &                 45.027692 &            0.004044 &             3 \\
16 &        (GBP, BTC) &                210.533517 &            0.026392 &             5 \\
17 &        (EUR, CHF) &                415.429151 &            0.101142 &             9 \\
18 &        (EUR, BTC) &                131.077892 &            0.040661 &            43 \\
19 &        (CHF, BTC) &                 53.215690 &            0.005642 &           104 \\
\bottomrule
\end{tabular}


In [16]:
#output the transaction log to file
with open("transaction_log.json", 'wb') as outfile:
    json.dump(transaction_log_json, outfile)

In [17]:
'''The Roll Measure to infer Bid Ask spread'''
#BTCE-USDBTC.csv
#the dataframe for the bitcoin data
df_btcusd = pd.read_csv("BTCE-USDBTC.csv")
#convert the Date to date object
df_btcusd["Date"]=pd.to_datetime(df_btcusd.Date)
#reinitialize the df_orderbook - the data has been changed since last time
df_orderbook=df.values.tolist()
def filter_direction(df_orderbook, freq):
    '''
    The function to set the trades to a uniform direction
    '''
    #the dictionary for tuple currency pairs and price arrays
    price_dict = {}
    for row in df_orderbook:
        #sell USD, buy BTC
        if row[1]=='USD' and row[3]=='BTC':
            tuple_tmp = ('BTC', 'USD')
            if tuple_tmp not in price_dict or price_dict[tuple_tmp] is None:
                price_dict[tuple_tmp] = {}
            try:
                if freq == "daily":
                    if row[7].date() not in price_dict[tuple_tmp] or price_dict[tuple_tmp][row[7].date()] is None:
                        price_dict[tuple_tmp][row[7].date()] = []
                    price_dict[tuple_tmp][row[7].date()].append(float(row[6])) #InvPrice, Time
                elif freq == "monthly":
                    if row[7].month not in price_dict[tuple_tmp] or price_dict[tuple_tmp][row[7].month] is None:
                        price_dict[tuple_tmp][row[7].month] = []
                    price_dict[tuple_tmp][row[7].month].append(float(row[6]))
                elif freq == "weekly":
                    week = ""
                    if row[7].day < 7:
                        week = str(row[7].month)+"_"+str(1)
                    elif row[7].day >=7 and row[7].day < 14:
                        week = str(row[7].month)+"_"+str(2)
                    elif row[7].day >=14 and row[7].day < 21:
                        week = str(row[7].month)+"_"+str(3)
                    elif row[7].day >=21 and row[7].day <= 31:
                        week = str(row[7].month)+"_"+str(4) 
                    if week not in price_dict[tuple_tmp] or price_dict[tuple_tmp][week] is None:
                        price_dict[tuple_tmp][week] = []
                    price_dict[tuple_tmp][week].append(float(row[6])) #InvPrice   
                elif freq == "all":
                    if 'all' not in price_dict[tuple_tmp] or price_dict[tuple_tmp]['all'] is None:
                        price_dict[tuple_tmp]['all'] = []
                    price_dict[tuple_tmp]['all'].append(float(row[6]))
            except ValueError:
                pass
        elif row[1]=='BTC' and row[3]=='USD':
            tuple_tmp = ('BTC', 'USD')
            if tuple_tmp not in price_dict or price_dict[tuple_tmp] is None:
                price_dict[tuple_tmp] = {}
            try:
                if freq == "daily":
                    if row[7].date() not in price_dict[tuple_tmp] or price_dict[tuple_tmp][row[7].date()] is None:
                        price_dict[tuple_tmp][row[7].date()] = []
                    price_dict[tuple_tmp][row[7].date()].append(float(row[5])) #Price, Time
                elif freq == "monthly":
                    if row[7].month not in price_dict[tuple_tmp] or price_dict[tuple_tmp][row[7].month] is None:
                        price_dict[tuple_tmp][row[7].month] = []
                    price_dict[tuple_tmp][row[7].month].append(float(row[5]))
                elif freq == "weekly":
                    week = 0
                    if row[7].day < 7:
                        week = str(row[7].month)+"_"+str(1)
                    elif row[7].day >=7 and row[7].day < 14:
                        week = str(row[7].month)+"_"+str(2)
                    elif row[7].day >=14 and row[7].day < 21:
                        week = str(row[7].month)+"_"+str(3)
                    elif row[7].day >=21 and row[7].day <= 31:
                        week = str(row[7].month)+"_"+str(4) 
                    if week not in price_dict[tuple_tmp] or price_dict[tuple_tmp][week] is None:
                        price_dict[tuple_tmp][week] = []
                    price_dict[tuple_tmp][week].append(float(row[5])) #Price   
                elif freq == "all":
                    if 'all' not in price_dict[tuple_tmp] or price_dict[tuple_tmp]['all'] is None:
                        price_dict[tuple_tmp]['all'] = []
                    price_dict[tuple_tmp]['all'].append(float(row[5]))
            except ValueError:
                pass
        else: 
            if row[1]<row[3]:
                tuple_tmp = (row[1],  row[3])
                if tuple_tmp not in price_dict or price_dict[tuple_tmp] is None:
                    price_dict[tuple_tmp] = {}
                try:
                    if freq == "daily":
                        if row[7].date() not in price_dict[tuple_tmp] or price_dict[tuple_tmp][row[7].date()] is None:
                            price_dict[tuple_tmp][row[7].date()] = []
                        price_dict[tuple_tmp][row[7].date()].append(float(row[6])) #InvPrice
                    elif freq == "monthly":
                        if row[7].month not in price_dict[tuple_tmp] or price_dict[tuple_tmp][row[7].month] is None:
                            price_dict[tuple_tmp][row[7].month] = []
                        price_dict[tuple_tmp][row[7].month].append(float(row[6]))
                    elif freq == "weekly":
                        week = 0
                        if row[7].day < 7:
                            week = str(row[7].month)+"_"+str(1)
                        elif row[7].day >=7 and row[7].day < 14:
                            week = str(row[7].month)+"_"+str(2)
                        elif row[7].day >=14 and row[7].day < 21:
                            week = str(row[7].month)+"_"+str(3)
                        elif row[7].day >=21 and row[7].day <= 31:
                            week = str(row[7].month)+"_"+str(4)      
                        if week not in price_dict[tuple_tmp] or price_dict[tuple_tmp][week] is None:
                            price_dict[tuple_tmp][week] = []
                        price_dict[tuple_tmp][week].append(float(row[6])) #InvPrice       
                    elif freq == "all":
                        if 'all' not in price_dict[tuple_tmp] or price_dict[tuple_tmp]['all'] is None:
                            price_dict[tuple_tmp]['all'] = []
                        price_dict[tuple_tmp]['all'].append(float(row[6]))
                except ValueError:
                    pass
            else:
                tuple_tmp = (row[3],  row[1])
                if tuple_tmp not in price_dict or price_dict[tuple_tmp] is None:
                    price_dict[tuple_tmp] = {}
                try:
                    if freq == "daily":
                        if row[7].date() not in price_dict[tuple_tmp] or price_dict[tuple_tmp][row[7].date()] is None:
                            price_dict[tuple_tmp][row[7].date()] = []
                        price_dict[tuple_tmp][row[7].date()].append(float(row[5])) #Price
                    elif freq == "monthly":
                        if row[7].month not in price_dict[tuple_tmp] or price_dict[tuple_tmp][row[7].month] is None:
                            price_dict[tuple_tmp][row[7].month] = []
                        price_dict[tuple_tmp][row[7].month].append(float(row[5]))
                    elif freq == "weekly":
                        week = 0
                        if row[7].day < 7:
                            week = str(row[7].month)+"_"+str(1)
                        elif row[7].day >=7 and row[7].day < 14:
                            week = str(row[7].month)+"_"+str(2)
                        elif row[7].day >=14 and row[7].day < 21:
                            week = str(row[7].month)+"_"+str(3)
                        elif row[7].day >=21 and row[7].day <= 31:
                            week = str(row[7].month)+"_"+str(4) 
                        if week not in price_dict[tuple_tmp] or price_dict[tuple_tmp][week] is None:
                            price_dict[tuple_tmp][week] = []
                        price_dict[tuple_tmp][week].append(float(row[5])) #Price
                    elif freq == "all":
                        if 'all' not in price_dict[tuple_tmp] or price_dict[tuple_tmp]['all'] is None:
                            price_dict[tuple_tmp]['all'] = []
                        price_dict[tuple_tmp]['all'].append(float(row[5]))
                except ValueError:
                    pass
    return price_dict

def avg_bench_mark(df_orderbook, freq):
    '''
    The function to calculate the benchmark as average price
    '''
    bench_price_dict = {}
    for row in df_orderbook:
        if row[1]=='USD' and row[3]=='BTC':
            tuple_tmp = ('BTC', 'USD')
            if tuple_tmp not in bench_price_dict or bench_price_dict[tuple_tmp] is None:
                bench_price_dict[tuple_tmp] = {}
            try: 
                if freq == "daily":
                    if row[7].date() not in bench_price_dict[tuple_tmp] or bench_price_dict[tuple_tmp][row[7].date()] is None:
                        bench_price_dict[tuple_tmp][row[7].date()] = []
                    bench_price_dict[tuple_tmp][row[7].date()].append(float(row[6])) #InvPrice                    
                elif freq == "monthly":
                    if row[7].month not in bench_price_dict[tuple_tmp] or bench_price_dict[tuple_tmp][row[7].month] is None:
                        bench_price_dict[tuple_tmp][row[7].month] = []
                    bench_price_dict[tuple_tmp][row[7].month].append(float(row[6]))                    
                elif freq == "weekly":
                    week = 0
                    if row[7].day < 7:
                        week = str(row[7].month)+"_"+str(1)
                    elif row[7].day >=7 and row[7].day < 14:
                        week = str(row[7].month)+"_"+str(2)
                    elif row[7].day >=14 and row[7].day < 21:
                        week = str(row[7].month)+"_"+str(3)
                    elif row[7].day >=21 and row[7].day <= 31:
                        week = str(row[7].month)+"_"+str(4) 
                    if week not in bench_price_dict[tuple_tmp] or bench_price_dict[tuple_tmp][week] is None:
                        bench_price_dict[tuple_tmp][week] = []
                    bench_price_dict[tuple_tmp][week].append(float(row[6])) #InvPrice                    
                elif freq == "all":
                    if 'all' not in bench_price_dict[tuple_tmp] or bench_price_dict[tuple_tmp]['all'] is None:
                        bench_price_dict[tuple_tmp]['all'] = []
                    bench_price_dict[tuple_tmp]['all'].append(float(row[6]))                       
            except ValueError:
                pass
        elif row[1]=='BTC' and row[3]=='USD':
            tuple_tmp = ('BTC', 'USD')
            if tuple_tmp not in bench_price_dict or bench_price_dict[tuple_tmp] is None:
                bench_price_dict[tuple_tmp] = {}
            try: 
                if freq == "daily":
                    if row[7].date() not in bench_price_dict[tuple_tmp] or bench_price_dict[tuple_tmp][row[7].date()] is None:
                        bench_price_dict[tuple_tmp][row[7].date()] = []
                    bench_price_dict[tuple_tmp][row[7].date()].append(float(row[5])) #Price                    
                elif freq == "monthly":
                    if row[7].month not in bench_price_dict[tuple_tmp] or bench_price_dict[tuple_tmp][row[7].month] is None:
                        bench_price_dict[tuple_tmp][row[7].month] = []
                    bench_price_dict[tuple_tmp][row[7].month].append(float(row[5]))                    
                elif freq == "weekly":
                    week = 0
                    if row[7].day < 7:
                        week = str(row[7].month)+"_"+str(1)
                    elif row[7].day >=7 and row[7].day < 14:
                        week = str(row[7].month)+"_"+str(2)
                    elif row[7].day >=14 and row[7].day < 21:
                        week = str(row[7].month)+"_"+str(3)
                    elif row[7].day >=21 and row[7].day <= 31:
                        week = str(row[7].month)+"_"+str(4) 
                    if week not in bench_price_dict[tuple_tmp] or bench_price_dict[tuple_tmp][week] is None:
                        bench_price_dict[tuple_tmp][week] = []
                    bench_price_dict[tuple_tmp][week].append(float(row[5])) #Price                    
                elif freq == "all":
                    if 'all' not in bench_price_dict[tuple_tmp] or bench_price_dict[tuple_tmp]['all'] is None:
                        bench_price_dict[tuple_tmp]['all'] = []
                    bench_price_dict[tuple_tmp]['all'].append(float(row[5]))                       
            except ValueError:
                pass            
        if row[1]<row[3]:
            tuple_tmp = (row[1],  row[3])
            if tuple_tmp not in bench_price_dict or bench_price_dict[tuple_tmp] is None:
                bench_price_dict[tuple_tmp] = {}
            try: 
                if freq == "daily":
                    if row[7].date() not in bench_price_dict[tuple_tmp] or bench_price_dict[tuple_tmp][row[7].date()] is None:
                        bench_price_dict[tuple_tmp][row[7].date()] = []
                    bench_price_dict[tuple_tmp][row[7].date()].append(float(row[6])) #InvPrice                    
                elif freq == "monthly":
                    if row[7].month not in bench_price_dict[tuple_tmp] or bench_price_dict[tuple_tmp][row[7].month] is None:
                        bench_price_dict[tuple_tmp][row[7].month] = []
                    bench_price_dict[tuple_tmp][row[7].month].append(float(row[6]))                    
                elif freq == "weekly":
                    week = 0
                    if row[7].day < 7:
                        week = str(row[7].month)+"_"+str(1)
                    elif row[7].day >=7 and row[7].day < 14:
                        week = str(row[7].month)+"_"+str(2)
                    elif row[7].day >=14 and row[7].day < 21:
                        week = str(row[7].month)+"_"+str(3)
                    elif row[7].day >=21 and row[7].day <= 31:
                        week = str(row[7].month)+"_"+str(4) 
                    if week not in bench_price_dict[tuple_tmp] or bench_price_dict[tuple_tmp][week] is None:
                        bench_price_dict[tuple_tmp][week] = []
                    bench_price_dict[tuple_tmp][week].append(float(row[6])) #InvPrice                    
                elif freq == "all":
                    if 'all' not in bench_price_dict[tuple_tmp] or bench_price_dict[tuple_tmp]['all'] is None:
                        bench_price_dict[tuple_tmp]['all'] = []
                    bench_price_dict[tuple_tmp]['all'].append(float(row[6]))                       
            except ValueError:
                pass
        else:
            tuple_tmp = (row[3],  row[1])
            if tuple_tmp not in bench_price_dict or bench_price_dict[tuple_tmp] is None:
                bench_price_dict[tuple_tmp] = {}
            try: 
                if freq == "daily":
                    if row[7].date() not in bench_price_dict[tuple_tmp] or bench_price_dict[tuple_tmp][row[7].date()] is None:
                        bench_price_dict[tuple_tmp][row[7].date()] = []
                    bench_price_dict[tuple_tmp][row[7].date()].append(float(row[5])) #Price                    
                elif freq == "monthly":
                    if row[7].month not in bench_price_dict[tuple_tmp] or bench_price_dict[tuple_tmp][row[7].month] is None:
                        bench_price_dict[tuple_tmp][row[7].month] = []
                    bench_price_dict[tuple_tmp][row[7].month].append(float(row[5]))                    
                elif freq == "weekly":
                    week = 0
                    if row[7].day < 7:
                        week = str(row[7].month)+"_"+str(1)
                    elif row[7].day >=7 and row[7].day < 14:
                        week = str(row[7].month)+"_"+str(2)
                    elif row[7].day >=14 and row[7].day < 21:
                        week = str(row[7].month)+"_"+str(3)
                    elif row[7].day >=21 and row[7].day <= 31:
                        week = str(row[7].month)+"_"+str(4) 
                    if week not in bench_price_dict[tuple_tmp] or bench_price_dict[tuple_tmp][week] is None:
                        bench_price_dict[tuple_tmp][week] = []
                    bench_price_dict[tuple_tmp][week].append(float(row[5])) #Price                    
                elif freq == "all":
                    if 'all' not in bench_price_dict[tuple_tmp] or bench_price_dict[tuple_tmp]['all'] is None:
                        bench_price_dict[tuple_tmp]['all'] = []
                    bench_price_dict[tuple_tmp]['all'].append(float(row[5]))                       
            except ValueError:
                pass
    result_dict = {}
    for key in bench_price_dict:
        if key not in result_dict or result_dict[key] is None: 
            result_dict[key]={}
        for k in bench_price_dict[key]:
            if k not in result_dict[key] or result_dict[key][k] is None:
                result_dict[key][k] = 0
            arr = np.array(bench_price_dict[key][k])
            result_dict[key][k] = np.average(arr)
    return result_dict
        
def bench_mark(ref_btc_df, freq):
    '''
    The function to calculate the benchmark for BTC/USD Pair 
    '''
    #convert ref_btc_df to a list 
    ref_btc_df_list_tmp=ref_btc_df.values.tolist()
    ref_btc_df_dict = {}
    #get the set of reference USD/BTC prices
    for row in ref_btc_df_list_tmp:
        ref_btc_df_dict[row[0]]=row[4]
    #provide a benchmark for price_dict
    bench_price_dict = {}   
    if freq == "daily":
        for key in ref_btc_df_dict:
            bench_price_dict[key.date()]=ref_btc_df_dict[key]
    elif freq == "monthly":
        ave = {}
        for key in ref_btc_df_dict:
            if key.month not in ave or ave[key.month] is None:
                ave[key.month] = []
            ave[key.month].append(ref_btc_df_dict[key])
        for key in ave:
            arr = ave[key]
            bench_price_dict[key] = sum(arr)/len(arr)
    elif freq == "weekly":
        ave = {}
        for key in ref_btc_df_dict:
            week = 0
            if key.day < 7:
                week = str(key.month)+"_"+str(1)
            elif key.day >= 7 and key.day < 14:
                week = str(key.month)+"_"+str(2)
            elif key.day >= 14 and key.day < 21:
                week = str(key.month)+"_"+str(3)
            elif key.day >=21 and key.day <= 31:
                week = str(key.month)+"_"+str(4)
            if week not in ave or ave[week] is None:
                ave[week]=[]
            ave[week].append(ref_btc_df_dict[key])
        for key in ave: 
            arr = ave[key]
            bench_price_dict[key] = sum(arr)/len(arr)
    elif freq == "all":
        ave = {}
        ave["all"]=[]
        for key in ref_btc_df_dict:
            ave["all"].append(ref_btc_df_dict[key])
        for key in ave:
            arr = ave[key]
            bench_price_dict[key] = sum(arr)/len(arr)
    return bench_price_dict

def autocovariance(Xi, N, k, Xs, Xs_):
    '''
    To calculate the autocovariance
    '''
    N=float(N)
    autoCov = 0
    for i in np.arange(1, N-k):
        autoCov += ((Xi[i+k])-Xs_)*(Xi[i]-Xs)
    return float((1/(N-1)))*autoCov

def calculate_roll(df_orderbook, ref_btc_df, freq, nov):
    '''
    Function to calculate the roll measure according to the frequency passed in.
    When nov is True, this script calculates the Novemeber data only. Because the Nov. data is speical in the way
    that the data volume is large. 
    '''
    bench_dict = bench_mark(ref_btc_df, freq) #the benchmark for usd/btc pair
    avg_bench_dict = avg_bench_mark(df_orderbook, freq)
    price_dict = filter_direction(df_orderbook, freq)
    header_list_spread_rolls = ["Currency Pair("+freq+")", "Average Spread", "Standard Deviation", "Observations"] 
    result_spread_rolls=[]
    header_list_spread_noon = ["Currency Pair("+freq+")", "Average Spread/Noon Rate (Basis Points)", "Standard Deviation", "Observations"]
    result_spread_rolls_noon = []
    header_list_roll_avg = ["Currency Pair("+freq+")", "Roll/Ave. Price", "Standard Deviation", "Observations"]
    result_roll_avg = []
    for key in price_dict:
        #to calculate the average value
        ave = []
        #to calculate the average spread/noon rate
        ave_noon = []
        #average spread / average rate 
        roll_ave = []
        #print "*****"+str(key)+" pair starts"+"*****"
        pair_dict = price_dict[key]
        #calcualte the roll measure based on the daily data
        for pair_key in pair_dict:
            #Prof. Park's speical request to calcualte Nov results only 
            if freq == "daily" and nov == True:
                if pair_key.month !=11:
                    continue
            #print "***"+str(pair_key)+" starts ***"
            if len(pair_dict[pair_key])>4:
                price_arr = pair_dict[pair_key]
                #calculate the price delta 
                price_delta_tmp = []
                j=0
                for price in price_arr:
                    if j+1<len(price_arr):
                        tmp_delta = price_arr[j+1]-price_arr[j]
                        price_delta_tmp.append(tmp_delta)
                        j+=1
                Xi = np.array(price_delta_tmp[1:])
                Xi_ = np.array(price_delta_tmp[2:])
                N = np.size(Xi)
                k = 1
                Xs = np.average(Xi)
                Xs_ = np.average(Xi_)
                auto_corr = autocovariance(Xi, N, k, Xs, Xs_)   
                #print "the autocorrelation is: "
                #print auto_corr
                if auto_corr<0:
                    spread = 2 * (math.sqrt(-auto_corr))
                    ave.append(spread)
                    if pair_key in bench_dict:
                        if key == ("BTC", "USD"):
                            ave_noon.append((spread/bench_dict[pair_key])*10000)
                        else:
                            ave_noon.append(0)
                    if key in avg_bench_dict and pair_key in avg_bench_dict[key]:
                        roll_ave.append((spread/avg_bench_dict[key][pair_key])*10000)
                    #print "the roll measure bid and ask spread is:"
                    #print str(spread)
                else:
                    #print "Error: "+"autocorrelation should be negative!"
                    pass
            else:
                #print "The data for "+str(pair_key)+" is not enough (less than 3) for calculation"
                pass
            #print "***"+str(pair_key)+" ends ***"
        if len(ave)>0:
            if key[0]<key[1]:
                key_new=(key[1],key[0])
            else:
                key_new = key
            tmp = []
            arr = np.array(ave)
            mean = np.mean(arr)
            std = np.std(arr)
            tmp.append(key_new)
            tmp.append(round(mean,6))
            tmp.append(round(std,6))
            tmp.append(round(len(ave),6))
            result_spread_rolls.append(tmp)
            #print str(key)+" average spread is: "+str(mean)+" the standard deviation is: "+str(std)
            tmp_noon = []
            arr_noon = np.array(ave_noon)
            mean_noon = np.mean(arr_noon)
            std_noon = np.std(arr_noon)
            tmp_noon.append(key_new)
            tmp_noon.append(round(mean_noon,6))
            tmp_noon.append(round(std_noon,6))
            tmp_noon.append(round(len(ave),6))
            result_spread_rolls_noon.append(tmp_noon)
            #roll/ave
            tmp_avg = []
            arr_avg = np.array(roll_ave)
            mean_avg = np.mean(arr_avg)
            std_avg = np.std(arr_avg)
            tmp_avg.append(key_new)
            tmp_avg.append(round(mean_avg,6))
            tmp_avg.append(round(std_avg,6))
            tmp_avg.append(round(len(ave),6))
            result_roll_avg.append(tmp_avg)
            #print str(key)+" average spread/noon rate (in basis points) is:"+str(mean_noon*10000) + " the standard deviation is: "+str(std_noon)
            #print str(key)+" "+str(len(ave))+" days used in the calculation"
        else:
            pass
            #print str(key) + " not enough data "
        #print "*****"+str(key)+" pair ends"+"*****"
    df_result_spread_rolls = pd.DataFrame(result_spread_rolls, columns=header_list_spread_rolls)
    df_result_spread_rolls = df_result_spread_rolls.sort("Currency Pair("+freq+")", ascending=False)
    df_result_spread_rolls = df_result_spread_rolls.reset_index(drop=True)
    
    df_result_spread_rolls_noon = pd.DataFrame(result_spread_rolls_noon, columns=header_list_spread_noon)
    df_result_spread_rolls_noon=df_result_spread_rolls_noon.sort("Currency Pair("+freq+")", ascending=False)
    df_result_spread_rolls_noon=df_result_spread_rolls_noon.reset_index(drop=True)
    
    df_result_spread_rolls_ave = pd.DataFrame(result_roll_avg, columns=header_list_roll_avg)
    df_result_spread_rolls_ave=df_result_spread_rolls_ave.sort("Currency Pair("+freq+")", ascending=False)
    df_result_spread_rolls_ave=df_result_spread_rolls_ave.reset_index(drop=True)
    
    return (df_result_spread_rolls, df_result_spread_rolls_noon, df_result_spread_rolls_ave)
    
def calculate_noon_rate(df_orderbook, ref_btc_df):
    '''
    Function to calculate direction * (price-noon)/noon 
    USD/BTC Pair
    '''
    #convert ref_btc_df to a list 
    ref_btc_df_list_tmp=ref_btc_df.values.tolist()
    ref_btc_df_dict = {}
    #get the set of reference USD/BTC prices
    for row in ref_btc_df_list_tmp:
        ref_btc_df_dict[row[0].date()]=float(row[4])
    #the result 
    ave = []
    for row in df_orderbook:
        date_row = row[7].date()
        #sell USD, buy BTC
        if row[1]=='USD' and row[3]=='BTC' and date_row in ref_btc_df_dict:
            direction = 1
            price = float(row[6])
            relative = direction * (price-ref_btc_df_dict[date_row])/ref_btc_df_dict[date_row]
            ave.append(relative)
        #sell BTC, buy USD
        elif row[1]=='BTC' and row[3]=='USD' and date_row in ref_btc_df_dict:
            direction = -1
            price = float(row[5])
            relative = direction * (price-ref_btc_df_dict[date_row])/ref_btc_df_dict[date_row]
            ave.append(relative)
    arr = np.array(ave)
    mean = np.mean(arr)
    std = np.std(arr)
    print "the (daily) average direction * (price-noon)/noon  for USD-BTC (in basis points) is: "+str(mean * 10000)+" the standard deviation is: "+str(std)
    
daily_roll = calculate_roll(df_orderbook, df_btcusd, "daily", False)
monthly_roll = calculate_roll(df_orderbook, df_btcusd, "monthly", False)
nov_roll = calculate_roll(df_orderbook, df_btcusd, "daily", True)
weekly_roll = calculate_roll(df_orderbook, df_btcusd, "weekly", False)
all_roll = calculate_roll(df_orderbook, df_btcusd, "all", False)
calculate_noon_rate(df_orderbook, df_btcusd)
#df_btcusd


/Library/Python/2.7/site-packages/ipykernel/__main__.py:352: VisibleDeprecationWarning: using a non-integer number instead of an integer will result in an error in the future
/Library/Python/2.7/site-packages/ipykernel/__main__.py:465: FutureWarning: sort(columns=....) is deprecated, use sort_values(by=.....)
/Library/Python/2.7/site-packages/ipykernel/__main__.py:469: FutureWarning: sort(columns=....) is deprecated, use sort_values(by=.....)
/Library/Python/2.7/site-packages/ipykernel/__main__.py:473: FutureWarning: sort(columns=....) is deprecated, use sort_values(by=.....)
the (daily) average direction * (price-noon)/noon  for USD-BTC (in basis points) is: 11.2177339994 the standard deviation is: 0.0234720651966

In [18]:
print "*******ROLLS MEASURE*******"
print daily_roll[0].to_latex()
daily_roll[0].to_csv("python_csv/daily_rolls.csv")


*******ROLLS MEASURE*******
\begin{tabular}{llrrr}
\toprule
{} & Currency Pair(daily) &  Average Spread &  Standard Deviation &  Observations \\
\midrule
0 &           (USD, LKK) &        0.623481 &            0.761159 &          11.0 \\
1 &           (USD, BTC) &        9.535521 &           29.633419 &          40.0 \\
2 &           (LKK, EUR) &        0.122991 &            0.207823 &           4.0 \\
3 &           (LKK, CHF) &        0.088291 &            0.210252 &           7.0 \\
4 &           (LKK, BTC) &        0.000002 &            0.000006 &          62.0 \\
5 &           (JPY, CHF) &        0.000016 &            0.000000 &           1.0 \\
6 &           (GBP, EUR) &        0.006222 &            0.000000 &           1.0 \\
7 &           (EUR, BTC) &        0.000235 &            0.000395 &           8.0 \\
8 &           (CHF, BTC) &        0.000007 &            0.000007 &          12.0 \\
\bottomrule
\end{tabular}


In [19]:
print "*******ROLLS MEASURE*******"
print daily_roll[1].to_latex()
daily_roll[1].to_csv("python_csv/daily_rolls_basis_pts.csv")


*******ROLLS MEASURE*******
\begin{tabular}{llrrr}
\toprule
{} & Currency Pair(daily) &  Average Spread/Noon Rate (Basis Points) &  Standard Deviation &  Observations \\
\midrule
0 &           (USD, LKK) &                                 0.000000 &            0.000000 &          11.0 \\
1 &           (USD, BTC) &                               152.364503 &          477.673712 &          40.0 \\
2 &           (LKK, EUR) &                                 0.000000 &            0.000000 &           4.0 \\
3 &           (LKK, CHF) &                                 0.000000 &            0.000000 &           7.0 \\
4 &           (LKK, BTC) &                                 0.000000 &            0.000000 &          62.0 \\
5 &           (JPY, CHF) &                                 0.000000 &            0.000000 &           1.0 \\
6 &           (GBP, EUR) &                                 0.000000 &            0.000000 &           1.0 \\
7 &           (EUR, BTC) &                                 0.000000 &            0.000000 &           8.0 \\
8 &           (CHF, BTC) &                                 0.000000 &            0.000000 &          12.0 \\
\bottomrule
\end{tabular}


In [20]:
print "*******ROLLS MEASURE*******"
print daily_roll[2].to_latex()
daily_roll[2].to_csv("python_csv/daily_rolls_divide_avg.csv")


*******ROLLS MEASURE*******
\begin{tabular}{llrrr}
\toprule
{} & Currency Pair(daily) &  Roll/Ave. Price &  Standard Deviation &  Observations \\
\midrule
0 &           (USD, LKK) &       320.324724 &          393.089123 &          11.0 \\
1 &           (USD, BTC) &       300.870914 &          968.383390 &          40.0 \\
2 &           (LKK, EUR) &      5503.820420 &         8423.002682 &           4.0 \\
3 &           (LKK, CHF) &      7490.941280 &        17118.962743 &           7.0 \\
4 &           (LKK, BTC) &       288.656422 &          806.998640 &          62.0 \\
5 &           (JPY, CHF) &        16.187055 &            0.000000 &           1.0 \\
6 &           (GBP, EUR) &        54.064573 &            0.000000 &           1.0 \\
7 &           (EUR, BTC) &      1235.556608 &         2062.264657 &           8.0 \\
8 &           (CHF, BTC) &        49.554563 &           52.743640 &          12.0 \\
\bottomrule
\end{tabular}


In [21]:
print nov_roll[0].to_latex()
nov_roll[0].to_csv("python_csv/Nov_Rolls.csv")


\begin{tabular}{llrrr}
\toprule
{} & Currency Pair(daily) &  Average Spread &  Standard Deviation &  Observations \\
\midrule
0 &           (USD, LKK) &        0.190102 &            0.000000 &           1.0 \\
1 &           (USD, BTC) &        2.575987 &            6.927764 &          27.0 \\
2 &           (LKK, EUR) &        0.482941 &            0.000000 &           1.0 \\
3 &           (LKK, CHF) &        0.001751 &            0.001192 &           2.0 \\
4 &           (LKK, BTC) &        0.000001 &            0.000001 &          22.0 \\
5 &           (CHF, BTC) &        0.000005 &            0.000005 &           9.0 \\
\bottomrule
\end{tabular}


In [22]:
print nov_roll[1].to_latex()
nov_roll[0].to_csv("python_csv/Nov_Rolls_BPS.csv")


\begin{tabular}{llrrr}
\toprule
{} & Currency Pair(daily) &  Average Spread/Noon Rate (Basis Points) &  Standard Deviation &  Observations \\
\midrule
0 &           (USD, LKK) &                                 0.000000 &            0.000000 &           1.0 \\
1 &           (USD, BTC) &                                38.156032 &          104.339384 &          27.0 \\
2 &           (LKK, EUR) &                                 0.000000 &            0.000000 &           1.0 \\
3 &           (LKK, CHF) &                                 0.000000 &            0.000000 &           2.0 \\
4 &           (LKK, BTC) &                                 0.000000 &            0.000000 &          22.0 \\
5 &           (CHF, BTC) &                                 0.000000 &            0.000000 &           9.0 \\
\bottomrule
\end{tabular}


In [23]:
print "*******ROLLS MEASURE*******"
print monthly_roll[0].to_latex()
monthly_roll[0].to_csv("python_csv/monthly_rolls.csv")


*******ROLLS MEASURE*******
\begin{tabular}{llrrr}
\toprule
{} & Currency Pair(monthly) &  Average Spread &  Standard Deviation &  Observations \\
\midrule
0  &             (USD, LKK) &        0.400040 &            0.398272 &           4.0 \\
1  &             (USD, GBP) &        0.001153 &            0.000000 &           1.0 \\
2  &             (USD, EUR) &        0.045263 &            0.030331 &           4.0 \\
3  &             (USD, CHF) &        0.107909 &            0.136917 &           3.0 \\
4  &             (USD, BTC) &       27.407447 &           25.848086 &           4.0 \\
5  &             (LKK, JPY) &        0.072167 &            0.050316 &           2.0 \\
6  &             (LKK, GBP) &        0.071056 &            0.100180 &           3.0 \\
7  &             (LKK, EUR) &        0.154712 &            0.211220 &           3.0 \\
8  &             (LKK, CHF) &        0.068024 &            0.094842 &           3.0 \\
9  &             (LKK, BTC) &        0.000004 &            0.000003 &           3.0 \\
10 &             (JPY, BTC) &        0.000000 &            0.000000 &           1.0 \\
11 &             (GBP, EUR) &        0.046845 &            0.044720 &           4.0 \\
12 &             (GBP, CHF) &        0.018689 &            0.000000 &           1.0 \\
13 &             (GBP, BTC) &        0.000234 &            0.000152 &           2.0 \\
14 &             (EUR, CHF) &        0.034459 &            0.034834 &           4.0 \\
15 &             (EUR, BTC) &        0.000134 &            0.000073 &           3.0 \\
16 &             (CHF, BTC) &        0.000022 &            0.000002 &           2.0 \\
\bottomrule
\end{tabular}


In [24]:
print "*******ROLLS MEASURE*******"
print monthly_roll[1].to_latex()
monthly_roll[1].to_csv("python_csv/monthly_rolls_basis_pts.csv")


*******ROLLS MEASURE*******
\begin{tabular}{llrrr}
\toprule
{} & Currency Pair(monthly) &  Average Spread/Noon Rate (Basis Points) &  Standard Deviation &  Observations \\
\midrule
0  &             (USD, LKK) &                                 0.000000 &            0.000000 &           4.0 \\
1  &             (USD, GBP) &                                 0.000000 &            0.000000 &           1.0 \\
2  &             (USD, EUR) &                                 0.000000 &            0.000000 &           4.0 \\
3  &             (USD, CHF) &                                 0.000000 &            0.000000 &           3.0 \\
4  &             (USD, BTC) &                               440.504344 &          408.782125 &           4.0 \\
5  &             (LKK, JPY) &                                 0.000000 &            0.000000 &           2.0 \\
6  &             (LKK, GBP) &                                 0.000000 &            0.000000 &           3.0 \\
7  &             (LKK, EUR) &                                 0.000000 &            0.000000 &           3.0 \\
8  &             (LKK, CHF) &                                 0.000000 &            0.000000 &           3.0 \\
9  &             (LKK, BTC) &                                 0.000000 &            0.000000 &           3.0 \\
10 &             (JPY, BTC) &                                 0.000000 &            0.000000 &           1.0 \\
11 &             (GBP, EUR) &                                 0.000000 &            0.000000 &           4.0 \\
12 &             (GBP, CHF) &                                 0.000000 &            0.000000 &           1.0 \\
13 &             (GBP, BTC) &                                 0.000000 &            0.000000 &           2.0 \\
14 &             (EUR, CHF) &                                 0.000000 &            0.000000 &           4.0 \\
15 &             (EUR, BTC) &                                 0.000000 &            0.000000 &           3.0 \\
16 &             (CHF, BTC) &                                 0.000000 &            0.000000 &           2.0 \\
\bottomrule
\end{tabular}


In [25]:
print "*******ROLLS MEASURE*******"
print monthly_roll[2].to_latex()
monthly_roll[2].to_csv("python_csv/monthly_rolls_divide_avg.csv")


*******ROLLS MEASURE*******
\begin{tabular}{llrrr}
\toprule
{} & Currency Pair(monthly) &  Roll/Ave. Price &  Standard Deviation &  Observations \\
\midrule
0  &             (USD, LKK) &       202.148889 &          203.232933 &           4.0 \\
1  &             (USD, GBP) &        14.610007 &            0.000000 &           1.0 \\
2  &             (USD, EUR) &       501.384555 &          339.183890 &           4.0 \\
3  &             (USD, CHF) &      1152.359960 &         1467.760023 &           3.0 \\
4  &             (USD, BTC) &       888.036646 &          829.075855 &           4.0 \\
5  &             (LKK, JPY) &       139.534983 &           97.705373 &           2.0 \\
6  &             (LKK, GBP) &      5235.990599 &         7325.585967 &           3.0 \\
7  &             (LKK, EUR) &     16359.700284 &        21516.243508 &           3.0 \\
8  &             (LKK, CHF) &     11457.020852 &        15918.131335 &           3.0 \\
9  &             (LKK, BTC) &       516.999775 &          288.809514 &           3.0 \\
10 &             (JPY, BTC) &        39.861795 &            0.000000 &           1.0 \\
11 &             (GBP, EUR) &       402.192247 &          384.927162 &           4.0 \\
12 &             (GBP, CHF) &       152.395005 &            0.000000 &           1.0 \\
13 &             (GBP, BTC) &      1106.323380 &          623.381587 &           2.0 \\
14 &             (EUR, CHF) &       312.336375 &          312.957512 &           4.0 \\
15 &             (EUR, BTC) &       738.789208 &          432.773516 &           3.0 \\
16 &             (CHF, BTC) &       134.767290 &            7.914854 &           2.0 \\
\bottomrule
\end{tabular}


In [26]:
print "*******ROLLS MEASURE*******"
print weekly_roll[0].to_latex()
weekly_roll[0].to_csv("python_csv/weekly_rolls.csv")


*******ROLLS MEASURE*******
\begin{tabular}{llrrr}
\toprule
{} & Currency Pair(weekly) &  Average Spread &  Standard Deviation &  Observations \\
\midrule
0  &            (USD, LKK) &        0.379917 &            0.371523 &          12.0 \\
1  &            (USD, GBP) &        0.004161 &            0.000000 &           1.0 \\
2  &            (USD, EUR) &        0.032857 &            0.055868 &           7.0 \\
3  &            (USD, CHF) &        0.143955 &            0.256524 &           6.0 \\
4  &            (USD, BTC) &       20.647815 &           30.010809 &          14.0 \\
5  &            (LKK, JPY) &        0.031791 &            0.000000 &           1.0 \\
6  &            (LKK, GBP) &        0.000303 &            0.000214 &           3.0 \\
7  &            (LKK, EUR) &        0.088107 &            0.238654 &           9.0 \\
8  &            (LKK, CHF) &        0.049378 &            0.124269 &           8.0 \\
9  &            (LKK, BTC) &        0.000003 &            0.000004 &          12.0 \\
10 &            (JPY, CHF) &        0.000014 &            0.000000 &           1.0 \\
11 &            (JPY, BTC) &        0.000000 &            0.000000 &           1.0 \\
12 &            (GBP, EUR) &        0.005907 &            0.000000 &           1.0 \\
13 &            (GBP, BTC) &        0.000030 &            0.000000 &           1.0 \\
14 &            (EUR, CHF) &        0.040449 &            0.064480 &           5.0 \\
15 &            (EUR, BTC) &        0.000168 &            0.000139 &           5.0 \\
16 &            (CHF, BTC) &        0.000008 &            0.000010 &           6.0 \\
\bottomrule
\end{tabular}


In [27]:
print "*******ROLLS MEASURE*******"
print weekly_roll[1].to_latex()
weekly_roll[1].to_csv("python_csv/weekly_rolls_basis_pts.csv")


*******ROLLS MEASURE*******
\begin{tabular}{llrrr}
\toprule
{} & Currency Pair(weekly) &  Average Spread/Noon Rate (Basis Points) &  Standard Deviation &  Observations \\
\midrule
0  &            (USD, LKK) &                                 0.000000 &            0.000000 &          12.0 \\
1  &            (USD, GBP) &                                 0.000000 &            0.000000 &           1.0 \\
2  &            (USD, EUR) &                                 0.000000 &            0.000000 &           7.0 \\
3  &            (USD, CHF) &                                 0.000000 &            0.000000 &           6.0 \\
4  &            (USD, BTC) &                               334.060293 &          489.458322 &          14.0 \\
5  &            (LKK, JPY) &                                 0.000000 &            0.000000 &           1.0 \\
6  &            (LKK, GBP) &                                 0.000000 &            0.000000 &           3.0 \\
7  &            (LKK, EUR) &                                 0.000000 &            0.000000 &           9.0 \\
8  &            (LKK, CHF) &                                 0.000000 &            0.000000 &           8.0 \\
9  &            (LKK, BTC) &                                 0.000000 &            0.000000 &          12.0 \\
10 &            (JPY, CHF) &                                 0.000000 &            0.000000 &           1.0 \\
11 &            (JPY, BTC) &                                 0.000000 &            0.000000 &           1.0 \\
12 &            (GBP, EUR) &                                 0.000000 &            0.000000 &           1.0 \\
13 &            (GBP, BTC) &                                 0.000000 &            0.000000 &           1.0 \\
14 &            (EUR, CHF) &                                 0.000000 &            0.000000 &           5.0 \\
15 &            (EUR, BTC) &                                 0.000000 &            0.000000 &           5.0 \\
16 &            (CHF, BTC) &                                 0.000000 &            0.000000 &           6.0 \\
\bottomrule
\end{tabular}


In [28]:
print "*******ROLLS MEASURE*******"
print weekly_roll[2].to_latex()
weekly_roll[2].to_csv("python_csv/weekly_rolls_divide_avg.csv")


*******ROLLS MEASURE*******
\begin{tabular}{llrrr}
\toprule
{} & Currency Pair(weekly) &  Roll/Ave. Price &  Standard Deviation &  Observations \\
\midrule
0  &            (USD, LKK) &       191.022383 &          189.230065 &          12.0 \\
1  &            (USD, GBP) &        50.957956 &            0.000000 &           1.0 \\
2  &            (USD, EUR) &       360.193446 &          616.135261 &           7.0 \\
3  &            (USD, CHF) &      1594.731153 &         2869.926150 &           6.0 \\
4  &            (USD, BTC) &       671.206917 &          987.782121 &          14.0 \\
5  &            (LKK, JPY) &        60.975237 &            0.000000 &           1.0 \\
6  &            (LKK, GBP) &        73.339035 &           51.286945 &           3.0 \\
7  &            (LKK, EUR) &      5915.942743 &        14659.039046 &           9.0 \\
8  &            (LKK, CHF) &      6155.438333 &        14971.670776 &           8.0 \\
9  &            (LKK, BTC) &       419.115272 &          463.972090 &          12.0 \\
10 &            (JPY, CHF) &        14.407204 &            0.000000 &           1.0 \\
11 &            (JPY, BTC) &        29.140013 &            0.000000 &           1.0 \\
12 &            (GBP, EUR) &        51.566653 &            0.000000 &           1.0 \\
13 &            (GBP, BTC) &       154.194840 &            0.000000 &           1.0 \\
14 &            (EUR, CHF) &       360.040251 &          569.752992 &           5.0 \\
15 &            (EUR, BTC) &       931.796127 &          765.306637 &           5.0 \\
16 &            (CHF, BTC) &        56.908046 &           65.885048 &           6.0 \\
\bottomrule
\end{tabular}


In [29]:
print "*******ROLLS MEASURE*******"
print all_roll[0].to_latex()
all_roll[0].to_csv("python_csv/all_rolls.csv")


*******ROLLS MEASURE*******
\begin{tabular}{llrrr}
\toprule
{} & Currency Pair(all) &  Average Spread &  Standard Deviation &  Observations \\
\midrule
0  &         (USD, LKK) &        0.702773 &                 0.0 &           1.0 \\
1  &         (USD, JPY) &        0.340601 &                 0.0 &           1.0 \\
2  &         (USD, GBP) &        0.116833 &                 0.0 &           1.0 \\
3  &         (USD, EUR) &        0.046704 &                 0.0 &           1.0 \\
4  &         (USD, CHF) &        0.142628 &                 0.0 &           1.0 \\
5  &         (USD, BTC) &       19.134828 &                 0.0 &           1.0 \\
6  &         (LKK, JPY) &        0.047369 &                 0.0 &           1.0 \\
7  &         (LKK, GBP) &        0.274759 &                 0.0 &           1.0 \\
8  &         (LKK, EUR) &        0.216060 &                 0.0 &           1.0 \\
9  &         (LKK, CHF) &        0.150449 &                 0.0 &           1.0 \\
10 &         (LKK, BTC) &        0.000004 &                 0.0 &           1.0 \\
11 &         (JPY, GBP) &        0.000143 &                 0.0 &           1.0 \\
12 &         (JPY, EUR) &        0.000332 &                 0.0 &           1.0 \\
13 &         (JPY, CHF) &        0.000156 &                 0.0 &           1.0 \\
14 &         (JPY, BTC) &        0.000000 &                 0.0 &           1.0 \\
15 &         (GBP, EUR) &        0.048809 &                 0.0 &           1.0 \\
16 &         (GBP, CHF) &        0.009167 &                 0.0 &           1.0 \\
17 &         (GBP, BTC) &        0.000159 &                 0.0 &           1.0 \\
18 &         (EUR, CHF) &        0.117633 &                 0.0 &           1.0 \\
19 &         (EUR, BTC) &        0.000197 &                 0.0 &           1.0 \\
20 &         (CHF, BTC) &        0.000010 &                 0.0 &           1.0 \\
\bottomrule
\end{tabular}


In [30]:
print "*******ROLLS MEASURE*******"
print all_roll[1].to_latex()
all_roll[1].to_csv("python_csv/all_rolls_basis_pts.csv")


*******ROLLS MEASURE*******
\begin{tabular}{llrrr}
\toprule
{} & Currency Pair(all) &  Average Spread/Noon Rate (Basis Points) &  Standard Deviation &  Observations \\
\midrule
0  &         (USD, LKK) &                                 0.000000 &                 0.0 &           1.0 \\
1  &         (USD, JPY) &                                 0.000000 &                 0.0 &           1.0 \\
2  &         (USD, GBP) &                                 0.000000 &                 0.0 &           1.0 \\
3  &         (USD, EUR) &                                 0.000000 &                 0.0 &           1.0 \\
4  &         (USD, CHF) &                                 0.000000 &                 0.0 &           1.0 \\
5  &         (USD, BTC) &                               302.124411 &                 0.0 &           1.0 \\
6  &         (LKK, JPY) &                                 0.000000 &                 0.0 &           1.0 \\
7  &         (LKK, GBP) &                                 0.000000 &                 0.0 &           1.0 \\
8  &         (LKK, EUR) &                                 0.000000 &                 0.0 &           1.0 \\
9  &         (LKK, CHF) &                                 0.000000 &                 0.0 &           1.0 \\
10 &         (LKK, BTC) &                                 0.000000 &                 0.0 &           1.0 \\
11 &         (JPY, GBP) &                                 0.000000 &                 0.0 &           1.0 \\
12 &         (JPY, EUR) &                                 0.000000 &                 0.0 &           1.0 \\
13 &         (JPY, CHF) &                                 0.000000 &                 0.0 &           1.0 \\
14 &         (JPY, BTC) &                                 0.000000 &                 0.0 &           1.0 \\
15 &         (GBP, EUR) &                                 0.000000 &                 0.0 &           1.0 \\
16 &         (GBP, CHF) &                                 0.000000 &                 0.0 &           1.0 \\
17 &         (GBP, BTC) &                                 0.000000 &                 0.0 &           1.0 \\
18 &         (EUR, CHF) &                                 0.000000 &                 0.0 &           1.0 \\
19 &         (EUR, BTC) &                                 0.000000 &                 0.0 &           1.0 \\
20 &         (CHF, BTC) &                                 0.000000 &                 0.0 &           1.0 \\
\bottomrule
\end{tabular}


In [ ]:
print "*******ROLLS MEASURE*******"
print all_roll[2].to_latex()
all_roll[2].to_csv("python_csv/all_rolls_divide_avg.csv")


*******ROLLS MEASURE*******
\begin{tabular}{llrrr}
\toprule
{} & Currency Pair(all) &  Roll/Ave. Price &  Standard Deviation &  Observations \\
\midrule
0  &         (USD, LKK) &       354.311486 &                 0.0 &           1.0 \\
1  &         (USD, JPY) &        31.919954 &                 0.0 &           1.0 \\
2  &         (USD, GBP) &      1464.158937 &                 0.0 &           1.0 \\
3  &         (USD, EUR) &       515.036388 &                 0.0 &           1.0 \\
4  &         (USD, CHF) &      1474.615795 &                 0.0 &           1.0 \\
5  &         (USD, BTC) &       539.274037 &                 0.0 &           1.0 \\
6  &         (LKK, JPY) &        90.770627 &                 0.0 &           1.0 \\
7  &         (LKK, GBP) &     44470.514054 &                 0.0 &           1.0 \\
8  &         (LKK, EUR) &     36347.648905 &                 0.0 &           1.0 \\
9  &         (LKK, CHF) &     27501.133385 &                 0.0 &           1.0 \\
10 &         (LKK, BTC) &       587.180562 &                 0.0 &           1.0 \\
11 &         (JPY, GBP) &       189.676507 &                 0.0 &           1.0 \\
12 &         (JPY, EUR) &       378.631721 &                 0.0 &           1.0 \\
13 &         (JPY, CHF) &       164.430852 &                 0.0 &           1.0 \\
14 &         (JPY, BTC) &        68.429187 &                 0.0 &           1.0 \\
15 &         (GBP, EUR) &       425.139580 &                 0.0 &           1.0 \\
16 &         (GBP, CHF) &        74.098045 &                 0.0 &           1.0 \\
17 &         (GBP, BTC) &       804.690649 &                 0.0 &           1.0 \\
18 &         (EUR, CHF) &      1074.895910 &                 0.0 &           1.0 \\
19 &         (EUR, BTC) &      1132.067397 &                 0.0 &           1.0 \\
20 &         (CHF, BTC) &        72.757215 &                 0.0 &           1.0 \\
\bottomrule
\end{tabular}


In [ ]:
#draw the graph of trades
def draw_network(transaction_log):
    G=nx.Graph()
    #dictionary to record the volume that a trader trades
    freq={}
    #define the trade relations between the two traders, {(trader1, trader2), frequency}
    relations = {}
    #the array of sizes for graph drawing
    sizes=[]
    for log in transaction_log:
        if log["trader1"] not in freq or freq[log["trader1"]] is None:
            freq[log["trader1"]]=1
        else: 
            freq[log["trader1"]]+=1
        if log["trader2"] not in freq or freq[log["trader2"]] is None:
            freq[log["trader2"]]=1
        else:
            freq[log["trader2"]]+=1
    for log in transaction_log:
        tmp = ()
        if log["trader1"] < log["trader2"]:
            tmp = (log["trader1"],log["trader2"])
        else:
            tmp = (log["trader2"],log["trader1"])
        #if the edges are already connected 
        if tmp in relations:
            relations[tmp] += 1
        else: 
            relations[tmp] = 1
        G.add_edge(log["trader1"],log["trader2"], weight=relations[tmp]*10)
    elarge=[(u,v) for (u,v,d) in G.edges(data=True) if d['weight'] >0.5]
    esmall=[(u,v) for (u,v,d) in G.edges(data=True) if d['weight'] <=0.5]
    #get the list of nodes from the graph
    nodes=G.nodes()
    for node in nodes:
        if node in freq:
            #the maximum size
            if freq[node]>=500:
                sizes.append(500*10)
            else:
                sizes.append(freq[node]*10)
        else:
            sizes.append(0)
    #pos=nx.spring_layout(G,k=0.5,iterations=500) # positions for all nodes
    pos=nx.random_layout(G)
    # nodes
    nx.draw_networkx_nodes(G,pos,node_size=sizes)
    # edges
    nx.draw_networkx_edges(G,pos,edgelist=elarge,
                        width=1)
    nx.draw_networkx_edges(G,pos,edgelist=esmall,
                        width=1,alpha=0.5,edge_color='b',style='dashed')
    plt.axis('off')
    plt.savefig("transaction_network.png") # save as png
    plt.show() # display
    
#draw_network(transaction_log)

'''
The function that draws the node for certain sizes only
'''
def draw_limited_network(transaction_log):
    G=nx.Graph()
    #dictionary to record the volume that a trader trades
    freq={}
    #define the trade relations between the two traders, {(trader1, trader2), frequency}
    relations = {}
    #the array of sizes for graph drawing
    sizes=[] 
    for log in transaction_log:
        if log["trader1"] not in freq or freq[log["trader1"]] is None:
            freq[log["trader1"]]=1
        else: 
            freq[log["trader1"]]+=1
        if log["trader2"] not in freq or freq[log["trader2"]] is None:
            freq[log["trader2"]]=1
        else:
            freq[log["trader2"]]+=1
    for log in transaction_log:
        if log["trader1"] in freq and freq[log["trader1"]]>5 or log["trader2"] in freq and freq[log["trader2"]]>5:
            if (log["trader1_buy"]=="BTC" and log["trader1_sell"] =="USD") or (log["trader1_buy"]=="USD" and log["trader1_sell"] =="BTC") or (log["trader1_buy"]=="LKK" and log["trader1_sell"] =="USD") or (log["trader1_buy"]=="USD" and log["trader1_sell"] =="LKK") or (log["trader1_buy"]=="BTC" and log["trader1_sell"] =="LKK") or (log["trader1_buy"]=="LKK" and log["trader1_sell"] =="BTC"):
                tmp = ()
                if log["trader1"] < log["trader2"]:
                    tmp = (log["trader1"],log["trader2"])
                else:
                    tmp = (log["trader2"],log["trader1"])
                #if the edges are already connected 
                if tmp in relations:
                    relations[tmp] += 1
                else: 
                    relations[tmp] = 1
                G.add_edge(log["trader1"],log["trader2"], weight=relations[tmp])
    elarge=[(u,v) for (u,v,d) in G.edges(data=True) if d['weight'] >0.5]
    esmall=[(u,v) for (u,v,d) in G.edges(data=True) if d['weight'] <=0.5]
    #get the list of nodes from the graph
    nodes=G.nodes()
    for node in nodes:
        if node in freq:
            #the maximum size
            if freq[node]>=500:
                sizes.append(500*10)
            else:
                sizes.append(freq[node]*10)
        else:
            sizes.append(0)
    pos=nx.spring_layout(G,k=0.028,iterations=300) # positions for all nodes
    #pos=nx.random_layout(G)
    # nodes
    nx.draw_networkx_nodes(G,pos,node_size=sizes)
    # edges
    nx.draw_networkx_edges(G,pos,edgelist=elarge,
                        width=1)
    nx.draw_networkx_edges(G,pos,edgelist=esmall,
                        width=1,alpha=0.34,edge_color='b',style='dashed')
    plt.axis('off')
    plt.savefig("transaction_network.eps" ,format='eps', dpi=2000) # save as eps
    plt.savefig("transaction_network.png",dpi=2000) # save as png
    plt.show() # display
    
draw_limited_network(transaction_log)


/System/Library/Frameworks/Python.framework/Versions/2.7/Extras/lib/python/matplotlib/collections.py:608: FutureWarning: elementwise comparison failed; returning scalar instead, but in the future will perform elementwise comparison
  if self._edgecolors_original != 'face':
/System/Library/Frameworks/Python.framework/Versions/2.7/Extras/lib/python/matplotlib/collections.py:548: FutureWarning: elementwise comparison failed; returning scalar instead, but in the future will perform elementwise comparison
  if self._edgecolors == 'face':