In [1]:
import pandas as pd
import matplotlib.pyplot as plt
# load data
df = pd.read_csv('./executions.csv', index_col='Unnamed: 0')
df.filled_time = pd.to_datetime(df.filled_time)
# convert to $
eur2usd = 1.14
gbp2usd = 1.26

In [2]:
# Remove Fix income
ETNID = 'etn_id'
DES = 'des'
OVALID = 'oval_id'
CURRENCY = 'currency'
INDEXID = 'index_id'
LAST_REBALANCE_DATE = 'last_rebalance_date'
POSITION = 'position'
BUY = 'buy'
SELL = 'sell'
TYPE = 'type'
LINEAR = 'linear'
PRATIO = 'performance_ratio'
RATE = 'rate'
FIXEDINCOME = 'fixed_income'
NOTIONAL = 'notional'
STARTDATE = 'start_date'
ENDDATE = 'end_date'
ETN = 'etn'
fixed_income_database = {
'a0b3bb23-75a1-4935-9fc4-7ba292fd3d3e' : {
    OVALID : 'a0b3bb23-75a1-4935-9fc4-7ba292fd3d3e',
    DES:  'Fix_income_GBP_20190508',
    RATE: 2.5,
    NOTIONAL: 100,
    STARTDATE: '2019-05-08',
    ENDDATE: '2020-05-08'
    },
'83e5f27b-cff9-4942-81d7-f63f5dd68c55' : {
    OVALID :'83e5f27b-cff9-4942-81d7-f63f5dd68c55',
    DES:  'Fix_income_EUR_20190508',
    RATE: 2.5,
    NOTIONAL: 100,
    STARTDATE: '2019-05-08',
    ENDDATE: '2020-05-08',
    },
'38e60122-b68e-48c1-9bac-5e5b05f3ba2f' : {
    OVALID : '38e60122-b68e-48c1-9bac-5e5b05f3ba2f',
    DES:  'Fix_income_EUR_20190701',
    RATE: 2.5,
    NOTIONAL: 100,
    STARTDATE: '2019-05-08',
    ENDDATE: '2020-05-08',
    },
'3fee5038-c52f-4178-8f0a-fef062ded60c' : {
    OVALID :'3fee5038-c52f-4178-8f0a-fef062ded60c',
    DES:  'Fix_income_GBP_20190701',
    RATE: 2.5,
    NOTIONAL: 100,
    STARTDATE: '2019-05-08',
    ENDDATE: '2020-05-08',
    }
}
# Create fix income

start = "2010-01-01"
end = "2019-11-30"
for key in fixed_income_database:
    _tmp = df[df.certificate_uuid==key]
    _tmp = _tmp[_tmp.filled_time <= end]
    _tmp['buy'] = _tmp.loc[_tmp.order_action=='buy'].filled_amount
    _tmp['sell'] = _tmp.loc[_tmp.order_action=='sell'].filled_amount
    _tmp.buy = _tmp.buy.fillna(0.0)
    _tmp.sell = _tmp.sell.fillna(0.0)
    print(fixed_income_database[key][DES])
    print(sum(_tmp['buy']) - sum(_tmp['sell']))


Fix_income_EUR_20190508
626489.0
Fix_income_EUR_20190701
387470.0
Fix_income_GBP_20190701
22996.0
Fix_income_GBP_20190508
24656.0

In [3]:
# Remove fix income
for key in fixed_income_database:
    df = df[df.certificate_uuid!=key]

df.index = df.filled_time
df = df.sort_index()

In [4]:
def ADF_Stationarity_Test(self, timeseries, printResults=True):

        # Dickey-Fuller test:
        adfTest = adfuller(timeseries, autolag="AIC")

        self.pValue = adfTest[1]

        if (self.pValue < self.SignificanceLevel):
            self.isStationary = True
        else:
            self.isStationary = False

        if printResults:
            dfResults = pd.Series(adfTest[0:4],
                                  index=["ADF Test Statistic", "P-Value", "# Lags Used", "# Observations Used"])

            # Add Critical Values
            for key, value in adfTest[4].items():
                dfResults["Critical Value (%s)" % key] = value

            print("Augmented Dickey-Fuller Test Results:")
            print(dfResults)
# I use the method above like so:
            self.isStationary = False
            print(symbol + ":")
            self.ADF_Stationarity_Test(lra[symbol].values)
            print("Is the time series stationary? {0}".format(self.isStationary))

In [5]:
# Average, Std. Dev initial contribution across all products for Oval (in USD - can convert at current rate)

In [6]:
# A measure of the distribution i.e. histogram of initial investment (in USD).

In [7]:
# Average, Std. Dev subsequent contributions by period (in %)

In [8]:
data=df.groupby('filled_time')
orderCount=data.count()
# print(orderCount.filled_amount)
orderSum=data.sum()
# print(orderSum.filled_amount)
perOrder=orderSum.filled_amount/orderCount.filled_amount
# print(perOrder)
userPerWeek=data.client_uuid.nunique()
# print(userPerWeek)
buy = df[df.order_action=='buy']
sell = df[df.order_action=='sell']
buyOrderSum = buy.groupby('filled_time').sum()
sellOrderSum = sell.groupby('filled_time').sum()
buyOrderCount = buy.groupby('filled_time').count().filled_amount
sellOrderCount = sell.groupby('filled_time').count().filled_amount


/home/wz/__pyEnvs/notebook3/lib/python3.5/site-packages/ipykernel_launcher.py:1: FutureWarning: 'filled_time' is both a column name and an index level.
Defaulting to column but this will raise an ambiguity error in a future version
  """Entry point for launching an IPython kernel.
/home/wz/__pyEnvs/notebook3/lib/python3.5/site-packages/ipykernel_launcher.py:12: FutureWarning: 'filled_time' is both a column name and an index level.
Defaulting to column but this will raise an ambiguity error in a future version
  if sys.path[0] == '':
/home/wz/__pyEnvs/notebook3/lib/python3.5/site-packages/ipykernel_launcher.py:13: FutureWarning: 'filled_time' is both a column name and an index level.
Defaulting to column but this will raise an ambiguity error in a future version
  del sys.path[0]
/home/wz/__pyEnvs/notebook3/lib/python3.5/site-packages/ipykernel_launcher.py:14: FutureWarning: 'filled_time' is both a column name and an index level.
Defaulting to column but this will raise an ambiguity error in a future version
  
/home/wz/__pyEnvs/notebook3/lib/python3.5/site-packages/ipykernel_launcher.py:15: FutureWarning: 'filled_time' is both a column name and an index level.
Defaulting to column but this will raise an ambiguity error in a future version
  from ipykernel import kernelapp as app

In [9]:
buyFills = buyOrderSum.filled_amount.resample('W').sum()
buyFills.plot()
plt.show()
sellFills = sellOrderSum.filled_amount.resample('W').sum()
sellFills.plot()
plt.show()
buyvsSell = buyFills / sellFills
buyvsSell[4:].plot()
plt.show()
bms = buyFills - sellFills
bms.plot()
plt.show()



In [10]:
# Check stikness of each product
buyByProduct = buy.groupby('certificate_uuid')
print(buyByProduct.sum().filled_amount)
sellByProduct = sell.groupby('certificate_uuid')
print(sellByProduct.sum().filled_amount.plot())
plt.show()
uuids = sellByProduct.sum().filled_amount.index


certificate_uuid
06359e98-73c9-46cd-b1ca-1354b6ab0c76     433704.0
1936995e-e906-467d-8510-0651c815b077    1289962.0
24f35ee0-a280-4524-b592-907c599c75d4      57437.0
282f64de-4977-49d0-bd63-f25db41b1fe3      37506.0
45c2ade4-4b75-42e3-8f87-ab9054d5a2cb      94425.0
5ca26422-438d-4433-8b04-9358ed6816d2     771888.0
6b5f381f-7c8a-451e-be88-a1f40b9cb035     356245.0
6ca2f46b-51e8-4c83-99ec-5888d4e7869a          0.0
89dbaeb5-61c7-4a4c-926e-72c014b06fbb      80811.0
93900df1-7475-45a2-9583-3300bdbde244     969906.0
9f157858-b898-4d33-859d-8ed7a955e0ad     626907.0
ad3c83b4-eea2-4fee-84b6-fa85f7138841          0.0
d7647cbd-ff75-400f-a95a-b34cdf1c8e0c     405907.0
ecc5cd65-6aec-4539-9e40-866eaa7763ca          0.0
Name: filled_amount, dtype: float64
Axes(0.125,0.125;0.775x0.755)

In [11]:
# Time series with each product
uuid = '89dbaeb5-61c7-4a4c-926e-72c014b06fbb'
# <----- Parameters UUID

prodBuy = buy[buy.certificate_uuid==uuid].groupby('filled_time').sum()
prodSell = sell[sell.certificate_uuid==uuid].groupby('filled_time').sum()
balance = pd.DataFrame()
balance['buy'] = prodBuy.filled_amount
balance['sell'] = prodSell.filled_amount
balance.sell.fillna(0, inplace=True)
balance.buy.fillna(0, inplace=True)
balance['difference'] = balance.buy - balance.sell
# balance.difference.plot()

# cumsum
cumsum = balance.difference.cumsum()
cumsum.plot()
plt.show()

balance.difference.plot()
plt.show()

sellDivAum = balance.sell / (balance.sell+balance.buy).cumsum()
sellDivAum.plot()
plt.show()


/home/wz/__pyEnvs/notebook3/lib/python3.5/site-packages/ipykernel_launcher.py:5: FutureWarning: 'filled_time' is both a column name and an index level.
Defaulting to column but this will raise an ambiguity error in a future version
  """
/home/wz/__pyEnvs/notebook3/lib/python3.5/site-packages/ipykernel_launcher.py:6: FutureWarning: 'filled_time' is both a column name and an index level.
Defaulting to column but this will raise an ambiguity error in a future version
  

In [12]:
# indicator function
def indicator(buy, sell, uuid: str):
    """
    
    """
    prodBuy = buy[buy.certificate_uuid==uuid].groupby('filled_time').sum()
    prodSell = sell[sell.certificate_uuid==uuid].groupby('filled_time').sum()
    balance = pd.DataFrame()
    balance['buy'] = prodBuy.filled_amount
    balance['sell'] = prodSell.filled_amount
    balance.sell.fillna(0, inplace=True)
    balance.buy.fillna(0, inplace=True)
    balance['difference'] = balance.buy - balance.sell
    sellDivAum = balance.sell / (balance.sell+balance.buy).cumsum()
    return balance, sellDivAum

# ---- Stickness analysis
alpha = 0.999
ratios = {}
for uuid in uuids:
    print(uuid)
    b, s = indicator(buy, sell, uuid=uuid)
#     print(s)
    print('%s%% ratio: %s' % (alpha * 100, s.quantile(alpha)))
    print()
    ratios[uuid] = s.quantile(alpha)

# import json
# print(json.dumps(ratios))


06359e98-73c9-46cd-b1ca-1354b6ab0c76
99.9% ratio: 0.04502549680200325

1936995e-e906-467d-8510-0651c815b077
99.9% ratio: 0.049095749655247586

24f35ee0-a280-4524-b592-907c599c75d4
99.9% ratio: 0.08208777046328274

282f64de-4977-49d0-bd63-f25db41b1fe3
99.9% ratio: 0.06988143992355092

45c2ade4-4b75-42e3-8f87-ab9054d5a2cb
99.9% ratio: 0.06237946442318311

5ca26422-438d-4433-8b04-9358ed6816d2
99.9% ratio: 0.07189869264968368

6b5f381f-7c8a-451e-be88-a1f40b9cb035
99.9% ratio: 0.04003272224915464

89dbaeb5-61c7-4a4c-926e-72c014b06fbb
99.9% ratio: 0.07791012021646389

93900df1-7475-45a2-9583-3300bdbde244
/home/wz/__pyEnvs/notebook3/lib/python3.5/site-packages/ipykernel_launcher.py:6: FutureWarning: 'filled_time' is both a column name and an index level.
Defaulting to column but this will raise an ambiguity error in a future version
  
/home/wz/__pyEnvs/notebook3/lib/python3.5/site-packages/ipykernel_launcher.py:7: FutureWarning: 'filled_time' is both a column name and an index level.
Defaulting to column but this will raise an ambiguity error in a future version
  import sys
99.9% ratio: 0.06254095373474232

9f157858-b898-4d33-859d-8ed7a955e0ad
99.9% ratio: 0.03507184014813446

d7647cbd-ff75-400f-a95a-b34cdf1c8e0c
99.9% ratio: 0.035850002001216984


In [13]:
# Split plot for buy and sell amoun
buyOrderSum.filled_amount.plot()
print('Buy order value: ')
plt.show()
# Split plot for buy and sell
sellOrderSum.filled_amount.plot()
print('Sell order value: ')
plt.show()
print('Buy amount / total amount: ')
ratio = buyOrderSum.filled_amount / (sellOrderSum.filled_amount+buyOrderSum.filled_amount)
ratio.plot()
plt.show()
print('Sell amount / total amount: ')
ratio = sellOrderSum.filled_amount / (sellOrderSum.filled_amount+buyOrderSum.filled_amount)
ratio.plot()
plt.show()
print('Buy order number / total number: ')
ratio = buyOrderCount / (buyOrderCount+sellOrderCount)
ratio.plot()
plt.show()
print('Buy value per order: ')
r = buyOrderSum.filled_amount / buyOrderCount
r.plot()
plt.show()
print('Sell value per order: ')
r = sellOrderSum.filled_amount / sellOrderCount
r.plot()
plt.show()


Buy order value: 
Sell order value: 
Buy amount / total amount: 
Sell amount / total amount: 
Buy order number / total number: 
Buy value per order: 
Sell value per order: 

In [14]:
# Plot weekly unique user
userPerWeek.plot()
print('Weekly unique client: ')
plt.show()


Weekly unique client: 

In [15]:
# Plot Weekly Data
import matplotlib.pyplot as plt
orderCount.filled_amount.plot()
print('Order count: ')
plt.show()
print(len(orderCount.filled_amount))


Order count: 
97

In [16]:
# Plot
orderSum.filled_amount.plot()
print('Order amount in total in EUR (including buy/sell): ')
plt.show()


Order amount in total in EUR (including buy/sell): 

In [17]:
# Plot
perOrder.plot()
print('value per order in EUR: ')
plt.show()


value per order in EUR: 

In [18]:
# Per client
clients = df.groupby('client_uuid')
fills = clients.filled_amount
start = fills.first()
suma = fills.sum()
end = fills.last()
# print(end)
# print(start)
# print(suma)
increase = (suma - start)
# print(increase)


weeks = (df.index[-1] - df.index[0]).days / 7
print('Weeks: ', weeks)
transPerWeek = suma/weeks
print('Weekly Transactions Avg of each client: ', transPerWeek.mean())
print('Weekly Transactions Std of each client: ', transPerWeek.std())
transPerWeek.hist(bins=40)
plt.show()
ratio = increase / weeks

# average
avgRatio = ratio.mean()
stdRatio = ratio.std()
ratio.hist(bins=20)
plt.show()
print(avgRatio)
print(start.mean())


Weeks:  58.57142857142857
Weekly Transactions Avg of each client:  11.3879318219
Weekly Transactions Std of each client:  27.4386152691
10.247626938
66.7892860584

In [19]:
# ------------ Report ------------- #
print('----- Summary ----')
print('* Total unique clients: ', len(pd.unique(df.client_uuid)))
print('* Avg. client transacations: ', fills.mean().mean())
print('* Std. client transacations: ', fills.mean().std())
print('* Total AUM: ')
print('* Total transcation: ')
print('')
print('----- Weekly Data ----')


----- Summary ----
* Total unique clients:  10379
* Avg. client transacations:  44.06452704
* Std. client transacations:  81.3983060497
* Total AUM: 
* Total transcation: 

----- Weekly Data ----

In [ ]:


In [ ]: