In [1]:
%matplotlib inline
In [2]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import statsmodels.api as sm
import seaborn as sns
In [3]:
!sbt "run-main ZeroIntelligenceApp"
In [4]:
tmp_df = pd.read_json('./data/zero-intelligence/Gpi2.json', orient='records')
zi_data = tmp_df.iloc[::-1].set_index('timestamp')
In [5]:
zi_data.head()
Out[5]:
In [6]:
zi_data.tail()
Out[6]:
In [7]:
zi_data.describe()
Out[7]:
In [8]:
zi_data['spread'] = zi_data.bidPrice - zi_data.askPrice
In [9]:
zi_data.spread.describe()
Out[9]:
In [10]:
zi_data.spread.plot(linestyle='none', marker='o', markersize=1, alpha=0.05)
plt.show()
Note that raw returns are bounded below! Bounds on price (at least in these simulations!) are $1 \le p_t \le 200$. Therefore lower bound on raw returns is...
$$ \underline{r} = \frac{1 - 200}{200} \approx -1. $$Similarly, then upper bound on raw returns is...
$$ \underline{r} = \frac{200 - 1}{1} = 199. $$
In [11]:
zi_data['raw_returns'] = zi_data.price.pct_change(periods=1)
In [12]:
zi_data.raw_returns.describe()
Out[12]:
In [13]:
zi_data.raw_returns.plot(linestyle='none', marker='o', markersize=1, alpha=0.05)
plt.show()
Starting from raw returns, $r_{t+k}$ we see that...
$$ 1 + r_{t+k} = 1 + \frac{p_{t+k} - p_t}{p_t} = \frac{p_{t+k}}{p_t} $$...taking logs yields...
$$ \ln \big(1 + r_{t+k}\big) = \ln p_{t+k} - \ln p_t = \Delta \ln p_{t+k} $$...since $\ln \big(1 + r_{t+k}\big) \approx r_{t+k}$ for $r << 1$ we should expect that raw and logarithmic return formulas should yields similar results for "small" returns.
In [14]:
zi_data['log_returns'] = np.log(1 + zi_data.raw_returns)
In [15]:
zi_data.log_returns.plot(linestyle='none', marker='o', markersize=1, alpha=0.05)
plt.show()
In [49]:
zi_data.log_returns.dropna().plot(kind='kde')
plt.show()
In [20]:
plt.plot(sm.tsa.acf(zi_data.log_returns.dropna(), fft=True))
plt.show()
In [50]:
zi_data['mid_price'] = 0.5 * (zi_data.askPrice + zi_data.bidPrice)
How to best measure effective size? Particularly of ask orders!
In [51]:
zi_data['effective_size'] = zi_data.quantity
incoming_ask_orders = zi_data.bidPrice == zi_data.price
zi_data.loc[incoming_ask_orders, 'effective_size'] *= -zi_data.loc[incoming_ask_orders, 'askPrice']
incoming_bid_orders = zi_data.askPrice == zi_data.price
zi_data.loc[incoming_bid_orders, 'effective_size'] *= zi_data.loc[incoming_bid_orders, 'bidPrice']
Compute the measure of price impact: $\Delta \ln$ mid_price...
In [52]:
zi_data['price_impact'] = np.log(zi_data.mid_price).diff()
In [53]:
zi_data[incoming_ask_orders].head()
Out[53]:
In [54]:
# incoming bid orders
fig, ax = plt.subplots(1, 1)
grouped_data = zi_data.groupby("effective_size")
grouped_data.price_impact.mean().plot(color='r', alpha=0.75, ax=ax)
ax.scatter(zi_data.effective_size, zi_data.price_impact, edgecolor='b', s=1, alpha=0.05)
ax.set_ylabel("price_impact")
plt.show()
In [55]:
ecdfs = {}
for column_name in ['raw_returns', 'log_returns']:
ecdfs[column_name] = sm.distributions.ECDF(zi_data[column_name].abs())
In [56]:
plt.plot(ecdfs['raw_returns'].x, 1 - ecdfs['raw_returns'].y, label='raw')
plt.plot(ecdfs['log_returns'].x, 1 - ecdfs['log_returns'].y, label='log')
plt.xscale('log')
plt.yscale('log')
plt.legend()
plt.show()
In [84]:
!sbt "run-main ZeroIntelligenceConstrainedApp"
In [85]:
tmp_df = pd.read_json('./data/zero-intelligence-constrained/Gpi2.json', orient='records')
zic_data = tmp_df.iloc[::-1].set_index('timestamp')
In [89]:
zic_data.describe()
Out[89]:
In [87]:
zic_data['spread'] = zic_data.bidPrice - zic_data.askPrice
zic_data['raw_returns'] = zic_data.price.pct_change(periods=1)
zic_data['log_returns'] = np.log(1 + zic_data.raw_returns)
In [81]:
zi_data.spread.describe()
Out[81]:
In [88]:
zi_data.spread.describe()
Out[88]:
In [72]:
zic_data.spread.describe()
Out[72]:
In [83]:
zic_data.spread.dropna().plot(kind='kde')
plt.show()
In [70]:
zi_data.log_returns.dropna().plot(kind='kde')
zic_data.log_returns.dropna().plot(kind='kde')
plt.show()
In [61]:
zi_data.log_returns.describe()
Out[61]:
In [62]:
zic_data.log_returns.describe()
Out[62]:
In [64]:
zic_data.price.plot()
plt.show()
In [65]:
zic_data.price.describe()
Out[65]:
In [ ]: