In [4]:
%matplotlib inline
import math
import numpy as np
import numpy.random as nrand
import matplotlib.pyplot as plt
import pandas as pd
import itertools
import scipy.stats as stats
#from pandas_datareader import data
from stockmarket.stylizedfacts import *
from math import isclose
import matplotlib.cm as cm
from pandas.tools.plotting import autocorrelation_plot
from stockmarket.database import create_CDA_tables, record_state_variables
#import powerlaw
from tqdm import *
from stockmarket import baselinemodel
import sqlite3
import ast
import json
import re
In [5]:
from matplotlib import style
style.use('seaborn-white')
In [194]:
#style.available
In [6]:
# also initialise these if you just do the data analysis
SIMTIME = 10
NRUNS = 1
In [7]:
calibratedParams = {"price_to_earnings_base": 11,
"longMA_heterogeneity": 1.441986880130183,
"price_to_earnings_gap": 11,
"agents_hold_thresholds": 0.009787868843112115,
"agent_ma_long": 65,
"shortMA_heterogeneity": 1.204473734660668,
"price_to_earnings_heterogeneity": 1.2809853358963714,
"profit_announcement_working_days": 13,
"agent_order_price_variability": 2,
"agent_volume_risk_aversion": 0.15710589508611844,
"share_chartists": 0.6360219452317543,
"agent_order_variability": 0.41645196996101297,
"agent_propensity_to_switch": 1.879037190114386,
"agent_ma_short": 50,
"shortMA_memory_divider": 3.539424126160756,
"share_mean_reversion": 0.3380862907737759,
"order_expiration_time": 6791,
'init_money': 26000,
'init_profit': 1000,
'init_discount_rate': 0.17}
In [8]:
initial_total_money = calibratedParams['init_money']
init_profit = calibratedParams['init_profit']
init_discount_rate = calibratedParams['init_discount_rate']
price_to_earnings_base = calibratedParams['price_to_earnings_base']
longMA_heterogeneity = calibratedParams['longMA_heterogeneity']
price_to_earnings_gap = calibratedParams['price_to_earnings_gap']
agents_hold_thresholds = calibratedParams['agents_hold_thresholds']
agent_ma_long = calibratedParams['agent_ma_long']
shortMA_heterogeneity = calibratedParams['shortMA_heterogeneity']
price_to_earnings_heterogeneity = calibratedParams['price_to_earnings_heterogeneity']
profit_announcement_working_days = calibratedParams['profit_announcement_working_days']
agent_order_price_variability = calibratedParams['agent_order_price_variability']
agent_volume_risk_aversion = calibratedParams['agent_volume_risk_aversion']
share_chartists = calibratedParams['share_chartists']
agent_order_variability = calibratedParams['agent_order_variability']
agent_propensity_to_switch = calibratedParams['agent_propensity_to_switch']
agent_ma_short = calibratedParams['agent_ma_short']
shortMA_memory_divider = calibratedParams['shortMA_memory_divider']
share_mean_reversion = calibratedParams['share_mean_reversion']
order_expiration_time = calibratedParams['order_expiration_time']
In [9]:
n_agents = 1000
n_firms = 1
firm_profit_mu = 0.058
firm_profit_delta = 0.00396825396
firm_profit_sigma = 0.125
In [10]:
PE_low_low = price_to_earnings_base
PE_low_high = int(price_to_earnings_heterogeneity*price_to_earnings_base)
PE_high_low = PE_low_high + price_to_earnings_gap
PE_high_high = int(price_to_earnings_heterogeneity*PE_high_low)
In [11]:
conn = sqlite3.connect('baseline.sqlite')
cur = conn.cursor()
In [12]:
for seed in tqdm(range(NRUNS)): # optional, remove tqdm(), it is used for the progress bar
create_CDA_tables(cur, seed)
agents, firms, stocks, order_books = baselinemodel.stockMarketSimulation(seed=seed,
simulation_time=SIMTIME,
init_backward_simulated_time=int(agent_ma_long*longMA_heterogeneity),
number_of_agents=n_agents,
share_chartists=share_chartists,
share_mean_reversion=share_mean_reversion,
amount_of_firms=n_firms,
initial_total_money=(initial_total_money,int(initial_total_money*1.1)),
initial_profit=(init_profit, init_profit),
discount_rate=init_discount_rate,
init_price_to_earnings_window=((PE_low_low,
PE_low_high),
(PE_high_low,
PE_high_high)),
order_expiration_time=order_expiration_time,
agent_order_price_variability=(agent_order_price_variability,agent_order_price_variability),
agent_order_variability=agent_order_variability,
agent_ma_short=(agent_ma_short, int(agent_ma_short*shortMA_heterogeneity)),
agent_ma_long=(agent_ma_long, int(agent_ma_long*longMA_heterogeneity)),
agents_hold_thresholds=(1-agents_hold_thresholds, 1+agents_hold_thresholds),
agent_volume_risk_aversion=agent_volume_risk_aversion,
agent_propensity_to_switch=agent_propensity_to_switch,
firm_profit_mu=firm_profit_mu,
firm_profit_delta=firm_profit_delta,
firm_profit_sigma=firm_profit_sigma,
profit_announcement_working_days=profit_announcement_working_days,
mean_reversion_memory_divider=4,
printProgress=False,
)
record_state_variables(cur, seed, agents, SIMTIME, firms, stocks, order_books)
conn.commit()
In [13]:
cur.close()
In [10]:
start_date = '2010-01-01'
end_date = '2016-12-31'
spy = data.DataReader("SPY",
start=start_date,
end=end_date,
data_source='google')['Close']
#spy.plot(title='AAPL Adj. Closing Price')
spy_returns = spy.pct_change()
spy_volume = data.DataReader("SPY",
start=start_date,
end=end_date,
data_source='google')['Volume']
In [11]:
# connect to the database
conn = sqlite3.connect('baseline.sqlite')
cur = conn.cursor()
In [195]:
stockVariables = []
for seed in range(NRUNS):
stockVariables.append(pd.read_sql_query(
"""
SELECT Statevariables{0}.seed ,Statevariables{0}.period, Variabletypes{0}.variable_type, Objects{0}.object_name, Objects{0}.object_type ,Statevariables{0}.value
FROM Statevariables{0} JOIN Variabletypes{0} JOIN Objects{0} ON Statevariables{0}.variable_id = Variabletypes{0}.id and Statevariables{0}.owner_id = Objects{0}.id
WHERE Objects{0}.object_type = 'stock'
""".format(seed), conn))
In [196]:
stockVariables = pd.concat(stockVariables)
In [197]:
stockVariables['variable_type'].unique()
Out[197]:
In [198]:
len(stockVariables.query('variable_type == "price_to_earnings_history" and seed == 0'))
Out[198]:
In [199]:
# for one seed loop over the period
pe_per_seed = []
for seed in range(NRUNS):
pe = []
for ptoe in stockVariables.query('seed == {} and variable_type == "price_to_earnings_history"'.format(seed))['value']:
pe.append(float(ptoe))
pe_per_seed.append(pe)
In [200]:
price_to_earnings = pe_per_seed[0]
In [201]:
def calculate_returns(orderbook_transaction_price_history):
"""Return the returns"""
closing_prices = []
for day in orderbook_transaction_price_history:
closing_prices.append(day[-1])
returns = pd.Series(closing_prices).pct_change()
return returns[1:]
In [202]:
orderbook_price_and_volumes = []
for seed in range(NRUNS):
orderbook_price_and_volumes.append(pd.read_sql_query(
"""
SELECT Statevariables{0}.seed ,Statevariables{0}.period, Variabletypes{0}.variable_type, Objects{0}.object_name, Objects{0}.object_type ,Statevariables{0}.value
FROM Statevariables{0} JOIN Variabletypes{0} JOIN Objects{0} ON Statevariables{0}.variable_id = Variabletypes{0}.id and Statevariables{0}.owner_id = Objects{0}.id
WHERE Objects{0}.object_type = 'order' AND Variabletypes{0}.variable_type = 'transaction_prices_history' OR Variabletypes{0}.variable_type = 'transaction_volumes_history'
""".format(seed), conn))
In [203]:
orderbook_price_and_volumes = pd.concat(orderbook_price_and_volumes)
In [204]:
# for one seed loop over the period
prices_per_seed = []
for seed in range(NRUNS):
prices_history = []
for prices in orderbook_price_and_volumes.query('seed == {} and variable_type == "transaction_prices_history"'.format(seed))['value']:
prices_history.append(ast.literal_eval(prices))
prices_per_seed.append(prices_history)
In [205]:
# record volumes per seed
closingprices = []
for prices in prices_per_seed:
closingprices_seed = []
for day in prices:
closingprices_seed.append(day[-1])
closingprices.append(closingprices_seed)
In [206]:
returns = []
for seed_close_prices in closingprices:
returns.append(pd.Series(np.array(seed_close_prices)).pct_change())
#returns = pd.Series(np.array(closingprices)).pct_change()
In [207]:
# for one seed loop over the period
volumes_per_seed = []
for seed in range(NRUNS):
volume_history = []
for vol in orderbook_price_and_volumes.query('seed == {} and variable_type == "transaction_volumes_history"'.format(seed))['value']:
volume_history.append(ast.literal_eval(vol))
volumes_per_seed.append(volume_history)
In [208]:
# record volumes per seed
totalvolume = []
for seed_vol in volumes_per_seed:
total_seed_volume = []
for day in seed_vol:
total_seed_volume.append(sum(day))
totalvolume.append(total_seed_volume)
In [209]:
fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(2, 2, figsize=(10,5))
ax1.plot(range(len(closingprices[0])), closingprices[0])
ax1.set_ylim(0.02, 0.09)
ax1.set_ylabel('Closing price', fontsize='12')
ax2.plot(range(len(returns[0])), returns[0])
ax2.set_ylabel('Returns', fontsize='12')
ax3.plot(range(len(price_to_earnings)), price_to_earnings)
ax3.plot(range(len(price_to_earnings)), [PE_low_low for x in range(len(price_to_earnings))], 'g--')
ax3.plot(range(len(price_to_earnings)), [PE_low_high for x in range(len(price_to_earnings))], 'g--')
ax3.plot(range(len(price_to_earnings)), [PE_high_low for x in range(len(price_to_earnings))], 'r--')
ax3.plot(range(len(price_to_earnings)), [PE_high_high for x in range(len(price_to_earnings))], 'r--')
ax3.set_ylim(10, 33)
ax3.set_ylabel('Price to earnings', fontsize='12')
ax3.set_xlabel('Days', fontsize='12')
ax4.bar(range(len(totalvolume[0])), totalvolume[0], edgecolor='b')
ax4.set_ylabel('Volume', fontsize='12')
ax4.set_xlabel('Days', fontsize='12')
for ax in [ax1, ax2, ax3, ax4]:
ax.ticklabel_format(axis='y', style='sci', scilimits=(-2,2))
fig.savefig('model_dynamics.png')
In [92]:
function_history = []
for seed in range(NRUNS):
function_history.append(pd.read_sql_query(
"""
SELECT Statevariables{0}.seed ,Statevariables{0}.period, Variabletypes{0}.variable_type, Objects{0}.object_name, Objects{0}.object_type ,Statevariables{0}.value
FROM Statevariables{0} JOIN Variabletypes{0} JOIN Objects{0} ON Statevariables{0}.variable_id = Variabletypes{0}.id and Statevariables{0}.owner_id = Objects{0}.id
WHERE Variabletypes{0}.variable_type = 'function_history'
""".format(seed), conn))
In [93]:
function_history= pd.concat(function_history)
In [94]:
def percentageMomentum(period, df):
df = df.query('period == {}'.format(period))
momentum = 0
queryString = 'momentum'
functions = df['value']
for func in functions:
if queryString in func:
momentum += 1
return momentum / (len(functions))
In [95]:
# for one seed loop over the period
functions_per_seed = []
for seed in range(NRUNS):
seed_functions = function_history.query('seed == {}'.format(seed))
functions_per_seed.append(seed_functions)
In [97]:
percMom = []
for period in range(SIMTIME):
percMom.append(percentageMomentum(period, functions_per_seed[0]))
In [98]:
share_noise_traders = np.array([1 - calibratedParams['share_chartists'] for x in range(SIMTIME)])
share_momentum = np.array(percMom)
percMeanRev = np.ones(SIMTIME) - share_noise_traders - share_momentum
In [99]:
colors= plt.cm.Set1(np.linspace(0, 1, 12))
In [157]:
fig, ax1 = plt.subplots(1, 1, figsize=(10,5))
ax1.fill_between(range(len(percMom)), percMeanRev, 0,label='mean reversion', alpha=1, color=colors[0])
ax1.fill_between(range(len(percMom)), share_momentum + percMeanRev, percMeanRev, label='momentum', color=colors[3], alpha=1)
ax1.fill_between(range(len(percMom)),
share_noise_traders+share_momentum + percMeanRev, share_momentum + percMeanRev,label='zero intelligence', color=colors[2])
ax1.set_ylabel('Percentage', fontsize='14')
ax1.set_xlabel('Days', fontsize='14')
ax1.legend(frameon=True, loc='best', framealpha=0.8)
fig.savefig('trader_population.png')
In [87]:
from stockmarket.stylizedfacts import *
In [133]:
autocorr_returns = {}
for idx, seed_returns in enumerate(returns):
autocorr_returns['seed_{}'.format(idx)] = [seed_returns[1:].autocorr(lag=lag) for lag in range(25)]
In [122]:
#autocorr_returns = [returns[0][1:].autocorr(lag=lag) for lag in range(25)]
autocorr_spy_returns = [spy_returns[200:].autocorr(lag=lag) for lag in range(25)]
In [135]:
autocorr_returns = pd.DataFrame(autocorr_returns)
In [156]:
fig, ax1 = plt.subplots(1, 1, figsize=(10,5))
#ax1.plot(range(len(autocorr_returns[0])), autocorr_returns[0], label='Model')
ax1.plot(autocorr_returns.index, autocorr_returns.mean(axis=1), 'k-', label='Simulation')
ax1.fill_between(autocorr_returns.index,
autocorr_returns.mean(axis=1)+autocorr_returns.std(axis=1),
autocorr_returns.mean(axis=1)-autocorr_returns.std(axis=1),
alpha=0.3, facecolor='black')
ax1.plot(range(len(autocorr_spy_returns)), autocorr_spy_returns, label='S&P500')
ax1.legend(frameon=True, loc='best', framealpha=0.8)
ax1.set_ylabel('Autocorrelation', fontsize='14')
ax1.set_xlabel('Lags', fontsize='14')
fig.savefig('auto_correlation.png')
In [184]:
np.mean(autocorr_returns.mean(axis=1)[1:])
Out[184]:
In [176]:
ftails = []
for ret in returns:
ftails.append(fat_tails_kurtosis(ret)[1])
In [180]:
np.mean(ftails)
Out[180]:
In [149]:
print('Fat tail in model (kurtosis): ',fat_tails_kurtosis(returns[0]))
print('Fat tail in SP500 (kurtosis): ',fat_tails_kurtosis(spy_returns[200:]))
In [140]:
absolute_returns = [ret.abs() for ret in returns]
autocorr_returns_abs = {}
for idx, seed_returns in enumerate(absolute_returns):
autocorr_returns_abs['seed_{}'.format(idx)] = [seed_returns[1:].autocorr(lag=lag) for lag in range(25)]
In [144]:
autocorr_returns_abs = pd.DataFrame(autocorr_returns_abs)
In [141]:
#autocorr_abs_returns = [absolute_returns.autocorr(lag=lag) for lag in range(25)]
absolute_spy_returns = spy_returns.abs()
autocorr_spy_abs_returns = [absolute_spy_returns.autocorr(lag=lag) for lag in range(25)]
In [152]:
fig, ax1 = plt.subplots(1, 1, figsize=(10,5))
#ax1.plot(range(len(autocorr_abs_returns)), autocorr_abs_returns, label='Model')
ax1.plot(range(len(autocorr_spy_abs_returns)), autocorr_spy_abs_returns, label='S&P500')
ax1.plot(autocorr_returns_abs.index, autocorr_returns_abs['seed_0'], 'k-', label='Benchmark simulation')
ax1.fill_between(autocorr_returns_abs.index,
autocorr_returns_abs.mean(axis=1)+autocorr_returns_abs.std(axis=1),
autocorr_returns_abs.mean(axis=1)-autocorr_returns_abs.std(axis=1),
alpha=0.3, facecolor='black')
ax1.legend(frameon=True, loc='best', framealpha=0.8)
ax1.set_ylabel('Autocorrelation', fontsize='14')
ax1.set_xlabel('Lag', fontsize='14')
fig.savefig('auto_correlation_abs.png')
In [170]:
def cclustered_volatility(returns, lags):
absolute_returns = returns.abs()
autocorr_abs_returns = [absolute_returns.autocorr(lag=lag) for lag in range(lags)]
average_autocorrelation = np.mean(autocorr_abs_returns[1:])
#print(average_autocorrelation)
return average_autocorrelation
In [171]:
cvol = []
for ret in returns:
cvol.append(cclustered_volatility(ret, 25))
In [174]:
print('average volatility clustering',np.mean(cvol))
In [150]:
print('Clustered volatility in model: ',clustered_volatility(returns[0], 25))
print('Clustered volatility in SP500: ',clustered_volatility(spy_returns[200:], 25))
In [160]:
hursts = []
for ret in returns:
hursts.append(long_memory(ret, hurst, 2, 20)[1])
In [164]:
print('Model long memory (hurst)', np.mean(hursts))
print('SP500 long memory (hurst)', long_memory(spy_returns[200:], hurst, 2, 20))
In [185]:
window = 10
In [192]:
totVol = totalvolume[0]
# volume
voll = pd.Series(totVol)
roller_sim_vol = voll.rolling(window)
volatility_sim_vol = roller_sim_vol.std(ddof=0)
roller_spy_vol = spy_volume[:200].rolling(window)
volatility_spy_vol = roller_spy_vol.std(ddof=0)
# returns
spy_roller_returns = spy_returns[:200].rolling(window)
spy_returns_volatility = spy_roller_returns.std(ddof=0)
sim_returns = returns[0]
roller_returns = sim_returns.rolling(window)
returns_volatility = roller_returns.std(ddof=0)
In [188]:
correlations = []
for vol, ret in zip(totalvolume, returns):
vol = pd.Series(vol)
sim_returns = ret
roller_returns = sim_returns.rolling(window)
returns_volatility = roller_returns.std(ddof=0)
correlations.append(returns_volatility.corr(vol))
In [190]:
np.mean(correlations)
Out[190]:
In [193]:
print('Model cor vol volatility: ', returns_volatility.corr(voll))
print('Model cor vol volatility average: ', np.mean(correlations))
print('SP500 cor vol volatility: ', spy_returns_volatility.corr(spy_volume[:200]))