In [3]:
from IPython import parallel
c = parallel.Client()
view = c.load_balanced_view()
In [108]:
%matplotlib inline
from __future__ import division
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
sns.set_context('talk')
sns.set_palette('hls', n_colors=7)
import zipline
print zipline.__version__
import pytz
from datetime import datetime
from zipline.utils.factory import load_bars_from_yahoo
# Load data manually from Yahoo! finance
start = datetime(2009, 1, 1, 0, 0, 0, 0, pytz.utc)
end = datetime(2012, 1, 1, 0, 0, 0, 0, pytz.utc)
large_cap = load_bars_from_yahoo(stocks=['CERN', 'DLTR', 'ROST', 'SBUX'],
start=start,
end=end)
# Taken from http://www.forbes.com/sites/brettnelson/2011/10/19/fifteen-small-company-stocks-to-buy-right-now/
small_cap = load_bars_from_yahoo(stocks=['HMSY', 'BWLD', 'IRBT', 'IPGP'],
start=start,
end=end)
In [109]:
(large_cap.loc[:, :, 'volume'].mean() * large_cap.loc[:, :, 'price'].mean()).mean()
Out[109]:
In [110]:
(small_cap.loc[:, :, 'volume'].mean().mean() * small_cap.loc[:, :, 'price'].mean()).mean()
Out[110]:
In [112]:
%%px --local
import zipline.utils.factory as factory
from zipline.algorithm import TradingAlgorithm
from zipline.api import order_target_percent, record, symbol, history, add_history, get_open_orders, get_datetime, set_slippage
from zipline.finance.slippage import VolumeShareSlippage
import numpy as np
import math
from pytz import timezone
from zipline.api import symbols
def initialize(context):
set_slippage(VolumeShareSlippage(volume_limit=0.5,
price_impact=1.5))
context.eps = 1 # change epsilon here
context.init = False
context.previous_datetime = None
context.i = 0
add_history(30, '1d', 'price')
add_history(30, '1d', 'volume')
def handle_data(context, data):
context.i += 1
if context.i < 30: return
current_datetime = get_datetime().astimezone(timezone('US/Eastern'))
cash = context.portfolio.cash
record(cash=cash)
if not context.init:
context.stocks = data.keys()
context.m = len(context.stocks)
context.b_t = np.ones(context.m) / context.m
# Equal weighting portfolio
for stock, percent in zip(context.stocks, context.b_t):
order_target_percent(stock, percent)
context.init = True
# only execute algorithm once per day
if context.previous_datetime != None:
if current_datetime.day != context.previous_datetime.day:
new_day = True
algo_executed = False
else:
new_day = False
else:
new_day = False
algo_executed = True
context.previous_datetime = current_datetime
if not new_day or algo_executed == True:
return
# skip tic if any orders are open or any stocks did not trade
for stock in context.stocks:
if bool(get_open_orders(stock)) or data[stock].datetime < get_datetime():
return
# compute current portfolio allocations
for i, stock in enumerate(context.stocks):
context.b_t[i] = context.portfolio.positions[stock].amount*data[stock].price
# Bring portfolio vector to unit length
context.b_t = context.b_t / np.sum(context.b_t)
# Compute new portfolio weights according to OLMAR algo.
b_norm = np.zeros((context.m, 28))
x_tilde = np.zeros((context.m, 28))
for k in range(28):
b_norm[:,k],x_tilde[:,k] = olmar(context,k+3)
s = np.zeros(28)
b_norm_opt = np.zeros(context.m)
s_sum = 0
for k in range(28):
s[k] = np.dot(b_norm[:,k],x_tilde[:,k])
b_norm[:,k] = np.multiply(s[k],b_norm[:,k])
b_norm_opt += b_norm[:,k]
s_sum += s[k]
b_norm_opt = np.divide(b_norm_opt,s_sum)
#print b_norm_opt
# Rebalance Portfolio
for stock, percent in zip(context.stocks, b_norm_opt):
order_target_percent(stock, percent)
algo_executed = True
def olmar(context,window):
"""Logic of the olmar algorithm.
:Returns: b_norm : vector for new portfolio
"""
# get history -- prices and volums of the last 5 days (at close)
p = history(30, '1d', 'price')
v = history(30, '1d', 'volume')
prices = p.ix[30-window:-1]
volumes = v.ix[30-window:-1]
# find relative moving volume weighted average price for each secuirty
x_tilde = np.zeros(context.m)
for i, stock in enumerate(context.stocks):
vwa_price = np.dot(prices[stock], volumes[stock]) / np.sum(volumes[stock])
x_tilde[i] = vwa_price/prices[stock].ix[-1]
###########################
# Inside of OLMAR (algo 2)
x_bar = x_tilde.mean()
# Calculate terms for lambda (lam)
dot_prod = np.dot(context.b_t, x_tilde)
num = context.eps - dot_prod
denom = (np.linalg.norm((x_tilde-x_bar)))**2
# test for divide-by-zero case
if denom == 0.0:
lam = 0 # no portolio update
else:
lam = max(0, num/denom)
b = context.b_t + lam*(x_tilde-x_bar)
b_norm = simplex_projection(b)
return b_norm,x_tilde
def simplex_projection(v, b=1):
v = np.asarray(v)
p = len(v)
# Sort v into u in descending order
v = (v > 0) * v
u = np.sort(v)[::-1]
sv = np.cumsum(u)
rho = np.where(u > (sv - b) / np.arange(1, p+1))[0][-1]
theta = np.max([0, (sv[rho] - b) / (rho+1)])
w = (v - theta)
w[w<0] = 0
return w
In [113]:
def run_algo(data, capital_base):
sim_params = factory.create_simulation_parameters(capital_base=capital_base)
algo_obj = TradingAlgorithm(initialize=initialize,
handle_data=handle_data,
sim_params=sim_params)
perf = algo_obj.run(data)
return perf
In [141]:
test_vals = [1e3, 1e4, 1e5, 5e5, 1e6, 5e6, 1e7]
In [142]:
perfs_lc = view.map_async(run_algo, [large_cap]*len(test_vals), test_vals)
perfs_lc.wait_interactive()
In [143]:
perfs_lc = perfs_lc.get()
In [144]:
perfs_sc = view.map_async(run_algo, [small_cap]*len(test_vals), test_vals)
perfs_sc.wait_interactive()
In [145]:
perfs_sc = perfs_sc.get()
In [155]:
ax1 = plt.subplot(2, 1, 1)
results = [(cpt_base, np.cumsum(np.log(1 + perf.returns))) for cpt_base, perf in zip(test_vals, perfs_lc)]
for capital, rets in results:
rets.plot(ax=ax1, label=capital)
plt.ylabel('cumulative returns in %')
plt.title('Capacity analysis\nIdentical algorithm with different capital bases')
plt.title('Large cap stocks')
ax1.set_xticks([])
ax2 = plt.subplot(2, 1, 2)
results = [(cpt_base, np.cumprod((1 + perf.returns)) - 1) for cpt_base, perf in zip(test_vals, perfs_sc)]
for capital, rets in results:
rets.plot(ax=ax2, label=capital, sharex=ax1)
plt.legend(loc=3)
plt.ylabel('cumulative returns in %')
plt.title('Small cap stocks')
plt.savefig('cap_rets.png')
In [152]:
ax1 = plt.subplot(2, 1, 1)
results = np.array([(cpt_base, perf.iloc[-1].portfolio_value) for cpt_base, perf in zip(test_vals, perfs_lc)])
slope = (results[1, 1] - results[0, 1]) / (results[1, 0] - results[0, 0])
intercept = results[0, 1] - results[0, 0] * slope
y = slope * test_vals[-1] + intercept
ax1.loglog((test_vals[0], test_vals[-1]), (results[0, 1], y), label='expected')
ax1.loglog(results[:, 0], results[:, 1], 'g', label='actual')
ax1.loglog(results[:, 0], results[:, 1], 'go')
#plt.xlabel('Starting capital')
plt.ylabel('Ending capital')
plt.legend(loc=0)
plt.title('Large cap stocks')
ax1.set_xticks([])
ax2 = plt.subplot(2, 1, 2)
results = np.array([(cpt_base, perf.iloc[-1].portfolio_value) for cpt_base, perf in zip(test_vals, perfs_sc)])
slope = (results[1, 1] - results[0, 1]) / (results[1, 0] - results[0, 0])
intercept = results[0, 1] - results[0, 0] * slope
y = slope * test_vals[-1] + intercept
ax2.loglog((test_vals[0], test_vals[-1]), (results[0, 1], y), label='expected')
ax2.loglog(results[:, 0], results[:, 1], 'g', label='actual')
ax2.loglog(results[:, 0], results[:, 1], 'go')
plt.xlabel('Starting capital in $')
plt.ylabel('Ending capital in $')
plt.title('Small cap stocks')
plt.savefig('cap_capital.png')
In [121]:
large_cap
Out[121]:
In [148]:
small_cap.loc['IRBT', perfs_sc[0].transactions.iloc[30][0]['dt']] # + pd.Timedelta('1d')]
Out[148]:
In [123]:
pd.Series({p['sid']: p['price'] for p in perfs_sc[3].transactions.iloc[30]})['IRBT']
Out[123]:
In [162]:
ax1 = plt.subplot(2, 1, 1)
price = {}
for i in range(len(test_vals)):
price[test_vals[i]] = pd.Series({p['sid']: p['price'] for p in perfs_lc[i].transactions.iloc[30]})
prices = pd.concat(price).unstack()
prices.plot(ax=ax1)
#plt.xlabel('Starting capital in $')
plt.ylabel('% of order filled')
plt.title('Large cap stocks')
ax1.set_xticks([])
ax2 = plt.subplot(2, 1, 2)
price = {}
for i in range(len(test_vals)):
price[test_vals[i]] = pd.DataFrame({p['sid']: {'price': p['price'], 'order amount': p['amount']} for p in perfs_sc[i].transactions.iloc[30]})
prices = pd.concat(price).unstack()
prices.plot(ax=ax2, logx=True)
plt.xlabel('Starting capital in $')
plt.ylabel('% of order filled')
plt.title('Small cap stocks')
#plt.savefig('cap_filled.png')
Out[162]:
In [166]:
df = prices['IRBT']
#df['price'] = ((df['price'] / df.loc[1000, 'price']) - 1)
df.index.name = 'Capital'
df
Out[166]:
In [168]:
test_vals
Out[168]:
In [172]:
txns
Out[172]:
In [203]:
plt.figure(figsize=(8, 4))
# ax1 = plt.subplot(2, 1, 1)
# txn = {}
# for i, test_val in enumerate(test_vals):
# txn[test_val] = pd.Series({p['sid']: p['amount'] for p in perfs_lc[i].transactions.iloc[30]})
# txns = pd.concat(txn).unstack()
# txns /= txns.iloc[0]
# df = pd.DataFrame((np.asarray(txns).T / (np.asarray(txns.index).T / 1000.)).T, columns=txns.columns, index=txns.index)
# df /= df.iloc[0]
# df.plot(ax=ax1, logx=True)
# #plt.xlabel('Starting capital in $')
# plt.ylabel('% of order filled')
# plt.title('Large cap stocks')
# ax1.set_xticks([])
ax2 = plt.subplot(1, 1, 1)
txn = {}
for i in range(len(test_vals)):
#factor = 10**i
txn[test_vals[i]] = pd.Series({p['sid']: p['amount'] for p in perfs_sc[i].transactions.iloc[30]})
txns = pd.concat(txn).unstack()
txns /= txns.iloc[1]
df = pd.DataFrame((np.asarray(txns).T / (np.asarray(txns.index).T / 1000.)).T, columns=txns.columns, index=txns.index)
df /= df.iloc[0]
df.plot(ax=ax2, logx=True)
plt.xlabel('Starting capital in $')
plt.ylabel('% of order filled')
plt.title('Small cap stocks')
plt.savefig('cap_filled.png')
In [194]:
df = pd.DataFrame((np.asarray(txns).T / (np.asarray(txns.index).T / 1000.)).T, columns=txns.columns, index=txns.index)
df
Out[194]:
In [131]:
def orders_to_df(perf):
def extract_order_info(orders):
if len(orders) == 0:
return np.nan
return {order['sid']: order['amount'] for order in orders}
orders_tmp = perf.orders.apply(extract_order_info).dropna()
orders = pd.DataFrame(orders_tmp.tolist()).fillna(0)
orders.index = orders_tmp.index
orders.index = orders.index.tz_localize('utc')
orders.index = [pd.tslib.normalize_date(idx) for idx in orders.index]
return orders
In [206]:
ax1 = plt.subplot(2, 1, 1)
for val, perf in zip(test_vals, perfs_lc):
df = pd.rolling_mean((orders_to_df(perf) / large_cap.loc[:, :, 'volume']).dropna().abs().max(axis=1), 20)
df.plot(ax=ax1, label=val, logy=True)
plt.legend()
plt.title('Large cap')
plt.ylabel('% of total volume ordered')
ax1.set_xticks([])
ax2 = plt.subplot(2, 1, 2)
for val, perf in zip(test_vals, perfs_sc):
df = pd.rolling_mean((orders_to_df(perf) / small_cap.loc[:, :, 'volume']).dropna().abs().max(axis=1), 20)
df.plot(ax=ax2, label=val, logy=True)
plt.title('Small cap')
plt.ylabel('% of total volume ordered')
plt.tight_layout()
plt.savefig('cap_perc_vol.png')