DB_URI指向数据库
In [1]:
%matplotlib inline
import os
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
from alphamind.api import *
from PyFin.api import *
plt.style.use('ggplot')
In [2]:
"""
Back test parameter settings
"""
start_date = '2016-01-01'
end_date = '2018-02-14'
freq = '10b'
industry_lower = 1.0
industry_upper = 1.0
neutralized_risk = industry_styles
industry_name = 'sw'
industry_level = 1
turn_over_target_base = 0.4
benchmark_total_lower = 0.8
benchmark_total_upper = 1.0
batch = 1
horizon = map_freq(freq)
universe = Universe('zz800')
data_source = os.environ['DB_URI']
benchmark_code = 905
weight_gap = 0.01
executor = NaiveExecutor()
ref_dates = makeSchedule(start_date, end_date, freq, 'china.sse')
engine = SqlEngine(data_source)
In [8]:
"""
Factor Model
"""
alpha_factors = {'f01': LAST('GROWTH')}
weights = dict(f01=1.)
alpha_model = ConstLinearModel(features=alpha_factors, weights=weights)
def predict_worker(params):
data_meta = DataMeta(freq=freq,
universe=universe,
batch=batch,
neutralized_risk=neutralized_risk,
risk_model='short',
pre_process=[winsorize_normal, standardize],
post_process=[standardize],
warm_start=0,
data_source=data_source)
ref_date, model = params
er, _ = predict_by_model(ref_date, model, data_meta)
return er
In [9]:
%%time
"""
Predicting Phase
"""
predicts = [predict_worker((d.strftime('%Y-%m-%d'), alpha_model)) for d in ref_dates]
In [10]:
"""
Shared Data
"""
constraint_risk = ['SIZE', 'SIZENL', 'BETA']
total_risk_names = constraint_risk + ['benchmark', 'total']
b_type = []
l_val = []
u_val = []
for name in total_risk_names:
if name == 'benchmark':
b_type.append(BoundaryType.RELATIVE)
l_val.append(benchmark_total_lower)
u_val.append(benchmark_total_upper)
elif name in {'SIZE', 'SIZENL', 'BETA'}:
b_type.append(BoundaryType.ABSOLUTE)
l_val.append(0.0)
u_val.append(0.0)
else:
b_type.append(BoundaryType.RELATIVE)
l_val.append(industry_lower)
u_val.append(industry_upper)
bounds = create_box_bounds(total_risk_names, b_type, l_val, u_val)
industry_total = engine.fetch_industry_matrix_range(universe, dates=ref_dates, category=industry_name, level=industry_level)
benchmark_total = engine.fetch_benchmark_range(dates=ref_dates, benchmark=benchmark_code)
risk_total = engine.fetch_risk_model_range(universe, dates=ref_dates)[1]
index_return = engine.fetch_dx_return_index_range(benchmark_code, start_date, end_date, horizon=horizon, offset=1).set_index('trade_date')
In [11]:
# rebalance
def create_scenario(weight_gap):
previous_pos = pd.DataFrame()
rets = []
turn_overs = []
leverags = []
ics = []
for i, ref_date in enumerate(ref_dates):
ref_date = ref_date.strftime('%Y-%m-%d')
industry_matrix = industry_total[industry_total.trade_date == ref_date]
benchmark_w = benchmark_total[benchmark_total.trade_date == ref_date]
risk_matrix = risk_total[risk_total.trade_date == ref_date]
total_data = pd.merge(industry_matrix, benchmark_w, on=['code'], how='left').fillna(0.)
total_data = pd.merge(total_data, risk_matrix, on=['code'])
total_data = total_data.dropna()
codes = total_data.code.values.tolist()
benchmark_w = total_data.weight.values
is_in_benchmark = (benchmark_w > 0.).astype(float).reshape((-1, 1))
total_risk_exp = np.concatenate([total_data[constraint_risk].values.astype(float),
is_in_benchmark,
np.ones_like(is_in_benchmark)],
axis=1)
total_risk_exp = pd.DataFrame(total_risk_exp, columns=total_risk_names)
constraints = LinearConstraints(bounds, total_risk_exp, benchmark_w)
lbound = np.maximum(0., benchmark_w - weight_gap) # np.zeros(len(total_data))
ubound = weight_gap + benchmark_w
er = predicts[i].loc[codes].values
target_pos, _ = er_portfolio_analysis(er,
industry_matrix.industry_name.values,
None,
constraints,
False,
benchmark_w,
method='risk_neutral',
lbound=lbound,
ubound=ubound)
target_pos['code'] = codes
turn_over, executed_pos = executor.execute(target_pos=target_pos)
executed_codes = executed_pos.code.tolist()
dx_returns = engine.fetch_dx_return(ref_date, executed_codes, horizon=horizon, offset=1)
result = pd.merge(executed_pos, total_data[['code', 'weight']], on=['code'], how='inner')
result = pd.merge(result, dx_returns, on=['code'])
excess_return = np.exp(result.dx.values) - 1. - index_return.loc[ref_date, 'dx']
raw_weight = result.weight_x.values
activate_weight = raw_weight - result.weight_y.values
ret = raw_weight @ excess_return
risk_adjusted_ic = np.corrcoef(excess_return, activate_weight)[0, 1]
rets.append(np.log(1. + ret))
ics.append(risk_adjusted_ic)
executor.set_current(executed_pos)
turn_overs.append(turn_over)
leverage = raw_weight.sum()
leverags.append(leverage)
ret_df = pd.DataFrame({'returns': rets, 'turn_over': turn_overs, 'IC': ics, 'leverage': leverags}, index=ref_dates)
ret_df.loc[advanceDateByCalendar('china.sse', ref_dates[-1], freq)] = 0.
ret_df = ret_df.shift(1)
ret_df.iloc[0] = 0.
ret_df['tc_cost'] = ret_df.turn_over * 0.002
return ret_df
In [12]:
ret_df = create_scenario(weight_gap)
ret_df[['returns', 'tc_cost']].cumsum().plot(figsize=(12, 6),
title='Fixed freq rebalanced: {0} with benchmark {1}'.format(freq, 905),
secondary_y='tc_cost')
Out[12]:
In [ ]: