• 请在环境变量中设置DB_URI指向数据库

In [1]:
%matplotlib inline
import os
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
from alphamind.api import *
from PyFin.api import *

plt.style.use('ggplot')

In [2]:
"""
Back test parameter settings
"""

start_date = '2016-01-01'
end_date = '2018-02-14'

freq = '10b'
industry_lower = 1.0
industry_upper = 1.0
neutralized_risk = industry_styles
industry_name = 'sw'
industry_level = 1
turn_over_target_base = 0.4
benchmark_total_lower = 0.8
benchmark_total_upper = 1.0
batch = 1
horizon = map_freq(freq)
universe = Universe('zz800')
data_source = os.environ['DB_URI']
benchmark_code = 905
weight_gap = 0.01

executor = NaiveExecutor()
ref_dates = makeSchedule(start_date, end_date, freq, 'china.sse')
engine = SqlEngine(data_source)

In [8]:
"""
Factor Model
"""

alpha_factors = {'f01': LAST('GROWTH')}

weights = dict(f01=1.)

alpha_model = ConstLinearModel(features=alpha_factors, weights=weights)

def predict_worker(params):
    data_meta = DataMeta(freq=freq,
                         universe=universe,
                         batch=batch,
                         neutralized_risk=neutralized_risk,
                         risk_model='short',
                         pre_process=[winsorize_normal, standardize],
                         post_process=[standardize],
                         warm_start=0,
                         data_source=data_source)
    ref_date, model = params
    er, _ = predict_by_model(ref_date, model, data_meta)
    return er

In [9]:
%%time

"""
Predicting Phase
"""
predicts = [predict_worker((d.strftime('%Y-%m-%d'), alpha_model)) for d in ref_dates]


Wall time: 2min

In [10]:
"""
Shared Data
"""

constraint_risk = ['SIZE', 'SIZENL', 'BETA']
total_risk_names = constraint_risk + ['benchmark', 'total']

b_type = []
l_val = []
u_val = []

for name in total_risk_names:
    if name == 'benchmark':
        b_type.append(BoundaryType.RELATIVE)
        l_val.append(benchmark_total_lower)
        u_val.append(benchmark_total_upper)
    elif name in {'SIZE', 'SIZENL', 'BETA'}:
        b_type.append(BoundaryType.ABSOLUTE)
        l_val.append(0.0)
        u_val.append(0.0)
    else:
        b_type.append(BoundaryType.RELATIVE)
        l_val.append(industry_lower)
        u_val.append(industry_upper)
        
bounds = create_box_bounds(total_risk_names, b_type, l_val, u_val)
industry_total = engine.fetch_industry_matrix_range(universe, dates=ref_dates, category=industry_name, level=industry_level)
benchmark_total = engine.fetch_benchmark_range(dates=ref_dates, benchmark=benchmark_code)
risk_total = engine.fetch_risk_model_range(universe, dates=ref_dates)[1]
index_return = engine.fetch_dx_return_index_range(benchmark_code, start_date, end_date, horizon=horizon, offset=1).set_index('trade_date')

In [11]:
# rebalance

def create_scenario(weight_gap):

    previous_pos = pd.DataFrame()
    rets = []
    turn_overs = []
    leverags = []
    ics = []

    for i, ref_date in enumerate(ref_dates):
        ref_date = ref_date.strftime('%Y-%m-%d')
        industry_matrix = industry_total[industry_total.trade_date == ref_date]
        benchmark_w = benchmark_total[benchmark_total.trade_date == ref_date]
        risk_matrix = risk_total[risk_total.trade_date == ref_date]

        total_data = pd.merge(industry_matrix, benchmark_w, on=['code'], how='left').fillna(0.)
        total_data = pd.merge(total_data, risk_matrix, on=['code'])
        total_data = total_data.dropna()
        codes = total_data.code.values.tolist()

        benchmark_w = total_data.weight.values
        is_in_benchmark = (benchmark_w > 0.).astype(float).reshape((-1, 1))

        total_risk_exp = np.concatenate([total_data[constraint_risk].values.astype(float),
                                         is_in_benchmark,
                                         np.ones_like(is_in_benchmark)],
                                        axis=1)
        total_risk_exp = pd.DataFrame(total_risk_exp, columns=total_risk_names)
        constraints = LinearConstraints(bounds, total_risk_exp, benchmark_w)

        lbound = np.maximum(0., benchmark_w - weight_gap)  # np.zeros(len(total_data))
        ubound = weight_gap + benchmark_w

        er = predicts[i].loc[codes].values
        
        target_pos, _ = er_portfolio_analysis(er,
                                              industry_matrix.industry_name.values,
                                              None,
                                              constraints,
                                              False,
                                              benchmark_w,
                                              method='risk_neutral',
                                              lbound=lbound,
                                              ubound=ubound)

        target_pos['code'] = codes
        turn_over, executed_pos = executor.execute(target_pos=target_pos)

        executed_codes = executed_pos.code.tolist()
        dx_returns = engine.fetch_dx_return(ref_date, executed_codes, horizon=horizon, offset=1)
        result = pd.merge(executed_pos, total_data[['code', 'weight']], on=['code'], how='inner')
        result = pd.merge(result, dx_returns, on=['code'])
        
        

        excess_return = np.exp(result.dx.values) - 1. - index_return.loc[ref_date, 'dx']
        raw_weight = result.weight_x.values
        activate_weight = raw_weight - result.weight_y.values
        ret = raw_weight @ excess_return
        risk_adjusted_ic = np.corrcoef(excess_return, activate_weight)[0, 1]
        rets.append(np.log(1. + ret))
        ics.append(risk_adjusted_ic)
        executor.set_current(executed_pos)
        turn_overs.append(turn_over)
        
        leverage = raw_weight.sum()
        leverags.append(leverage)

    ret_df = pd.DataFrame({'returns': rets, 'turn_over': turn_overs, 'IC': ics, 'leverage': leverags}, index=ref_dates)

    ret_df.loc[advanceDateByCalendar('china.sse', ref_dates[-1], freq)] = 0.
    ret_df = ret_df.shift(1)
    ret_df.iloc[0] = 0.
    ret_df['tc_cost'] = ret_df.turn_over * 0.002
    return ret_df

In [12]:
ret_df = create_scenario(weight_gap)
ret_df[['returns', 'tc_cost']].cumsum().plot(figsize=(12, 6),
                                             title='Fixed freq rebalanced: {0} with benchmark {1}'.format(freq, 905),
                                             secondary_y='tc_cost')


Out[12]:
<matplotlib.axes._subplots.AxesSubplot at 0x1f11a5b5048>

In [ ]: