在本文中,我们给出分步骤使用原始的多因子数据,生成目标权重的例子
In [1]:
%matplotlib inline
import os
import numpy as np
import pandas as pd
from alphamind.data.dbmodel.models import Uqer
from alphamind.data.dbmodel.models import Universe as UniverseTable
from alphamind.data.dbmodel.models import Industry
from alphamind.data.dbmodel.models import IndexComponent
from alphamind.data import neutralize
from alphamind.portfolio.linearbuilder import linear_builder
from PyFin.api import *
import sqlalchemy as sa
from sqlalchemy import outerjoin, and_, select
from matplotlib import rc
from matplotlib import pyplot as plt
from alphamind.api import *
rc('font', **{'family': 'Microsoft YaHei', 'size': 10})
rc('mathtext', **{'default': 'regular'})
rc('legend', **{'frameon': False})
In [2]:
universe_name = 'zz800'
trade_date = '2019-01-15'
industry_name = '申万行业分类'
benchmark_code = 906
max_active_industry_exposure = 0.
max_active_single_stock_exposure = 0.02
con = sa.create_engine(os.environ['DB_URI'])
In [3]:
%%time
factor = ['EPS', 'ROE']
big_table = outerjoin(Uqer, UniverseTable, and_(Uqer.trade_date == UniverseTable.trade_date,
Uqer.code == UniverseTable.code))
query = select([Uqer.code] + [getattr(Uqer, f) for f in factor]).select_from(big_table) \
.where(and_(Uqer.trade_date == trade_date,
getattr(UniverseTable, universe_name) == 1))
factors = pd.read_sql(query, con=con)
factors['factor'] = 0.5 * factors['EPS'] + 1.5 * factors['ROE']
In [4]:
factors.head()
Out[4]:
In [5]:
#
sql_engine = SqlEngine(os.environ['DB_URI'])
factor_expression = 0.5*LAST('EPS') + 1.5*LAST('ROE')
factors2 = sql_engine.fetch_factor_range(universe=Universe(universe_name),
dates=[trade_date],
factors=factor_expression)
factors2.rename(columns={str(factor_expression): 'factor'}, inplace=True)
factors2.head()
Out[5]:
In [6]:
print(np.testing.assert_array_almost_equal(factors.factor, factors2.factor))
In [7]:
%%time
big_table = outerjoin(Industry, UniverseTable, and_(Industry.trade_date == UniverseTable.trade_date,
Industry.code == UniverseTable.code))
query = select([Industry.code, Industry.industryName1]).select_from(big_table) \
.where(and_(Industry.trade_date == trade_date,
Industry.industry == industry_name,
getattr(UniverseTable, universe_name) == 1))
industry = pd.read_sql(query, con=con)
print(industry.head())
In [8]:
%%time
big_table = outerjoin(IndexComponent, UniverseTable, and_(IndexComponent.trade_date == UniverseTable.trade_date,
IndexComponent.code == UniverseTable.code))
query = select([IndexComponent.code, (IndexComponent.weight / 100.).label('index_weight')]) \
.where(and_(IndexComponent.trade_date == trade_date,
IndexComponent.indexCode == benchmark_code))
index_components = pd.read_sql(query, con=con)
print(index_components.head())
In [9]:
df = pd.merge(factors, industry, on=['code'], how='inner').dropna()
df = pd.merge(df, index_components, on=['code'], how='inner').dropna()
将行业数据(categorical)数据转为dummy矩阵
In [10]:
industry_dummy = pd.get_dummies(df.industryName1)
使用行业dummy矩阵对因子做中性化,得到行业中性化后的因子:neutralized_factor
In [11]:
%%time
df['neutralized_factor'] = neutralize(industry_dummy.values.astype(float), df['factor'].values).flatten()
print(df[['code', 'neutralized_factor']].head())
In [12]:
er = df.neutralized_factor.values
lbound = np.maximum(-max_active_single_stock_exposure, -df['index_weight'].values)
ubound = max_active_single_stock_exposure
risk_constraints = np.concatenate((industry_dummy, np.ones((len(er), 1))), axis=1)
industry_low_bounds = -max_active_industry_exposure * np.ones(industry_dummy.shape[1])
industry_up_bounds = max_active_industry_exposure * np.ones(industry_dummy.shape[1])
risk_target = (np.concatenate((industry_low_bounds, [0.])),
np.concatenate((industry_up_bounds, [0.])),)
输出结果中:
status:优化状态;optimized_values:组合预期收益的负值;weights:组合中股票的主动权重
In [13]:
%%time
status, optimized_values, weights = linear_builder(er,
lbound,
ubound,
risk_constraints,
risk_target)
计算最终持仓:
portfolio_weight:组合权重active_weight:主动权重
In [14]:
df['portfolio_weight'] = df['index_weight'] + weights
df['active_weight'] = weights
我们可以通过计算行业权重,并与指数的行业权重进行比较,验证行业暴露确实为0
In [15]:
df.groupby('industryName1').sum().plot.bar(y=['index_weight', 'portfolio_weight'], figsize=(14, 7))
Out[15]:
我们也可以通过观察中性化后因子值,观察个股权重与因子值的关系
In [16]:
df.plot(kind='scatter', x='neutralized_factor', y='active_weight', figsize=(14, 7))
plt.xlim((-5, 5))
plt.ylim((-0.02, 0.025))
Out[16]:
In [17]:
oper = CSTopN('er', 2, groups='industry')
data = df[['code', 'neutralized_factor', 'industryName1']].set_index('code')
data.rename(columns={'neutralized_factor': 'er'}, inplace=True)
data['industry'] = pd.Categorical(data.industryName1).codes.astype(float)
In [18]:
oper.push(data.to_dict(orient='index'))
data['chosen'] = oper.value.to_pd_series()
data = data[data.chosen == True]
所有的行业都选择了两只股票,如下图所示:
In [19]:
data.groupby('industryName1').count().plot.bar(y=['chosen'], figsize=(14, 7))
Out[19]:
In [20]:
data.shape
Out[20]:
股票代码:
In [21]:
data.index
Out[21]:
In [ ]: