DB_URI
指向数据库DATAYES_TOKEN
作为通联数据登陆凭证
In [1]:
%matplotlib inline
import os
from matplotlib import pyplot as plt
import uqer
import numpy as np
import pandas as pd
from uqer import DataAPI as api
from alphamind.api import *
from alphamind.data.neutralize import neutralize
plt.style.use('ggplot')
In [2]:
_ = uqer.Client(token=os.environ['DATAYES_TOKEN'])
In [3]:
ref_date = '2017-06-23'
factor = 'EPS'
engine = SqlEngine(os.environ['DB_URI'])
universe = Universe('zz800')
猜测的 neutralize
残差$\bar Res$计算公式:
其中:$k$为行业分类,$i$为该行业中第$i$只股票,$j$为第$j$个风险因子。$\bar f$为因子序列,$\bar Ex$为风险暴露矩阵。系数$\beta_{j,k}$由OLS确定。
下面的章节,我们分别比较三种neutralize
的方法差别:
UQER Neutralize
使用优矿的SDK计算因子残差。
Alpha-Mind Neutralize
使用alpha-mind计算因子残差,alpha-mind可以由以下地址安装:
https://github.com/wegamekinglc/alpha-mind
Direct Weighted Least Square Fit Implementation
直接使用scikit-learn的线性回归功能来计算因子残差。
In [4]:
codes = engine.fetch_codes(ref_date, universe)
factor_data = engine.fetch_factor(ref_date, factor, codes)
risk_cov, risk_expousre = engine.fetch_risk_model(ref_date, codes)
total_data = pd.merge(factor_data, risk_expousre, on=['code']).dropna()
In [5]:
total_data['ticker'] = total_data.code.apply(lambda x: '{0:06}'.format(x))
total_data.set_index('ticker', inplace=True)
In [6]:
len(total_data)
Out[6]:
In [7]:
%%timeit
neutralized_factor_uqer = uqer.neutralize(total_data[factor],
target_date=ref_date.replace('-', ''),
industry_type='short')
In [68]:
neutralized_factor_uqer = uqer.neutralize(total_data[factor],
target_date=ref_date.replace('-', ''),
industry_type='short').sort_index()
df = pd.DataFrame(neutralized_factor_uqer, columns=['uqer'])
df.head(10)
Out[68]:
In [69]:
len(neutralized_factor_uqer)
Out[69]:
In [70]:
risk_exposure_uqer = uqer.DataAPI.RMExposureDayGet(tradeDate=ref_date.replace('-', '')).set_index('ticker')
targeted_secs = risk_exposure_uqer.loc[neutralized_factor_uqer.index]
style_exposure = neutralized_factor_uqer.values @ targeted_secs[risk_styles].values
industry_exposure = neutralized_factor_uqer.values @ targeted_secs[industry_styles].values
exposure = pd.Series(np.concatenate([style_exposure, industry_exposure]), index=risk_styles+industry_styles)
exposure
Out[70]:
In [71]:
x = targeted_secs[risk_styles + industry_styles].values
y = total_data[factor].values
In [72]:
%%timeit
neutralized_factor_alphamind = neutralize(x, y, weights=np.ones(len(y)))
In [73]:
neutralized_factor_alphamind = neutralize(x, y, weights=np.ones(len(y)))
alphamind_series = pd.Series(neutralized_factor_alphamind.flatten(), index=total_data.index)
df['alpha-mind'] = alphamind_series
df.head()
Out[73]:
In [74]:
len(alphamind_series)
Out[74]:
In [77]:
style_exposure = targeted_secs[risk_styles].values.T @ neutralized_factor_alphamind
industry_exposure = targeted_secs[industry_styles].values.T @ neutralized_factor_alphamind
exposure = pd.Series(np.concatenate([style_exposure[:, 0], industry_exposure[:, 0]]), index=risk_styles+industry_styles)
exposure
Out[77]:
In [78]:
missed_codes = [c for c in alphamind_series.index if c not in neutralized_factor_uqer.index]
In [79]:
total_data.loc[missed_codes]
Out[79]:
In [80]:
import statsmodels.api as sm
In [81]:
mod = sm.WLS(y, x, weights=np.ones(len(y))).fit()
lg_series = pd.Series(mod.resid, index=total_data.index)
In [82]:
df['ols'] = lg_series
In [83]:
df['uqer - ols'] = df['uqer'] - df['ols']
df['alphamind - ols'] = df['alpha-mind'] - df['ols']
In [84]:
df[['uqer - ols', 'alphamind - ols']].plot(figsize=(14, 7), ylim=(-1e-4, 1e-4))
Out[84]:
In [85]:
df.head()
Out[85]:
In [ ]: