In [1]:
#!/Tsan/bin/python
# -*- coding: utf-8 -*-
In [86]:
# Libraries to use
from __future__ import division
import numpy as np
import pandas as pd
import statsmodels.api as sm
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime,time
import time
In [3]:
# Import My own library for factor testing
from SingleFactorTest import factorFilterFunctions as ff
from BackTestingEngine import backTestingEngine as bte
#from config import *
In [4]:
%matplotlib inline
%load_ext line_profiler
In [5]:
# make sure that matplotib and seaborn can show Chinese
import matplotlib as mpl
mpl.rcParams['font.sans-serif'] = ['SimHei']
mpl.rcParams['font.serif'] = ['SimHei']
sns.set_style("darkgrid",{"font.sans-serif":['simhei', 'Arial']})
In [6]:
path = ff.data_path
In [188]:
startTime = datetime.strptime('20120101', '%Y%m%d')
endTime = datetime.strptime('20170228', '%Y%m%d')
In [189]:
year2012 = [datetime.strptime('20120101', '%Y%m%d'),datetime.strptime('20121231', '%Y%m%d')]
In [40]:
filenameIndex = 'LZ_CN_STKA_INDXQUOTE_CLOSE.h5' # 指数收盘价
filenameIndexAmount = 'LZ_CN_STKA_INDXQUOTE_AMOUNT.h5' # 指数成交额
In [438]:
# 行业
filenameInduFlag = 'LZ_CN_STKA_INDU_ZX.h5' # 行业标识
#
filenameZXExplanation = 'LZ_GPA_TMP_INDU_ZX.csv' # 行业解释字典
In [192]:
filenameCSIWeight = 'LZ_CN_STKA_INDEX_CSI500WEIGHT.h5' # 中证500权重
filenameHS300Weight = 'LZ_CN_STKA_INDEX_HS300WEIGHT.h5' # 沪深300权重 (benchmark)
In [ ]:
In [363]:
# A股信息
filenameFshr = 'LZ_CN_STKA_VAL_FSHR.h5' # 流通股本
filenameFcap = 'LZ_CN_STKA_VAL_A_FCAP.h5' # 流通市值
filenamePrft = 'LZ_CN_STKA_CRD_NET_PRFT_FTTM.h5' # 净利润
filenameOprinc = 'LZ_CN_STKA_FIN_IND_QFA_OPRINC.h5' # 净收益
filenameEqy = 'LZ_CN_STKA_BAL_COMBO_TOTL_LIAB_SHRHLDR_EQY.h5' # 股东权益
filenameNetAsset = 'LZ_CN_STKA_VAL_NET_ASSET.h5' # 净资产
filenameClose = 'LZ_CN_STKA_QUOTE_TCLOSE.h5' # 收盘价
In [302]:
# wind 全A
filenameWindA = 'windA.csv'
In [441]:
#
hs300,csi500,windA= '000300.SH','000905.SH','881001'
indexList = [hs300,csi500,windA]
In [485]:
indexData = ff.readh5data(path,filenameIndex)
tradeAmount = ff.readh5data(path,filenameIndexAmount)
windAindex = pd.read_csv(path+filenameWindA,infer_datetime_format=True,parse_dates=[0],index_col=0).CLOSE
windAindex.name = windA
windAAmount = pd.read_csv(path+filenameWindA,infer_datetime_format=True,parse_dates=[0],index_col=0).AMOUNT
windAAmount.name = windA
indexData = pd.concat([windAindex,indexData],axis=1)
tradeAmount = pd.concat([windAAmount,tradeAmount],axis=1)
In [444]:
# 指数收益的相关性
coeff_matrix = indexData[indexList].loc[year2012[0]:year2012[1]].pct_change().corr(method='pearson').round(4)
In [447]:
# visualization
plt.figure(figsize=(12,8))
ax = plt.axes()
sns.heatmap(coeff_matrix ,ax=ax, annot=True)
ax.set_title('Correlationship Matrix',fontsize=18, fontweight='bold')
plt.show()
In [452]:
# 年化收益率
networth = indexData[indexList].loc[year2012[0]:year2012[1]] / indexData[indexList].loc[year2012[0]:year2012[1]].iloc[0]
(networth.iloc[-1]) ** (250 / networth.shape[0]) - 1
Out[452]:
In [454]:
# 净值数据
(indexData[indexList] /indexData[indexList].iloc[0]).plot(figsize=(16,9))
Out[454]:
In [465]:
# 成交量的bar
tradeAmount[[hs300,csi500]].loc[year2012[0]:year2012[1]].resample('M').sum().plot(kind='bar',figsize=(16,9))
plt.xticks(rotation=50)
Out[465]:
In [197]:
# 相关性
correlation = indexData[csi500].corr(benchmark)
In [198]:
# 股本结构
csimember = ff.readh5data(path,filenameCSIWeight).loc[year2012[0]:year2012[1]]
fshr = ff.readh5data(path,filenameFshr).loc[year2012[0]:year2012[1]] # 总股本
csifshr = pd.DataFrame(index = fshr.index, columns = ['fshr'],dtype =float)
for date in csimember.index:
memberList = csimember.loc[date].dropna().index.tolist()
csifshr.loc[date] = fshr.loc[date][memberList].sum()
fshrRatio = csifshr.divide(fshr.sum(axis=1),axis=0).squeeze().mean()
In [528]:
ff.readh5data(path,filenameFshr).iloc[-2]['601318.SH']
Out[528]:
In [199]:
fshrRatio
Out[199]:
In [217]:
# 市值覆盖率 & 行业覆盖率
fcap = ff.readh5data(path,filenameFcap).loc[year2012[0]:year2012[1]] # 流通市值
indu = ff.readh5data(path,filenameInduFlag ).loc[year2012[0]:year2012[1]] # 行业
csifcap = pd.DataFrame(index = fcap.index, columns = ['fcap'],dtype =float)
indudf= pd.DataFrame(index = indu.index, columns = ['fcap'],dtype =float)
for date in csimember.index:
memberList = csimember.loc[date].dropna().index.tolist()
csifcap.loc[date] = fcap.loc[date][memberList].sum()
indudf.loc[date] = indu.loc[date][memberList].drop_duplicates().shape[0]
fcapRatio = csifcap.divide(fcap.sum(axis=1),axis=0).squeeze().mean()
induRatio = (indudf/29).squeeze().mean()
In [418]:
In [ ]:
In [439]:
In [ ]:
In [464]:
# 画出指数成分股的各行业分布
induExplanation = pd.read_csv(path+filenameZXExplanation,infer_datetime_format=True,parse_dates=[0],encoding='gb2312')
induExplanation.index = induExplanation.index.map(lambda x: x+1)
memberList = csimember.iloc[-1].dropna().index.tolist()
a = indu.iloc[-1][memberList].groupby(indu.iloc[-1][memberList]).count()
memberHs300 = hsmember.iloc[-1].dropna().index.tolist()
b = indu.iloc[-1][memberHs300].groupby(indu.iloc[-1][memberHs300]).count()
c = pd.concat([a,b,induExplanation],axis=1)
c.columns = ['csi500','hs300','indu']
c.index.name = 'industry'
c.set_index('indu',inplace= True)
c.plot(kind= 'bar',figsize=(16,9))
plt.xticks(rotation=50)
Out[464]:
In [ ]:
In [218]:
induRatio
Out[218]:
In [201]:
# 成分股数量、调整频率,调整幅度
adjNum , adjfrequency, adjRange = 500, 120, 100
In [ ]:
In [ ]:
In [ ]:
In [202]:
# 成分股的流动性(日均成交金额、日均换手率、流动性指标)
tradeAmount = ff.readh5data(path,filenameIndexAmount).loc[year2012[0]:year2012[1]][csi500]
turnoverRatiodf = tradeAmount.divide(csifcap.squeeze(),axis=0)
liquidIndicator = np.abs(np.log(indexData[csi500]) - np.log(indexData[csi500].shift(1))).divide(turnoverRatiodf,axis=0)
dailyTradeAmount = tradeAmount.mean() # 日均成交金额
dailyTurnoverRatio = turnoverRatiodf.mean() # 日均换手率
dailyLiquidIndicator = liquidIndicator.mean() # 流动性指标
In [203]:
# 标准差 夏普比率
std = indexData[csi500].pct_change().std() * np.sqrt(250)
sharpe = ((indexData[csi500].iloc[-1] / indexData[csi500].iloc[0]) ** (250/ indexData[csi500].shape[0]) -1) / std
In [204]:
# 盈利能力 ,平均每股
prft = ff.readh5data(path,filenamePrft).loc[year2012[0]:year2012[1]] # 净利润
oprinc = ff.readh5data(path,filenameOprinc).loc[year2012[0]:year2012[1]] # 净收益
eqy = ff.readh5data(path,filenameEqy).loc[year2012[0]:year2012[1]] # 股东权益
csieps = pd.DataFrame(index = fcap.index, columns = ['eps'],dtype =float)
csiroe = pd.DataFrame(index = indu.index, columns = ['froe'],dtype =float)
for date in csimember.index:
memberList = csimember.loc[date].dropna().index.tolist()
csieps .loc[date] = oprinc.loc[date][memberList].sum() / fshr.loc[date][memberList].sum()
csiroe.loc[date] = prft.loc[date][memberList].sum() / eqy.loc[date][memberList].sum()
csieps = csieps.squeeze().mean()
csiroe = csiroe.squeeze().mean()
In [210]:
# 估值水平 pe pb
hsmember = ff.readh5data(path,filenameHS300Weight).loc[year2012[0]:year2012[1]] # 沪深300的成分股
netAsset = ff.readh5data(path,filenameNetAsset).loc[year2012[0]:year2012[1]] # 净资产
peAbs = pd.DataFrame(index = fcap.index, columns = ['pe'],dtype =float) # pe绝对值
peRel = pd.DataFrame(index = fcap.index, columns = ['pe'],dtype =float) # pe相对值
pbAbs = pd.DataFrame(index = fcap.index, columns = ['pb'],dtype =float) # pb绝对值
pbRel = pd.DataFrame(index = fcap.index, columns = ['pb'],dtype =float) # pb相对值
for date in csimember.index:
memberList = csimember.loc[date].dropna().index.tolist()
hsmemberList = hsmember.loc[date].dropna().index.tolist()
peAbs .loc[date] = fcap.loc[date][memberList].sum() / prft .loc[date][memberList].sum()
pbAbs.loc[date] = fcap.loc[date][memberList].sum() / netAsset.loc[date][memberList].sum()
peRel.loc[date] = peAbs .loc[date] / (fcap.loc[date][hsmemberList].sum() / prft .loc[date][hsmemberList].sum())
pbRel.loc[date] = pbAbs .loc[date] / (fcap.loc[date][hsmemberList].sum() / netAsset .loc[date][hsmemberList].sum())
peAbs = peAbs.squeeze().mean()
peRel = peRel.squeeze().mean()
pbAbs = pbAbs.squeeze().mean()
pbRel = pbRel.squeeze().mean()
In [382]:
In [211]:
peAbs
Out[211]:
In [227]:
indexList = ['Cor','Structure','Fcap_Coverage','Indu_Coverage','Amount','Turnover','Liquidity','Std','Sharpe',
'Eps','Roe','PeAbs','PeRel','PbAbs','PbRel']
data = [correlation,fshrRatio,fcapRatio,induRatio,dailyTradeAmount,dailyTurnoverRatio,dailyLiquidIndicator,
std,sharpe,csieps,csiroe,peAbs,peRel,pbAbs,pbRel]
columnNames = ['2012']
In [228]:
resultdf = pd.DataFrame(index =indexList,data=data,columns = columnNames)
resultdf.round(4)
Out[228]:
In [226]:
resultdf.iloc[4]
Out[226]:
In [286]:
benchmarkdf = getIndexIndicators(hs300,filenameHS300Weight,hs300,filenameHS300Weight,year2012,'2012')
benchmarkdf.iloc[:4] = 1
In [269]:
# 计算得分
def calFinalScore(x,bench,intervalNum = 6):
bench = np.linspace(0,bench,intervalNum)
for i in xrange(intervalNum-1):
if bench[i]<= x < bench[i+1]:
finalScore = i+1
continue
if x >= bench[i+1]: # 若超过上界则得分为0
finalScore = 0
return finalScore
In [278]:
calFinalScore(resultdf.iloc[7].values,1)
Out[278]:
In [288]:
weightList = [0.06,0.06,0.06,0.06,0.05,0.05,0.08,0.1,0.16,0.06,0.1,0.04,0.04,0.04,0.04]
In [285]:
rankList = []
for index in resultdf.index:
rankList.append(calFinalScore(resultdf.loc[index].values,benchmarkdf.loc[index].values))
In [287]:
rankList
Out[287]:
In [468]:
def calTotalScore(resultdf,benchmarkdf,weightList):
rankList = []
for index in resultdf.index:
rankList.append(calFinalScore(resultdf.loc[index].values,benchmarkdf.loc[index].values))
totalScore = (np.array(rankList) * np.array([weightList])).sum()
return totalScore
In [472]:
calTotalScore(resultdf,windAindicators,weightList)
Out[472]:
In [297]:
totalScore = (np.array(rankList) * np.array([weightList])).sum()
totalScore
Out[297]:
In [252]:
np.linspace(0,benchmarkdf.iloc[6].values,6)
Out[252]:
In [ ]:
In [378]:
getIndexIndicators(csi500,filenameCSIWeight,windA,year2012,'2012',indexData,tradeAmount)
Out[378]:
In [470]:
windAindicators = getIndexIndicators(windA,filenameClose,windA,year2012,'2012',indexData,tradeAmount)
In [376]:
def getIndexIndicators(csi500,filenameCSIWeight,benchmark,timeperiod,year,indexData,tradeAmount):
# 指数
indexData = indexData.loc[timeperiod[0]:timeperiod[1]]
correlation = indexData[csi500].corr(indexData[benchmark])
# 股本结构
csimember = ff.readh5data(path,filenameCSIWeight).loc[timeperiod[0]:timeperiod[1]]
fshr = ff.readh5data(path,filenameFshr).loc[timeperiod[0]:timeperiod[1]] # 总股本
csifshr = pd.DataFrame(index = fshr.index, columns = ['fshr'],dtype =float)
# 市值覆盖率 & 行业覆盖率
fcap = ff.readh5data(path,filenameFcap).loc[timeperiod[0]:timeperiod[1]] # 流通市值
indu = ff.readh5data(path,filenameInduFlag ).loc[timeperiod[0]:timeperiod[1]] # 行业
csifcap = pd.DataFrame(index = fcap.index, columns = ['fcap'],dtype =float)
indudf= pd.DataFrame(index = indu.index, columns = ['fcap'],dtype =float)
# 标准差 夏普比率
std = indexData[csi500].pct_change().std() * np.sqrt(250)
sharpe = ((indexData[csi500].iloc[-1] / indexData[csi500].iloc[0]) ** (250/ indexData[csi500].shape[0]) -1) / std
# 盈利能力 ,平均每股
prft = ff.readh5data(path,filenamePrft).loc[timeperiod[0]:timeperiod[1]] # 净利润
oprinc = ff.readh5data(path,filenameOprinc).loc[timeperiod[0]:timeperiod[1]] # 净收益
eqy = ff.readh5data(path,filenameEqy).loc[timeperiod[0]:timeperiod[1]] # 股东权益
csieps = pd.DataFrame(index = fcap.index, columns = ['eps'],dtype =float)
csiroe = pd.DataFrame(index = indu.index, columns = ['froe'],dtype =float)
# 估值水平 pe pb
windAmember = ff.readh5data(path,filenameClose).loc[timeperiod[0]:timeperiod[1]] # 沪深300的成分股
netAsset = ff.readh5data(path,filenameNetAsset).loc[timeperiod[0]:timeperiod[1]] # 净资产
peAbs = pd.DataFrame(index = fcap.index, columns = ['pe'],dtype =float) # pe绝对值
peRel = pd.DataFrame(index = fcap.index, columns = ['pe'],dtype =float) # pe相对值
pbAbs = pd.DataFrame(index = fcap.index, columns = ['pb'],dtype =float) # pb绝对值
pbRel = pd.DataFrame(index = fcap.index, columns = ['pb'],dtype =float) # pb相对值
# 循环计算
for date in csimember.index:
memberList = csimember.loc[date].dropna().index.tolist()
csifshr.loc[date] = fshr.loc[date][memberList].sum()
csifcap.loc[date] = fcap.loc[date][memberList].sum()
indudf.loc[date] = indu.loc[date][memberList].drop_duplicates().shape[0]
csieps .loc[date] = oprinc.loc[date][memberList].sum() / fshr.loc[date][memberList].sum()
csiroe.loc[date] = prft.loc[date][memberList].sum() / eqy.loc[date][memberList].sum()
hsmemberList = windAmember.loc[date].dropna().index.tolist()
peAbs .loc[date] = fcap.loc[date][memberList].sum() / prft .loc[date][memberList].sum()
pbAbs.loc[date] = fcap.loc[date][memberList].sum() / netAsset.loc[date][memberList].sum()
peRel.loc[date] = peAbs .loc[date] / (fcap.loc[date][hsmemberList].sum() / prft .loc[date][hsmemberList].sum())
pbRel.loc[date] = pbAbs .loc[date] / (fcap.loc[date][hsmemberList].sum() / netAsset .loc[date][hsmemberList].sum())
fshrRatio = csifshr.divide(fshr.sum(axis=1),axis=0).squeeze().mean()
fcapRatio = csifcap.divide(fcap.sum(axis=1),axis=0).squeeze().mean()
induRatio = (indudf/29).squeeze().mean()
csieps = csieps.squeeze().mean()
csiroe = csiroe.squeeze().mean()
peAbs = peAbs.squeeze().mean()
peRel = peRel.squeeze().mean()
pbAbs = pbAbs.squeeze().mean()
pbRel = pbRel.squeeze().mean()
# 成分股的流动性(日均成交金额、日均换手率、流动性指标)
tradeAmount = tradeAmount.loc[timeperiod[0]:timeperiod[1]][csi500]
turnoverRatiodf = tradeAmount.divide(csifcap.squeeze(),axis=0)
liquidIndicator = np.abs(np.log(indexData[csi500]) - np.log(indexData[csi500].shift(1))).divide(turnoverRatiodf,axis=0)
dailyTradeAmount = tradeAmount.mean() # 日均成交金额
dailyTurnoverRatio = turnoverRatiodf.mean() # 日均换手率
dailyLiquidIndicator = liquidIndicator.mean() # 流动性指标
# 合成 dataframe
indexList = ['Cor','Structure','Fcap_Coverage','Indu_Coverage','Amount','Turnover','Liquidity','Std','Sharpe',
'Eps','Roe','PeAbs','PeRel','PbAbs','PbRel']
data = [correlation,fshrRatio,fcapRatio,induRatio,dailyTradeAmount,dailyTurnoverRatio,dailyLiquidIndicator,
std,sharpe,csieps,csiroe,peAbs,peRel,pbAbs,pbRel]
columnNames = [year]
resultdf = pd.DataFrame(index =indexList,data=data,columns = columnNames)
return resultdf.round(4)
In [491]:
a = indexData.loc[year2012[0]:year2012[1]].pct_change()
b = a[[hs300,csi500]].sub(a[windA],axis=0)
In [504]:
c =1 - (b[b>0].isnull().sum() / b.shape[0])
In [507]:
c.name = '2012'
c
Out[507]:
In [520]:
# 计算胜率
def calWinratio(indexData,indexlabel,benchmark,year,timeperiod):
index = indexData.loc[timeperiod[0]:timeperiod[1]].pct_change()
ret = index[indexlabel].sub(index[benchmark],axis=0)
winratio=1 - (ret[ret>0].isnull().sum() / ret.shape[0])
winratio.name = year
return winratio
In [521]:
calWinratio(indexData,[hs300,csi500],windA,'2012',year2012)
Out[521]:
In [522]:
timedict = {'2014':[datetime.strptime('20140101', '%Y%m%d'), datetime.strptime('20141231', '%Y%m%d')],
'2015': [datetime.strptime('20150101', '%Y%m%d'), datetime.strptime('20151231', '%Y%m%d')],
'2016': [datetime.strptime('20160101', '%Y%m%d'), datetime.strptime('20161231', '%Y%m%d')],}
In [524]:
b = pd.DataFrame()
for i,j in timedict.iteritems():
a = calWinratio(indexData,[hs300,csi500],windA,i,j)
b = pd.concat([b,a],axis=1)
b
Out[524]:
In [564]:
exchi_step = 4
batch_size = 5
total_series_length = 50000
echo_step = 3
state_size = 4
x = np.array(np.random.choice(2, total_series_length, p=[0.5, 0.5]))
y = np.roll(x, echo_step)
y[0:echo_step] = 0
x = x.reshape((batch_size, -1)) # The first index changing slowest, subseries as rows
y = y.reshape((batch_size, -1))
x
Out[564]:
In [566]:
np.zeros((batch_size, state_size))
Out[566]:
In [568]:
x = np.array(np.random.choice(2, 10, p=[0.5, 0.5]))
y = np.roll(x, 2)
x
Out[568]:
In [567]:
y
Out[567]:
In [570]:
y = y.reshape((batch_size, -1))
Out[570]:
In [ ]:
In [13]:
path = ff.data_path
# 文件名
filenameIndex = 'LZ_CN_STKA_INDXQUOTE_CLOSE.h5' # 指数收盘价
filenameIndexAmount = 'LZ_CN_STKA_INDXQUOTE_AMOUNT.h5' # 指数成交额
# 行业
filenameInduFlag = 'LZ_CN_STKA_INDU_ZX.h5' # 行业标识
filenameZXExplanation = 'LZ_GPA_TMP_INDU_ZX.csv' # 行业解释字典
filenameCSIWeight = 'LZ_CN_STKA_INDEX_CSI500WEIGHT.h5' # 中证500权重
filenameHS300Weight = 'LZ_CN_STKA_INDEX_HS300WEIGHT.h5' # 沪深300权重 (benchmark)
# A股信息
filenameFshr = 'LZ_CN_STKA_VAL_FSHR.h5' # 流通股本
filenameFcap = 'LZ_CN_STKA_VAL_A_FCAP.h5' # 流通市值
filenamePrft = 'LZ_CN_STKA_CRD_NET_PRFT_FTTM.h5' # 净利润
filenameOprinc = 'LZ_CN_STKA_FIN_IND_QFA_OPRINC.h5' # 净收益
filenameEqy = 'LZ_CN_STKA_BAL_COMBO_TOTL_LIAB_SHRHLDR_EQY.h5' # 股东权益
filenameNetAsset = 'LZ_CN_STKA_VAL_NET_ASSET.h5' # 净资产
filenameClose = 'LZ_CN_STKA_QUOTE_TCLOSE.h5' # 收盘价
#
filenameEps = 'LZ_CN_STKA_PRF_PARENT_S_FA_EPS_BASIC.h5' # eps
filenameRoe = 'LZ_CN_STKA_FIN_IND_ROE.h5' # roe
filenamePb = 'LZ_CN_STKA_VAL_PB.h5' # pb
filenamePe = 'LZ_CN_STKA_VAL_PE.h5' # pe
# wind 全A
filenameWindA = 'WindA.csv'
In [15]:
year2012 = [datetime.strptime('20120101', '%Y%m%d'), datetime.strptime('20121231', '%Y%m%d')]
hs300, csi500, windA = '000300.SH', '000905.SH', '881001'
indexList = [hs300, csi500, windA]
weightList = [0.06, 0.06, 0.06, 0.06, 0.05, 0.05, 0.08, 0.1, 0.16, 0.06, 0.1, 0.04, 0.04, 0.04, 0.04] # 得分权重
timedict = {'2014': [datetime.strptime('20140101', '%Y%m%d'), datetime.strptime('20141231', '%Y%m%d')],
'2015': [datetime.strptime('20150101', '%Y%m%d'), datetime.strptime('20151231', '%Y%m%d')],
'2016': [datetime.strptime('20160101', '%Y%m%d'), datetime.strptime('20161231', '%Y%m%d')]}
In [16]:
# 数据预处理
indexData = ff.readh5data(path, filenameIndex)
tradeAmount = ff.readh5data(path, filenameIndexAmount)
windAindex = pd.read_csv(path+filenameWindA, infer_datetime_format=True, parse_dates=[0], index_col=0).CLOSE
windAindex.name = windA
windAAmount = pd.read_csv(path+filenameWindA, infer_datetime_format=True, parse_dates=[0], index_col=0).AMOUNT
windAAmount.name = windA
indexData = pd.concat([windAindex, indexData], axis=1)
tradeAmount = pd.concat([windAAmount, tradeAmount], axis=1)
csimember1 = ff.readh5data(path, filenameCSIWeight)
fshr1 = ff.readh5data(path, filenameFshr) # 总股本
fcap1 = ff.readh5data(path, filenameFcap) # 市值
indu1 = ff.readh5data(path, filenameInduFlag) # 行业
prft1 = ff.readh5data(path, filenamePrft) # 净利润
oprinc1 = ff.readh5data(path, filenameOprinc) # 净收益
eqy1 = ff.readh5data(path, filenameEqy) # 股东权益
windAmember1 = ff.readh5data(path, filenameClose) # windA
netAsset1 = ff.readh5data(path, filenameNetAsset)
In [57]:
# 指标后去函数
def getIndexIndicators(csi500, memberList, benchmark, timeperiod, year, indexData, tradeAmount):
"""参数依次为:要计算的指数代码(str),要计算的指数的权重文件名,基准代码,时间,时间名,指数数据(DataFrame),
成交量数据(dataframe), 可参见调用形式"""
indexData = indexData.loc[timeperiod[0]:timeperiod[1]]
correlation = indexData[csi500].corr(indexData[benchmark])
# 股本结构
#csimember = csimember1.loc[timeperiod[0]:timeperiod[1]]
fshr = 10000 * fshr1.loc[timeperiod[0]:timeperiod[1]] # 总股本
csifshr = pd.DataFrame(index=fshr.index, columns=['fshr'], dtype=float)
# 市值覆盖率 & 行业覆盖率
fcap = 10000 * fcap1.loc[timeperiod[0]:timeperiod[1]] # 流通市值
indu = indu1.loc[timeperiod[0]:timeperiod[1]] # 行业
csifcap = pd.DataFrame(index=fcap.index, columns=['fcap'], dtype=float)
indudf = pd.DataFrame(index=indu.index, columns=['fcap'], dtype=float)
# 标准差 夏普比率
std = indexData[csi500].pct_change().std() * np.sqrt(250)
sharpe = ((indexData[csi500].iloc[-1] / indexData[csi500].iloc[0]) ** (250 / indexData[csi500].shape[0]) - 1) / std
# 盈利能力 ,平均每股
#eps = ff.readh5data(path,filenameEps).loc[timeperiod[0]:timeperiod[1]]
#roe = ff.readh5data(path, filenameRoe).loc[timeperiod[0]:timeperiod[1]]
#pb = ff.readh5data(path, filenamePb).loc[timeperiod[0]:timeperiod[1]]
#pe = ff.readh5data(path, filenamePe).loc[timeperiod[0]:timeperiod[1]]
prft = prft1.loc[timeperiod[0]:timeperiod[1]] # 净利润
oprinc = oprinc1.loc[timeperiod[0]:timeperiod[1]] # 净收益
eqy =eqy1.loc[timeperiod[0]:timeperiod[1]] # 股东权益
csieps = pd.DataFrame(index=fcap.index, columns=['eps'], dtype=float)
csiroe = pd.DataFrame(index=indu.index, columns=['froe'], dtype=float)
# 估值水平 pe pb
windAmember = windAmember1.loc[timeperiod[0]:timeperiod[1]] # 沪深300的成分股
netAsset = netAsset1 .loc[timeperiod[0]:timeperiod[1]] # 净资产
peAbs = pd.DataFrame(index=fcap.index, columns=['pe'], dtype=float) # pe绝对值
peRel = pd.DataFrame(index=fcap.index, columns=['pe'], dtype=float) # pe相对值
pbAbs = pd.DataFrame(index=fcap.index, columns=['pb'], dtype=float) # pb绝对值
pbRel = pd.DataFrame(index=fcap.index, columns=['pb'], dtype=float) # pb相对值
# 循环计算
for date in indexData.index:
csifshr.loc[date] = fshr.loc[date][memberList].sum()
csifcap.loc[date] = fcap.loc[date][memberList].sum()
indudf.loc[date] = indu.loc[date][memberList].drop_duplicates().shape[0]
csieps.loc[date] = oprinc.loc[date][memberList].sum() / fshr.loc[date][memberList].sum()
csiroe.loc[date] = prft.loc[date][memberList].sum() / eqy.loc[date][memberList].sum()
hsmemberList = windAmember.loc[date].dropna().index.tolist()
try:
peAbs.loc[date] = fcap.loc[date][memberList].sum() / prft.loc[date][memberList].sum()
except ZeroDivisionError:
peAbs.loc[date] = np.NaN
pbAbs.loc[date] = fcap.loc[date][memberList].sum() / netAsset.loc[date][memberList].sum()
peRel.loc[date] = peAbs.loc[date] / (fcap.loc[date][hsmemberList].sum() / prft.loc[date][hsmemberList].sum())
pbRel.loc[date] = pbAbs.loc[date] / (
fcap.loc[date][hsmemberList].sum() / netAsset.loc[date][hsmemberList].sum())
# eps = ff.readh5data(path,filenameEps).loc[timeperiod[0]:timeperiod[1]]
#roe = ff.readh5data(path, filenameRoe).loc[timeperiod[0]:timeperiod[1]]
#pb = ff.readh5data(path, filenamePb).loc[timeperiod[0]:timeperiod[1]]
#pe = ff.readh5data(path, filenamePe).loc[timeperiod[0]:timeperiod[1]]
fshrRatio = csifshr.divide(fshr.sum(axis=1), axis=0).squeeze().mean()
fcapRatio = csifcap.divide(fcap.sum(axis=1), axis=0).squeeze().mean()
induRatio = (indudf / 29).squeeze().mean()
csieps = csieps.squeeze().mean()
csiroe = csiroe.squeeze().mean()
peAbs = peAbs.squeeze().mean()
peRel = peRel.squeeze().mean()
pbAbs = pbAbs.squeeze().mean()
pbRel = pbRel.squeeze().mean()
# 成分股的流动性(日均成交金额、日均换手率、流动性指标)
tradeAmount = tradeAmount.loc[timeperiod[0]:timeperiod[1]][csi500]
turnoverRatiodf = tradeAmount.divide(csifcap.squeeze(), axis=0)
liquidIndicator = np.abs(np.log(indexData[csi500]) - np.log(indexData[csi500].shift(1))).divide(turnoverRatiodf,
axis=0)
dailyTradeAmount = tradeAmount.mean() # 日均成交金额
dailyTurnoverRatio = turnoverRatiodf.mean() # 日均换手率
dailyLiquidIndicator = liquidIndicator.mean() # 流动性指标
# 合成 dataframe
indexList = ['Cor', 'Structure', 'Fcap_Coverage', 'Indu_Coverage', 'Amount', 'Turnover', 'Liquidity', 'Std',
'Sharpe',
'Eps', 'Roe', 'PeAbs', 'PeRel', 'PbAbs', 'PbRel']
data = [correlation, fshrRatio, fcapRatio, induRatio, dailyTradeAmount, dailyTurnoverRatio, dailyLiquidIndicator,
std, sharpe, csieps, csiroe, peAbs, peRel, pbAbs, pbRel]
columnNames = [year]
resultdf = pd.DataFrame(index=indexList, data=data, columns=columnNames)
return resultdf.round(4)
In [25]:
# 计算得分
def calFinalScore(x,bench,intervalNum = 6):
bench = np.linspace(0, bench, intervalNum)
finalScore = 0
for i in xrange(intervalNum-1):
if bench[i] <= x < bench[i+1]:
finalScore = i+1
break
return finalScore
# 计算总得分
def calTotalScore(resultdf,benchmarkdf,weightList):
rankList = []
for index in resultdf.index:
rankList.append(calFinalScore(resultdf.loc[index].values, benchmarkdf.loc[index].values))
totalScore = (np.array(rankList) * np.array([weightList])).sum()
return totalScore
# 计算胜率
def calWinratio(indexData,indexlabel,benchmark,year,timeperiod):
index = indexData.loc[timeperiod[0]:timeperiod[1]].pct_change()
ret = index[indexlabel].sub(index[benchmark], axis=0)
winratio=1 - (ret[ret>0].isnull().sum() / ret.shape[0])
winratio.name = year
return winratio
In [30]:
startTime = datetime.strptime('20150101', '%Y%m%d')
endTime = datetime.strptime('20170228', '%Y%m%d')
indexData = indexData.loc[startTime:endTime]
In [31]:
endOfMonthList = sorted(list(set(indexData .iloc[indexData .resample('M').size().cumsum().sub(1)].index)))
endOfMonthList
Out[31]:
In [ ]:
In [46]:
indexTotal = ff.readh5data(path,filenameIndex)
In [143]:
# 表头
indulabelList = map(lambda x : 'CI00500' + str(x) + '.' + 'WI' if x < 10 else 'CI0050' + str(x) + '.' + 'WI',range(1,30))
In [144]:
induRankdf = pd.DataFrame(index = endOfMonthList, columns = indulabelList, dtype= float)
In [146]:
start =time.time()
for date in endOfMonthList[1:]:
scoreList = []
previousdate = endOfMonthList[endOfMonthList.index(date) -1]
induthatday = indu1.loc[date]
for indu in xrange(1,30):
indumember = induthatday[induthatday==indu].index.tolist()
indexname = 'CI00500' + str(indu) + '.' + 'WI' if indu < 10 else 'CI0050' + str(indu) + '.' + 'WI'
print date
print indu
print indumember.__len__()
windAmember= windAmember1.loc[date].dropna().index.tolist()
indicators = getIndexIndicators(indexname, indumember, windA, [previousdate,date], '2015', indexData, tradeAmount)
windAIndicators = getIndexIndicators(windA, windAmember, windA, [previousdate,date], '2015', indexData, tradeAmount)
score = calTotalScore(indicators, windAIndicators, weightList)
scoreList.append(score)
induRankdf.loc[date] = scoreList
print '%s seconds elapsed' % (time.time() - start)
In [147]:
induRankdf
Out[147]:
In [148]:
induRankdf.columns = indulabelList
induRankdf.head()
Out[148]:
In [149]:
induNetworth = indexData[indulabelList].loc[induRankdf.index[:]]
induRet = (induNetworth / induNetworth.iloc[0]).pct_change().iloc[1:]
In [150]:
induRet.head()
Out[150]:
In [151]:
retList = []
for date in induRet.index:
print date
induSelected = induRankdf.loc[date].sort_values(ascending=False).index[:5]
#print induSelected
retList.append(induRet[induSelected].loc[date].mean())
In [152]:
retdf = pd.Series(index = induRet.index,data = retList)
networth = (retdf + 1).cumprod()
networth.name = 'InduRotation'
In [153]:
referenceIndex = indexData.loc[networth.index][[hs300,csi500]]
referenceIndex = referenceIndex / referenceIndex.iloc[0]
In [154]:
networth = pd.concat([networth,referenceIndex],axis=1)
In [ ]:
In [164]:
networth.loc[induRankdf.index[0]] = 1
networth = networth.sort_index()
fig = plt.figure(figsize=(14, 9))
# Add a subplot
ax = fig.add_subplot(111)
networth.plot(figsize=(16, 9), ax=ax, color=sns.color_palette("Paired", 11),
fontsize=13, title='Networth Plot ')
ax.set_title(ax.get_title(), alpha=0.7, fontsize=30)
plt.xlabel('time', fontsize=20)
plt.ylabel('networth ', fontsize=20)
plt.show()
In [70]:
from itertools import product
c= [1,2,3,4]
b = [5,6,7,8]
a= [c,b]
list(product(*a))
Out[70]:
In [170]:
filename = 'newindex.xlsx'
In [242]:
newIndex = 'newIndex'
In [260]:
filenameAmount = 'LZ_CN_STKA_DERI_AmountAvg_1M.h5'
In [261]:
stkTradeAmount = ff.readh5data(path,filenameAmount)
In [ ]:
In [288]:
# 数据预处理
indexData = ff.readh5data(path, filenameIndex)
tradeAmount = ff.readh5data(path, filenameIndexAmount)
windAindex = pd.read_csv(path+filenameWindA, infer_datetime_format=True, parse_dates=[0], index_col=0).CLOSE
windAindex.name = windA
windAAmount = pd.read_csv(path+filenameWindA, infer_datetime_format=True, parse_dates=[0], index_col=0).AMOUNT
windAAmount.name = windA
indexData = pd.concat([windAindex, indexData], axis=1)
tradeAmount = pd.concat([windAAmount, tradeAmount], axis=1)
In [289]:
xls = pd.ExcelFile(path+filename)
df1 = xls.parse(u'股票代码',header=None,index_col=0,parse_dates=True)
df1.index.name = 'time'
df1 = df1.applymap(lambda x: str(x)[1:-1])
df2 = xls.parse(u'不加手续费', parse_dates=True, index_col=0)
df2.index.name = 'time'
df2 = df2.iloc[:,0]
df2.name = newIndex
In [290]:
dateCountList = df1.index.tolist()
In [291]:
startTime = df1.index[0]
endTime = df1.index[-1]
indexData = indexData.loc[startTime:endTime]
indexData = pd.concat([df2, indexData], axis=1)
In [292]:
stkTradeAmount = ff.readh5data(path,filenameAmount).loc[startTime:endTime]
In [293]:
amountDict = {}
for date in stkTradeAmount.index:
if date in dateCountList:
memberList = df1.loc[date].tolist()
amountDict[date] = (stkTradeAmount.loc[date][memberList].sum())
In [294]:
amountSeries = pd.Series(amountDict)
amountSeries.name = newIndex
In [295]:
tradeAmount = pd.concat([amountSeries, tradeAmount], axis=1).loc[startTime:endTime]
In [297]:
tradeAmount.tail()
Out[297]:
In [ ]:
In [299]:
newIndexRank = pd.DataFrame(index=df1.index,columns =['score'],dtype=float)
In [300]:
indexData.tail()
Out[300]:
In [301]:
start =time.time()
for date in df1.index[1:]:
print date
previousdate = dateCountList[dateCountList.index(date) -1]
indumember = df1.loc[date].tolist()
windAmember= windAmember1.loc[date].dropna().index.tolist()
indicators = getIndexIndicators(newIndex, indumember, windA, [previousdate,date], '2015', indexData, tradeAmount)
windAIndicators = getIndexIndicators(windA, windAmember, windA, [previousdate,date], '2015', indexData, tradeAmount)
score = calTotalScore(indicators, windAIndicators, weightList)
newIndexRank .loc[date] = score
print '%s seconds elapsed' % (time.time() - start)
In [302]:
newIndexRank
Out[302]:
In [303]:
fig = plt.figure(figsize=(14, 9))
# Add a subplot
ax = fig.add_subplot(111)
newIndexRank.plot(figsize=(16, 9), ax=ax, color=sns.color_palette("Paired", 11),
fontsize=13, title='Score Plot ')
ax.set_title(ax.get_title(), alpha=0.7, fontsize=30)
plt.xlabel('time', fontsize=20)
plt.ylabel('score ', fontsize=20)
Out[303]:
In [306]:
import os
path = os.path.abspath(os.path.dirname(__file__))