In [1]:
#!/Tsan/bin/python
# -*- coding: utf-8 -*-
In [2]:
# Libraries to use
from __future__ import division
import numpy as np
import pandas as pd
import statsmodels.api as sm
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime
In [3]:
# Import My own library for factor testing
from SingleFactorTest import factorFilterFunctions as ff
#from config import *
In [4]:
%matplotlib inline
In [5]:
# Constants
# some useful parameters
startTime = datetime.strptime('20161201', '%Y%m%d')
endTime = datetime.strptime('20170429', '%Y%m%d')
path = ff.data_path
timeStampNum = 2500
thresholdNum = 0.2
HS300Index ='000300.SH' # HS300 index code
ZZ500Index = '000905.SH' # ZZ500 index code
In [6]:
# Files to use
filenamePrice = 'Own_Factor_AdjustedPriceForward-1d.csv'
filenameST = 'LZ_GPA_SLCIND_ST_FLAG.csv'
filenameTradeday = 'LZ_GPA_SLCIND_TRADEDAYCOUNT.csv'
filenameStopFlag = 'LZ_GPA_SLCIND_STOP_FLAG.csv'
filenameIndu = 'LZ_GPA_INDU_ZX.csv'
filenameFCAP = 'LZ_GPA_VAL_A_FCAP.csv'
filenameAdjustFactor = 'LZ_GPA_CMFTR_CUM_FACTOR.csv'
filenameHS300 = 'LZ_GPA_INDXQUOTE_CLOSE.csv'
filenameZXIndustry = 'LZ_GPA_INDU_ZX.csv'
filenameZXExplanation = 'LZ_GPA_TMP_INDU_ZX.csv'
In [7]:
# Own factor
filenameTOV20 = 'Own_Factor_Turnover_Volatility_20D.csv' # group0 最好,有年化 10%的收益率,单调性很好,多空效果也很好,
#但是市值单调递减,group0市值最大,可能选到了上证50(大市值股票交易比较稳定,因此换手率波动较小)
In [8]:
# Uqer factor
filenameUQAD20 = 'Uqer_factor_AD20.csv' # group9最好,有2%左右的超额收益,其他组都没有超额收益,多空效果很好,有较好的单调性(但是单调性反转,长期是group0最小)(证明类动量/价因子还是效果比较好的)
filenameUQATR6 = 'Uqer_factor_ATR6.csv' # group1最好,有0.2%左右的超额收益,其他组都没有超额收益,多空效果很好,有较好的单调性
In [9]:
# value
filenamePS = 'LZ_GPA_VAL_PS.csv' # group0 最好,有年化 5%的收益率,单调性很好,多空效果也很好
filenamePB ='LZ_GPA_VAL_PB.csv' # group0 最好,有年化 12%的收益率,单调性很好,多空效果也很好 (神级因子)
filenamePE ='LZ_GPA_VAL_PE.csv' # group0 最好,有年化 10%的收益率,单调性不是特别完美,但不错,多空效果也很好
In [10]:
filenameTest = {'PB':filenamePB, 'PS':filenamePS,'TOV20':filenameTOV20,'UQAD20':filenameUQAD20,'UQATR6':filenameUQATR6}
filenameDict = filenameTest
In [11]:
stDF = pd.read_csv(path+filenameST,infer_datetime_format=True,parse_dates=[0],index_col=0).loc[startTime:endTime]
tradeDayDF = pd.read_csv(path+filenameTradeday,infer_datetime_format=True,parse_dates=[0],index_col=0).loc[startTime:endTime]
stopFlagDF = pd.read_csv(path+filenameStopFlag,infer_datetime_format=True,parse_dates=[0],index_col=0).loc[startTime:endTime]
In [12]:
priceData = pd.read_csv(path+ filenamePrice ,infer_datetime_format=True,parse_dates=[0],index_col=0).loc[startTime:endTime]
benchMarkData = pd.read_csv(path+filenameHS300,infer_datetime_format=True,parse_dates=[0],index_col=0).loc[startTime:endTime][ZZ500Index]
In [13]:
LFCAPDF = np.log10(ff.getData(thresholdNum, startTime, endTime,filename = filenameFCAP))
In [14]:
endOfWeekList = sorted(list(set(LFCAPDF.iloc[LFCAPDF.resample('W').size().cumsum().sub(1)].index)))
In [15]:
activeReturnData = ff.calcReturn(priceData, endOfWeekList , benchmark = benchMarkData,activeReturn = True,logReturn = True,shiftVal = -1)
In [16]:
# Generate the useles stock list
filterdict={}
for i in endOfWeekList :
suspendList = ff.GetSTNewSuspend(i,stDF,tradeDayDF,stopFlagDF)
filterdict[i] = suspendList
print i, len(filterdict[i])
In [17]:
IndustryDF = pd.read_csv(path+filenameIndu,infer_datetime_format=True,parse_dates=[0],index_col=0).loc[endOfWeekList]
normalizedLFCAP = ff.winsorAndnorm(LFCAPDF, filterdict, endOfWeekList )
In [18]:
# Creat new dataframe to save the outcome
global returnofFactor
global tValueofFactor
global pValueofFactor
global ICFactor
global ICpValue
returnofFactor = pd.DataFrame(index=endOfWeekList [:-1],columns=[filenameDict.keys()],data=None,dtype = float)
tValueofFactor = pd.DataFrame(index= endOfWeekList [:-1],columns= [filenameDict.keys()],data=None,dtype = float)
pValueofFactor = pd.DataFrame(index= endOfWeekList [:-1],columns= [filenameDict.keys()],data=None,dtype = float)
ICFactor = pd.DataFrame(index= endOfWeekList [:-1],columns= [filenameDict.keys()],data=None,dtype = float)
ICpValue = pd.DataFrame(index= endOfWeekList [:-1],columns= [filenameDict.keys()],data=None,dtype = float)
In [19]:
ICpValue
Out[19]:
In [20]:
def modifyUQdata(filename):
sparedata = pd.read_csv(path+filename,infer_datetime_format=True,parse_dates=[0],index_col=0)
uqercolumnList = sparedata.columns.tolist()
uqercolumnName = [x.split('.')[0] for x in uqercolumnList]
newcolumnList = stDF.columns.tolist()
newcolumnName = [x.split('.')[0] for x in newcolumnList]
columndict = dict(zip(newcolumnName,newcolumnList))
finalcolumns = []
for stk in uqercolumnName:
if stk in newcolumnName:
stk = columndict[stk]
else:
pass
finalcolumns.append(stk)
sparedata.columns = finalcolumns
addNanColumns = list(set(newcolumnList) - set(sparedata.columns))
addData = pd.DataFrame(index = sparedata.index,columns = addNanColumns,dtype =float)
sparedata = pd.concat([sparedata,addData],axis=1)
sparedata = sparedata[newcolumnList]
return sparedata
In [21]:
for i,j in filenameDict.iteritems():
print i
if 'Uqer' in j :
factorData = modifyUQdata(j).loc[startTime:endTime]
else:
factorData = ff.getData(thresholdNum,startTime,endTime,filename = j)
normalizedData = ff.winsorAndnorm(factorData, filterdict, endOfWeekList )
neutralizedData = ff.neutralizeFactor(normalizedData, normalizedLFCAP,IndustryDF,endOfWeekList )
ff.calReturnAndIC(returnofFactor,tValueofFactor,pValueofFactor,ICFactor,ICpValue,neutralizedData,activeReturnData,i)
In [22]:
ICFactor[np.abs(ICFactor)>np.abs(ICFactor).mean().mean()].count().plot(figsize=(16,10),kind='bar',color=sns.color_palette("GnBu_d",10))
Out[22]:
In [23]:
Above0df = ICFactor.astype(float)[ICFactor.astype(float)>0].describe()
Below0df = ICFactor.astype(float)[ICFactor.astype(float)<0].describe()
# show the amount of Negative IC and Positive IC in same figure
fig = plt.figure(figsize=(14,9))
ax = fig.add_subplot(111)
totaldf = pd.DataFrame({ 'Above_0':Above0df.loc['count'].values, 'Below_0':Below0df.loc['count'].values},index = Above0df.columns)
totaldf.plot(kind='bar',ax=ax, stacked =True,alpha=0.84,title ='IC Distribution',fontsize =13)
ax.set_title(ax.get_title(),alpha=0.88, fontsize=30)
Out[23]:
In [ ]:
In [24]:
# simple cumulative return
fig = plt.figure(figsize=(14,9))
# Add a subplot
ax = fig.add_subplot(111)
returnofFactor.cumsum().plot(figsize=(20,12),ax=ax,color=sns.color_palette("Paired",10),title ='Cumulative Return',fontsize =13)
ax.set_title(ax.get_title(),alpha=0.7, fontsize=30)
Out[24]:
In [25]:
# Net worth plot for each factor
fig = plt.figure(figsize=(18,14))
# Add a subplot
ax = fig.add_subplot(111)
(np.abs(returnofFactor)+1).cumprod().plot(figsize=(22,14),ax=ax,color=sns.color_palette("Paired",10),title ='Net Worth',fontsize =13)
ax.set_title(ax.get_title(),alpha=0.7, fontsize=30, ha='right')
Out[25]:
In [26]:
color_set=sns.light_palette((210, 90, 60), input="husl")
(pValueofFactor[pValueofFactor<0.05].count()/len(pValueofFactor)).plot(figsize=(18,12),kind='bar',color=sns.color_palette(color_set,10))
Out[26]:
In [27]:
ReturnForPlot = returnofFactor.copy()
ReturnForPlot.index = ReturnForPlot.index.map(lambda x:10000*x.year+100*x.month+x.day)
ReturnForPlot = ReturnForPlot.astype(float)
plt.figure(figsize=(20,12))
ax = plt.axes()
sns.heatmap(ReturnForPlot[-40:],ax=ax, annot=True)
ax.set_title('Monthly Return of Each Factor',fontsize=18, fontweight='bold')
plt.show()
In [28]:
ICFactorTosave = ICFactor.apply(lambda x : x/np.abs(x).sum(),axis=1).shift(1)
ReturnTosave = returnofFactor.apply(lambda x : x/np.abs(x).sum(),axis=1).shift(1)
pWeight = np.exp(-7*pValueofFactor.astype(float)).shift(1)
In [29]:
totalWeighted = (ReturnTosave + ICFactorTosave) * pWeight
totalWeighted
Out[29]:
In [30]:
ReturnTosave.shift(0)
Out[30]:
In [31]:
ICFactorTosave
Out[31]:
In [32]:
returnofFactor.iloc[-1].rank()
Out[32]:
In [33]:
ICpValue
Out[33]:
In [ ]:
In [19]:
In [ ]: