notebook.community

Edit and run



In [1]:

    
#!/Tsan/bin/python
# -*- coding: utf-8 -*-



In [2]:

    
# Libraries to use
from __future__ import division 
import numpy as np
import pandas as pd
import statsmodels.api as sm
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime



In [3]:

    
# Import My own library for factor testing
from SingleFactorTest import factorFilterFunctions as ff
#from config import *



In [4]:

    
%matplotlib inline



In [5]:

    
# Constants
# some useful parameters
startTime =  datetime.strptime('20161201', '%Y%m%d')
endTime = datetime.strptime('20170429', '%Y%m%d')

path = ff.data_path
timeStampNum = 2500
thresholdNum = 0.2
HS300Index ='000300.SH' # HS300 index code
ZZ500Index = '000905.SH' #   ZZ500 index code



In [6]:

    
# Files to use 
filenamePrice = 'Own_Factor_AdjustedPriceForward-1d.csv'
filenameST = 'LZ_GPA_SLCIND_ST_FLAG.csv'
filenameTradeday = 'LZ_GPA_SLCIND_TRADEDAYCOUNT.csv'
filenameStopFlag = 'LZ_GPA_SLCIND_STOP_FLAG.csv'
filenameIndu = 'LZ_GPA_INDU_ZX.csv'
filenameFCAP = 'LZ_GPA_VAL_A_FCAP.csv'
filenameAdjustFactor = 'LZ_GPA_CMFTR_CUM_FACTOR.csv'
filenameHS300 = 'LZ_GPA_INDXQUOTE_CLOSE.csv'
filenameZXIndustry = 'LZ_GPA_INDU_ZX.csv'
filenameZXExplanation = 'LZ_GPA_TMP_INDU_ZX.csv'



In [7]:

    
# Own factor
filenameTOV20 = 'Own_Factor_Turnover_Volatility_20D.csv' # group0 最好，有年化 10%的收益率，单调性很好，多空效果也很好，
 #但是市值单调递减，group0市值最大，可能选到了上证50（大市值股票交易比较稳定，因此换手率波动较小）



In [8]:

    
# Uqer factor
filenameUQAD20 = 'Uqer_factor_AD20.csv'    # group9最好，有2%左右的超额收益，其他组都没有超额收益，多空效果很好，有较好的单调性（但是单调性反转，长期是group0最小）（证明类动量/价因子还是效果比较好的）
filenameUQATR6 = 'Uqer_factor_ATR6.csv'    # group1最好，有0.2%左右的超额收益，其他组都没有超额收益，多空效果很好，有较好的单调性



In [9]:

    
# value
filenamePS = 'LZ_GPA_VAL_PS.csv'  # group0 最好，有年化 5%的收益率，单调性很好，多空效果也很好
filenamePB ='LZ_GPA_VAL_PB.csv'  # group0 最好，有年化 12%的收益率，单调性很好，多空效果也很好 （神级因子）
filenamePE ='LZ_GPA_VAL_PE.csv'  #  group0 最好，有年化 10%的收益率，单调性不是特别完美，但不错，多空效果也很好



In [10]:

    
filenameTest = {'PB':filenamePB, 'PS':filenamePS,'TOV20':filenameTOV20,'UQAD20':filenameUQAD20,'UQATR6':filenameUQATR6}
filenameDict = filenameTest



In [11]:

    
stDF = pd.read_csv(path+filenameST,infer_datetime_format=True,parse_dates=[0],index_col=0).loc[startTime:endTime]
tradeDayDF = pd.read_csv(path+filenameTradeday,infer_datetime_format=True,parse_dates=[0],index_col=0).loc[startTime:endTime]
stopFlagDF = pd.read_csv(path+filenameStopFlag,infer_datetime_format=True,parse_dates=[0],index_col=0).loc[startTime:endTime]



In [12]:

    
priceData =  pd.read_csv(path+ filenamePrice ,infer_datetime_format=True,parse_dates=[0],index_col=0).loc[startTime:endTime]
benchMarkData = pd.read_csv(path+filenameHS300,infer_datetime_format=True,parse_dates=[0],index_col=0).loc[startTime:endTime][ZZ500Index]



In [13]:

    
LFCAPDF = np.log10(ff.getData(thresholdNum, startTime, endTime,filename = filenameFCAP))



In [14]:

    
endOfWeekList = sorted(list(set(LFCAPDF.iloc[LFCAPDF.resample('W').size().cumsum().sub(1)].index)))



In [15]:

    
activeReturnData = ff.calcReturn(priceData, endOfWeekList , benchmark = benchMarkData,activeReturn = True,logReturn = True,shiftVal = -1)



In [16]:

    
# Generate the useles stock list
filterdict={}
for i in endOfWeekList :
    suspendList = ff.GetSTNewSuspend(i,stDF,tradeDayDF,stopFlagDF)
    filterdict[i] = suspendList
    print i, len(filterdict[i])









    



2016-12-02 00:00:00 391
2016-12-09 00:00:00 392
2016-12-16 00:00:00 404
2016-12-23 00:00:00 413
2016-12-30 00:00:00 404
2017-01-06 00:00:00 408
2017-01-13 00:00:00 416
2017-01-20 00:00:00 422
2017-01-26 00:00:00 428
2017-02-03 00:00:00 432
2017-02-10 00:00:00 445
2017-02-17 00:00:00 448
2017-02-24 00:00:00 457
2017-03-03 00:00:00 449
2017-03-10 00:00:00 446
2017-03-17 00:00:00 448
2017-03-24 00:00:00 451
2017-03-31 00:00:00 444
2017-04-07 00:00:00 448
2017-04-14 00:00:00 456
2017-04-21 00:00:00 451
2017-04-28 00:00:00 453



In [17]:

    
IndustryDF = pd.read_csv(path+filenameIndu,infer_datetime_format=True,parse_dates=[0],index_col=0).loc[endOfWeekList]
normalizedLFCAP = ff.winsorAndnorm(LFCAPDF, filterdict, endOfWeekList )



In [18]:

    
# Creat new dataframe to save the outcome
global returnofFactor
global tValueofFactor
global pValueofFactor
global ICFactor
global ICpValue
returnofFactor = pd.DataFrame(index=endOfWeekList [:-1],columns=[filenameDict.keys()],data=None,dtype = float) 
tValueofFactor = pd.DataFrame(index= endOfWeekList [:-1],columns= [filenameDict.keys()],data=None,dtype = float)
pValueofFactor = pd.DataFrame(index= endOfWeekList [:-1],columns= [filenameDict.keys()],data=None,dtype = float)
ICFactor =  pd.DataFrame(index= endOfWeekList [:-1],columns= [filenameDict.keys()],data=None,dtype = float)
ICpValue = pd.DataFrame(index= endOfWeekList [:-1],columns= [filenameDict.keys()],data=None,dtype = float)



In [19]:

    
ICpValue









    Out[19]:






  
    
      
      PB
      PS
      UQATR6
      TOV20
      UQAD20
    
  
  
    
      2016-12-02
      NaN
      NaN
      NaN
      NaN
      NaN
    
    
      2016-12-09
      NaN
      NaN
      NaN
      NaN
      NaN
    
    
      2016-12-16
      NaN
      NaN
      NaN
      NaN
      NaN
    
    
      2016-12-23
      NaN
      NaN
      NaN
      NaN
      NaN
    
    
      2016-12-30
      NaN
      NaN
      NaN
      NaN
      NaN
    
    
      2017-01-06
      NaN
      NaN
      NaN
      NaN
      NaN
    
    
      2017-01-13
      NaN
      NaN
      NaN
      NaN
      NaN
    
    
      2017-01-20
      NaN
      NaN
      NaN
      NaN
      NaN
    
    
      2017-01-26
      NaN
      NaN
      NaN
      NaN
      NaN
    
    
      2017-02-03
      NaN
      NaN
      NaN
      NaN
      NaN
    
    
      2017-02-10
      NaN
      NaN
      NaN
      NaN
      NaN
    
    
      2017-02-17
      NaN
      NaN
      NaN
      NaN
      NaN
    
    
      2017-02-24
      NaN
      NaN
      NaN
      NaN
      NaN
    
    
      2017-03-03
      NaN
      NaN
      NaN
      NaN
      NaN
    
    
      2017-03-10
      NaN
      NaN
      NaN
      NaN
      NaN
    
    
      2017-03-17
      NaN
      NaN
      NaN
      NaN
      NaN
    
    
      2017-03-24
      NaN
      NaN
      NaN
      NaN
      NaN
    
    
      2017-03-31
      NaN
      NaN
      NaN
      NaN
      NaN
    
    
      2017-04-07
      NaN
      NaN
      NaN
      NaN
      NaN
    
    
      2017-04-14
      NaN
      NaN
      NaN
      NaN
      NaN
    
    
      2017-04-21
      NaN
      NaN
      NaN
      NaN
      NaN



In [20]:

    
def modifyUQdata(filename):
    sparedata = pd.read_csv(path+filename,infer_datetime_format=True,parse_dates=[0],index_col=0)
    uqercolumnList = sparedata.columns.tolist()
    uqercolumnName = [x.split('.')[0] for x in uqercolumnList]
    newcolumnList = stDF.columns.tolist()
    newcolumnName = [x.split('.')[0] for x in newcolumnList]
    columndict = dict(zip(newcolumnName,newcolumnList))
    finalcolumns = []
    for stk in uqercolumnName:
        if stk in newcolumnName:
            stk = columndict[stk]
        else:
            pass
        finalcolumns.append(stk)
    sparedata.columns = finalcolumns
    addNanColumns = list(set(newcolumnList) - set(sparedata.columns))
    addData = pd.DataFrame(index = sparedata.index,columns = addNanColumns,dtype =float)
    sparedata = pd.concat([sparedata,addData],axis=1)
    sparedata = sparedata[newcolumnList]
    return sparedata



In [21]:

    
for i,j in filenameDict.iteritems():
    print i
    if  'Uqer' in j :
        factorData = modifyUQdata(j).loc[startTime:endTime]
    else:
        factorData = ff.getData(thresholdNum,startTime,endTime,filename = j)
    normalizedData = ff.winsorAndnorm(factorData, filterdict, endOfWeekList )
    neutralizedData = ff.neutralizeFactor(normalizedData, normalizedLFCAP,IndustryDF,endOfWeekList )
    ff.calReturnAndIC(returnofFactor,tValueofFactor,pValueofFactor,ICFactor,ICpValue,neutralizedData,activeReturnData,i)









    



PB
PS
UQATR6
TOV20
UQAD20



In [22]:

    
ICFactor[np.abs(ICFactor)>np.abs(ICFactor).mean().mean()].count().plot(figsize=(16,10),kind='bar',color=sns.color_palette("GnBu_d",10))









    Out[22]:





<matplotlib.axes._subplots.AxesSubplot at 0x35249f98>



In [23]:

    
Above0df = ICFactor.astype(float)[ICFactor.astype(float)>0].describe()
Below0df = ICFactor.astype(float)[ICFactor.astype(float)<0].describe()
# show the amount of Negative IC and Positive IC in same figure
fig = plt.figure(figsize=(14,9))
ax = fig.add_subplot(111)
totaldf = pd.DataFrame({ 'Above_0':Above0df.loc['count'].values, 'Below_0':Below0df.loc['count'].values},index = Above0df.columns)
totaldf.plot(kind='bar',ax=ax, stacked =True,alpha=0.84,title ='IC Distribution',fontsize =13)
ax.set_title(ax.get_title(),alpha=0.88, fontsize=30)









    Out[23]:





<matplotlib.text.Text at 0x37f72d68>



In [ ]:



In [24]:

    
# simple cumulative return 
fig = plt.figure(figsize=(14,9))
# Add a subplot
ax = fig.add_subplot(111)
returnofFactor.cumsum().plot(figsize=(20,12),ax=ax,color=sns.color_palette("Paired",10),title ='Cumulative Return',fontsize =13)
ax.set_title(ax.get_title(),alpha=0.7, fontsize=30)









    Out[24]:





<matplotlib.text.Text at 0x3924f1d0>



In [25]:

    
# Net worth plot for each factor
fig = plt.figure(figsize=(18,14))
# Add a subplot
ax = fig.add_subplot(111)
(np.abs(returnofFactor)+1).cumprod().plot(figsize=(22,14),ax=ax,color=sns.color_palette("Paired",10),title ='Net Worth',fontsize =13)
ax.set_title(ax.get_title(),alpha=0.7, fontsize=30, ha='right')









    Out[25]:





<matplotlib.text.Text at 0x3b2a00b8>



In [26]:

    
color_set=sns.light_palette((210, 90, 60), input="husl")
(pValueofFactor[pValueofFactor<0.05].count()/len(pValueofFactor)).plot(figsize=(18,12),kind='bar',color=sns.color_palette(color_set,10))









    Out[26]:





<matplotlib.axes._subplots.AxesSubplot at 0x398796a0>



In [27]:

    
ReturnForPlot = returnofFactor.copy()
ReturnForPlot.index = ReturnForPlot.index.map(lambda x:10000*x.year+100*x.month+x.day)
ReturnForPlot = ReturnForPlot.astype(float)

plt.figure(figsize=(20,12))
ax = plt.axes()
sns.heatmap(ReturnForPlot[-40:],ax=ax, annot=True)
ax.set_title('Monthly Return of Each Factor',fontsize=18, fontweight='bold')
plt.show()



In [28]:

    
ICFactorTosave = ICFactor.apply(lambda x : x/np.abs(x).sum(),axis=1).shift(1)
ReturnTosave = returnofFactor.apply(lambda x : x/np.abs(x).sum(),axis=1).shift(1)
pWeight = np.exp(-7*pValueofFactor.astype(float)).shift(1)



In [29]:

    
totalWeighted = (ReturnTosave + ICFactorTosave) * pWeight
totalWeighted



In [30]:

    
ReturnTosave.shift(0)



In [31]:

    
ICFactorTosave



In [32]:

    
returnofFactor.iloc[-1].rank()









    Out[32]:





PB        3.0
PS        2.0
UQATR6    5.0
TOV20     4.0
UQAD20    1.0
Name: 2017-04-21 00:00:00, dtype: float64



In [33]:

    
ICpValue



In [ ]:



In [19]:



In [ ]:

	PB	PS	UQATR6	TOV20	UQAD20
2016-12-02	NaN	NaN	NaN	NaN	NaN
2016-12-09	-0.069067	-0.047262	-0.650699	-0.715036	-0.087494
2016-12-16	0.121354	-0.121181	-0.367402	-0.016306	0.038942
2016-12-23	-0.056972	-0.266055	-0.638545	-0.639310	0.274663
2016-12-30	-0.002507	-0.005442	-0.760358	-0.755884	0.003365
2017-01-06	-0.001131	-0.282716	-0.827336	-0.402116	0.266862
2017-01-13	0.005916	-0.000052	-0.908848	-0.682177	-0.014112
2017-01-20	0.000208	-0.009625	-0.137994	-0.033039	-0.036313
2017-01-26	-0.233064	-0.012772	-0.596711	-0.890499	-0.002155
2017-02-03	-0.000121	-0.001799	-0.001352	1.196105	-0.000051
2017-02-10	-0.001153	-0.254804	-0.951015	-0.001092	0.263452
2017-02-17	-0.000553	-0.001994	-0.955080	-0.484359	-0.003343
2017-02-24	-0.096738	-0.227662	-0.114703	-1.013299	-0.002385
2017-03-03	-0.036561	0.000143	0.000597	-1.028807	-0.576071
2017-03-10	0.002172	0.184559	0.607297	-0.008102	0.000294
2017-03-17	-0.319660	-0.127671	-0.062528	-0.757095	-0.001342
2017-03-24	0.000458	-0.179210	1.104719	0.115318	-0.001809
2017-03-31	0.000242	0.007185	-0.617399	-0.387210	-0.389903
2017-04-07	-0.000275	-0.000602	-0.789840	-0.853704	0.000605
2017-04-14	-0.000337	-0.093456	-0.759685	-0.495503	-0.001582
2017-04-21	0.001073	-0.000871	-0.078103	-0.427691	-0.583520

	PB	PS	UQATR6	TOV20	UQAD20
2016-12-02	NaN	NaN	NaN	NaN	NaN
2016-12-09	0.078343	0.079874	-0.398314	-0.345596	-0.097872
2016-12-16	0.278839	0.302357	-0.219638	-0.085581	0.113585
2016-12-23	-0.040857	-0.166854	-0.343671	-0.308109	0.140509
2016-12-30	0.034207	-0.044095	-0.487942	-0.392647	0.041110
2017-01-06	-0.012215	-0.130505	-0.491087	-0.240099	0.126093
2017-01-13	0.043715	0.005851	-0.507246	-0.374585	0.068603
2017-01-20	0.035208	-0.103641	-0.404462	-0.322787	-0.133903
2017-01-26	-0.097673	-0.052789	-0.329032	-0.474302	-0.046204
2017-02-03	-0.001447	0.075175	0.034152	0.844050	-0.045175
2017-02-10	0.026033	-0.149112	-0.632936	-0.034904	0.157016
2017-02-17	-0.016754	-0.028002	-0.533057	-0.360853	-0.061334
2017-02-24	-0.081368	-0.148620	-0.083360	-0.630985	-0.055666
2017-03-03	-0.107232	0.000863	-0.034891	-0.554666	-0.302348
2017-03-10	0.109480	0.234866	0.518461	-0.085115	-0.052077
2017-03-17	-0.178307	-0.174458	-0.155416	-0.432655	-0.059164
2017-03-24	-0.088828	-0.201094	0.545579	0.113072	-0.051427
2017-03-31	0.008077	-0.050397	-0.462704	-0.393995	-0.084828
2017-04-07	-0.004994	-0.025711	-0.450828	-0.473224	0.045243
2017-04-14	-0.008266	-0.081059	-0.473591	-0.406036	0.031048
2017-04-21	-0.014996	-0.153468	-0.221510	-0.473290	-0.136737

	PB	PS	UQATR6	TOV20	UQAD20
2016-12-02	NaN	NaN	NaN	NaN	NaN
2016-12-09	-0.190011	-0.154262	-0.252385	-0.369440	-0.033903
2016-12-16	-0.156785	-0.423797	-0.180914	-0.115308	0.123196
2016-12-23	-0.140429	-0.099202	-0.294874	-0.331201	0.134295
2016-12-30	-0.157765	-0.083624	-0.272416	-0.363237	0.122957
2017-01-06	-0.197660	-0.153362	-0.336249	-0.162017	0.150713
2017-01-13	0.072858	-0.037236	-0.401602	-0.307591	-0.180713
2017-01-20	0.013606	-0.072098	0.263277	0.286001	-0.365018
2017-01-26	-0.251474	-0.059218	-0.267679	-0.416196	-0.005431
2017-02-03	-0.121967	-0.138273	-0.351400	0.352055	0.036305
2017-02-10	-0.196915	-0.181509	-0.318079	-0.064838	0.238658
2017-02-17	-0.158524	-0.259365	-0.422023	-0.123528	-0.036560
2017-02-24	-0.227214	-0.098647	-0.279641	-0.382314	0.012185
2017-03-03	0.016534	0.146172	0.088455	-0.474141	-0.274699
2017-03-10	-0.047292	0.232522	0.095576	-0.507273	0.117337
2017-03-17	-0.454898	-0.098681	-0.064816	-0.325648	-0.055957
2017-03-24	0.091485	0.008732	0.559140	0.296382	-0.044261
2017-03-31	0.086748	0.092261	-0.154695	0.006785	-0.659510
2017-04-07	-0.201019	-0.072683	-0.339011	-0.380480	-0.006806
2017-04-14	-0.165472	-0.267077	-0.286094	-0.089466	-0.191891
2017-04-21	0.187279	0.152595	0.143403	0.045599	-0.471124

	PB	PS	UQATR6	TOV20	UQAD20
2016-12-02	55.004386	55.631789	54.019495	52.190244	57.718556
2016-12-09	56.589212	53.761698	56.352831	57.087509	59.898221
2016-12-16	54.380837	55.531362	50.514259	49.658473	62.665743
2016-12-23	53.793329	55.901931	50.838866	48.645804	62.454120
2016-12-30	53.885010	54.918918	50.951609	54.720517	62.726098
2017-01-06	59.468673	58.324004	54.720135	55.614679	56.859847
2017-01-13	58.972515	57.742380	62.968929	63.352920	53.742670
2017-01-20	53.672403	57.588266	53.393809	50.687846	58.758042
2017-01-26	58.232973	58.172599	56.983179	61.073397	59.178052
2017-02-03	54.345965	54.711146	51.848920	57.358612	65.364117
2017-02-10	57.593666	56.818907	55.555802	57.910076	58.627072
2017-02-17	55.185118	57.317141	54.393953	52.838884	59.236312
2017-02-24	59.228633	61.488361	60.479425	51.906201	54.723163
2017-03-03	58.599639	61.599714	60.110390	54.187247	60.342871
2017-03-10	55.313812	58.372091	58.663312	56.278368	58.742727
2017-03-17	60.630039	59.489259	68.144944	63.505190	58.757532
2017-03-24	60.469820	60.569096	57.715492	59.207691	52.517943
2017-03-31	55.796345	58.195735	53.465591	52.419106	59.470958
2017-04-07	57.261733	56.002981	55.764862	57.905213	56.960551
2017-04-14	63.292931	62.577935	62.378926	59.908890	51.121550
2017-04-21	62.884255	61.276907	70.612384	63.950212	57.867901