In [239]:

    
#!/Tsan/bin/python
# -*- coding: utf-8 -*-



In [240]:

    
# Libraries to use
from __future__ import division 
import numpy as np
import pandas as pd
import statsmodels.api as sm
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime
from sklearn.cluster import KMeans



In [241]:

    
# Import My own library for factor testing
from SingleFactorTest import factorFilterFunctions as ff
#from config import *



In [242]:

    
%matplotlib inline



In [243]:

    
%load_ext line_profiler









    



The line_profiler extension is already loaded. To reload it, use:
  %reload_ext line_profiler



In [244]:

    
# make sure that matplotib and seaborn can show Chinese
import matplotlib as mpl
mpl.rcParams['font.sans-serif'] = ['SimHei']
mpl.rcParams['font.serif'] = ['SimHei']
sns.set_style("darkgrid",{"font.sans-serif":['simhei', 'Arial']})



In [245]:

    
# Files to use 
filenamePrice = 'Own_Factor_AdjustedPriceForward-1d.csv'
filenameST = 'LZ_CN_STKA_SLCIND_ST_FLAG.h5'
filenameTradeday = 'LZ_CN_STKA_SLCIND_TRADEDAYCOUNT.h5'
filenameStopFlag = 'LZ_CN_STKA_SLCIND_STOP_FLAG.h5'
filenameIndu = 'LZ_GPA_INDU_ZX.csv'
filenameFCAP =  'LZ_CN_STKA_VAL_A_FCAP.h5'
filenameAdjustFactor = 'LZ_CN_STKA_CMFTR_CUM_FACTOR.h5'
filenameHS300 = 'LZ_CN_STKA_INDXQUOTE_CLOSE.h5'
filenameZXIndustry = 'LZ_CN_STKA_INDU_ZX.h5'  # 各股票对应的行业
filenameZXExplanation = 'LZ_GPA_TMP_INDU_ZX.csv'



In [246]:

    
# Factors
# Value factor
filenamePE ='LZ_GPA_VAL_PE.csv'  # 市盈率   012-2016 Cum return 0.9(group1),1.4(group8) monotonicity: Normal
filenamePB ='LZ_GPA_VAL_PB.csv'  # 市净率   012-2016 Cum return 1.1(group2),0.1(group9) monotonicity: Good+
filenamePS = 'LZ_GPA_VAL_PS.csv' # 市销率   012-2016 Cum return 0.9(group0),0.3(group9) monotonicity: Good+
filenamePCF = 'LZ_GPA_VAL_PC.csv' # 市现率   2012-2016 Cum return 0.95(group0),0.35(group8) monotonicity: Normal
filenameADJPB ='Own_Factor_AdjustedPB-1d.csv' # 市值调整市净率 2012-2016 Cum return 1.1(group2),0.2(group9) monotonicity: Normal

# Growth factor
filenameYOYGR = 'LZ_GPA_FIN_IND_QFA_YOYGR.csv'  # 单季度.营业总收入同比增长率(%) 2012-2016 Cum return 1.1(group8),0.41(group1) monotonicity: Good+
filenameYOYNETPROFIT = 'LZ_GPA_FIN_IND_QFA_YOYNETPROFIT.csv' # 单季度.归属母公司股东的净利润同比增长率(%) 2012-2016 Cum return 1.2(group9),0.2(group1) monotonicity: Good
filenameYOYOCF = 'LZ_GPA_FIN_IND_YOYOCF.csv' # 同比增长率-经营活动产生的现金流量净额(%) 2012-2016 Cum return 0.98(group8),0.4(group2) monotonicity: Good
filenameYOYROE = 'LZ_GPA_FIN_IND_YOYROE.csv' # 同比增长率-净资产收益率(摊薄)(%) 2012-2016 Cum return 1.17(group7),0.18(group1) monotonicity: Normal
filenameYOYBPS = 'LZ_GPA_FIN_IND_YOYBPS.csv' # 相对年初增长率-每股净资产(%)  2012-2016 Cum return 0.85(group4),0.5(group0) monotonicity: Normal

# Financial factor
filenameCAPITALIZEDTODA = 'LZ_GPA_FIN_IND_CAPITALIZEDTODA.csv'  # 资本支出/折旧和摊销 2012-2016 Cum return 0.95(group0),0.6(group9) monotonicity: Normal+
filenameCASHRATIO = 'LZ_GPA_FIN_IND_CASHRATIO.csv' # 保守速动比率 2012-2016 Cum return 0.85(group0),0.53(group2) monotonicity: Normal+
filenameCASHTOLIQDEBT = 'LZ_GPA_FIN_IND_CASHTOLIQDEBT.csv' # 货币资金／流动负债 2012-2016 Cum return 0.8(group7),0.6(group8) monotonicity: Normal
filenameOCFTODEBT = 'LZ_GPA_FIN_IND_OCFTODEBT.csv' # 经营活动产生的现金流量净额/负债合计 2012-2016 Cum return 0.8(group0),0.55(group4) monotonicity: Normal
filenamePROFITTOOP = 'LZ_GPA_FIN_IND_PROFITTOOP.csv' # 利润总额／营业收入 2012-2016 Cum return 0.84(group3),0.23(group9) monotonicity: Normal
filenamePROFITTOOPTTM ='LZ_GPA_FIN_DERI_COMBO_EBTTOOR_TTM.csv' # # 利润总额／营业收入(TTM) 2012-2016 Cum return 0.72(group8),0.41(group1) monotonicity: Good

filenameBERYRATIO = 'LZ_GPA_DERI_BerryRatio.csv' #边际毛利/营运开支 2012-2016  monotonicity: bad ,not useful at all

# Momentum factor
filenameTRUEMOM = 'LZ_GPA_USR_MOM.csv' # 真动量 2012-2016 Cum return 1.0(group3), - 0.3(group9) monotonicity: Normal+
filenameMOM1M = 'LZ_GPA_DERI_Momentum_1M.csv' # 一月反转 2012-2016 Cum return 0.85(group0), - 0.2(group9) monotonicity: Good
filenameMOM3M = 'LZ_GPA_TURNOVER_TurnoverAvg_3M.csv' # 三月反转 2012-2016 Cum return 0.65(group7), 0.4(group9) monotonicity: Normal
filenamePVO = 'LZ_GPA_DERI_PVO.csv' # 一年偏度  2012-2016 Cum return 0.47(group0), -0.3(group9) monotonicity: Good
filenameABORMALVOLUME = 'LZ_GPA_DERI_NormalizedAbormalVolume.csv' # 归一化异常交易量  2012-2016 Cum return 0.7(group0), 0.1(group9) monotonicity: Normal
filenameSKEW = 'LZ_GPA_DERI_TSKEW.csv' # 斜率   2012-2016 Cum return 1.1(group0), 0.25(group8) monotonicity: Good+
filenameMACD = 'LZ_GPA_DERI_MACD.csv'  # MACD   2012-2016 Cum return 0.8(group3), -0.4(group9) monotonicity: Normal
finenameBB = 'LZ_GPA_DERI_BB_20.csv'  # Bolling_bands  2012-2016 Cum return 0.75(group3) , 0.05(group9)  monotonicity: Normal 

# Liquidity factor
filenameTURNOVER1M = 'LZ_GPA_TURNOVER_TurnoverAvg_1M.csv' # 一月换手率均值 2012-2016 Cum return 0.7(group4) , 0.1(group9)  monotonicity: Normal 
filenameAMOUNTAVG1M = 'LZ_GPA_DERI_AmountAvg_1M.csv'    #  日均成交额   2012-2016 Cum return 1.52(group0) , -0.27(group9)  monotonicity: Perfect
filenameILLIQ = 'LZ_GPA_DERI_ILLIQ.csv'         # 非流动性因子  2012-2016 Cum return 1.5(group9), - 0.07(group1) monotonicity: Perfect
filenameTURNOVER = 'LZ_GPA_VAL_TURN.csv'    # 换手率 2012-2016 Cum return  0.78(group6), - 0.25(group9) monotonicity: Normal（除了第九组外其他差不多）
filenameOWNILLIQ ='Own_Factor_ILLQ-1d.csv' # 非流动性 2012-2016 Cum return 1.5(group9), - 0.25(group0) monotonicity: Perfect
filenameADJILLIQ = 'Own_Factor_ADJ_ILLQ_1D.csv' # 非流动性(市值调整) 
filenameADJTURNOVER = 'LZ_GPA_DERI_adjustedTurnOver_20.csv' # 市值调整日均成交额 2012-2016 Cum return 1.25(group0), - 0.5(group9) monotonicity: Perfect

# Volatility factor
filenameRV1Y = 'LZ_GPA_DERI_RealizedVolatility_1Y.csv' # 一年收益波动   2012-2016 Cum return  0.65(group8), 0.4(group1) monotonicity : Normal
filenameOwnVol = 'Own_Factor_Volatility_90d.csv'   # 90天收益波动率     2012-2016 Cum return 0.65(group1), - 0.26(group9) monotonicity: Good
filenameAbove20 = 'Own_Factor_Above20MA_20d.csv'  # 高于20天MA的价格平均 2012-2016 Cum return 0.8(group1) , -0.4(group9)  monotonicity：Good

filenameTOV20 = 'Own_Factor_Turnover_Volatility_20D.csv' # 20天换手率波动率
filenameADJTOV20 = 'Own_Factor_ADJ_Turnover_Volatility_20D.csv' # 20天换手率波动率（市值调整） 2012-2016 Cum return 1.8(group0), -0.3(group9) monotonicity: perfect
filenameTOVD20 = 'Own_Factor_Turnover_Volatility_deviation_20D.csv' # 20天换手率均值除500天换手率均值-1
filenameADJTOVD20 = 'Own_Factor_ADJ_Turnover_Volatility_Deviation_20D.csv' # 20天换手率均值除500天换手率均值-1(市值调整)  Cum return 0.7(group0), -0.2(group9)Good



In [247]:

    
#
#filenameSpecificVol = 'Own_Factor_Specific_Volatility.csv' # 特质波动率
filenameAroon = 'Aroon_Allstocks.csv' 
filenameAdjTOTrue = 'Own_Factor_AdjustedTurnOver-1d.csv'# 真市值调整换手率 2012-2016 Cum return 1.0(group7), - 0.4(group9) monotonicity: Normal
filenameDDA = 'Own_Factor_DDA-1d.csv'  # 股票每日成交额(前复权)  2012-2016 Cum return 2.2(group0), - 0.6(group9) monotonicity: Perfect
filennameQFAEPS = 'LZ_GPA_FIN_IND_QFA_EPS.csv' # 单季度每股收益
filenameDDA20 = 'Own_Factor_DDA-20d.csv' # DDA二十天均值   2012-2016 Cum return 2.2(group0), - 0.6(group9) monotonicity: Perfect
filenameADJDDA20 ='Own_Factor_ADJ_DDA_20D.csv' # 中性化（仅去市值）后的DDA20
filenameIDIVOL = 'Own_Factor_Idiosyncratic_Volatility.csv' # 特异波动率  2012-2016 Cum return 0.9(group1), 0(group9) monotonicity: Good+
filenameOwnSkewness = 'Own_Factor_Skewness_250d.csv'  # 250日偏度
filenameOwnReturnSkew = 'Own_Factor_Return_Skew_250D.csv' # 250日收益率偏度
filenamePPO ='LZ_GPA_DERI_PPO.csv' #
filename_5_20_deviation = 'Own_Factor_5_20_price_deviation_1D.csv'  #  5日价格平均除以20日价格平均
filename_5_20_return_deviation = 'Own_Factor_5_20_return_deviation_1D.csv'  # 5日平均收益除以20日平均收益
filenameSharpe = 'Own_Factor_sharpe_ratio_20D.csv'  # 20日夏普比率
filenameDownsideRisk = 'Own_Factor_downside_risk_252D.csv' # 252日下行波动率  不太好的因子
filenameSortinoRatio = 'Own_Factor_sortino_ratio_20D.csv'  # 20日sortino比率   不太好的因子



In [248]:

    
# Uqer factor
filenameUQAD20 = 'Uqer_factor_AD20.csv'    #累积/派发线（Accumulation / Distribution Line)的20日均线
filenameUQADTM = 'Uqer_factor_ADTM.csv'    #动态买卖气指标，用开盘价的向上波动幅度和向下波动幅度的距离差值来描述人气高低的指标。属于情绪类因子
filenameUQATR6 = 'Uqer_factor_ATR6.csv'    #6日均幅指标（Average TRUE Ranger），取一定时间周期内的股价波动幅度的移动平均值 （# perfect!）
filenameUQAroon = 'Uqer_factor_Aroon.csv'   #Aroon通过计算自价格达到近期最高值和最低值以来所经过的期间数
filenameUQBias10 = 'Uqer_factor_BIAS10.csv' # 10日乖离率，简称Y值，是移动平均原理派生的一项技术指标，表示股价偏离趋向指标斩百分比值
filenameUQCCI10 = 'Uqer_factor_CCI10.csv'   # 10日顺势指标（Commodity Channel Index），专门测量股价是否已超出常态分布范围(效果不太理想)
filenameUQKDJ_K = 'Uqer_factor_KDJ_K.csv'   #随机指标。它综合了动量观念、强弱指标及移动平均线的优点，用来度量股价脱离价格正常范围的变异程度。（不理想0
filenameUQROC6 = 'Uqer_factor_ROC6.csv' #6日变动速率（Price Rate of Change），以当日的收盘价和N天前的收盘价比较
filenameUQRVI = 'Uqer_factor_RVI.csv' # 相对离散指数（Relative Volatility Index）
filenameUQCMO = 'Uqer_factor_CMO.csv' # 钱德动量摆动指标（Chande Momentum Osciliator）  # 相当不错的因子，单调性相当好！特别是分行业之后！
filenameUQRSI = 'Uqer_factor_RSI.csv'  # 相对强弱指标（Relative Strength Index）   或许还可以？
filenameUQSkewness = 'Uqer_factor_Skewness.csv' # 过去20个交易日股价的偏度，分行业后又不错的单调性
filenameUQOBV20 = 'Uqer_factor_OBV20.csv'   # 20日能量潮指标（On Balance Volume，OBV） 非常棒的因子（但与市值因子相关性太高）
filenameUQMTM ='Uqer_factor_MTM.csv'   # 动量指标（Momentom Index） 中间组（group4，group5）比较好，或许可以当作检验因子？
filenameUQPVT6 = 'Uqer_factor_PVT6.csv'  # 价量趋势（Price and Volume Trend）指标    中间组效果较好（可当作检验因子，3组，4组）
filenameUQREC = 'Uqer_factor_REC.csv'     # 析师推荐评级（Recommended rating score by analyst  没有单调性，第九组最好，可以当作一个选股的检验因子
filenameUQDAREC = 'Uqer_factor_DAREC.csv'   # 分析师推荐评级变化，相比于60个交易日前。 没有单调性，但第九组最好
filenameUQGREC = 'Uqer_factor_GREC.csv'    # 分析师推荐评级变化趋势,过去60个交易日内的DAREC 符号加和. 第五组最好，单调性不太好
filenameUQREVS20 = 'Uqer_factor_REVS20.csv'  # 股票的20日收益   perfect! 很好的因子！并且与市值因子相关性不大！
filenameUQREVS5 ='Uqer_factor_REVS5.csv'  # 股票的5日收益  不错，但不如20日收益
filenameUQMA10RegressCoeff12 = 'Uqer_factor_MA10RegressCoeff12.csv' # 10日价格平均线12日线性回归系数  单调性还不错，从第三组开始基本单调（第三组最好）
filenameUQWVAD = 'Uqer_factor_WVAD.csv' # 威廉变异离散量（William's variable accumulation distribution），是一种将成交量加权的量价指标 相当不错的因子
                                        # 从group 1 开始单调性明显（最好的组为group2 ,group3），特别是分了行业之后的
filenameUQHurst = 'Uqer_factor_Hurst.csv'  # 赫斯特指数   不错的因子（和市值因子没啥相关性），单调性不错（group9在最好），特别是分行业之后效果更明显
filenameUQMassIndex = 'Uqer_factor_MassIndex.csv'  # 梅斯线（Mass Index），本指标是Donald Dorsey累积股价波幅宽度之后所设计的震荡曲线。其最主要的作用，在于寻找飙涨股或者极度弱势股的重要趋势反转点。属于常用技术指标类因子
                                                   # 波动率指标，单调性不错，group1在最上面，group9在最下面。
filenameUQKlingerOscillator = 'Uqer_factor_KlingerOscillator.csv'  # 成交量摆动指标.单调性一般，俩头差中间好，或许可用来选股。



In [249]:

    
# Not good factors but maybe useful
filenameOVERVOL = 'Over_Heat_Volume.csv' 
filenameSIZE = 'LZ_GPA_VAL_A_FCAP.csv'
filenameExcessReturn = 'Own_Factor_excess_return_20D.csv'
filenameROE = 'LZ_GPA_FIN_IND_ROE.csv'



In [250]:

    
# ROE x日 波动率
filenameRoeVol60 = 'Own_Factor_ROE_Volatility_60D.csv'   # 和预期不符
filenameRoeVol120 = 'Own_Factor_ROE_Volatility_120D.csv'  # 符合预期，即ROE波动小的组收益较好,单调性不明显
filenameRoeVol250 = 'Own_Factor_ROE_Volatility_250D.csv'   # 符合预期，单调性非常好，描述ROE波动率可用此因子！good+!



In [251]:

    
# Non_linear factor
filenameNLFCAP ='Own_Factor_Non_Linear_Size.1D.csv'  # 效果很完美，但是与市值因子相关性接近-1，可能还是受小市值影响？
filenameBeta  = 'Own_Factor_Beta_1D.csv'   # 贝塔因子



In [252]:

    
# 一致预期
filenameNetProfitTTM = 'LZ_CN_STKA_CRD_NET_PRFT_FTTM.h5'



In [253]:

    
# h5 files here
filenameOwnFactorILLIQh5 = 'OwnFactorILLIQ.h5' 
filenameOwnFactorADJILLIQh5 = 'OwnFactorADJILLIQ.h5' # 垃圾因子效果很差
filenameLZBPLR = 'LZ_CN_STKA_DERI_BP_LR.h5' # 股东权益总市值比 （股东权益合计/总资产）
filenameOwnFactorSmartMoney = 'OwnfactorSmartMoney1min.h5'
filenameOwnFactorLogSmartMoney = 'OwnfactorlogSmartMoneyCSIall.h5'
filenameOwnMassIndex = 'OwnfactorMassIndex.h5'   # mass index  比较优秀的因子,和市值不太相关，前几组区分的不算开（group1-group3），但后面几组区分的很好。
filenameMassIndex800 = 'OwnfactorMassIndex800.h5' 
filenameOwnNetIn = 'OwnfactorNetIn.h5'  # 资金净流入（分钟线合成） 效果一般，单调性不好
filenameOwnNetInRatio = 'OwnfactorNetInRatio.h5' # 资金净流入 / 流动市值
filenameOwnNetInRatio5d = 'OwnfactorNetInRatio5d.h5' # 资金净流入5日平均 / 流动市值
filenameOwnNetInRatio20d = 'OwnfactorNetInRatio20d.h5' # 资金净流入20日平均 / 流动市值
filenameVolQuantileRatio = 'VolQuantileRatio.h5'  # 成交量分布指数，整体单调性不是那么强，但前几组单调性不错，可用来选股。用tick数据做可能效果更好。
filenameInduSpreadBias = 'OwnFactorInduSpreadBias.h5' # 行业偏差指数,很好的反转因子，单调性强，IC均值及中位数为-0.07，但17年没有什么超额收益。



In [254]:

    
# 复权后价格
filenameAdjClose = 'OwnfactorAdjustedClose.h5'



In [255]:

    
# factors not creat by myself
filenameTrueSpreadBias = 'OthersSpreadBias222_1D.csv' #东方证券价差波动率因子，正常测试效果一般，分行业后效果非常好
filenameIdiosycraticVol = 'Othersid2_std_3m_1D.csv'



In [256]:

    
# Constants
startTime =  datetime.strptime('20120504', '%Y%m%d')
endTime = datetime.strptime('20170228', '%Y%m%d')

path = ff.data_path
timeStampNum = 2500
thresholdNum = 0.2
HS300Index ='000300.SH' # HS300 index code
ZZ500Index = '000905.SH' #   ZZ500 index code



In [257]:

    
# for csv file
'''
stDF = pd.read_csv(path+filenameST,infer_datetime_format=True,parse_dates=[0],index_col=0).loc[startTime:endTime]
tradeDayDF = pd.read_csv(path+filenameTradeday,infer_datetime_format=True,parse_dates=[0],index_col=0).loc[startTime:endTime]
stopFlagDF = pd.read_csv(path+filenameStopFlag,infer_datetime_format=True,parse_dates=[0],index_col=0).loc[startTime:endTime]
indusDF = pd.read_csv(path+filenameZXIndustry,infer_datetime_format=True,parse_dates=[0],index_col=0).loc[startTime:endTime]
induExplanation = pd.read_csv(path+filenameZXExplanation,infer_datetime_format=True,parse_dates=[0],encoding='gb2312')
'''









    Out[257]:





"\nstDF = pd.read_csv(path+filenameST,infer_datetime_format=True,parse_dates=[0],index_col=0).loc[startTime:endTime]\ntradeDayDF = pd.read_csv(path+filenameTradeday,infer_datetime_format=True,parse_dates=[0],index_col=0).loc[startTime:endTime]\nstopFlagDF = pd.read_csv(path+filenameStopFlag,infer_datetime_format=True,parse_dates=[0],index_col=0).loc[startTime:endTime]\nindusDF = pd.read_csv(path+filenameZXIndustry,infer_datetime_format=True,parse_dates=[0],index_col=0).loc[startTime:endTime]\ninduExplanation = pd.read_csv(path+filenameZXExplanation,infer_datetime_format=True,parse_dates=[0],encoding='gb2312')\n"



In [ ]:



In [ ]:



In [258]:

    
# for h5 file
stDF = ff.readh5data(path,filenameST).loc[startTime:endTime]
tradeDayDF = ff.readh5data(path,filenameTradeday).loc[startTime:endTime]
stopFlagDF = ff.readh5data(path,filenameStopFlag).loc[startTime:endTime]
indusDF = ff.readh5data(path,filenameZXIndustry).loc[startTime:endTime]
induExplanation = pd.read_csv(path+filenameZXExplanation,infer_datetime_format=True,parse_dates=[0],encoding='gb2312')



In [ ]:



In [350]:

    
#
sololist = [filenameOwnMassIndex]
#
filenameDict = {'PE':filenamePE,'PB':filenamePB, 'PS':filenamePS, 'PCF':filenamePCF, 'YOYGR':filenameYOYGR,'YOYGRPROFIT':filenameYOYNETPROFIT, \
                'TRUE_MOM':filenameTRUEMOM, 'MOM_1M':filenameMOM1M , 'TURNOVER_1M':filenameTURNOVER1M }



In [351]:

    
induExplanation.tail()









    Out[351]:







  
    
      
      LZ_GPA_TMP_INDU_ZX-t
    
  
  
    
      24
      电子元器件
    
    
      25
      通信
    
    
      26
      计算机
    
    
      27
      传媒
    
    
      28
      综合



In [352]:

    
explanationDict = induExplanation.iloc[:,0].to_dict()



In [353]:

    
explanationDict









    Out[353]:





{0: u'\u77f3\u6cb9\u77f3\u5316',
 1: u'\u7164\u70ad',
 2: u'\u6709\u8272\u91d1\u5c5e',
 3: u'\u7535\u529b\u53ca\u516c\u7528\u4e8b\u4e1a',
 4: u'\u94a2\u94c1',
 5: u'\u57fa\u7840\u5316\u5de5',
 6: u'\u5efa\u7b51',
 7: u'\u5efa\u6750',
 8: u'\u8f7b\u5de5\u5236\u9020',
 9: u'\u673a\u68b0',
 10: u'\u7535\u529b\u8bbe\u5907',
 11: u'\u56fd\u9632\u519b\u5de5',
 12: u'\u6c7d\u8f66',
 13: u'\u5546\u8d38\u96f6\u552e',
 14: u'\u9910\u996e\u65c5\u6e38',
 15: u'\u5bb6\u7535',
 16: u'\u7eba\u7ec7\u670d\u88c5',
 17: u'\u533b\u836f',
 18: u'\u98df\u54c1\u996e\u6599',
 19: u'\u519c\u6797\u7267\u6e14',
 20: u'\u94f6\u884c',
 21: u'\u975e\u94f6\u884c\u91d1\u878d',
 22: u'\u623f\u5730\u4ea7',
 23: u'\u4ea4\u901a\u8fd0\u8f93',
 24: u'\u7535\u5b50\u5143\u5668\u4ef6',
 25: u'\u901a\u4fe1',
 26: u'\u8ba1\u7b97\u673a',
 27: u'\u4f20\u5a92',
 28: u'\u7efc\u5408'}



In [354]:

    
sololist[0]









    Out[354]:





'OwnfactorMassIndex.h5'



In [ ]:



In [355]:

    
if ('Uqer' or 'Others') in sololist[0]:
    sparedata = pd.read_csv(path+sololist[0],infer_datetime_format=True,parse_dates=[0],index_col=0)
else:
    sparedata = ff.readh5data(path,sololist[0])



In [ ]:



In [356]:

    
# adjust the columns name of the Uqer data same as the own factor
if 'Uqer' or '1min' in sololist[0]: 
    uqercolumnList = sparedata.columns.tolist()
    uqercolumnName = [x.split('.')[0] for x in uqercolumnList]
    newcolumnList = stDF.columns.tolist()
    newcolumnName = [x.split('.')[0] for x in newcolumnList]
    columndict = dict(zip(newcolumnName,newcolumnList))
    finalcolumns = []
    for stk in uqercolumnName:
        if stk in newcolumnName:
            stk = columndict[stk]
        else:
            pass
        finalcolumns.append(stk)
    sparedata.columns = finalcolumns
    addNanColumns = list(set(newcolumnList) - set(sparedata.columns))
    addData = pd.DataFrame(index = sparedata.index,columns = addNanColumns,dtype =float)
    sparedata = pd.concat([sparedata,addData],axis=1)
    sparedata = sparedata[newcolumnList]



In [ ]:



In [ ]:



In [357]:

    
## easy way to calculate the last day of the month
#stDF['label'] = stDF.index.map(lambda x: (x.year,x.month))
#locationList=(stDF.groupby(['label']).size().cumsum()-1).values
#stDF.iloc[locationList].index
#stDF.groupby(['label'])



In [358]:

    
# Using pd.read_csv method since the adjusted price data is in csv format
#priceData =  pd.read_csv(path+ filenamePrice ,infer_datetime_format=True,parse_dates=[0],index_col=0).loc[startTime:endTime]
priceData =  ff.readh5data(path,filenameAdjClose).loc[startTime:endTime]
#benchMarkData = pd.read_csv(path+filenameHS300,infer_datetime_format=True,parse_dates=[0],index_col=0).loc[startTime:endTime][ZZ500Index]



In [ ]:



In [359]:

    
# Read h5data since the benchmarkData is the h5 file

#priceData = ff.readh5data(path, filenamePrice).loc[startTime:endTime]
benchMarkData = ff.readh5data(path, filenameHS300).loc[startTime:endTime][ZZ500Index]



In [ ]:



In [360]:

    
LFCAPDF = np.log10(ff.getData(thresholdNum, startTime, endTime,filename = filenameFCAP))



In [ ]:



In [ ]:



In [361]:

    
#fs = np.log10(pd.read_csv(path+ filenameFCAP ,infer_datetime_format=True,parse_dates=[0],index_col=0))

#fs[fs>fs.quantile(0.9,axis=1)] = np.NaN
#fs.to_csv(path+'Own_factor_Nonlinear_FCAP.csv',na_rep='NaN',date_format='%Y%m%d')



In [362]:

    
#fs[fs<fs.quantile(0.4,axis=1)]



In [ ]:



In [363]:

    
endOfMonthList = ff.getLastDayOfMonth(LFCAPDF.index)[1]



In [364]:

    
#sorted(list(set(endOfMonthList) & set(endOfMonthList1)))



In [ ]:



In [365]:

    
# calculate correlation between two factors
#if  'Uqer' or '1min' in sololist[0]:
factor1 = sparedata.loc[startTime:endTime]
#else:
    #factor1 = pd.read_csv(path+sololist[0],infer_datetime_format=True,parse_dates=[0],index_col=0).loc[startTime:endTime]
    #factor1 = ff.readh5data(path,sololist[0])
correlationDF = ff.showCorrelation(factor1,LFCAPDF, endOfMonthList, filterdic = None).astype(float)
correlationDF.describe()



In [ ]:



In [ ]:



In [366]:

    
#%lprun -f ff.getLastDayOfMonth ff.getLastDayOfMonth(LFCAPDF.index)



In [367]:

    
activeReturnData = ff.calcReturn(priceData, endOfMonthList, benchmark = benchMarkData,activeReturn = True,logReturn = False)



In [ ]:



In [368]:

    
# Generate the useles stock list
filterdict={}
for i in endOfMonthList:
    suspendList = ff.GetSTNewSuspend(i,stDF,tradeDayDF,stopFlagDF)
    filterdict[i] = suspendList
    print i, len(filterdict[i])









    



2012-05-31 00:00:00 1366
2012-06-29 00:00:00 1352
2012-07-31 00:00:00 1269
2012-08-31 00:00:00 1243
2012-09-28 00:00:00 1211
2012-10-31 00:00:00 1186
2012-11-30 00:00:00 1180
2012-12-31 00:00:00 1168
2013-01-31 00:00:00 1161
2013-02-28 00:00:00 1161
2013-03-29 00:00:00 1169
2013-04-26 00:00:00 1163
2013-05-31 00:00:00 1158
2013-06-28 00:00:00 1166
2013-07-31 00:00:00 1176
2013-08-30 00:00:00 1174
2013-09-30 00:00:00 1176
2013-10-31 00:00:00 1183
2013-11-29 00:00:00 1198
2013-12-31 00:00:00 1184
2014-01-30 00:00:00 1178
2014-02-28 00:00:00 1196
2014-03-31 00:00:00 1232
2014-04-30 00:00:00 1213
2014-05-30 00:00:00 1216
2014-06-30 00:00:00 1227
2014-07-31 00:00:00 1220
2014-08-29 00:00:00 1236
2014-09-30 00:00:00 1216
2014-10-31 00:00:00 1225
2014-11-28 00:00:00 1250
2014-12-31 00:00:00 1219
2015-01-30 00:00:00 1218
2015-02-27 00:00:00 1206
2015-03-31 00:00:00 1264
2015-04-30 00:00:00 1234
2015-05-29 00:00:00 1285
2015-06-30 00:00:00 1304
2015-07-31 00:00:00 1336
2015-08-31 00:00:00 1290
2015-09-30 00:00:00 1178
2015-10-30 00:00:00 1142
2015-11-30 00:00:00 1056
2015-12-31 00:00:00 1014
2016-01-29 00:00:00 1007
2016-02-29 00:00:00 1027
2016-03-31 00:00:00 1012
2016-04-29 00:00:00 1037
2016-05-31 00:00:00 1011
2016-06-30 00:00:00 982
2016-07-29 00:00:00 932
2016-08-31 00:00:00 903
2016-09-30 00:00:00 893
2016-10-31 00:00:00 861
2016-11-30 00:00:00 842
2016-12-30 00:00:00 812
2017-01-26 00:00:00 783
2017-02-28 00:00:00 779



In [369]:

    
### Calculate the return of each group of given factor(solo factor)
totalGroupDict ={}
factorData = ff.getData(thresholdNum, startTime, endTime,availableData = factor1)

for date in endOfMonthList:
    factorDataTemp = factorData.loc[:date].tail()
    factorDataTemp = factorDataTemp[list(set(factorDataTemp.columns.tolist())- set(filterdict[date]))]
    #print factorDataTemp
    totalGroupDict[date] = ff.getStockGroup(factorDataTemp,groupNum=10,Mean_Num=20)



In [370]:

    
#totalGroupDict



In [371]:

    
ReturnDF = pd.DataFrame(index=endOfMonthList[:-1],columns=totalGroupDict.values()[0].keys(),data=None, dtype =float)
sizeDistribution = pd.DataFrame(index=endOfMonthList[:-1],columns=totalGroupDict.values()[0].keys(),data=None, dtype =float)
for group in ReturnDF.columns.tolist():
    for time in ReturnDF.index:
        ReturnDF.loc[time][group] = activeReturnData.loc[time][totalGroupDict[time][group]].mean()
        sizeDistribution.loc[time][group] = LFCAPDF.loc[time][totalGroupDict[time][group]].quantile()
ReturnDF.sort_index(axis=1,inplace=True)
sizeDistribution.sort_index(axis=1,inplace=True)



In [372]:

    
# show size distribution(quantile plot)
fig = plt.figure(figsize=(16,10))
# Add a subplot
ax = fig.add_subplot(111)
sizeDistribution.median().plot(kind='bar',ax = ax, fontsize =13,title ='Size Distribution of each group',alpha =0.8)
ax.set_title(ax.get_title(),alpha=0.7, fontsize=25)









    Out[372]:





<matplotlib.text.Text at 0x5313c2b0>



In [373]:

    
sizeDistribution.quantile()
sizeMono = sizeDistribution.quantile().corr(pd.Series(index = sizeDistribution.quantile().index,data = range(len(sizeDistribution.quantile()))),\
                                           method = 'spearman')
sizeMono









    Out[373]:





0.30909090909090903



In [374]:

    
# Calc spearman correlation to investigate monotonicity
referSeries = pd.Series(index = ReturnDF.columns, data=range(len(ReturnDF.columns)))
monoDF = pd.DataFrame(index = ReturnDF.index, columns=['Spearman_Cor'], dtype = float)
for date in ReturnDF.index:
    monoDF.loc[date] = ReturnDF.loc[date].corr(referSeries,method='spearman')



In [375]:

    
# Plot
fig = plt.figure(figsize=(18,14))
# Add a subplot
ax = fig.add_subplot(111)
monoDF.plot(figsize=(22,14),ax=ax,fontsize =13,title ='Monotonicity')
ax.set_title(ax.get_title(),alpha=0.7, fontsize=30)









    Out[375]:





<matplotlib.text.Text at 0x3966e8d0>



In [ ]:



In [376]:

    
### Method to calculate moving max drawdown
from numpy.lib.stride_tricks import as_strided
def windowed_view(x, window_size):
    """Creat a 2d windowed view of a 1d array.

    `x` must be a 1d numpy array.

    `numpy.lib.stride_tricks.as_strided` is used to create the view.
    The data is not copied.

    Example:

    >>> x = np.array([1, 2, 3, 4, 5, 6])
    >>> windowed_view(x, 3)
    array([[1, 2, 3],
           [2, 3, 4],
           [3, 4, 5],
           [4, 5, 6]])
    """
    y = as_strided(x, shape=(x.size - window_size + 1, window_size),
                   strides=(x.strides[0], x.strides[0]))
    return y



In [377]:

    
###
def rolling_max_dd(x, window_size, min_periods=1):
    """Compute the rolling maximum drawdown of `x`.

    `x` must be a 1d numpy array.
    `min_periods` should satisfy `1 <= min_periods <= window_size`.

    Returns an 1d array with length `len(x) - min_periods + 1`.
    """
    if min_periods < window_size:
        pad = np.empty(window_size - min_periods)
        pad.fill(x[0])
        x = np.concatenate((pad, x))
    y = windowed_view(x, window_size)
    #print y
    rolling_max_y = np.maximum.accumulate(y, axis=1)
    #print rolling_max_y
    dd = 1-y/rolling_max_y
    return np.abs(dd).max(axis=1)



In [378]:

    
max(rolling_max_dd(ReturnDF['group_0'].values,4, min_periods=1))









    Out[378]:





9.4090168655254303



In [379]:

    
# long top group short bot group 
sortGroups = ReturnDF[['group_0','group_9']].cumsum().iloc[-1].sort_values(ascending = False).index
top, bot = sortGroups[0], sortGroups[-1]

# Add another column
ReturnDF['top_bot_comb'] = ReturnDF[top] - ReturnDF[bot]



In [380]:

    
top, bot









    Out[380]:





('group_0', 'group_9')



In [381]:

    
ReturnDF.tail(10)









    Out[381]:







  
    
      
      group_0
      group_1
      group_2
      group_3
      group_4
      group_5
      group_6
      group_7
      group_8
      group_9
      top_bot_comb
    
  
  
    
      2016-04-29
      -0.014786
      -0.012027
      0.004326
      -0.008509
      0.002305
      -0.007659
      -0.010035
      -0.008823
      -0.021024
      -0.026645
      0.011859
    
    
      2016-05-31
      0.033453
      0.015366
      0.025428
      0.018033
      0.024678
      0.032415
      0.029878
      0.034312
      0.027137
      0.019690
      0.013763
    
    
      2016-06-30
      -0.002798
      0.016313
      0.018038
      0.001802
      -0.000997
      -0.003439
      -0.018455
      -0.030696
      -0.053449
      -0.064684
      0.061887
    
    
      2016-07-29
      0.014565
      0.005168
      0.008527
      0.008308
      0.021788
      0.018760
      0.010603
      0.017110
      0.001674
      0.018260
      -0.003696
    
    
      2016-08-31
      -0.012339
      -0.006737
      0.011002
      0.017276
      0.015889
      0.025597
      0.014195
      0.022748
      0.014206
      0.000898
      -0.013238
    
    
      2016-09-30
      0.020434
      0.018856
      0.019749
      0.017414
      0.014997
      0.016332
      0.010510
      0.012242
      0.002624
      0.003860
      0.016574
    
    
      2016-10-31
      0.002239
      -0.001973
      0.002343
      0.006996
      0.005440
      0.006260
      0.014275
      0.012441
      0.015380
      0.000811
      0.001428
    
    
      2016-11-30
      0.006055
      -0.004464
      -0.007019
      0.002998
      0.002857
      0.016680
      0.005462
      0.012217
      0.009900
      0.014976
      -0.008921
    
    
      2016-12-30
      -0.008760
      -0.015727
      -0.013924
      -0.009137
      -0.004085
      -0.009040
      -0.016822
      -0.005369
      -0.011491
      -0.019497
      0.010736
    
    
      2017-01-26
      -0.009173
      -0.004230
      0.001045
      0.007732
      0.003869
      0.010781
      0.004113
      0.002603
      0.008428
      -0.000319
      -0.008854



In [382]:

    
# simple cumulative return 
fig = plt.figure(figsize=(18,14))
# Add a subplot
ax = fig.add_subplot(111)
ReturnDF.astype(float).cumsum().plot(figsize=(22,14),ax=ax,  color=sns.color_palette("Paired",11),fontsize =13,title ='Cumulative Return')
ax.set_title(ax.get_title(),alpha=0.7, fontsize=30)









    Out[382]:





<matplotlib.text.Text at 0x3946e518>



In [ ]:



In [383]:

    
# Net worth plot 
netWorthDF = (ReturnDF.astype(float)+1).cumprod()
fig = plt.figure(figsize=(18,14))
# Add a subplot
ax = fig.add_subplot(111)
netWorthDF.plot(figsize=(22,14),ax=ax,color=sns.color_palette("Paired",11),title ='Net Worth',fontsize =13)
ax.set_title(ax.get_title(),alpha=0.7, fontsize=30, ha='right')









    Out[383]:





<matplotlib.text.Text at 0x3970ed68>



In [384]:

    
# Calc spearman correlation to investigate monotonicity
referSeries = pd.Series(index = netWorthDF.iloc[:,:-1].columns, data=range(len(netWorthDF.iloc[:,:-1].columns)))
monoDF = pd.DataFrame(index = netWorthDF.index, columns=['Spearman_Cor'], dtype = float)
for date in netWorthDF.index:
    monoDF.loc[date] = netWorthDF.iloc[:,:-1].loc[date].corr(referSeries,method='spearman')



In [ ]:



In [385]:

    
# Plot
fig = plt.figure(figsize=(18,14))
# Add a subplot
ax = fig.add_subplot(111)
monoDF.plot(figsize=(22,14),ax=ax,fontsize =13,title ='Monotonicity')
ax.set_title(ax.get_title(),alpha=0.7, fontsize=30)









    Out[385]:





<matplotlib.text.Text at 0x39a17860>



In [386]:

    
# monotonicity information 
print monoDF.std()
print monoDF.median()









    



Spearman_Cor    0.277084
dtype: float64
Spearman_Cor   -0.769697
dtype: float64



In [387]:

    
# basic indicator
annualizedReturn = (1+ReturnDF.mean())**12 - 1
annualizedVol = ReturnDF.std()* np.sqrt(12)
sharpeRatio = annualizedReturn / annualizedVol 
print 'Annual Return:','\n',annualizedReturn,'\n\n','Annual Volatility:','\n',annualizedVol,'\n\n','Sharpe Ratio:''\n',sharpeRatio









    



Annual Return: 
group_0         0.156024
group_1         0.153781
group_2         0.153068
group_3         0.134514
group_4         0.154951
group_5         0.161712
group_6         0.143073
group_7         0.091823
group_8         0.053177
group_9         0.052519
top_bot_comb    0.098778
dtype: float64 

Annual Volatility: 
group_0         0.097661
group_1         0.089834
group_2         0.092746
group_3         0.078567
group_4         0.087283
group_5         0.081228
group_6         0.079780
group_7         0.080485
group_8         0.091390
group_9         0.109763
top_bot_comb    0.125205
dtype: float64 

Sharpe Ratio:
group_0         1.597611
group_1         1.711838
group_2         1.650407
group_3         1.712104
group_4         1.775277
group_5         1.990852
group_6         1.793356
group_7         1.140877
group_8         0.581872
group_9         0.478479
top_bot_comb    0.788935
dtype: float64



In [388]:

    
copyReturn = ReturnDF.copy()
copyReturn [copyReturn > copyReturn .mean()] =0
downsideRisk = ReturnDF.std(skipna = True) * np.sqrt(12)
downsideRisk









    Out[388]:





group_0         0.097661
group_1         0.089834
group_2         0.092746
group_3         0.078567
group_4         0.087283
group_5         0.081228
group_6         0.079780
group_7         0.080485
group_8         0.091390
group_9         0.109763
top_bot_comb    0.125205
dtype: float64



In [ ]:



In [389]:

    
sortinoRatio = annualizedReturn / downsideRisk
sortinoRatio









    Out[389]:





group_0         1.597611
group_1         1.711838
group_2         1.650407
group_3         1.712104
group_4         1.775277
group_5         1.990852
group_6         1.793356
group_7         1.140877
group_8         0.581872
group_9         0.478479
top_bot_comb    0.788935
dtype: float64



In [390]:

    
# Max drawdown
maxdd = netWorthDF.copy()
maxdd.iloc[0] = 0
for date in netWorthDF.index[1:]:
    maxdd.loc[date] = 1-netWorthDF.loc[date]/netWorthDF.loc[:date].max()
maxddInfo = pd.concat([maxdd.max(),maxdd.idxmax()],axis=1)
maxddInfo.columns = ['Max_drawdown','Time']
maxddInfo









    Out[390]:







  
    
      
      Max_drawdown
      Time
    
  
  
    
      group_0
      0.072089
      2014-11-28
    
    
      group_1
      0.065952
      2014-11-28
    
    
      group_2
      0.078445
      2014-11-28
    
    
      group_3
      0.067686
      2014-11-28
    
    
      group_4
      0.069246
      2014-11-28
    
    
      group_5
      0.061807
      2015-06-30
    
    
      group_6
      0.071651
      2015-06-30
    
    
      group_7
      0.090931
      2015-06-30
    
    
      group_8
      0.133007
      2015-06-30
    
    
      group_9
      0.121273
      2015-06-30
    
    
      top_bot_comb
      0.129742
      2014-11-28



In [391]:

    
# calmar Ratio
calmarRatio = annualizedReturn/ maxddInfo['Max_drawdown']
calmarRatio









    Out[391]:





group_0         2.164318
group_1         2.331718
group_2         1.951274
group_3         1.987335
group_4         2.237691
group_5         2.616400
group_6         1.996809
group_7         1.009811
group_8         0.399806
group_9         0.433069
top_bot_comb    0.761344
dtype: float64



In [ ]:



In [ ]:



In [ ]:



In [392]:

    
ReturnForPlot =ReturnDF.copy()



In [393]:

    
ReturnForPlot.index = ReturnForPlot.index.map(lambda x:100*x.year+(1+x.month) if x.month < 12 else\
                                             100*(x.year+1)+1) ### this shows the real time

plt.figure(figsize=(24,13)) ax = plt.axes() sns.heatmap(ReturnForPlot.iloc[-40:].astype(float),ax=ax, annot=True) ax.set_title('Monthly Return of Each Group',fontsize=18, fontweight='bold') plt.show()



In [394]:

    
### the following part is to save return data of every factor into one Dataframe



In [395]:

    
#---------------- the following part shows the show the difference of a risk factor across different market capitalzation size----#
#-------------  and different industries ------#



In [396]:

    
# show the difference of a risk factor through different market capitalzation size
# capdata should not contain Nan value
# Return: DICTIONARY, the KEY is the date and the Value is the tuple of the groups
# Inputs: 
# capdata: DATAFRAME ,the LFCAP DATA
# datelist: LIST, the datelist of the end month
def getGroupsbyCap(capdata, datelist):
    capdict ={}
    for date in datelist:
        capdataindice = capdata.loc[date]
        lower = capdataindice.quantile(1/3)
        upper = capdataindice.quantile(2/3)
        smallcap = capdataindice[capdataindice<=lower].index
        midcap = capdataindice[(lower<capdataindice) & (capdataindice<=upper)].index
        hugecap = capdataindice[capdataindice>upper].index
        capdict[date] = (smallcap,midcap,hugecap)
    return capdict



In [397]:

    
# industry number
grouplabel = np.random.choice(29,10,replace=False)
grouplabel









    Out[397]:





array([19, 26, 18,  4, 12, 16,  3, 25, 22, 27])



In [398]:

    
# show the difference of a risk factor through different industry
# capdata should not contain Nan value
# Return: DICTIONARY, the KEY is the date and the Value is the DICTIONARY of the groups of each industry on that day
# Inputs: 
# datelist: LIST, the datelist of the end month
# grouplable: LIST, the industry label,usually 3 of them is constant and other 3 is randomed  alike [2,3,6,15,18,25]
# industryDF: DATAFRAME,the industry dataframe(could )
def getIndustryDict(datelist,grouplabel,industryDF):
    industrydict = {}
    industryDF = industryDF.loc[datelist]
    for date in datelist:
        industryDFindice = industryDF.loc[date]
        industrydict[date] = {label:industryDFindice[industryDFindice == label].index for label in grouplabel}
    return industrydict



In [ ]:



In [399]:

    
#------------------------------ Following part is to group  stocks within industry --------------------------



In [400]:

    
wholeIndList = np.array(range(29))

wholeIndDict = getIndustryDict(endOfMonthList,wholeIndList,indusDF)



In [401]:

    
multindexList = [endOfMonthList,wholeIndList]
induReturnDF  = pd.DataFrame(data=None, columns=totalGroupDict.values()[0].keys(),\
                             index=pd.MultiIndex.from_product(multindexList,names=['time','industry']),dtype=float)



In [402]:

    
# Group the stocks
# To-Dos: Add new weighting option for return,Same weight as the benchmark inter the different industries.(Namely industry-neutralized) 
groupNumberThrottle = 10
for i,j in wholeIndDict.iteritems():
    print i
    factorIndice = factorData.loc[:i].tail()
    factorIndice = factorIndice[list(set(factorIndice.columns.tolist())- set(filterdict[date]))]  # Remove ST ,new and suspending stk
    for ind, stk in j.iteritems():
        intersection = list(set(factorIndice.columns.tolist()) & set(stk))
        if len(intersection) < groupNumberThrottle:
            induReturnDF.loc[i,ind] = 0
            continue
        else:
            stkgroup = ff.getStockGroup(factorIndice[intersection], groupNum=10, Mean_Num=20)
            for p,q in stkgroup.iteritems():
                try:
                    induReturnDF.loc[i,ind][p] = activeReturnData.loc[i][q].mean()
                except:
                    induReturnDF.loc[i,ind][p] = np.NaN









    



2015-01-30 00:00:00
2016-02-29 00:00:00
2014-01-30 00:00:00
2016-12-30 00:00:00
2016-06-30 00:00:00
2013-02-28 00:00:00
2015-03-31 00:00:00
2017-01-26 00:00:00
2015-10-30 00:00:00
2013-04-26 00:00:00
2016-03-31 00:00:00
2016-11-30 00:00:00
2013-12-31 00:00:00
2013-03-29 00:00:00
2014-02-28 00:00:00
2017-02-28 00:00:00
2016-10-31 00:00:00
2016-01-29 00:00:00
2014-07-31 00:00:00
2012-06-29 00:00:00
2015-11-30 00:00:00
2012-11-30 00:00:00
2016-09-30 00:00:00
2012-05-31 00:00:00
2013-07-31 00:00:00
2014-06-30 00:00:00
2012-08-31 00:00:00
2012-12-31 00:00:00
2013-05-31 00:00:00
2014-04-30 00:00:00
2012-10-31 00:00:00
2016-07-29 00:00:00
2013-10-31 00:00:00
2013-11-29 00:00:00
2012-09-28 00:00:00
2014-12-31 00:00:00
2016-08-31 00:00:00
2014-11-28 00:00:00
2012-07-31 00:00:00
2015-04-30 00:00:00
2014-03-31 00:00:00
2013-06-28 00:00:00
2015-08-31 00:00:00
2016-05-31 00:00:00
2014-10-31 00:00:00
2015-05-29 00:00:00
2016-04-29 00:00:00
2014-05-30 00:00:00
2014-09-30 00:00:00
2015-06-30 00:00:00
2013-09-30 00:00:00
2013-08-30 00:00:00
2015-07-31 00:00:00
2015-12-31 00:00:00
2015-09-30 00:00:00
2014-08-29 00:00:00
2015-02-27 00:00:00
2013-01-31 00:00:00



In [ ]:



In [403]:

    
finalReturn = induReturnDF.mean(level = 'time')
finalReturn.sort_index(axis=1,inplace=True)



In [404]:

    
sortGroups = finalReturn[['group_0','group_9']].cumsum().iloc[-1].sort_values(ascending = False).index
top, bot = sortGroups[0], sortGroups[-1]

# Add another column
finalReturn['top_bot_comb'] = finalReturn[top] - finalReturn[bot]



In [405]:

    
indNetWorth = (finalReturn+1).cumprod()
fig = plt.figure(figsize=(14,9))
# Add a subplot
ax = fig.add_subplot(111)
indNetWorth.plot(figsize=(22,14),ax=ax,color=sns.color_palette("Paired",11),title ='Net Worth',fontsize =13)
ax.set_title(ax.get_title(),alpha=0.7, fontsize=30, ha='right')









    Out[405]:





<matplotlib.text.Text at 0x3e0b6550>



In [406]:

    
# basic indicator
annualizedReturnNew = (1+finalReturn.mean())**12 - 1
annualizedVolNew = finalReturn.std()* np.sqrt(12)
sharpeRatioNew = annualizedReturnNew / annualizedVolNew 
print 'Annual Return:','\n',annualizedReturnNew,'\n\n','Annual Volatility:','\n',annualizedVolNew,'\n\n','Sharpe Ratio:''\n',sharpeRatioNew









    



Annual Return: 
group_0         0.238441
group_1         0.116152
group_2         0.114914
group_3         0.119476
group_4         0.135069
group_5         0.143708
group_6         0.121505
group_7         0.101965
group_8         0.044896
group_9         0.019274
top_bot_comb    0.215397
dtype: float64 

Annual Volatility: 
group_0         0.153136
group_1         0.055010
group_2         0.061030
group_3         0.059027
group_4         0.067602
group_5         0.059314
group_6         0.053708
group_7         0.065812
group_8         0.061020
group_9         0.081083
top_bot_comb    0.185337
dtype: float64 

Sharpe Ratio:
group_0         1.557052
group_1         2.111452
group_2         1.882927
group_3         2.024107
group_4         1.998007
group_5         2.422836
group_6         2.262310
group_7         1.549350
group_8         0.735754
group_9         0.237709
top_bot_comb    1.162193
dtype: float64



In [407]:

    
copyReturn = finalReturn.copy()
copyReturn[copyReturn > copyReturn.mean()] = 0
downsideRiskNew = copyReturn.std(skipna = True) * np.sqrt(12)
downsideRiskNew









    Out[407]:





group_0         0.071487
group_1         0.018493
group_2         0.024701
group_3         0.021934
group_4         0.029448
group_5         0.027667
group_6         0.023831
group_7         0.035313
group_8         0.030831
group_9         0.045844
top_bot_comb    0.103966
dtype: float64



In [ ]:



In [408]:

    
sortinoRatioNew = annualizedReturnNew / downsideRiskNew
sortinoRatioNew









    Out[408]:





group_0         3.335459
group_1         6.280990
group_2         4.652199
group_3         5.447072
group_4         4.586683
group_5         5.194258
group_6         5.098672
group_7         2.887437
group_8         1.456195
group_9         0.420424
top_bot_comb    2.071800
dtype: float64



In [409]:

    
# Max drawdown
maxdd1 = indNetWorth.copy()
maxdd1.iloc[0] = 0
for date in indNetWorth.index[1:]:
    maxdd1.loc[date] = 1-indNetWorth.loc[date]/indNetWorth.loc[:date].max()
maxddInfo1 = pd.concat([maxdd1.max(),maxdd1.idxmax()],axis=1)
maxddInfo1.columns = ['Max_drawdown','Time']
maxddInfo1









    Out[409]:







  
    
      
      Max_drawdown
      Time
    
  
  
    
      group_0
      0.156435
      2015-09-30
    
    
      group_1
      0.024299
      2014-11-28
    
    
      group_2
      0.035784
      2014-11-28
    
    
      group_3
      0.032926
      2013-06-28
    
    
      group_4
      0.050773
      2015-06-30
    
    
      group_5
      0.050719
      2015-06-30
    
    
      group_6
      0.036702
      2015-06-30
    
    
      group_7
      0.063952
      2015-06-30
    
    
      group_8
      0.081955
      2015-07-31
    
    
      group_9
      0.106494
      2015-06-30
    
    
      top_bot_comb
      0.258448
      2015-10-30



In [ ]:



In [410]:

    
# calmar Ratio
calmarRatioNew = annualizedReturnNew/ maxddInfo1['Max_drawdown']
calmarRatioNew









    Out[410]:





group_0         1.524224
group_1         4.780080
group_2         3.211374
group_3         3.628654
group_4         2.660243
group_5         2.833399
group_6         3.310557
group_7         1.594401
group_8         0.547812
group_9         0.180987
top_bot_comb    0.833423
dtype: float64



In [411]:

    
def str_to_datetime_format(string):
    return '%Y/%m/%d' if '/'in string else '%Y-%m-%d'



In [412]:

    
print  downsideRisk['top_bot_comb'],sortinoRatio['top_bot_comb']









    



0.125204756957 0.788934956205



In [413]:

    
# save factor performance summary into csv file
if np.isnan(monoDF.median().values[0]) and np.isnan(sharpeRatio['top_bot_comb']):
    raise  Exception( 'Error! Please check the original data!')
savepath = 'C:/Users/LZJF_02/Desktop/myownliarbry'
infodata = np.array([[startTime,endTime,monoDF.median().values[0],monoDF.std().values[0],annualizedReturn['top_bot_comb'],annualizedVol['top_bot_comb'],sharpeRatio['top_bot_comb'],\
    downsideRisk['top_bot_comb'],sortinoRatio['top_bot_comb'], maxddInfo['Max_drawdown'].loc['top_bot_comb'],calmarRatio['top_bot_comb']]])
totalInfo = pd. DataFrame(index =[sololist[0].split('.')[0]],columns = ['Start_time','End_time','Mono_median','Mono_std','Annualized_return','Annualized_volatility','Sharpe_ratio','Downside_Risk',
                                                     'Sortino_ratio','Max_drawdown','Calmar_Ratio'],data= infodata)
totalInfo.index.name = 'Factor_Name'
try:
    readfacInfo = pd.read_csv(savepath+'/'+'factorInfo.csv',infer_datetime_format=True,parse_dates=[0],index_col=0)
except:  
    readfacInfo = totalInfo
    readfacInfo.to_csv(savepath+'/'+'factorInfo.csv',na_rep='NaN')
factorName = sololist[0].split('.')[0]
if  factorName in readfacInfo.index:
    print factorName+ ' '+'already in the file!'
    try:
        begin = datetime.strptime(readfacInfo.loc[factorName]['Start_time'].split()[0],str_to_datetime_format(readfacInfo.loc[factorName]['Start_time']))
        end = datetime.strptime(readfacInfo.loc[factorName]['End_time'].split()[0],str_to_datetime_format(readfacInfo.loc[factorName]['End_time']))
    except:
        print 'No convert needed!The time type is already python.datetime!'
        begin = readfacInfo.loc[factorName]['Start_time']
        end = readfacInfo.loc[factorName]['End_time']
    print begin,startTime, end, endTime
    if  begin > startTime or  end < endTime:
        print  'Update needed'
        print  readfacInfo.loc[factorName].values 
        print infodata.flatten()
        readfacInfo.loc[factorName] = infodata.flatten()
    updatedInfo = readfacInfo
else:
    updatedInfo = pd.concat([readfacInfo,totalInfo])
    updatedInfo = updatedInfo.rename(columns = {'End_Time': 'End_time'})
updatedInfo.to_csv(savepath+'/'+'factorInfo.csv',na_rep='NaN')









    



OwnfactorMassIndex already in the file!
2012-01-04 00:00:00 2012-05-04 00:00:00 2017-09-28 00:00:00 2017-02-28 00:00:00



In [414]:

    
updatedInfo









    Out[414]:







  
    
      
      Start_time
      End_time
      Mono_median
      Mono_std
      Annualized_return
      Annualized_volatility
      Sharpe_ratio
      Downside_Risk
      Sortino_ratio
      Max_drawdown
      Calmar_Ratio
    
    
      Factor_Name
      
      
      
      
      
      
      
      
      
      
      
    
  
  
    
      Own_Factor_Turnover_Volatility_deviation_20D
      2010-01-01 00:00:00
      2017-02-28 00:00:00
      -0.927273
      0.252882
      0.195560
      0.168607
      1.159855
      0.168607
      1.159855
      0.257361
      0.759865
    
    
      Own_Factor_ILLQ-1d
      2010-01-01 00:00:00
      2017-02-28 00:00:00
      0.987879
      0.157828
      0.324461
      0.261079
      1.242771
      0.261079
      1.242771
      0.397588
      0.816074
    
    
      Own_Factor_Idiosyncratic_Volatility
      2010-01-01 00:00:00
      2017-02-28 00:00:00
      -0.903030
      0.281116
      0.189600
      0.158699
      1.194719
      0.158699
      1.194719
      0.152969
      1.239467
    
    
      Own_Factor_ADJ_Turnover_Volatility_20D
      2010-01-01 00:00:00
      2017-02-28 00:00:00
      -1.000000
      0.220559
      0.466077
      0.297559
      1.566334
      0.297559
      1.566334
      0.624890
      0.745854
    
    
      LZ_GPA_VAL_PE
      2010-01-01 00:00:00
      2017-02-28 00:00:00
      0.757576
      0.224231
      0.071443
      0.207247
      0.344724
      0.207247
      0.344724
      0.301485
      0.236970
    
    
      LZ_GPA_VAL_PB
      2010-01-01 00:00:00
      2017-02-28 00:00:00
      -0.115152
      0.450342
      0.030133
      0.230448
      0.130760
      0.230448
      0.130760
      0.355927
      0.084661
    
    
      LZ_GPA_VAL_PS
      2010-01-01 00:00:00
      2017-02-28 00:00:00
      0.103030
      0.522217
      0.007019
      0.190525
      0.036841
      0.190525
      0.036841
      0.355447
      0.019747
    
    
      LZ_GPA_FIN_IND_QFA_YOYGR
      2010-01-01 00:00:00
      2017-02-28 00:00:00
      0.939394
      0.160919
      0.087862
      0.070906
      1.239132
      0.070906
      1.239132
      0.081457
      1.078637
    
    
      LZ_GPA_FIN_IND_YOYBPS
      2010-01-01 00:00:00
      2017-02-28 00:00:00
      0.200000
      0.221057
      0.016883
      0.089912
      0.187772
      0.089912
      0.187772
      0.167963
      0.100516
    
    
      LZ_GPA_FIN_IND_QFA_YOYNETPROFIT
      2010-01-01 00:00:00
      2017-02-28 00:00:00
      0.890909
      0.102845
      0.088731
      0.072936
      1.216553
      0.072936
      1.216553
      0.057036
      1.555711
    
    
      LZ_GPA_FIN_IND_CAPITALIZEDTODA
      2010-01-01 00:00:00
      2017-02-28 00:00:00
      0.248485
      0.478620
      0.013869
      0.110005
      0.126078
      0.110005
      0.126078
      0.279676
      0.049590
    
    
      LZ_GPA_FIN_IND_CASHRATIO
      2010-01-01 00:00:00
      2017-02-28 00:00:00
      0.842424
      0.482296
      0.072803
      0.200644
      0.362847
      0.200644
      0.362847
      0.274937
      0.264799
    
    
      LZ_GPA_DERI_Momentum_1M
      2010-01-01 00:00:00
      2017-02-28 00:00:00
      -0.975758
      0.219044
      0.234231
      0.179390
      1.305707
      0.179390
      1.305707
      0.211129
      1.109419
    
    
      LZ_GPA_TURNOVER_TurnoverAvg_3M
      2010-01-01 00:00:00
      2017-02-28 00:00:00
      -0.503030
      0.424059
      0.102717
      0.205064
      0.500900
      0.205064
      0.500900
      0.284812
      0.360647
    
    
      LZ_GPA_VAL_TURN
      2010-01-01 00:00:00
      2017-02-28 00:00:00
      -0.660606
      0.282483
      0.334626
      0.202912
      1.649118
      0.202912
      1.649118
      0.170676
      1.960596
    
    
      Own_Factor_Volatility_90d
      2010-01-01 00:00:00
      2017-02-28 00:00:00
      -0.369697
      0.391101
      0.065162
      0.204425
      0.318759
      0.204425
      0.318759
      0.265979
      0.244990
    
    
      Own_Factor_DDA-20d
      2010-01-01 00:00:00
      2017-02-28 00:00:00
      -0.987879
      0.057228
      0.586765
      0.233574
      2.512117
      0.233574
      2.512117
      0.277902
      2.111408
    
    
      Own_Factor_Skewness_250d
      2010-01-01 00:00:00
      2017-02-28 00:00:00
      -0.951515
      0.312497
      0.079775
      0.134299
      0.594016
      0.134299
      0.594016
      0.255741
      0.311939
    
    
      LZ_GPA_FIN_IND_OCFTODEBT
      2010-01-01 00:00:00
      2017-02-28 00:00:00
      -0.151515
      0.343190
      0.037943
      0.072903
      0.520460
      0.072903
      0.520460
      0.121523
      0.312229
    
    
      Own_Factor_Return_Skew_250D
      2010-01-01 00:00:00
      2017-02-28 00:00:00
      -0.915152
      0.125775
      0.132202
      0.113174
      1.168130
      0.113174
      1.168130
      0.186287
      0.709667
    
    
      Own_Factor_5_20_return_deviation_1D
      2010-01-01 00:00:00
      2017-02-28 00:00:00
      -0.115152
      0.343066
      0.034217
      0.068324
      0.500804
      0.068324
      0.500804
      0.113482
      0.301520
    
    
      Own_Factor_downside_risk_252D
      2010-01-01 00:00:00
      2017-02-28 00:00:00
      0.115152
      0.312213
      0.063775
      0.244886
      0.260429
      0.244886
      0.260429
      0.247510
      0.257668
    
    
      Own_Factor_sortino_ratio_20D
      2010-01-01 00:00:00
      2017-02-28 00:00:00
      -0.842424
      0.412710
      0.148444
      0.220652
      0.672752
      0.220652
      0.672752
      0.373013
      0.397959
    
    
      Own_Factor_excess_return_20D
      2010-01-01 00:00:00
      2017-02-28 00:00:00
      -0.066667
      0.191054
      0.021491
      0.103385
      0.207876
      0.103385
      0.207876
      0.191390
      0.112290
    
    
      Uqer_factor_AD20
      2010-01-01 00:00:00
      2017-02-28 00:00:00
      -0.842424
      0.330977
      0.131389
      0.187849
      0.699441
      0.187849
      0.699441
      0.196628
      0.668212
    
    
      Uqer_factor_ADTM
      2010-01-01 00:00:00
      2017-02-28 00:00:00
      -0.903030
      0.272842
      0.124757
      0.162724
      0.766680
      0.162724
      0.766680
      0.211954
      0.588605
    
    
      Uqer_factor_ATR6
      2010-01-01 00:00:00
      2017-02-28 00:00:00
      -0.987879
      0.079624
      0.319641
      0.183263
      1.744170
      0.183263
      1.744170
      0.129518
      2.467921
    
    
      Uqer_factor_Aroon
      2010-01-01 00:00:00
      2017-02-28 00:00:00
      -0.866667
      0.207764
      0.151488
      0.155398
      0.974840
      0.155398
      0.974840
      0.260018
      0.582608
    
    
      Uqer_factor_BIAS10
      2010-01-01 00:00:00
      2017-02-28 00:00:00
      -0.212121
      0.255514
      0.135854
      0.162293
      0.837090
      0.162293
      0.837090
      0.236858
      0.573566
    
    
      LZ_GPA_FIN_IND_ROE
      2010-01-01 00:00:00
      2017-02-28 00:00:00
      0.248485
      0.541320
      0.026381
      0.123334
      0.213895
      0.123334
      0.213895
      0.223166
      0.118211
    
    
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
    
    
      Uqer_factor_ROC6
      2010-01-01 00:00:00
      2017-02-28 00:00:00
      -0.272727
      0.159828
      0.125337
      0.155992
      0.803486
      0.155992
      0.803486
      0.229675
      0.545716
    
    
      Uqer_factor_RVI
      2010-01-01 00:00:00
      2017-02-28 00:00:00
      0.296970
      0.269994
      0.023267
      0.100626
      0.231226
      0.100626
      0.231226
      0.176433
      0.131877
    
    
      Uqer_factor_CMO
      2010-01-01 00:00:00
      2017-02-28 00:00:00
      -0.757576
      0.314749
      0.175174
      0.180025
      0.973054
      0.180025
      0.973054
      0.238347
      0.734953
    
    
      Uqer_factor_RSI
      2010-01-01 00:00:00
      2017-02-28 00:00:00
      -0.284848
      0.419446
      0.126249
      0.152102
      0.830029
      0.152102
      0.830029
      0.151151
      0.835251
    
    
      Uqer_factor_Skewness
      2010-01-01 00:00:00
      2017-02-28 00:00:00
      -0.090909
      0.532514
      0.055360
      0.093224
      0.593839
      0.093224
      0.593839
      0.139466
      0.396942
    
    
      Uqer_factor_OBV20
      2010-01-01 00:00:00
      2017-02-28 00:00:00
      -0.963636
      0.242911
      0.182649
      0.232356
      0.786073
      0.232356
      0.786073
      0.276910
      0.659595
    
    
      Uqer_factor_MTM
      2010-01-01 00:00:00
      2017-02-28 00:00:00
      0.345455
      0.294705
      0.114816
      0.139492
      0.823100
      0.139492
      0.823100
      0.162813
      0.705202
    
    
      Uqer_factor_PVT6
      2010-01-01 00:00:00
      2017-02-28 00:00:00
      -0.478788
      0.169562
      0.138363
      0.163633
      0.845574
      0.163633
      0.845574
      0.371262
      0.372684
    
    
      Own_Factor_Non_Linear_Size
      2010-01-01 00:00:00
      2017-02-28 00:00:00
      0.987879
      0.103554
      0.365225
      0.260086
      1.404250
      0.260086
      1.404250
      0.363877
      1.003706
    
    
      Own_Factor_Beta_1D
      2014-01-01 00:00:00
      2017-02-28 00:00:00
      -0.321212
      0.205558
      0.060245
      0.280683
      0.214638
      0.280683
      0.214638
      0.319325
      0.188664
    
    
      Uqer_factor_REC
      2010-01-01 00:00:00
      2017-02-28 00:00:00
      0.684848
      0.222095
      0.101425
      0.104614
      0.969514
      0.104614
      0.969514
      0.117914
      0.860165
    
    
      Uqer_factor_DAREC
      2010-01-01 00:00:00
      2017-02-28 00:00:00
      0.696970
      0.190183
      0.071891
      0.077365
      0.929251
      0.077365
      0.929251
      0.086857
      0.827696
    
    
      Uqer_factor_GREC
      2010-01-01 00:00:00
      2017-02-28 00:00:00
      0.321212
      0.085382
      0.025729
      0.059282
      0.434019
      0.059282
      0.434019
      0.098773
      0.260490
    
    
      Uqer_factor_REVS20
      2010-01-01 00:00:00
      2017-02-28 00:00:00
      -0.963636
      0.293734
      0.246413
      0.183685
      1.341496
      0.183685
      1.341496
      0.215362
      1.144179
    
    
      Uqer_factor_MA10RegressCoeff12
      2010-01-01 00:00:00
      2017-02-28 00:00:00
      -0.503030
      0.349971
      0.137653
      0.177725
      0.774531
      0.177725
      0.774531
      0.223596
      0.615632
    
    
      Uqer_factor_REVS5
      2010-01-01 00:00:00
      2017-02-28 00:00:00
      -0.418182
      0.195611
      0.128630
      0.159985
      0.804012
      0.159985
      0.804012
      0.233563
      0.550731
    
    
      Uqer_factor_WVAD
      2010-01-01 00:00:00
      2017-02-28 00:00:00
      -0.709091
      0.235787
      0.187525
      0.187516
      1.000048
      0.187516
      1.000048
      0.377979
      0.496125
    
    
      Uqer_factor_Hurst
      2010-01-01 00:00:00
      2017-02-28 00:00:00
      0.903030
      0.315446
      0.047758
      0.100909
      0.473280
      0.100909
      0.473280
      0.157058
      0.304082
    
    
      Uqer_factor_MassIndex
      2010-01-01 00:00:00
      2017-02-28 00:00:00
      -0.745455
      0.103745
      0.137922
      0.160887
      0.857265
      0.160887
      0.857265
      0.218426
      0.631438
    
    
      LZ_CN_STKA_CRD_NET_PRFT_FTTM
      2012-01-04 00:00:00
      2017-09-28 00:00:00
      -0.527273
      0.191070
      0.187691
      0.294209
      0.637952
      0.294209
      0.637952
      0.433025
      0.216721
    
    
      OwnfactorMassIndex
      2012-01-04 00:00:00
      2017-09-28 00:00:00
      -0.806061
      0.107720
      0.085841
      0.119966
      0.715548
      0.119966
      0.715548
      0.130660
      0.328490
    
    
      OwnfactorNetIn
      2012-01-04 00:00:00
      2017-09-28 00:00:00
      0.090909
      0.101865
      0.062391
      0.144794
      0.430898
      0.144794
      0.430898
      0.304817
      0.102342
    
    
      OwnfactorNetInRatio
      2012-01-04 00:00:00
      2017-09-28 00:00:00
      -0.515152
      0.215181
      0.156945
      0.131480
      1.193675
      0.131480
      1.193675
      0.140680
      0.557807
    
    
      OwnfactorNetInRatio5d
      2012-01-04 00:00:00
      2017-09-28 00:00:00
      -0.157576
      0.190452
      0.103416
      0.132851
      0.778440
      0.132851
      0.778440
      0.160737
      0.321694
    
    
      OwnfactorNetInRatio20d
      2012-01-04 00:00:00
      2017-09-28 00:00:00
      -0.800000
      0.198371
      0.114064
      0.132060
      0.863725
      0.132060
      0.863725
      0.211442
      0.269729
    
    
      VolQuantileRatio
      2012-01-04 00:00:00
      2017-09-28 00:00:00
      -0.709091
      0.214110
      0.094508
      0.081041
      1.166166
      0.081041
      1.166166
      0.095243
      0.496142
    
    
      OwnFactorInduSpreadBias
      2012-05-04 00:00:00
      2017-09-28 00:00:00
      -0.939394
      0.137225
      0.234363
      0.143745
      1.630408
      0.143745
      1.630408
      0.128056
      0.915081
    
    
      OthersSpreadBias222_1D
      2012-05-04 00:00:00
      2017-09-28 00:00:00
      -0.848485
      0.192570
      0.152392
      0.131752
      1.156657
      0.131752
      1.156657
      0.128779
      0.591682
    
    
      Othersid2_std_3m_1D
      2012-05-04 00:00:00
      2017-09-28 00:00:00
      -0.872727
      0.446710
      0.133595
      0.207055
      0.645215
      0.207055
      0.645215
      0.238415
      0.280173
    
    
      OwnFactorILLIQ
      2012-05-04 00:00:00
      2017-02-28 00:00:00
      0.987879
      0.025434
      0.415396
      0.303977
      1.366539
      0.303977
      1.366539
      0.400209
      1.037947
    
  

65 rows × 11 columns



In [ ]:



In [415]:

    
'''define the risk preference weight indice before perform kmeans clustering'''









    Out[415]:





'define the risk preference weight indice before perform kmeans clustering'



In [416]:

    
updatedInfo['Mono_median'] = np.abs(updatedInfo['Mono_median'])
mat =updatedInfo.dropna(axis=0).iloc[:,2:].as_matrix()
# Using sklearn
km = KMeans(n_clusters=4)
result = km.fit(mat)
# Get cluster assignment labels
labels = km.labels_
# Format results as a DataFrame
results = pd.DataFrame(data=labels, columns=['cluster'],index = updatedInfo.index)



In [417]:

    
results.groupby(['cluster'])









    Out[417]:





<pandas.core.groupby.DataFrameGroupBy object at 0x000000006360C128>



In [418]:

    
for name,group in results.groupby(['cluster']):
    print group









    



                                     cluster
Factor_Name                                 
LZ_GPA_VAL_PE                              0
LZ_GPA_VAL_PB                              0
LZ_GPA_VAL_PS                              0
LZ_GPA_FIN_IND_YOYBPS                      0
LZ_GPA_FIN_IND_CAPITALIZEDTODA             0
LZ_GPA_FIN_IND_CASHRATIO                   0
LZ_GPA_TURNOVER_TurnoverAvg_3M             0
Own_Factor_Volatility_90d                  0
LZ_GPA_FIN_IND_OCFTODEBT                   0
Own_Factor_5_20_return_deviation_1D        0
Own_Factor_downside_risk_252D              0
Own_Factor_excess_return_20D               0
LZ_GPA_FIN_IND_ROE                         0
Own_Factor_ROE_Volatility_60D              0
Own_Factor_ROE_Volatility_120D             0
Uqer_factor_KDJ_K                          0
Uqer_factor_RVI                            0
Uqer_factor_Skewness                       0
Own_Factor_Beta_1D                         0
Uqer_factor_GREC                           0
OwnfactorNetIn                             0
                                              cluster
Factor_Name                                          
Own_Factor_Turnover_Volatility_deviation_20D        1
Own_Factor_ILLQ-1d                                  1
Own_Factor_Idiosyncratic_Volatility                 1
Own_Factor_ADJ_Turnover_Volatility_20D              1
LZ_GPA_FIN_IND_QFA_YOYGR                            1
LZ_GPA_FIN_IND_QFA_YOYNETPROFIT                     1
LZ_GPA_DERI_Momentum_1M                             1
Own_Factor_Return_Skew_250D                         1
Own_Factor_ROE_Volatility_250D                      1
Own_Factor_Non_Linear_Size                          1
Uqer_factor_REVS20                                  1
OwnFactorInduSpreadBias                             1
OthersSpreadBias222_1D                              1
OwnFactorILLIQ                                      1
                    cluster
Factor_Name                
LZ_GPA_VAL_TURN           2
Own_Factor_DDA-20d        2
Uqer_factor_ATR6          2
                                cluster
Factor_Name                            
Own_Factor_Skewness_250d              3
Own_Factor_sortino_ratio_20D          3
Uqer_factor_AD20                      3
Uqer_factor_ADTM                      3
Uqer_factor_Aroon                     3
Uqer_factor_BIAS10                    3
Uqer_factor_CCI10                     3
Uqer_factor_ROC6                      3
Uqer_factor_CMO                       3
Uqer_factor_RSI                       3
Uqer_factor_OBV20                     3
Uqer_factor_MTM                       3
Uqer_factor_PVT6                      3
Uqer_factor_REC                       3
Uqer_factor_DAREC                     3
Uqer_factor_MA10RegressCoeff12        3
Uqer_factor_REVS5                     3
Uqer_factor_WVAD                      3
Uqer_factor_Hurst                     3
Uqer_factor_MassIndex                 3
LZ_CN_STKA_CRD_NET_PRFT_FTTM          3
OwnfactorMassIndex                    3
OwnfactorNetInRatio                   3
OwnfactorNetInRatio5d                 3
OwnfactorNetInRatio20d                3
VolQuantileRatio                      3
Othersid2_std_3m_1D                   3



In [419]:

    
(maxddInfo1-maxddInfo)









    Out[419]:







  
    
      
      Max_drawdown
      Time
    
  
  
    
      group_0
      0.084346
      306 days
    
    
      group_1
      -0.041652
      0 days
    
    
      group_2
      -0.042662
      0 days
    
    
      group_3
      -0.034760
      -518 days
    
    
      group_4
      -0.018473
      214 days
    
    
      group_5
      -0.011088
      0 days
    
    
      group_6
      -0.034949
      0 days
    
    
      group_7
      -0.026979
      0 days
    
    
      group_8
      -0.051053
      31 days
    
    
      group_9
      -0.014778
      0 days
    
    
      top_bot_comb
      0.128706
      336 days



In [420]:

    
#--------------------------------------  Section End   -------------------------------------



In [421]:

    
industryDict = getIndustryDict(endOfMonthList,grouplabel,indusDF)



In [422]:

    
capDict=getGroupsbyCap(LFCAPDF,endOfMonthList)



In [423]:

    
templist=[endOfMonthList,['mean','median','std']]
capGroupsDF = pd.DataFrame(data=None, columns=['small','mid','huge'],index=pd.MultiIndex.from_product(templist,names=['time','stats']),dtype=float)
indusGroupDF  = pd.DataFrame(data=None, columns=grouplabel,index=pd.MultiIndex.from_product(templist,names=['time','stats']),dtype=float)



In [424]:

    
for date in endOfMonthList:
    factorindice = factorData.loc[date]
    smallindice = factorindice.loc[list(set(factorindice.index) & set(capDict[date][0]))]
    midindice = factorindice.loc[list(set(factorindice.index) & set(capDict[date][1]))]
    hugeindice = factorindice.loc[list(set(factorindice.index) & set(capDict[date][2]))]
    capGroupsDF.loc[date,'small'] = [smallindice.mean(),smallindice.median(),smallindice.std()]
    capGroupsDF.loc[date,'mid'] = [midindice.mean(),midindice.median(),midindice.std()]
    capGroupsDF.loc[date,'huge'] = [hugeindice.mean(),hugeindice.median(),hugeindice.std()]
    for i in grouplabel:
        #print grouplabel
        inDFIndice = factorindice.loc[list(set(factorindice.index) & set(industryDict[date][i]))]
        indusGroupDF.loc[date,i] = [inDFIndice.mean(),inDFIndice.median(),inDFIndice.std()]
indusGroupDF.rename(columns = {i:u''+explanationDict[i]+'' for i in grouplabel},inplace=True)



In [425]:

    
indusGroupDF









    Out[425]:







  
    
      
      
      农林牧渔
      计算机
      食品饮料
      钢铁
      汽车
      纺织服装
      电力及公用事业
      通信
      房地产
      传媒
    
    
      time
      stats
      
      
      
      
      
      
      
      
      
      
    
  
  
    
      2012-05-31
      mean
      24.337617
      23.686263
      24.735306
      24.344537
      23.219667
      24.362898
      24.885194
      23.726326
      24.181000
      23.469804
    
    
      median
      25.033000
      23.794000
      24.629000
      24.819500
      24.082000
      24.407000
      24.648000
      23.914500
      24.255000
      23.739000
    
    
      std
      3.425975
      3.316913
      1.341010
      2.993172
      3.138850
      1.337348
      1.478817
      2.867441
      1.504675
      2.484472
    
    
      2012-06-29
      mean
      24.049033
      24.438169
      25.337385
      24.971494
      25.417000
      24.465388
      24.410838
      24.453775
      23.692250
      24.802733
    
    
      median
      24.467500
      24.609000
      25.207000
      25.354000
      24.460000
      24.093000
      24.452000
      24.620000
      24.495000
      24.951000
    
    
      std
      3.294212
      2.884764
      1.331104
      3.300744
      3.099510
      1.295921
      0.906842
      2.464229
      4.068524
      2.950744
    
    
      2012-07-31
      mean
      25.012683
      24.397364
      25.376737
      24.065238
      24.932600
      24.764660
      24.500087
      25.053746
      24.046125
      24.606566
    
    
      median
      25.323500
      24.914000
      25.371000
      24.348500
      24.820000
      24.870000
      24.838000
      25.233500
      25.125000
      24.930500
    
    
      std
      3.411653
      3.219743
      1.034335
      3.021236
      0.927553
      1.452712
      3.037387
      2.394036
      4.635993
      2.755424
    
    
      2012-08-31
      mean
      23.826983
      24.902727
      24.390797
      24.147048
      24.584467
      24.670240
      24.657214
      25.243840
      22.160875
      25.151645
    
    
      median
      24.031000
      25.130000
      24.567000
      24.325000
      24.711000
      24.539000
      24.778000
      25.317000
      23.498500
      25.026000
    
    
      std
      3.202584
      3.012399
      1.455852
      3.377661
      0.931028
      1.121221
      2.906055
      1.441953
      5.106984
      1.244328
    
    
      2012-09-28
      mean
      24.203968
      25.234064
      24.617407
      24.377282
      26.866933
      25.371580
      25.570243
      25.273444
      24.881917
      25.525898
    
    
      median
      24.557000
      25.669500
      24.636000
      25.000000
      26.845000
      25.333000
      25.864500
      25.157500
      25.981500
      25.468500
    
    
      std
      3.244645
      3.419265
      1.316226
      3.663541
      1.162081
      0.942134
      3.136475
      1.309546
      5.067581
      1.176676
    
    
      2012-10-31
      mean
      24.261524
      23.417013
      24.514966
      24.467659
      22.876240
      24.536120
      23.411740
      24.001241
      24.408042
      23.831798
    
    
      median
      24.644000
      23.830000
      24.532000
      24.549000
      22.718000
      24.426500
      23.661000
      23.862000
      24.544500
      23.740000
    
    
      std
      3.267764
      3.371744
      1.505734
      2.987165
      0.936655
      1.235272
      2.731695
      1.095639
      1.130011
      0.892371
    
    
      2012-11-30
      mean
      24.361556
      24.189640
      24.542000
      24.871176
      24.981360
      24.713080
      24.174877
      24.524931
      24.558875
      24.399798
    
    
      median
      24.946000
      24.384000
      24.599000
      25.051000
      24.969000
      24.675500
      24.471000
      24.498000
      24.482000
      24.382000
    
    
      std
      3.437210
      3.100903
      1.977920
      2.901626
      0.889478
      0.998118
      3.094199
      1.095534
      0.548979
      1.359754
    
    
      2012-12-31
      mean
      24.966540
      24.961000
      25.319017
      24.811588
      25.455280
      25.850740
      25.409534
      25.508648
      26.849542
      25.415780
    
    
      median
      25.414000
      25.603000
      25.464000
      25.160000
      25.456000
      25.838500
      26.133000
      25.764000
      26.809000
      25.805000
    
    
      std
      4.277904
      3.869975
      2.054867
      3.036613
      0.902596
      1.081051
      4.016038
      2.548526
      0.837630
      2.732331
    
    
      2013-01-31
      mean
      24.786484
      25.003946
      25.911335
      25.108321
      28.097080
      25.897360
      24.447689
      25.490152
      25.613708
      25.894560
    
    
      median
      24.948000
      25.799000
      26.018000
      25.884000
      27.883000
      25.889000
      24.825000
      25.612000
      25.471000
      25.802000
    
    
      std
      3.406462
      4.783559
      2.254950
      4.246043
      1.238551
      0.971628
      3.747251
      2.600675
      0.612375
      1.759975
    
    
      2013-02-28
      mean
      24.839391
      24.062257
      25.629199
      24.567321
      24.141360
      24.645040
      24.133622
      24.319752
      25.475250
      24.313073
    
    
      median
      24.907500
      24.483500
      25.593500
      25.080000
      24.562000
      24.715000
      24.166500
      24.601000
      25.363500
      24.599000
    
    
      std
      3.376781
      3.943873
      1.025312
      3.835331
      3.017310
      1.129440
      3.118832
      2.776262
      1.071369
      2.857305
    
    
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
    
    
      2016-05-31
      mean
      23.787231
      21.382703
      22.737551
      23.129189
      23.395295
      23.991833
      21.687901
      22.232957
      23.092567
      22.489236
    
    
      median
      24.712000
      23.846500
      23.702000
      23.830000
      24.000500
      24.444000
      23.612000
      24.141000
      23.631500
      23.728500
    
    
      std
      5.423114
      7.332413
      5.125924
      4.871237
      3.799454
      3.669611
      6.989874
      6.603666
      4.118131
      5.834218
    
    
      2016-06-30
      mean
      23.992615
      23.460230
      23.649915
      23.374766
      24.207591
      24.100625
      22.501531
      23.200700
      24.446600
      24.164358
    
    
      median
      24.407000
      24.965000
      24.605500
      24.327000
      25.345000
      24.447000
      25.001000
      25.379500
      25.670000
      25.287500
    
    
      std
      4.140543
      6.052281
      4.866484
      5.077175
      5.033465
      3.763871
      7.346662
      7.502333
      4.785742
      5.381772
    
    
      2016-07-29
      mean
      24.126385
      23.260284
      24.434751
      23.809126
      23.978591
      24.140104
      22.787634
      21.358836
      23.766300
      23.803689
    
    
      median
      24.509000
      24.625500
      25.065000
      24.769000
      24.760500
      24.622500
      24.454000
      23.662000
      24.279000
      24.474500
    
    
      std
      3.712723
      5.699705
      4.485169
      4.620684
      4.776274
      3.562405
      6.830782
      6.856474
      4.442911
      4.522847
    
    
      2016-08-31
      mean
      24.222308
      21.860541
      24.075667
      23.962144
      22.785500
      23.101479
      22.136268
      22.402657
      24.936000
      23.352321
    
    
      median
      23.824000
      23.634000
      24.459000
      24.802000
      23.613000
      24.006000
      22.760500
      23.598000
      24.612000
      23.525000
    
    
      std
      2.633080
      6.396071
      3.702042
      4.757423
      4.650881
      3.961364
      5.039416
      6.656483
      1.302665
      3.141290
    
    
      2016-09-30
      mean
      24.203169
      22.406608
      23.671198
      23.403649
      22.536773
      23.770854
      22.815768
      22.708579
      22.781567
      23.561708
    
    
      median
      23.921000
      23.831500
      24.105000
      24.437000
      23.807000
      24.166000
      23.974000
      23.987500
      22.629000
      23.733500
    
    
      std
      1.570329
      6.293223
      3.656099
      5.466778
      5.198556
      3.621806
      5.581251
      5.984522
      1.407915
      2.759976
    
    
      2016-10-31
      mean
      25.366877
      22.433054
      24.050729
      23.376550
      22.848864
      24.073042
      22.709902
      22.693736
      24.242333
      24.333019
    
    
      median
      25.589000
      24.227000
      24.374000
      24.159000
      24.286500
      24.530000
      24.601000
      23.908500
      25.041000
      24.624000
    
    
      std
      1.778419
      6.633661
      3.149959
      4.927928
      5.524703
      4.421399
      6.207208
      6.222161
      2.974933
      3.116715
    
    
      2016-11-30
      mean
      24.622846
      22.657243
      24.518282
      24.592441
      22.562364
      24.324979
      24.016012
      23.236343
      25.287500
      24.403189
    
    
      median
      24.899000
      24.377000
      24.769000
      24.970000
      24.297500
      24.905000
      25.783000
      24.499500
      26.748000
      25.214000
    
    
      std
      2.928304
      6.654210
      3.338642
      3.651973
      6.042042
      3.837744
      6.756742
      6.144480
      5.788998
      3.700156
    
    
      2016-12-30
      mean
      25.236538
      23.290703
      24.629362
      24.620405
      22.666318
      24.899729
      21.487159
      23.620271
      22.459200
      23.786047
    
    
      median
      24.936000
      24.716000
      24.987000
      24.917000
      25.770000
      24.962000
      23.701000
      24.753000
      23.348000
      24.976000
    
    
      std
      1.391776
      6.578757
      2.972297
      3.189430
      7.730322
      2.398340
      6.989612
      5.147242
      4.182209
      4.891178
    
    
      2017-01-26
      mean
      24.119062
      22.919933
      24.282326
      24.382124
      23.686864
      23.940191
      22.584321
      23.255316
      23.740581
      23.496514
    
    
      median
      24.451000
      24.507000
      24.651000
      24.684000
      25.136000
      24.432000
      24.692000
      24.663500
      23.736000
      24.791000
    
    
      std
      2.121641
      5.817083
      2.942083
      2.491236
      5.965606
      3.212742
      6.485829
      5.874814
      2.487030
      5.425072
    
    
      2017-02-28
      mean
      23.561785
      22.102133
      23.692247
      23.728991
      22.031614
      23.206617
      22.395494
      22.320890
      25.365844
      22.823075
    
    
      median
      24.092000
      23.837000
      24.177000
      23.932000
      23.199000
      24.173000
      24.530000
      23.787000
      25.058500
      23.876500
    
    
      std
      4.016014
      5.699899
      3.504035
      2.929973
      5.716032
      4.368083
      7.261793
      5.437966
      2.883582
      5.130999
    
  

174 rows × 10 columns



In [426]:

    
capGroupsDF.head()



In [ ]:



In [ ]:



In [ ]:



In [427]:

    
newstack = capGroupsDF.stack().unstack('stats').reset_index()
newstack = newstack.rename(columns = {'level_1':'cap'})
newstack = newstack.pivot_table(index='time',columns='cap')



In [428]:

    
newstack.head()



In [429]:

    
fig, axs = plt.subplots(3,1, figsize=(16, 10), facecolor='w', edgecolor='k',sharex=True)
fig.subplots_adjust(hspace = .5, wspace=.001)
for label,num in zip(newstack.columns.levels[0],range(len(newstack.columns.levels[0]))):
    newstack[label].plot(ax=axs[num],legend=False)
    axs[num].set_title(label)
axs[0].legend()









    Out[429]:





<matplotlib.legend.Legend at 0x38008f98>



In [430]:

    
newstackInd = indusGroupDF.stack().unstack('stats').reset_index()
newstackInd = newstackInd.rename(columns = {'level_1':'industry'})
newstackInd = newstackInd.pivot_table(index='time',columns='industry')



In [431]:

    
fig, axs = plt.subplots(3,1, figsize=(22, 16), facecolor='w', edgecolor='k',sharex=True)
fig.subplots_adjust(hspace = .5, wspace=.001)
for label,num in zip(newstackInd.columns.levels[0],range(len(newstackInd.columns.levels[0]))):
    newstackInd[label].plot(ax=axs[num],color=sns.color_palette("Paired",10),legend=False)
    axs[num].set_title(label)
axs[0].legend()









    Out[431]:





<matplotlib.legend.Legend at 0x6360c438>



In [432]:

    
capcopy=capGroupsDF.copy()
copydata=capcopy.reset_index()



In [433]:

    
copydata.pivot_table(index='time',columns='stats').head()



In [434]:

    
fig, axs = plt.subplots(3,1, figsize=(16, 10), facecolor='w', edgecolor='k',sharex=True)
fig.subplots_adjust(hspace = .5, wspace=.001)
for label,num in zip(set(copydata['stats']),range(len(set(copydata['stats'])))):
    #print label,num
    dataslice = copydata[copydata['stats']==label]
    dataslice.plot(ax=axs[num])
    axs[num].set_title(label)



In [ ]:

	Pearson	Spearman
count	58.000000	58.000000
mean	0.063635	0.026797
std	0.069319	0.120121
min	-0.079835	-0.192784
25%	0.011941	-0.049422
50%	0.068115	0.027325
75%	0.106348	0.082577
max	0.230051	0.411534

stats	mean			median			std
cap	huge	mid	small	huge	mid	small	huge	mid	small
time
2012-05-31	24.270578	23.723938	23.884430	24.4385	24.0310	23.9290	2.561568	3.270327	2.029463
2012-06-29	24.553639	24.126187	24.587646	24.6310	24.4470	24.7470	2.221958	3.070402	2.145529
2012-07-31	24.999228	24.994384	24.846234	25.1550	25.2250	24.9595	2.439171	2.793392	2.156882
2012-08-31	24.251235	24.438690	24.539629	24.3110	24.6590	24.7230	2.091846	2.986278	2.277619
2012-09-28	25.232045	24.778303	25.052839	25.2830	25.0935	25.0930	2.250390	3.119188	2.021150

	huge			mid			small
stats	mean	median	std	mean	median	std	mean	median	std
time
2012-05-31	24.270578	24.4385	2.561568	23.723938	24.0310	3.270327	23.884430	23.9290	2.029463
2012-06-29	24.553639	24.6310	2.221958	24.126187	24.4470	3.070402	24.587646	24.7470	2.145529
2012-07-31	24.999228	25.1550	2.439171	24.994384	25.2250	2.793392	24.846234	24.9595	2.156882
2012-08-31	24.251235	24.3110	2.091846	24.438690	24.6590	2.986278	24.539629	24.7230	2.277619
2012-09-28	25.232045	25.2830	2.250390	24.778303	25.0935	3.119188	25.052839	25.0930	2.021150

	group_0	group_1	group_2	group_3	group_4	group_5	group_6	group_7	group_8	group_9	top_bot_comb
2016-04-29	-0.014786	-0.012027	0.004326	-0.008509	0.002305	-0.007659	-0.010035	-0.008823	-0.021024	-0.026645	0.011859
2016-05-31	0.033453	0.015366	0.025428	0.018033	0.024678	0.032415	0.029878	0.034312	0.027137	0.019690	0.013763
2016-06-30	-0.002798	0.016313	0.018038	0.001802	-0.000997	-0.003439	-0.018455	-0.030696	-0.053449	-0.064684	0.061887
2016-07-29	0.014565	0.005168	0.008527	0.008308	0.021788	0.018760	0.010603	0.017110	0.001674	0.018260	-0.003696
2016-08-31	-0.012339	-0.006737	0.011002	0.017276	0.015889	0.025597	0.014195	0.022748	0.014206	0.000898	-0.013238
2016-09-30	0.020434	0.018856	0.019749	0.017414	0.014997	0.016332	0.010510	0.012242	0.002624	0.003860	0.016574
2016-10-31	0.002239	-0.001973	0.002343	0.006996	0.005440	0.006260	0.014275	0.012441	0.015380	0.000811	0.001428
2016-11-30	0.006055	-0.004464	-0.007019	0.002998	0.002857	0.016680	0.005462	0.012217	0.009900	0.014976	-0.008921
2016-12-30	-0.008760	-0.015727	-0.013924	-0.009137	-0.004085	-0.009040	-0.016822	-0.005369	-0.011491	-0.019497	0.010736
2017-01-26	-0.009173	-0.004230	0.001045	0.007732	0.003869	0.010781	0.004113	0.002603	0.008428	-0.000319	-0.008854

	Max_drawdown	Time
group_0	0.072089	2014-11-28
group_1	0.065952	2014-11-28
group_2	0.078445	2014-11-28
group_3	0.067686	2014-11-28
group_4	0.069246	2014-11-28
group_5	0.061807	2015-06-30
group_6	0.071651	2015-06-30
group_7	0.090931	2015-06-30
group_8	0.133007	2015-06-30
group_9	0.121273	2015-06-30
top_bot_comb	0.129742	2014-11-28

	Max_drawdown	Time
group_0	0.156435	2015-09-30
group_1	0.024299	2014-11-28
group_2	0.035784	2014-11-28
group_3	0.032926	2013-06-28
group_4	0.050773	2015-06-30
group_5	0.050719	2015-06-30
group_6	0.036702	2015-06-30
group_7	0.063952	2015-06-30
group_8	0.081955	2015-07-31
group_9	0.106494	2015-06-30
top_bot_comb	0.258448	2015-10-30

	Start_time	End_time	Mono_median	Mono_std	Annualized_return	Annualized_volatility	Sharpe_ratio	Downside_Risk	Sortino_ratio	Max_drawdown	Calmar_Ratio
Factor_Name
Own_Factor_Turnover_Volatility_deviation_20D	2010-01-01 00:00:00	2017-02-28 00:00:00	-0.927273	0.252882	0.195560	0.168607	1.159855	0.168607	1.159855	0.257361	0.759865
Own_Factor_ILLQ-1d	2010-01-01 00:00:00	2017-02-28 00:00:00	0.987879	0.157828	0.324461	0.261079	1.242771	0.261079	1.242771	0.397588	0.816074
Own_Factor_Idiosyncratic_Volatility	2010-01-01 00:00:00	2017-02-28 00:00:00	-0.903030	0.281116	0.189600	0.158699	1.194719	0.158699	1.194719	0.152969	1.239467
Own_Factor_ADJ_Turnover_Volatility_20D	2010-01-01 00:00:00	2017-02-28 00:00:00	-1.000000	0.220559	0.466077	0.297559	1.566334	0.297559	1.566334	0.624890	0.745854
LZ_GPA_VAL_PE	2010-01-01 00:00:00	2017-02-28 00:00:00	0.757576	0.224231	0.071443	0.207247	0.344724	0.207247	0.344724	0.301485	0.236970
LZ_GPA_VAL_PB	2010-01-01 00:00:00	2017-02-28 00:00:00	-0.115152	0.450342	0.030133	0.230448	0.130760	0.230448	0.130760	0.355927	0.084661
LZ_GPA_VAL_PS	2010-01-01 00:00:00	2017-02-28 00:00:00	0.103030	0.522217	0.007019	0.190525	0.036841	0.190525	0.036841	0.355447	0.019747
LZ_GPA_FIN_IND_QFA_YOYGR	2010-01-01 00:00:00	2017-02-28 00:00:00	0.939394	0.160919	0.087862	0.070906	1.239132	0.070906	1.239132	0.081457	1.078637
LZ_GPA_FIN_IND_YOYBPS	2010-01-01 00:00:00	2017-02-28 00:00:00	0.200000	0.221057	0.016883	0.089912	0.187772	0.089912	0.187772	0.167963	0.100516
LZ_GPA_FIN_IND_QFA_YOYNETPROFIT	2010-01-01 00:00:00	2017-02-28 00:00:00	0.890909	0.102845	0.088731	0.072936	1.216553	0.072936	1.216553	0.057036	1.555711
LZ_GPA_FIN_IND_CAPITALIZEDTODA	2010-01-01 00:00:00	2017-02-28 00:00:00	0.248485	0.478620	0.013869	0.110005	0.126078	0.110005	0.126078	0.279676	0.049590
LZ_GPA_FIN_IND_CASHRATIO	2010-01-01 00:00:00	2017-02-28 00:00:00	0.842424	0.482296	0.072803	0.200644	0.362847	0.200644	0.362847	0.274937	0.264799
LZ_GPA_DERI_Momentum_1M	2010-01-01 00:00:00	2017-02-28 00:00:00	-0.975758	0.219044	0.234231	0.179390	1.305707	0.179390	1.305707	0.211129	1.109419
LZ_GPA_TURNOVER_TurnoverAvg_3M	2010-01-01 00:00:00	2017-02-28 00:00:00	-0.503030	0.424059	0.102717	0.205064	0.500900	0.205064	0.500900	0.284812	0.360647
LZ_GPA_VAL_TURN	2010-01-01 00:00:00	2017-02-28 00:00:00	-0.660606	0.282483	0.334626	0.202912	1.649118	0.202912	1.649118	0.170676	1.960596
Own_Factor_Volatility_90d	2010-01-01 00:00:00	2017-02-28 00:00:00	-0.369697	0.391101	0.065162	0.204425	0.318759	0.204425	0.318759	0.265979	0.244990
Own_Factor_DDA-20d	2010-01-01 00:00:00	2017-02-28 00:00:00	-0.987879	0.057228	0.586765	0.233574	2.512117	0.233574	2.512117	0.277902	2.111408
Own_Factor_Skewness_250d	2010-01-01 00:00:00	2017-02-28 00:00:00	-0.951515	0.312497	0.079775	0.134299	0.594016	0.134299	0.594016	0.255741	0.311939
LZ_GPA_FIN_IND_OCFTODEBT	2010-01-01 00:00:00	2017-02-28 00:00:00	-0.151515	0.343190	0.037943	0.072903	0.520460	0.072903	0.520460	0.121523	0.312229
Own_Factor_Return_Skew_250D	2010-01-01 00:00:00	2017-02-28 00:00:00	-0.915152	0.125775	0.132202	0.113174	1.168130	0.113174	1.168130	0.186287	0.709667
Own_Factor_5_20_return_deviation_1D	2010-01-01 00:00:00	2017-02-28 00:00:00	-0.115152	0.343066	0.034217	0.068324	0.500804	0.068324	0.500804	0.113482	0.301520
Own_Factor_downside_risk_252D	2010-01-01 00:00:00	2017-02-28 00:00:00	0.115152	0.312213	0.063775	0.244886	0.260429	0.244886	0.260429	0.247510	0.257668
Own_Factor_sortino_ratio_20D	2010-01-01 00:00:00	2017-02-28 00:00:00	-0.842424	0.412710	0.148444	0.220652	0.672752	0.220652	0.672752	0.373013	0.397959
Own_Factor_excess_return_20D	2010-01-01 00:00:00	2017-02-28 00:00:00	-0.066667	0.191054	0.021491	0.103385	0.207876	0.103385	0.207876	0.191390	0.112290
Uqer_factor_AD20	2010-01-01 00:00:00	2017-02-28 00:00:00	-0.842424	0.330977	0.131389	0.187849	0.699441	0.187849	0.699441	0.196628	0.668212
Uqer_factor_ADTM	2010-01-01 00:00:00	2017-02-28 00:00:00	-0.903030	0.272842	0.124757	0.162724	0.766680	0.162724	0.766680	0.211954	0.588605
Uqer_factor_ATR6	2010-01-01 00:00:00	2017-02-28 00:00:00	-0.987879	0.079624	0.319641	0.183263	1.744170	0.183263	1.744170	0.129518	2.467921
Uqer_factor_Aroon	2010-01-01 00:00:00	2017-02-28 00:00:00	-0.866667	0.207764	0.151488	0.155398	0.974840	0.155398	0.974840	0.260018	0.582608
Uqer_factor_BIAS10	2010-01-01 00:00:00	2017-02-28 00:00:00	-0.212121	0.255514	0.135854	0.162293	0.837090	0.162293	0.837090	0.236858	0.573566
LZ_GPA_FIN_IND_ROE	2010-01-01 00:00:00	2017-02-28 00:00:00	0.248485	0.541320	0.026381	0.123334	0.213895	0.123334	0.213895	0.223166	0.118211
...	...	...	...	...	...	...	...	...	...	...	...
Uqer_factor_ROC6	2010-01-01 00:00:00	2017-02-28 00:00:00	-0.272727	0.159828	0.125337	0.155992	0.803486	0.155992	0.803486	0.229675	0.545716
Uqer_factor_RVI	2010-01-01 00:00:00	2017-02-28 00:00:00	0.296970	0.269994	0.023267	0.100626	0.231226	0.100626	0.231226	0.176433	0.131877
Uqer_factor_CMO	2010-01-01 00:00:00	2017-02-28 00:00:00	-0.757576	0.314749	0.175174	0.180025	0.973054	0.180025	0.973054	0.238347	0.734953
Uqer_factor_RSI	2010-01-01 00:00:00	2017-02-28 00:00:00	-0.284848	0.419446	0.126249	0.152102	0.830029	0.152102	0.830029	0.151151	0.835251
Uqer_factor_Skewness	2010-01-01 00:00:00	2017-02-28 00:00:00	-0.090909	0.532514	0.055360	0.093224	0.593839	0.093224	0.593839	0.139466	0.396942
Uqer_factor_OBV20	2010-01-01 00:00:00	2017-02-28 00:00:00	-0.963636	0.242911	0.182649	0.232356	0.786073	0.232356	0.786073	0.276910	0.659595
Uqer_factor_MTM	2010-01-01 00:00:00	2017-02-28 00:00:00	0.345455	0.294705	0.114816	0.139492	0.823100	0.139492	0.823100	0.162813	0.705202
Uqer_factor_PVT6	2010-01-01 00:00:00	2017-02-28 00:00:00	-0.478788	0.169562	0.138363	0.163633	0.845574	0.163633	0.845574	0.371262	0.372684
Own_Factor_Non_Linear_Size	2010-01-01 00:00:00	2017-02-28 00:00:00	0.987879	0.103554	0.365225	0.260086	1.404250	0.260086	1.404250	0.363877	1.003706
Own_Factor_Beta_1D	2014-01-01 00:00:00	2017-02-28 00:00:00	-0.321212	0.205558	0.060245	0.280683	0.214638	0.280683	0.214638	0.319325	0.188664
Uqer_factor_REC	2010-01-01 00:00:00	2017-02-28 00:00:00	0.684848	0.222095	0.101425	0.104614	0.969514	0.104614	0.969514	0.117914	0.860165
Uqer_factor_DAREC	2010-01-01 00:00:00	2017-02-28 00:00:00	0.696970	0.190183	0.071891	0.077365	0.929251	0.077365	0.929251	0.086857	0.827696
Uqer_factor_GREC	2010-01-01 00:00:00	2017-02-28 00:00:00	0.321212	0.085382	0.025729	0.059282	0.434019	0.059282	0.434019	0.098773	0.260490
Uqer_factor_REVS20	2010-01-01 00:00:00	2017-02-28 00:00:00	-0.963636	0.293734	0.246413	0.183685	1.341496	0.183685	1.341496	0.215362	1.144179
Uqer_factor_MA10RegressCoeff12	2010-01-01 00:00:00	2017-02-28 00:00:00	-0.503030	0.349971	0.137653	0.177725	0.774531	0.177725	0.774531	0.223596	0.615632
Uqer_factor_REVS5	2010-01-01 00:00:00	2017-02-28 00:00:00	-0.418182	0.195611	0.128630	0.159985	0.804012	0.159985	0.804012	0.233563	0.550731
Uqer_factor_WVAD	2010-01-01 00:00:00	2017-02-28 00:00:00	-0.709091	0.235787	0.187525	0.187516	1.000048	0.187516	1.000048	0.377979	0.496125
Uqer_factor_Hurst	2010-01-01 00:00:00	2017-02-28 00:00:00	0.903030	0.315446	0.047758	0.100909	0.473280	0.100909	0.473280	0.157058	0.304082
Uqer_factor_MassIndex	2010-01-01 00:00:00	2017-02-28 00:00:00	-0.745455	0.103745	0.137922	0.160887	0.857265	0.160887	0.857265	0.218426	0.631438
LZ_CN_STKA_CRD_NET_PRFT_FTTM	2012-01-04 00:00:00	2017-09-28 00:00:00	-0.527273	0.191070	0.187691	0.294209	0.637952	0.294209	0.637952	0.433025	0.216721
OwnfactorMassIndex	2012-01-04 00:00:00	2017-09-28 00:00:00	-0.806061	0.107720	0.085841	0.119966	0.715548	0.119966	0.715548	0.130660	0.328490
OwnfactorNetIn	2012-01-04 00:00:00	2017-09-28 00:00:00	0.090909	0.101865	0.062391	0.144794	0.430898	0.144794	0.430898	0.304817	0.102342
OwnfactorNetInRatio	2012-01-04 00:00:00	2017-09-28 00:00:00	-0.515152	0.215181	0.156945	0.131480	1.193675	0.131480	1.193675	0.140680	0.557807
OwnfactorNetInRatio5d	2012-01-04 00:00:00	2017-09-28 00:00:00	-0.157576	0.190452	0.103416	0.132851	0.778440	0.132851	0.778440	0.160737	0.321694
OwnfactorNetInRatio20d	2012-01-04 00:00:00	2017-09-28 00:00:00	-0.800000	0.198371	0.114064	0.132060	0.863725	0.132060	0.863725	0.211442	0.269729
VolQuantileRatio	2012-01-04 00:00:00	2017-09-28 00:00:00	-0.709091	0.214110	0.094508	0.081041	1.166166	0.081041	1.166166	0.095243	0.496142
OwnFactorInduSpreadBias	2012-05-04 00:00:00	2017-09-28 00:00:00	-0.939394	0.137225	0.234363	0.143745	1.630408	0.143745	1.630408	0.128056	0.915081
OthersSpreadBias222_1D	2012-05-04 00:00:00	2017-09-28 00:00:00	-0.848485	0.192570	0.152392	0.131752	1.156657	0.131752	1.156657	0.128779	0.591682
Othersid2_std_3m_1D	2012-05-04 00:00:00	2017-09-28 00:00:00	-0.872727	0.446710	0.133595	0.207055	0.645215	0.207055	0.645215	0.238415	0.280173
OwnFactorILLIQ	2012-05-04 00:00:00	2017-02-28 00:00:00	0.987879	0.025434	0.415396	0.303977	1.366539	0.303977	1.366539	0.400209	1.037947

	Max_drawdown	Time
group_0	0.084346	306 days
group_1	-0.041652	0 days
group_2	-0.042662	0 days
group_3	-0.034760	-518 days
group_4	-0.018473	214 days
group_5	-0.011088	0 days
group_6	-0.034949	0 days
group_7	-0.026979	0 days
group_8	-0.051053	31 days
group_9	-0.014778	0 days
top_bot_comb	0.128706	336 days