In [856]:
#!/Tsan/bin/python
# -*- coding: utf-8 -*-
In [1759]:
# Libraries to use
from __future__ import division
import numpy as np
import pandas as pd
import statsmodels.api as sm
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime
import statsmodels
import cvxopt as cv
from cvxopt import solvers
In [858]:
# Import My own library for factor testing
import factorFilterFunctions as ff
#from config import *
In [859]:
%matplotlib inline
In [860]:
# basic stocl pool
ZZ500Weight = 'LZ_GPA_INDEX_CSI500WEIGHT.csv'
HS300Weight = 'LZ_GPA_INDEX_HS300WEIGHT.csv'
In [861]:
filenamePrice ='LZ_GPA_DERI_AdjustedPriceForward_20.csv'
In [862]:
# Factors
# Value factor
filenamePE='LZ_GPA_VAL_PE.csv' # 市盈率
filenamePB='LZ_GPA_VAL_PB.csv' # 市净率
filenamePS = 'LZ_GPA_VAL_PS.csv' # 市销率
filenamePCF = 'LZ_GPA_VAL_PC.csv' # 市现率
# Growth factor
filenameYOYGR = 'LZ_GPA_FIN_IND_QFA_YOYGR.csv' # 单季度.营业总收入同比增长率(%)
filenameYOYNETPROFIT = 'LZ_GPA_FIN_IND_QFA_YOYNETPROFIT.csv' # 单季度.归属母公司股东的净利润同比增长率(%)
filenameYOYOCF = 'LZ_GPA_FIN_IND_YOYOCF.csv' # 同比增长率-经营活动产生的现金流量净额(%)
filenameYOYROE = 'LZ_GPA_FIN_IND_YOYROE.csv' # 同比增长率-净资产收益率(摊薄)(%)
filenameYOYBPS = 'LZ_GPA_FIN_IND_YOYBPS.csv' # 相对年初增长率-每股净资产(%)
# Financial factor
filenameCAPITALIZEDTODA = 'LZ_GPA_FIN_IND_CAPITALIZEDTODA.csv' # 资本支出/折旧和摊销
filenameCASHRATIO = 'LZ_GPA_FIN_IND_CASHRATIO.csv' # 保守速动比率
filenameCASHTOLIQDEBT = 'LZ_GPA_FIN_IND_CASHTOLIQDEBT.csv' # 货币资金/流动负债
filenameOCFTODEBT = 'LZ_GPA_FIN_IND_OCFTODEBT.csv' # 经营活动产生的现金流量净额/负债合计
filenamePROFITTOOP = 'LZ_GPA_FIN_IND_PROFITTOOP.csv' # 利润总额/营业收入
# Momentum factor
filenameTRUEMOM = 'LZ_GPA_USR_MOM.csv' # 真动量
filenameMOM1M = 'LZ_GPA_DERI_Momentum_1M.csv' # 一月反转
filenameMOM3M = 'LZ_GPA_TURNOVER_TurnoverAvg_3M.csv' # 三月反转
filenamePVO = 'LZ_GPA_DERI_PVO.csv' # 一年偏度
filenameRV1Y = 'LZ_GPA_DERI_RealizedVolatility_1Y.csv' # 一年收益波动
filenameABNORMALVOLUME = 'LZ_GPA_DERI_NormalizedAbormalVolume.csv' # 归一化异常交易量
filenameSKEW = 'LZ_GPA_DERI_TSKEW.csv'# 偏度
# Volatility factor
# Liquidity factor
filenameTURNOVER1M = 'LZ_GPA_TURNOVER_TurnoverAvg_1M.csv' # 一月换手率均值
filenameAMOUNTAVG1M = 'LZ_GPA_DERI_AmountAvg_1M.csv' # 日均成交量
filenameILLIQ = 'LZ_GPA_DERI_ILLIQ.csv' # 非流动性因子
filenameVOLUME = 'LZ_GPA_QUOTE_TVOLUME.csv' # 成交量
In [863]:
filenameFCAP = 'LZ_GPA_VAL_A_FCAP.csv'
filenameBENCH = 'LZ_GPA_INDXQUOTE_CLOSE.csv'
In [864]:
# some useful parameters
startTime = datetime.strptime('20120101', '%Y%m%d')
endTime = datetime.strptime('20161231', '%Y%m%d')
facNum = 5 # factor Num
path = ff.data_path # path
timeStampNum = 2500 # Number of time period
thresholdNum = 0.05 # thresholdNum to filter stocks by Nan's amount
HS300Index ='000300.SH' # HS300 index code
ZZ500Index = '000905.SH' # ZZ500 index code
In [865]:
stockPool = pd.read_csv(path+ZZ500Weight,infer_datetime_format=True,parse_dates=[0],index_col=0).loc[startTime:endTime]
#stockPool = stockPool.iloc[-1].dropna(how='any').index.tolist() # get today's ZX500 stock pool
stockPool1 = pd.read_csv(path+HS300Weight,infer_datetime_format=True,parse_dates=[0],index_col=0).loc[startTime:endTime]
In [866]:
stockPool = stockPool.dropna(how = 'all',axis =1).columns.tolist()
stockPool1 = stockPool1.dropna(how = 'all',axis =1).columns.tolist()
In [867]:
stockPool = list((set(stockPool1)|set(stockPool)))
In [868]:
len(stockPool)
Out[868]:
In [869]:
class dataCleaning():
def __init__(self,data):
self.__data = data
#@property
#def _constructor(self):
# return dataCleaning
#@property
#def _constructor_sliced(self):
# return SubclassedSeries
def narrowData(self):
return self.__data[stockPool].loc[startTime:endTime]
def Normalize(self,narrowedData):
dataWinsorized = narrowedData.copy()
dataWinsorizedTrans = dataWinsorized.T
MAD = 1.483*np.abs(dataWinsorizedTrans-dataWinsorizedTrans.median(skipna=True))
return ((dataWinsorizedTrans - dataWinsorizedTrans.mean(axis=0, skipna=True))/dataWinsorizedTrans.std(axis=0, skipna=True)).T
In [870]:
PriceDF = pd.read_csv(path+filenamePrice,infer_datetime_format=True,parse_dates=[0],index_col=0)[stockPool].loc[startTime:endTime]
In [871]:
LFCAPData = np.log10(pd.read_csv(path+filenameFCAP,infer_datetime_format=True,parse_dates=[0],index_col=0))
PBData = pd.read_csv(path+filenamePB,infer_datetime_format=True,parse_dates=[0],index_col=0)
YOYBPSData = pd.read_csv(path+filenameYOYBPS,infer_datetime_format=True,parse_dates=[0],index_col=0)
AMOUNTAVG1MData = pd.read_csv(path+filenameAMOUNTAVG1M,infer_datetime_format=True,parse_dates=[0],index_col=0)
TURNOVER1MData = pd.read_csv(path+filenameTURNOVER1M,infer_datetime_format=True,parse_dates=[0],index_col=0)
PROFITOOPData = pd.read_csv(path+filenamePROFITTOOP,infer_datetime_format=True,parse_dates=[0],index_col=0)
benchmarkData = pd.read_csv(path+filenameBENCH,infer_datetime_format=True,parse_dates=[0],index_col=0)[ZZ500Index].loc[startTime:endTime]
In [872]:
#isinstance(dataCleaning(PBData).narrowData(),dataCleaning)
In [873]:
PBData = dataCleaning(PBData).Normalize(dataCleaning(PBData).narrowData())
LFCAPData = dataCleaning(LFCAPData ).Normalize(dataCleaning(LFCAPData ).narrowData())
YOYBPSData = dataCleaning(YOYBPSData).Normalize(dataCleaning(YOYBPSData).narrowData())
AMOUNTAVG1MData = dataCleaning(AMOUNTAVG1MData).Normalize(dataCleaning(AMOUNTAVG1MData).narrowData())
TURNOVER1MData = dataCleaning(TURNOVER1MData).Normalize(dataCleaning(TURNOVER1MData).narrowData())
PROFITOOPData = dataCleaning(PROFITOOPData).Normalize(dataCleaning(PROFITOOPData).narrowData())
In [874]:
print PBData.shape[1], LFCAPData.shape[1], YOYBPSData.shape[1], AMOUNTAVG1MData.shape[1], TURNOVER1MData.shape[1], PROFITOOPData.shape[1]
In [875]:
#PROFITOOPData
In [991]:
# independentfactor should be a list contains of dataframe
def orthoFactor(factordf, independentfactor):
'''
Muti variable regression for return.
returndf and dataframes in factorDict should have same index and same columns
output: Dataframe, the orthogonalized result of factordf
input:
factordf: Dataframe, factor to be orthogonalized
independentfactor: List, the values are the factor dataframe as independence in regression(all \
with same columns and index)
'''
emptydf = pd.DataFrame(index = factordf.index, columns = factordf.columns, data= None, dtype =float)
dfNum = len(independentfactor)
if dfNum == 0:
print 'Input is an empty list!'
raise ValueError
for date in factordf.index:
factordfSlice = factordf.loc[date]
mapfunction = map(lambda x: x.loc[date], independentfactor)
if dfNum > 1:
totaldf = pd.concat(mapfunction, axis=1)
else:
totaldf = independentfactor[0].loc[date]
result = sm.OLS(factordfSlice.T,totaldf ).fit()
emptydf .loc[date] = result.resid
return emptydf
In [ ]:
In [876]:
PBData = PBData .loc[:, PBData .isnull().sum() < (len(PBData) * thresholdNum)]
LFCAPData = LFCAPData .loc[:, LFCAPData .isnull().sum() < (len(LFCAPData) * thresholdNum)]
YOYBPSData = YOYBPSData .loc[:, YOYBPSData .isnull().sum() < (len(YOYBPSData) * thresholdNum)]
AMOUNTAVG1MData = AMOUNTAVG1MData .loc[:, AMOUNTAVG1MData .isnull().sum() < (len(AMOUNTAVG1MData) * thresholdNum)]
TURNOVER1MData = TURNOVER1MData .loc[:, TURNOVER1MData .isnull().sum() < (len(TURNOVER1MData) * thresholdNum)]
PROFITOOPData = PROFITOOPData .loc[:, PROFITOOPData .isnull().sum() < (len(PROFITOOPData ) * thresholdNum)]
PriceDF = PriceDF.loc[:, (PriceDF.isnull().sum() < (len(PriceDF) * thresholdNum))]
In [877]:
print PBData.shape[1], LFCAPData.shape[1], YOYBPSData.shape[1] ,AMOUNTAVG1MData.shape[1], TURNOVER1MData.shape[1], PROFITOOPData.shape[1]
In [878]:
len(list((DFcolumns(PBData)) & (DFcolumns(YOYBPSData))))
Out[878]:
In [879]:
def DFcolumns(df):
return set(df.columns.tolist())
In [880]:
##
stkPool = list((DFcolumns(PBData)) & (DFcolumns(YOYBPSData)) & (DFcolumns(AMOUNTAVG1MData)) & (DFcolumns(TURNOVER1MData)) & (DFcolumns(PROFITOOPData))\
& (DFcolumns(PriceDF)))
len(stkPool)
Out[880]:
In [881]:
#
def fillData(df,stkPool):
return df[stkPool].fillna(method='ffill').fillna(method='bfill')
In [882]:
PBData = fillData(PBData,stkPool)
LFCAPData = fillData(LFCAPData,stkPool)
YOYBPSData = fillData(YOYBPSData,stkPool)
AMOUNTAVG1MData = fillData(AMOUNTAVG1MData,stkPool)
TURNOVER1MData = fillData(TURNOVER1MData,stkPool)
PROFITOOPData = fillData(PROFITOOPData,stkPool)
PriceDF = fillData(PriceDF,stkPool)
In [932]:
LFCAPData.shape
Out[932]:
In [962]:
orthorizedDF = orthoFactor(YOYBPSData,[PROFITOOPData])
In [963]:
correlationDF1 = ff.showCorrelation(orthorizedDF ,PROFITOOPData, orthorizedDF.index, filterdic = None).astype(float)
correlationDF.describe()
Out[963]:
In [965]:
correlationDF = ff.showCorrelation(YOYBPSData ,PROFITOOPData, orthorizedDF.index, filterdic = None).astype(float)
correlationDF.describe()
Out[965]:
In [924]:
orthoFactor(PBData,[LFCAPData,YOYBPSData])
Out[924]:
In [884]:
returnDF = PriceDF.pct_change()[1:]
returnOfBench = benchmarkData.pct_change()[1:]
In [885]:
activeReturnDF = returnDF .apply(lambda x: x - returnOfBench )
activeReturnDF.head().iloc[:,2].values
Out[885]:
In [886]:
factorReturn = pd.DataFrame(index = returnDF.index, columns = ['LFCA', 'PB', 'YOYBPS', 'AMOUNTAVG1MData', 'TURNOVER1M' ,\
'PROFITOOP'],data= None ,dtype =float)
factorPvalue = pd.DataFrame(index = returnDF.index, columns = ['LFCA', 'PB', 'YOYBPS', 'AMOUNTAVG1MData', 'TURNOVER1M' ,\
'PROFITOOP'],data= None ,dtype =float)
In [887]:
specificReturn = pd.DataFrame(index = returnDF.index, columns = LFCAPData.columns ,dtype =float )
In [1655]:
# construct the multiple factor structural risk model
def multiFactorReg(returndf,factorDict,WLS =False, weightdf = None):
'''
Muti variable regression for return.
returndf and dataframes in factorDict should have same index and same columns
output: 3 Dataframe, respectively idiosyncratic return for each stock, factor Return and factor P-value
input:
returndf: Dataframe, can either be return or acticve return
factorDict: Dictionary, the keys are the names of factors and the values are the corresponding factor dataframe(all \
with same columns and index)
WLS: True to use WLS , False to use OLS. If True, then weightdf should not be none.
weightdf: Dataframe , which has no nan and the shape is same as dataframes in factorDict
'''
specificReturn = pd.DataFrame(index = returndf.index, columns =returndf.columns , data= None, dtype =float)
factorReturn = pd.DataFrame(index = returndf.index, columns =factorDict.keys(), data= None, dtype =float )
factorPvalue = pd.DataFrame(index = returndf.index, columns =factorDict.keys(), data= None, dtype =float )
dfNum = len(factorDict.keys())
if dfNum == 0:
print 'Input is an empty list!'
raise ValueError
for date in returndf.index:
returndfSlice = returndf.loc[date]
mapfunction = map(lambda x: x.loc[date], factorDict.values())
if dfNum > 1:
totaldf = pd.concat(mapfunction, axis=1)
else:
totaldf = factorDict.values()[0].loc[date]
if WLS:
w = weightdf.loc[date]
result = sm.WLS(returndfSlice.T,totaldf,weights = 1/w ).fit()
else:
result = sm.OLS(returndfSlice.T,totaldf ).fit()
specificReturn .loc[date] = result.resid
factorReturn .loc[date] = result.params.values
factorPvalue . loc[date] = result.pvalues.values
return specificReturn, factorReturn, factorPvalue
In [1609]:
factordic = {'LFCA':LFCAPData, 'PB':PBData, 'YOYBPS':YOYBPSData, 'AMOUNTAVG1MData':AMOUNTAVG1MData, 'TURNOVER1M' :TURNOVER1MData,\
'PROFITOOP':PROFITOOPData}
In [1630]:
LFCAPData.isnull().sum().sum()
Out[1630]:
In [1637]:
# set weight for WLS
weightdf = pd.read_csv(path+filenameFCAP,infer_datetime_format=True,parse_dates=[0],index_col=0).loc[startTime:endTime]
weightdf = fillData(weightdf,stkPool)
In [1638]:
# the square root is the weight
weight = np.sqrt(weightdf)
In [1658]:
weight.head()
Out[1658]:
In [1657]:
multiFactorReg(activeReturnDF,factordic,True,weight)[1].tail()
Out[1657]:
In [1656]:
multiFactorReg(activeReturnDF,factordic)[1].tail()
Out[1656]:
In [983]:
for date in returnDF.index:
activeReturnSlice = activeReturnDF .loc[date]
LFCAPDataSlice = LFCAPData.loc[date]
PBDataSlice = PBData.loc[date]
YOYBPSDataSlice = YOYBPSData.loc[date]
AMOUNTAVG1MDataSlice = AMOUNTAVG1MData .loc[date]
TURNOVER1MDataSlice = TURNOVER1MData.loc[date]
PROFITOOPDataSlice = PROFITOOPData.loc[date]
totalDF = pd.concat([LFCAPDataSlice,PBDataSlice,YOYBPSDataSlice,AMOUNTAVG1MDataSlice,TURNOVER1MDataSlice,PROFITOOPDataSlice],axis=1)
#print totalDF,PBDataSlice
result = sm.OLS(activeReturnSlice.T,totalDF ).fit()
factorReturn . loc[date] = result.params.values
factorPvalue . loc[date] = result.pvalues.values
specificReturn .loc[date] = result.resid
result.summary()
Out[983]:
In [1287]:
factorReturn.head()
Out[1287]:
In [1278]:
def calEWMcovariance(facRetdf,decay=0.94):
'''
To calculate EWM covariance matrix of given facRetdf
output: Dataframe, the ewm cov-matrix of the factors
input:
facRetdf: Dataframe, factor return dataframe
decay: float, decay-factors
Decay factors were set at:
− 0.94 (1-day) from 112 days of data;
− 0.97 (1-month) from 227 days of data.
'''
m,n = facRetdf.shape
facDF = facRetdf - facRetdf.mean()
for i in xrange(m):
facDF.iloc[i] = np.sqrt(decay**(m-1-i)) * facDF.iloc[i]
ewmCovMatrix = facDF.T.dot(facDF) *(1-decay)/(1-decay**m)
return ewmCovMatrix
In [1345]:
compara = 1/0.94 - 1
ewmMatrix = factorReturn.ewm(ignore_na=True, min_periods=0, com=compara).cov(pairwise = True)[-200:].iloc[-1]
In [1274]:
calEWMcovariance(factorReturn.iloc[-200:], decay=0.94)
Out[1274]:
In [1350]:
# calculate monthly scaled variance forecast for the market index by DEWIV
alphaS = 21*returnOfBench.ewm(ignore_na=True, min_periods=0, com=compara).cov(pairwise = True)[-200:].iloc[-1]
In [1351]:
alphaS
Out[1351]:
In [1307]:
benchmarkWeight = pd.read_csv(path+ZZ500Weight,infer_datetime_format=True,parse_dates=[0],index_col=0)[stkPool].loc[startTime:endTime].fillna(0)
In [1308]:
benchmarkWeight
Out[1308]:
In [1310]:
riskExposureDF = pd.concat(map(lambda x: x.iloc[-1], factordic .values()), axis=1)
In [1344]:
factordic .values()[3].iloc[-1].head()
Out[1344]:
In [1328]:
# note that dictionary is not used in the given order, so make sure keys pair the values!
factordic.keys()
Out[1328]:
In [1341]:
riskExposureDF.columns= factordic.keys()
In [1359]:
riskExposureDF.head()
Out[1359]:
In [1708]:
## Only conserve the diagonal element
spEwmMatrix = calEWMcovariance(specificReturn[-200:],decay=0.94)
tempo = spEwmMatrix .copy()
digVal = np.diag(spEwmMatrix)
a = np.fill_diagonal(tempo.values ,0)
finalSpMat = spEwmMatrix -tempo
In [1709]:
finalSpMat.head()
Out[1709]:
In [1734]:
# calculate monthly specific risk of the market index
alphaSP = reduce(lambda x,y: x.dot(y),[benchmarkWeight.iloc[-1], finalSpMat, benchmarkWeight.iloc[-1].T ])
In [1735]:
alphaSP
Out[1735]:
In [1736]:
# calculate total variance of the market index
alphaM = reduce(lambda x,y: x.dot(y),[benchmarkWeight.iloc[-1], riskExposureDF, ewmMatrix, riskExposureDF.T, benchmarkWeight.iloc[-1].T ])+alphaSP
In [1737]:
alphaM
Out[1737]:
In [1738]:
ewmMatrix
Out[1738]:
In [1739]:
# can not use np.dot on two series to construct a matrix
benchmarkWeightDF = pd.DataFrame(benchmarkWeight.iloc[-1])
In [1741]:
lastPart = reduce(lambda x,y: x.dot(y),[ewmMatrix, riskExposureDF.T, benchmarkWeightDF, \
benchmarkWeightDF.T, riskExposureDF,ewmMatrix ])
lastPart
Out[1741]:
In [1758]:
finalCovMatrix = ewmMatrix + ((alphaS - alphaM)/(alphaS - alphaSP)) * lastPart
finalCovMatrix
Out[1758]:
In [1813]:
stocks = pd.read_csv(path+ZZ500Weight,infer_datetime_format=True,parse_dates=[0],index_col=0)[stkPool].loc[startTime:endTime].iloc[-1].dropna()
optStkPool = stocks.index.tolist()
riskExposureDFSlice = riskExposureDF.loc[optStkPool]
In [1814]:
# final covriance to optimize
toOptCov = reduce(lambda x,y: x.dot(y),[riskExposureDFSlice,finalCovMatrix,riskExposureDFSlice.T])
In [1815]:
toOptCov
Out[1815]:
In [1858]:
# optimize
factorNum = riskExposureDFSlice.shape[1]
stkNum = toOptCov.shape[1]
P = cv.matrix(toOptCov.values)
q = cv.matrix(0.0, (stkNum, 1))
G = cv.matrix(np.concatenate((riskExposureDFSlice.T.values,np.diag(np.ones(stkNum)), - np.diag(np.ones(stkNum)))))
h = cv.matrix(reduce(lambda x,y : np.append(x,y),[0.01 * np.ones(factorNum),0.005 * np.ones(stkNum), np.zeros(stkNum)]))
A = cv.matrix(np.ones(stkNum)).T
b = cv.matrix(1.0).T
sol = solvers.qp(P, q, G, h, A, b)
In [1893]:
print sol['x']
In [1838]:
min(sol['x'])
Out[1838]:
In [1743]:
####
dates = [pd.Timestamp('2012-05-01'), pd.Timestamp('2012-05-02'), pd.Timestamp('2012-05-03')]
s = pd.DataFrame(index = dates ,data =[[5,6,7],[8,6,0],[1,2,3]],columns=[1,2,3] )
d = pd.DataFrame(index = dates,data =[[1,2,3,1],[2,3,1,2],[3,1,2,3]],columns=['hs','hc','hv','hg'] )
In [1744]:
dd =d.copy()
for i in dates:
sSlice = s.loc[i]
dSlice = d.loc[i]
dd.loc[i] = sSlice.loc[dSlice].values
print dd
In [ ]:
In [1745]:
#d.apply(lambda x: s[x.values] ,axis=0)
In [1746]:
color_set=sns.light_palette((210, 90, 60), input="husl")
(factorPvalue[factorPvalue < 0.05].count()/len(factorPvalue)).plot(figsize=(18,12),kind='bar',color=sns.color_palette(color_set,10))
Out[1746]:
In [1747]:
factorReturn.cumsum().plot(figsize=(20,14))
Out[1747]:
In [1748]:
factorReturn.describe()
Out[1748]:
In [1749]:
result2 = sm.OLS(factorReturn['PB'],factorReturn['LFCA']).fit()
In [1750]:
result2.summary()
Out[1750]:
In [1751]:
specificReturn
Out[1751]:
In [ ]:
In [1752]:
result1 = sm.OLS(returnOfBench, factorReturn).fit()
In [1753]:
result1 .summary()
Out[1753]:
In [1754]:
result1.resid.head(10)
Out[1754]:
In [ ]:
In [1755]:
print PBData.shape,YOYBPSData.shape,AMOUNTAVG1MData .shape, TURNOVER1MData.shape,PROFITOOPData.shape
In [ ]:
In [ ]:
In [1756]:
#
#stDF = pd.read_csv(path+filenameST,infer_datetime_format=True,parse_dates=[0],index_col=0)[-timeStampNum-1:-5]
#tradeDayDF = pd.read_csv(path+filenameTradeday,infer_datetime_format=True,parse_dates=[0],index_col=0)[-timeStampNum-1:-5]
#stopFlagDF = pd.read_csv(path+filenameStopFlag,infer_datetime_format=True,parse_dates=[0],index_col=0)[-timeStampNum-1:-5]
In [1890]:
# calculate my own illq factor
filenameClose = 'LZ_GPA_QUOTE_TCLOSE.csv'
filenameOpen = 'LZ_GPA_QUOTE_TOPEN.csv'
filenameVolume = 'LZ_GPA_QUOTE_TVOLUME.csv'
def calcILLQ():
openPrice = pd.read_csv(path+filenameOpen,infer_datetime_format=True,parse_dates=[0],index_col=0)
closePrice = pd.read_csv(path+filenameClose,infer_datetime_format=True,parse_dates=[0],index_col=0).iloc[:-3][openPrice.columns]
volume = pd.read_csv(path+filenameVolume,infer_datetime_format=True,parse_dates=[0],index_col=0)
if openPrice.shape != closePrice.shape:
print openPrice.shape, closePrice.shape
print 'data shape is not equal!'
else:
newdf = np.abs((closePrice - openPrice)/openPrice)/volume
newdf = newdf.rolling(min_periods=20,window=20,center=False).mean()
newdf.index.name = 'LZ_GPA_DERI_OWNILLIQ_20-d'
newdf.to_csv(path+'LZ_GPA_DERI_OWNILLIQ_20.csv',na_rep='NaN',date_format='%Y%m%d')
return newdf
In [1892]:
calcILLQ().tail()
Out[1892]: