In [2]:
#!/Tsan/bin/python
# -*- coding: utf-8 -*-
In [3]:
# Libraries To Use
from __future__ import division
import numpy as np
import pandas as pd
import statsmodels.api as sm
import os
from datetime import datetime,time,date
import matplotlib.pyplot as plt
import seaborn as sns
import time
import os
import h5py
import talib
In [5]:
# Import My own library for factor testing
from SingleFactorTest import factorFilterFunctions as ff
#from config import *
from SingleFactorTest.calcOwnFactors import CalOwnFactor
In [6]:
%load_ext line_profiler
In [7]:
%matplotlib inline
In [8]:
path = ff.data_path # path
In [ ]:
In [9]:
# Constants
startTime = datetime.strptime('20120104', '%Y%m%d')
endTime = datetime.strptime('20170928', '%Y%m%d')
In [ ]:
###### 注意,使用时各因子dataframe文件的长度应相等,可以使用CalOwnFactor类中的sliceData(startTime,endTime)方法来切片确保长度相等
In [6]:
# --------------------------------------- Global Functions to def---------------------------------- #
In [ ]:
# --------------------------------------- neutralize class ---------------------------------- #
In [10]:
class neutralize(CalOwnFactor):
classname = 'neutralize'
def __init__(self, path,filenameFCAP,factorData):
super(neutralize,self).__init__(path) # arguments in super method should be exactly same as the args in parent class
self.logFCAP = np.log10(ff.readh5data(path,filenameFCAP))
self.factorData = factorData
def simpleNormalize(self,factorData):
factorDatatemp = factorData.copy()
dataWinsorizedTrans = factorDatatemp.T
MAD = 1.483*np.abs(dataWinsorizedTrans-dataWinsorizedTrans.median(skipna=True))
return ((dataWinsorizedTrans - dataWinsorizedTrans.mean(axis=0, skipna=True))/\
dataWinsorizedTrans.std(axis=0, skipna=True)).T
def neutralizeFactor(self, datelist):
normalizedLFCAPDF = self.simpleNormalize(self.logFCAP)
normalizedFactor= self.simpleNormalize(self.factorData)
factorNeutralized = pd.DataFrame(index=normalizedFactor.index, columns=normalizedFactor.columns, data=None, dtype = float)
for date in datelist:
LFCAPIndice = normalizedLFCAPDF.loc[date].dropna()
factorIndice = normalizedFactor.loc[date].dropna()
intersectionStocks = list(set(LFCAPIndice.index) & set(factorIndice.index))
#dummy_Matrix = pd.get_dummies(IndustryDF.loc[date]).T.iloc[:-1]
#dummy_Matrix = dummy_Matrix[intersectionStocks].append(LFCAPIndice.loc[intersectionStocks])
try:
result = sm. OLS(factorIndice.loc[intersectionStocks].T, LFCAPIndice.loc[intersectionStocks].T).fit()
factorNeutralized.loc[date][intersectionStocks] = result.resid
except:
factorNeutralized.loc[date] = np.NaN
self.factorNeutralized = factorNeutralized.round(4)
In [ ]:
In [ ]:
In [ ]:
In [12]:
# top functions
# 标准化去极值函数,下面有使用示例
def simpleNormalize(narrowedData):
dataWinsorized = narrowedData.copy()
dataWinsorizedTrans = dataWinsorized.T
MAD = 1.483*np.abs(dataWinsorizedTrans-dataWinsorizedTrans.median(skipna=True))
return ((dataWinsorizedTrans - dataWinsorizedTrans.mean(axis=0, skipna=True))/dataWinsorizedTrans.std(axis=0, skipna=True)).T
In [13]:
# top function
# 中性化函数,下面有使用示例
def neutralizeFactor(normalizedFactorDF, normalizedLFCAPDF, datelist):
factorNeutralized = pd.DataFrame(index=normalizedFactorDF.index, columns=normalizedFactorDF.columns, data=None, dtype = float)
for date in datelist:
LFCAPIndice = normalizedLFCAPDF.loc[date].dropna()
factorIndice = normalizedFactorDF.loc[date].dropna()
intersectionStocks = list(set(LFCAPIndice.index) & set(factorIndice.index))
#dummy_Matrix = pd.get_dummies(IndustryDF.loc[date]).T.iloc[:-1]
#dummy_Matrix = dummy_Matrix[intersectionStocks].append(LFCAPIndice.loc[intersectionStocks])
try:
result = sm. OLS(factorIndice.loc[intersectionStocks].T, LFCAPIndice.loc[intersectionStocks].T).fit()
factorNeutralized.loc[date][intersectionStocks] = result.resid
except:
factorNeutralized.loc[date] = np.NaN
return factorNeutralized
In [ ]:
In [9]:
# --------------------------------------- Function Section End ---------------------------------- #
In [ ]:
In [36]:
# 对对数市值进行标准化去极值操作
filenameFCAP = 'LZ_CN_STKA_VAL_A_FCAP.h5'
# Data prepared for Neuralization
FCAP1 = np.log10(ff.readh5data(path,filenameFCAP))
NormalizedFCAP = simpleNormalize(FCAP1)
In [ ]:
In [ ]:
# --------------------------------------- AdjustedPrice ---------------------------------- #
In [ ]:
# 前复权收盘价计算
In [155]:
filenameAdjustFactor = 'LZ_CN_STKA_CMFTR_CUM_FACTOR.h5'
filenameClose = 'LZ_CN_STKA_QUOTE_TCLOSE.h5'
filenameOpen = 'LZ_CN_STKA_QUOTE_TOPEN.h5'
In [152]:
# first thing is to calculate forward adjusted pice
class AdjustedPrice(CalOwnFactor):
classname = 'AdjustedPrice'
def __init__(self,path):
super(AdjustedPrice,self).__init__(path) # arguments in super method should be exactly same as the args in parent class
#self.setting = setting
def calAdjustedPrice(self,adjFacBackward,originalPrice):
AdjFacforward = adjFacBackward / adjFacBackward.iloc[-1]
self.adjustedPrice = (AdjFacforward * originalPrice).round(3)
In [153]:
# 前复权收盘价
adjc = AdjustedPrice(path)
adjc.addData('adjf',filenameAdjustFactor)
adjc.addData('close',filenameClose)
adjc.calAdjustedPrice(adjc.datadict['adjf'],adjc.datadict['close'])
adjc.saveData(adjc.adjustedPrice,'OwnfactorAdjustedClose')
In [156]:
# 前复权开盘价
adjo = AdjustedPrice(path)
adjc.addData('adjf',filenameAdjustFactor)
adjc.addData('open',filenameOpen)
adjc.calAdjustedPrice(adjc.datadict['adjf'],adjc.datadict['open'])
adjc.saveData(adjc.adjustedPrice,'OwnfactorAdjustedOpen')
In [ ]:
# --------------------------------------- end AdjustedPrice section ---------------------------------- #
In [12]:
# --------------------------------------- Mass Index ---------------------------------- #
In [ ]:
# mass index:9日均线的EMA / 9日均线的EMA的EMA ,之后再取25日平均值
In [71]:
filenameOpen = 'LZ_CN_STKA_QUOTE_TOPEN.h5' # 开盘价
filenameClose = 'LZ_CN_STKA_QUOTE_TCLOSE.h5' # 收盘价
filenameHigh = 'LZ_CN_STKA_QUOTE_THIGH.h5' # 最高价
filenameLow = 'LZ_CN_STKA_QUOTE_TLOW.h5' # 最低价
#
filenameVolume = 'LZ_CN_STKA_QUOTE_TVOLUME.h5'
In [14]:
class MassIndex(CalOwnFactor):
classname = 'MassIndex'
def __init__(self,path):
super(MassIndex,self).__init__(path) # arguments in super method should be exactly same as the args in parent class
#self.setting = setting
def calEma(self,data,window = 9):
Ema = data.copy()
NanCount =0
for name in data.columns:
try:
Ema[name] = talib.EMA(data[name].values,timeperiod = window)
except Exception as e:
#print str(e)
#print 'too much NaN value for stock %s'%name
assert str(e)== 'inputs are all NaN'
Ema [name] = np.NaN
NanCount+=1
return Ema
def calMassIndex(self,hlRange,rolling_window=25):
self.singleEMA = self.calEma(hlRange)
self.doubleEMA = self.calEma(self.singleEMA)
self.MassIndex = (self.singleEMA / self.doubleEMA).rolling(window=rolling_window, min_periods=25).sum().round(3)
In [15]:
# cal massindex
massindex = MassIndex(path)
massindex.addData('high',filenameHigh)
massindex.addData('low',filenameLow)
massindex.calMassIndex(massindex.datadict['high'] - massindex.datadict['low'])
massindex.saveData(massindex.MassIndex,'OwnfactorMassIndex')
In [16]:
# --------------------------------------- end Mass Index section ---------------------------------- #
In [ ]:
In [ ]:
# --------------------------------------- Calculate daily deal Amount(yuan) (DDA) ---------------------------------- #
In [ ]:
# DDA:前复权收盘价 * 成交量的20天滚动平均
In [14]:
filenameVolume = 'LZ_CN_STKA_QUOTE_TVOLUME.h5'
filenameAdjClose = 'OwnfactorAdjustedClose.h5'
In [15]:
class DDA(CalOwnFactor):
classname = 'DDA'
def __init__(self,path):
super(DDA,self).__init__(path) # arguments in super method should be exactly same as the args in parent class
#self.setting = setting
def calDDA(self,turnOver,adjPrice,rolling_window=20):
self.DDA = (turnOver * adjPrice).rolling(window=rolling_window, min_periods=20).mean().round(3)
In [17]:
# cal dda
dda = DDA(path)
dda.addData('turnover',filenameVolume)
dda.addData('adjprice', filenameAdjClose)
dda.calDDA(dda.datadict['turnover'], dda.datadict['adjprice'])
dda.saveData(dda.DDA,'OwnfactorDDA20D')
In [18]:
# --------------------------------------- end dda section ---------------------------------- #
In [ ]:
In [ ]:
# --------------------------------------- Calculate x-days return Skew ---------------------------------- #
In [ ]:
# 股票收益率的250日skew
In [19]:
class ReturnSkew(CalOwnFactor):
classname = 'ReturnSkew'
def __init__(self,path):
super(ReturnSkew,self).__init__(path) # arguments in super method should be exactly same as the args in parent class
#self.setting = setting
def calReturnSkew(self,adjPrice,rolling_window=250):
self.ReturnSkew = adjPrice.pct_change().rolling(window=rolling_window, min_periods=250).skew().round(3)
In [21]:
# cal dda
rs= ReturnSkew(path)
rs.addData('adjprice', filenameAdjClose)
rs.calReturnSkew(rs.datadict['adjprice'])
rs.saveData(rs.ReturnSkew,'OwnfactorReturnSkew250D')
In [ ]:
# --------------------------------------- end return Skew section---------------------------------- #
In [ ]:
In [ ]:
# --------------------------------------- Calculate SortinoRatio ---------------------------------- #
In [ ]:
# N日股票的SortinoRatio,(年华收益率/下行波动率)
In [28]:
class SortinoRatio(CalOwnFactor):
classname = 'SortinoRatio'
def __init__(self,path):
super(SortinoRatio,self).__init__(path) # arguments in super method should be exactly same as the args in parent class
#self.setting = setting
def calDownsideRisk(self,adjPrice,rolling_window=20,period=120):
returndf = adjPrice.pct_change()
averageReturn = returndf.rolling(window=rolling_window, min_periods=20).mean().round(3)
self.downsideRisk = (returndf[returndf < averageReturn] .rolling(min_periods=120,window=period,center=False).std()) \
* np.sqrt(252/period)
def calSortinoRatio(self,adjPrice,period = 252):
annualReturn = (adjPrice.pct_change().rolling(window=period, min_periods=20).mean().round(3) +1) ** (252 /period) - 1
self.SortinoRatio = annualReturn / self.downsideRisk
In [29]:
# cal dda
sr= SortinoRatio(path)
sr.addData('adjprice', filenameAdjClose)
sr.calDownsideRisk(sr.datadict['adjprice'])
sr.calSortinoRatio(sr.datadict['adjprice'])
sr.saveData(sr.SortinoRatio,'OwnfactorSortinoRatio252D')
In [30]:
# --------------------------------------- End SortinoRatio---------------------------------- #
In [ ]:
In [ ]:
# --------------------------------------- Calculate TurnOver Rate Volatility ---------------------------------- #
In [ ]:
# 20天换手率的标准差/20天换手率的标准差(中性化)
In [40]:
filenameTOR= 'LZ_CN_STKA_VAL_TURN.h5' # 换手率
In [31]:
# the illiquid factor
class TORV(CalOwnFactor):
classname = 'TORV'
def __init__(self,path):
super(TORV,self).__init__(path) # arguments in super method should be exactly same as the args in parent class
#self.setting = setting
def calTORV(self,turnoverdf,period =20):
self.TORV = turnoverdf.rolling(min_periods=20,window=period,center=False).std()
In [35]:
# cal turnOverRate std
torv = TORV(path)
torv.addData('turnOverRate', filenameTOR)
torv.calTORV(torv.datadict['turnOverRate'])
torv.saveData(torv.TORV,'OwnFactorTurnoverVolatility20D')
In [ ]:
# TO nuetralize torv
NormalizedTORV = simpleNormalize(torv.TORV)
neutralizedTORV = neutralizeFactor(NormalizedTORV, NormalizedFCAP, NormalizedTORV.index)
ff.saveh5data(neutralizedTORV,path,'OwnFactorADJTORV')
In [ ]:
# --------------------------------------- End TurnOver Rate Volatility Section---------------------------------- #
In [ ]:
In [ ]:
# --------------------------------------- ILLQ Factor(5-days average) ---------------------------------- #
In [ ]:
# 非流动性,(收盘价 - 开盘价) / 成交量 ,该指标的N日滚动平均
In [81]:
# the illiquid factor
class ILLIQ(CalOwnFactor):
classname = 'ILLIQ'
def __init__(self,path):
super(ILLIQ,self).__init__(path) # arguments in super method should be exactly same as the args in parent class
#self.setting = setting
def calILLIQ(self,openPrice,closePrice,volume):
if openPrice.shape != closePrice.shape:
print openPrice.shape, closePrice.shape
print 'data shape is not equal!'
else:
newdf = np.abs((closePrice - openPrice)/openPrice)/volume
self.ILLIQ= (newdf.rolling(min_periods=5,window=5,center=False).mean()* 10000000).round(4)
In [82]:
# cal ILLIQ
illiq = ILLIQ(path)
illiq .addData('open',filenameOpen)
illiq .addData('close',filenameClose)
illiq .addData('volume', filenameVolume)
illiq .calILLIQ(illiq.datadict['open'],illiq.datadict['close'],illiq.datadict['volume'])
illiq .saveData(illiq.ILLIQ,'OwnFactorILLIQ')
In [ ]:
In [99]:
# TO nuetralize ILLIQ
NormalizedILLQ = simpleNormalize(illiq.ILLIQ)
neutralizedILLQ = neutralizeFactor(NormalizedILLQ, NormalizedFCAP, NormalizedILLQ.index)
#neutralizedILLQ.index.name = 'Own_Factor_ADJ_ILLQ_1D'
ff.saveh5data(neutralizedILLQ,path,'OwnFactorADJILLIQ')
In [ ]:
datasample = ff.readh5data(path,'OwnFactorADJILLIQ.h5')
In [84]:
# --------------------------------------- end ILLQ Factor section ---------------------------------- #
In [ ]:
In [ ]:
# --------------------------------------- spreadbais Factor ---------------------------------- #
In [ ]:
# 个股行业偏差 = 个股价格/个股所在行业平均价格的对数
# 行业偏差= (个股行业偏差 - 个股行业偏差 的N日滚动均值) / 个股行业偏差 的std
In [30]:
filenameINDUClass ='LZ_CN_STKA_INDU_ZX.h5'
filenmaeINDUIndex = 'LZ_CN_STKA_INDXQUOTE_CLOSE.h5'
filenameAdjClose = 'OwnfactorAdjustedClose.h5'
In [33]:
# the illiquid factor
class InduSpreadBias(CalOwnFactor):
classname = 'InduSpreadBias'
def __init__(self,path):
super(InduSpreadBias,self).__init__(path) # arguments in super method should be exactly same as the args in parent class
#self.setting = setting
@staticmethod
def fullName(x):
if np.isnan(x):
return np.NaN
else:
if x<10:
return 'CI00500'+str(int(x))+'.WI'
else:
return 'CI0050'+str(int(x))+'.WI'
def fillname(self,Induclassdf):
self.stkindumap = Induclassdf.applymap(self.fullName)
def mapStkInduIndex(self,indusIndex):
for date in self.stkindumap.index:
#print date
indusIndexSlice = indusIndex.loc[date]
sparedfSlice = self.stkindumap.loc[date]
stkList = sparedfSlice.dropna().index.tolist()
self.stkindumap.set_value(date, stkList, indusIndexSlice .loc[sparedfSlice.dropna()].values)
def calInduSpreadBias(self,closePrice):
self.InduSpreadBias = np.log((closePrice / self.stkindumap).astype(float))
self.InduSpreadBias = (self.InduSpreadBias - self.InduSpreadBias.rolling(min_periods=60,window=60,center=False).mean()) / self.\
InduSpreadBias.rolling(min_periods=60,window=60,center=False).std()
In [35]:
induspreadbias = InduSpreadBias(path)
induspreadbias .addData('induclass',filenameINDUClass)
induspreadbias.addData('induindex',filenmaeINDUIndex)
induspreadbias.addData('adjprice', filenameAdjClose)
induspreadbias.sliceData(startTime,endTime)
induspreadbias .fillname(induspreadbias.datadict['induclass'])
induspreadbias.mapStkInduIndex(induspreadbias.datadict['induindex'])
induspreadbias.calInduSpreadBias(induspreadbias.datadict['adjprice'])
induspreadbias .saveData(induspreadbias.InduSpreadBias.round(4),'OwnFactorInduSpreadBias')
In [ ]:
# --------------------------------------- end spreadbais Factor ---------------------------------- #
In [ ]:
In [1]:
# --------------------------------------- HibertTransform Factor ---------------------------------- #
In [ ]:
# 希尔波特变化因子,根据talib计算
In [2]:
filenameAdjClose = 'OwnfactorAdjustedClose.h5'
In [34]:
class HibertTransform(CalOwnFactor):
classname = 'HibertTransform'
def __init__(self,path):
super(HibertTransform,self).__init__(path) # arguments in super method should be exactly same as the args in parent class
#self.setting = setting
def calHB(self,data):
hb = data.copy()
NanCount =0
datamodi = np.array(data.values,dtype='f8')
for name in data.columns:
nameindex = data.columns.tolist().index(name)
try:
hb[name] = talib.HT_TRENDLINE(datamodi[:,nameindex])
except Exception as e:
#print str(e)
#print 'too much NaN value for stock %s'%name
assert str(e)== 'inputs are all NaN'
hb[name] = np.NaN
NanCount+=1
return hb
def calHBRatio(self,adjClose,rolling_window=20):
self.HBRatio = (self.calHB(adjClose)/ adjClose).rolling(window=rolling_window, min_periods=20).mean().round(4)
In [35]:
# cal HibertTransform Factor
hiberttransform = HibertTransform(path)
hiberttransform .addData('adjprice',filenameAdjClose)
hiberttransform .calHBRatio(hiberttransform.datadict['adjprice'])
hiberttransform.saveData(hiberttransform.HBRatio,'OwnfactorHibertTransform20D')
In [36]:
hiberttransform.HBRatio.tail(5)
Out[36]:
In [ ]:
# --------------------------------------- End HibertTransform Factor ---------------------------------- #
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
# --------------------------------------- research basedon 1min data section ---------------------------------- #
In [69]:
savepath = 'C:/Users/LZJF_02/Desktop/myownliarbry/stkdata1m/'
desktop = 'C:/Users/LZJF_02/Desktop/'
stkname = '600050'
data = ff.readh5data(savepath,stkname+'.h5')
#data['time'] = [datetime.strptime(str(int(time)),'%H%M%S').time() for time in data['time']]
#data1 = ff.readh5data(savepath,stkname+'.h5')
#datelist = sorted(set(data.index))
In [278]:
dataslice = data[data.index == datelist[0]]
In [284]:
posPnl = dataslice[dataslice['close'] > dataslice['open']]
negPnl = dataslice[dataslice['close'] < dataslice['open']]
posPnl['volume']
Out[284]:
In [298]:
def calVolQuantileRatio(stk):
data = ff.readh5data(savepath, stk+'.h5')
data['time'] = [datetime.strptime(str(int(time)), '%H%M%S').time() for time in data['time']]
dataPos = data[data['close']>data['open']]['volume']
dataNeg = data[data['close']<data['open']]['volume']
datelist = sorted(set(data.index))
Qdf = pd.DataFrame(index=datelist, columns=[stk], dtype=float)
for date in datelist:
try:
posSlice = dataPos[dataPos.index ==date]
#print posSlice
negSlice = dataNeg[dataNeg.index ==date]
except:
print 'error'
Qdf.loc[date] = np.NaN
continue
totalVolume = data[data.index==date]['volume'].sum()
#print totalVolume
if posSlice.shape[0] > 4:
posVol = posSlice.quantile(0.8)
else:
posVol = 0
if negSlice.shape[0] > 4:
negVol = negSlice.quantile(0.8)
else:
negVol = 0
try:
Qdf.loc[date] = 1000*(posVol - negVol) / totalVolume
except:
Qdf.loc[date] = np.NaN
return Qdf.round(3)
In [310]:
stkname = '603999'
c = calVolQuantileRatio(stkname)
In [216]:
negPnl['volume'].hist()
Out[216]:
In [191]:
dataNoneZero = data[data.volume >1]
dataNoneZero
Out[191]:
In [90]:
datelist = sorted(set(data.index))
Qdf = pd.DataFrame(index=datelist, columns=[stkname], dtype=float)
for date in datelist:
breakdate = date
try:
dataslice = dataNoneZero[dataNoneZero.index == date]
except:
Qdf.loc[date] = np.NaN
continue
#dataslice = dataNoneZero[dataNoneZero.index == date]
#dataNoneZero = dataslice.loc[dataslice.volume>0]
if dataslice.shape[0] >= 60 and dataslice.loc[dataslice['high'] == dataslice['low']].shape[0] < 0.5 \
* dataslice.shape[0]: # nonzero volumes should be at least 120
dataslice['impact'] = np.abs(dataslice['close'] - dataslice['open']) / np.log10(dataslice['volume'])/dataslice['open']
datanew = dataslice.sort_values(['impact'], ascending=False)
try:
q1 = datanew.loc[datanew.volume.cumsum() <= datanew.volume.sum() * 0.3][['amount', 'volume']]
Qdf.loc[date] = (q1.amount.sum() / q1.volume.sum()) / (datanew.amount.sum() / datanew.volume.sum())
except ZeroDivisionError:
Qdf.loc[date] = np.NaN
else:
Qdf.loc[date] = np.NaN
In [84]:
filenameHS300Member = 'LZ_CN_STKA_INDEX_HS300MEMBER.h5'
filenameCSI500Member = 'LZ_CN_STKA_INDEX_CSI500MEMBER.h5'
In [89]:
def getMemberList(path,filename):
df = ff.readh5data(path,filename).iloc[-1]
return df.loc[df==1].index.tolist()
hs300MemberList = getMemberList(path,filenameHS300Member )
csi500MemberList = getMemberList(path,filenameCSI500Member)
finalList = hs300MemberList+csi500MemberList
finalList
Out[89]:
In [66]:
dataslice
Out[66]:
In [42]:
dataslice = dataNoneZero[dataNoneZero.index == datelist[6]]
In [56]:
dataslice['impact'] = 100*np.abs(dataslice['close'] - dataslice['open']) / np.log10(dataslice['volume'])/dataslice['open']
Out[56]:
In [312]:
fileList = []
for filename in os.listdir(savepath):
if filename.endswith('.h5') and 'VolQuantileRatio' in filename:
fileList.append(filename.split('.')[0])
#print(os.path.join(savepath, file))
In [313]:
fileList
Out[313]:
In [123]:
from multiprocessing import Pool, cpu_count
In [126]:
def calNetIn(stk):
data = ff.readh5data(savepath,stk+'.h5')[['amount','open','close','time']]
data['time'] = [datetime.strptime(str(int(time)),'%H%M%S').time() for time in data['time']]
datelist = sorted(set(data.index))
Qdf = pd.DataFrame(index = datelist,columns = [stk],dtype=float)
for date in datelist:
try:
dataslice = data[data.index ==date]
Qdf.loc[date] = (dataslice['amount'] * np.sign(dataslice['close'] - dataslice['open'])).sum()
except:
Qdf.loc[date] = np.NaN
return Qdf
In [127]:
%lprun -f calNetIn(stkname)
In [ ]:
In [20]:
def calSmartMoney(stk):
data = ff.readh5data(savepath,stk+'.h5')
data['time'] = [datetime.strptime(str(int(time)),'%H%M%S').time() for time in data['time']]
datelist = sorted(set(data.index))
Qdf = pd.DataFrame(index = datelist,columns = [stk],dtype=float)
for date in datelist:
dataslice = data[data.index ==date]
dataNoneZero = dataslice.loc[dataslice.volume>0]
if len(dataNoneZero) >= 60 and dataNoneZero.loc[dataNoneZero['high']==dataNoneZero['low']].shape[0] < \
0.5 * dataNoneZero.shape[0]: # nonzero volumes should be at least 120
dataNoneZero['impact'] = np.abs(dataNoneZero['close'] - dataNoneZero['open']) / dataNoneZero['volume']/dataNoneZero['open']
datanew = dataNoneZero.sort_values(['impact'],ascending =False)
q1 = datanew.loc[datanew.volume.cumsum() <= datanew.volume.sum() *0.2]
Qdf.loc[date] = (q1.amount.sum() / q1.volume.sum()) / (datanew.amount.sum() / datanew.volume.sum())
else:
Qdf.loc[date] = np.NaN
return Qdf
In [21]:
finaldf = pd.DataFrame()
for stk in fileList:
print stk
temp = ff.readh5data(path,stk+'.h5')
finaldf = pd.concat([finaldf,temp],axis=1)
In [167]:
resultlist = []
for filename in os.listdir(savepath):
if filename.endswith('.h5') and 'OwnfactorNetIn' in filename:
resultlist.append(filename.split('.')[0])
#print(os.path.join(savepath, file))
resultlist
Out[167]:
In [315]:
resultlist = []
for filename in os.listdir(savepath):
if filename.endswith('.h5') and 'VolQuantileRatio' in filename:
resultlist.append(filename.split('.')[0])
#print(os.path.join(savepath, file))
resultlist
Out[315]:
In [316]:
resultdf = pd.DataFrame()
for stk in resultlist:
temp = ff.readh5data(savepath,stk+'.'+'h5')
resultdf = pd.concat([resultdf,temp],axis=1)
In [355]:
ff.saveh5data(resultdf,path,'VolQuantileRatio')
In [356]:
resultdf.tail()
Out[356]:
In [178]:
resultdf = resultdf.rolling(min_periods=20,window=20,center=False).mean()
resultdf
Out[178]:
In [179]:
lcap = ff.readh5data(path,filenameFCAP)
codelcap = [x.split('.')[0] for x in lcap.columns]
lcap.columns = codelcap
codedf = resultdf.columns
finalstklist = list(set(codedf) & set(codelcap))
finalindex = list(set(resultdf.index.tolist()) & set(lcap.index.tolist()))
In [180]:
newdf = (resultdf.loc[finalindex ][finalstklist] / lcap.loc[finalindex][finalstklist]).round(4)
In [181]:
newdf = newdf.sort_index()
In [ ]:
In [17]:
Qdf = pd.DataFrame(index = datelist,columns = [stk],dtype=float)
condition1=0
condition2 =0
for date in datelist:
print date
dataslice = data[data.index ==date]
dataNoneZero = dataslice.loc[dataslice.volume>0]
if len(dataNoneZero) >= 60 and dataNoneZero.loc[dataNoneZero['high']==dataNoneZero['low']].shape[0] < 0.5 * dataNoneZero.shape[0]: # nonzero volumes should be at least 120
#condition1+=1
#condition2+=1
dataNoneZero['impact'] = np.abs(dataNoneZero['close'] - dataNoneZero['open']) / dataNoneZero['volume']/dataNoneZero['open']
datanew = dataNoneZero.sort_values(['impact'],ascending =False)
q1 = datanew.loc[datanew.volume.cumsum() <= datanew.volume.sum() *0.2]
Qdf.loc[date] = (q1.amount.sum() / q1.volume.sum()) / (datanew.amount.sum() / datanew.volume.sum())
else:
Qdf.loc[date] = np.NaN