In [1]:
#!/Tsan/bin/python
# -*- coding: utf-8 -*-
In [94]:
# Libraries to use
from __future__ import division
import talib as tb
import numpy as np
import pandas as pd
import statsmodels.api as sm
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime
import cvxopt as cv
from cvxopt import solvers
In [7]:
# Import My own library for factor testing
from SingleFactorTest import factorFilterFunctions as ff
#from config import *
In [3]:
%matplotlib inline
In [4]:
# Files to use
filenameHS300 = 'LZ_GPA_INDXQUOTE_CLOSE.csv'
filenameIndexVolume = 'LZ_GPA_INDXQUOTE_VOLUME.csv'
filenameHSOpen = 'LZ_GPA_INDXQUOTE_OPEN.csv'
filenameHS300Weight = 'LZ_GPA_INDEX_HS300WEIGHT.csv'
filanemaZX500Weight = 'LZ_GPA_INDEX_CSI500WEIGHT.csv'
In [5]:
# some useful parameters
startTime = datetime.strptime('20120101', '%Y%m%d')
endTime = datetime.strptime('20161231', '%Y%m%d')
timeStampNum = 2500 # Number of time period
#thresholdNum = 0.05 # thresholdNum to filter stocks by Nan's amount
HS300Index ='000300.SH' # HS300 index code
ZZ500Index = '000905.SH' # ZZ500 index code
In [57]:
# Constants
path = ff.data_path
timeStampNum = 1000
thresholdNum = 0.2
HSIndex='000300.SH'
sampleNum = 500
In [72]:
targetAsset = pd.read_csv(path+filenameHS300,infer_datetime_format=True,parse_dates=[0],index_col=0).loc[startTime :endTime].iloc[:,:4]
tANetWorth = targetAsset /targetAsset.iloc[0]
tACovMatrix = tANetWorth.ewm(ignore_na=True, min_periods=0, halflife = 60).cov(pairwise = True).iloc[-1]
endOfMonthList = ff.getLastDayOfMonth(tANetWorth.index)[1]
positionSheet = pd.DataFrame(index=tANetWorth.index, columns = tANetWorth.columns, data= None, dtype =float)
positionSheet
Out[72]:
In [24]:
tANetWorth
t1 = tANetWorth .pct_change().dropna().iloc[1:]
originalShapeRatio = t1.mean()/t1.std()
originalShapeRatio.name = 'Shape_Ratio'
In [43]:
t1 .mean()
Out[43]:
In [20]:
origiMaxdd = tANetWorth .copy()
origiMaxdd .iloc[0] = 0
for date in tANetWorth.index[1:]:
origiMaxdd .loc[date] = 1-tANetWorth.loc[date]/tANetWorth.loc[:date].max()
origiMaxdd.max()
Out[20]:
In [30]:
calmaRatio = t1.mean()/origiMaxdd.max()
calmaRatio.name = 'Calmar_Ratio'
In [47]:
basePoint = 0.005
InfoDF = pd.concat([originalShapeRatio,calmaRatio],axis=1)
InfoDF.rank()
InfoWeight = np.array([0.6,0.4])
InfoDF = InfoDF.rank() * InfoWeight
InfoDF = InfoDF.sum(axis=1) - InfoDF.sum(axis=1).mean()
(0.3 + InfoDF*basePoint ).values
Out[47]:
In [48]:
InfoDF*basePoint
Out[48]:
In [83]:
basePoint = 0.1
returnDF = tANetWorth.pct_change()
for date in tANetWorth.index:
tempdf = returnDF.loc[:date]
tempNWdf = tANetWorth.loc[:date]
if date in endOfMonthList and tempdf.shape[0] >= 120:
# Adjusted threshold for each asset
originalShapeRatio = tempdf.mean()/tempdf.std()
originalShapeRatio.name = 'Shape_Ratio'
origiMaxdd = tempNWdf.copy()
origiMaxdd .iloc[0] = 0
for i in tempNWdf.index[1:]:
origiMaxdd .loc[i] = 1-tempNWdf.loc[i]/tempNWdf.loc[:i].max()
calmaRatio = tempdf.mean()/origiMaxdd.max()
calmaRatio.name = 'Calmar_Ratio'
InfoDF = pd.concat([originalShapeRatio,calmaRatio],axis=1)
InfoWeight = np.array([0.6,0.4]) # customized
InfoDF = InfoDF.rank() * InfoWeight
InfoDF = InfoDF.sum(axis=1) - InfoDF.sum(axis=1).mean()
temCovMatrix = tempdf.iloc[-120:]. ewm(ignore_na=True, min_periods=0, halflife = 60).cov(pairwise = True).iloc[-1]
#print temCovMatrix
# Optimize
stkNum = temCovMatrix.shape[1]
P = cv.matrix(temCovMatrix.values)
q = cv.matrix(0.0, (stkNum, 1))
G = cv.matrix(np.concatenate((np.diag(np.ones(stkNum)), - np.diag(np.ones(stkNum)))))
h = cv.matrix(np.append((0.3 + InfoDF * basePoint).values, np.zeros(stkNum)))
A = cv.matrix(np.ones(stkNum)).T
b = cv.matrix(1.0).T
sol = solvers.qp(P, q, G, h, A, b)
positionSheet.loc[date] = np.array(list(sol['x']))
In [84]:
positionSheet.dropna()
Out[84]:
In [85]:
positionSheet1 = positionSheet.fillna(method='ffill').fillna(0)
positionSheet
Out[85]:
In [86]:
totalNW = (positionSheet1 * tANetWorth).sum(axis=1)
equalWeighted = (tANetWorth.sum(axis=1)/4)
equalWeighted.name = 'Equal_Weighed'
totalNW.name ='Dynamic_Optimize'
compareNW = pd.concat([totalNW ,equalWeighted ],axis=1)
compareNW = pd.concat([compareNW ,tANetWorth ],axis=1)
compareNW.index.name = 'time'
compareNW.plot(figsize=(22,14),fontsize =13,legend =True)
Out[86]:
In [87]:
compareNW
Out[87]:
In [88]:
t1 = compareNW.pct_change().dropna().iloc[1:]
shapeRatio1 = t1.mean()/t1.std()
shapeRatio1
Out[88]:
In [89]:
compareNW.dropna().pct_change().dropna()
Out[89]:
In [90]:
maxdd = compareNW.copy()
maxdd.iloc[0] = 0
for date in compareNW.index[1:]:
maxdd.loc[date] = 1-compareNW.loc[date]/compareNW.loc[:date].max()
maxddInfo = pd.concat([maxdd.max(),maxdd.idxmax()],axis=1)
maxddInfo.columns = ['Max_drawdown','Time']
maxddInfo
Out[90]:
In [ ]:
In [93]:
fig = plt.figure(figsize=(18,14))
# Add a subplot
totalNW.plot.area(figsize=(22,14),fontsize =13,color='g',legend =True)
#equatWeighted.plot(figsize=(22,14),fontsize =13,color='r',legend = True)
Out[93]:
In [ ]:
In [ ]:
#positionSheet.dropna(axis=0)
In [ ]:
positionSheet
In [ ]:
HS300WeightDF = pd.read_csv(path+filanemaZX500Weight,infer_datetime_format=True,parse_dates=[0],index_col=0).iloc[-2000:]
In [ ]:
HS300WeightDF
In [ ]:
for i in range(1,len(HS300WeightDF)):
HS300WeightToday = HS300WeightDF.iloc[i].dropna().index.tolist()
#print HS300WeightToday
print len(HS300WeightToday)
HS300WeightYes = HS300WeightDF.iloc[i-1].dropna().index.tolist()
if len(set(HS300WeightToday) | set(HS300WeightYes)) != 500:
print HS300WeightDF.index[i]
In [ ]:
HS300WeightDF
In [ ]:
benchMarkClose = pd.read_csv(path+filenameHS300,infer_datetime_format=True,parse_dates=[0],index_col=0)[-timeStampNum-1:-5][HSIndex]
benchMarkOpen = pd.read_csv(path+filenameHSOpen,infer_datetime_format=True,parse_dates=[0],index_col=0)[-timeStampNum-1:-5][HSIndex]
benchMarkClose.head()
In [ ]:
def getSVD(inputArray,t,m):
inputmatrix = getNewMatrix(inputArray,t,m)
#print inputmatrix
u,s,v = np.linalg.svd(inputmatrix)
eviNum = 1 if s[0]/s.sum() > 0.99 else 2
sNew = np.zeros((eviNum,eviNum))
np.fill_diagonal(sNew,s[:eviNum])
matrixForts = np.dot(np.dot(u[:,:eviNum].reshape(u.shape[0],eviNum),sNew),v[:eviNum])
newts = recreateArray(matrixForts,t,m)
return newts
In [ ]:
### Method to calculate moving max drawdown
from numpy.lib.stride_tricks import as_strided
def windowed_view(x, window_size):
"""Creat a 2d windowed view of a 1d array.
`x` must be a 1d numpy array.
`numpy.lib.stride_tricks.as_strided` is used to create the view.
The data is not copied.
Example:
>>> x = np.array([1, 2, 3, 4, 5, 6])
>>> windowed_view(x, 3)
array([[1, 2, 3],
[2, 3, 4],
[3, 4, 5],
[4, 5, 6]])
"""
y = as_strided(x, shape=(x.size - window_size + 1, window_size),
strides=(x.strides[0], x.strides[0]))
return y
In [ ]:
In [ ]:
def getNewMatrix(inputArray, t, m):
newMatrix = []
n = t-m+1
for i in range(n):
newdata = list(inputArray[i:m+i])
newMatrix.append(newdata)
#newMatrix = np.array(newMatrix).reshape(n,m)
return np.array(newMatrix)
In [ ]:
benchMarkOpen[-15:]
In [ ]:
inputmatrix = getNewMatrix(benchMarkClose[0:sampleNum].values,sampleNum,10)
len(inputmatrix)
In [ ]:
def calSVD(newMatrix):
u,s,v = np.linalg.svd(newMatrix)
return u,s,v
In [ ]:
inputmatrix = getNewMatrix(benchMarkClose[0:sampleNum].values,sampleNum,10)
u,s,v=calSVD(inputmatrix)
eviNum = 1 if s[0]/s.sum() > 0.99 else 2
In [ ]:
#u[:,eviNum].reshape(7,1)
In [ ]:
sNew = np.zeros((eviNum,eviNum))
In [ ]:
np.fill_diagonal(sNew,s[:eviNum])
In [ ]:
v[:eviNum]
In [ ]:
#np.array([[1,2],[6,3]]).dot(np.array([[1,2],[2,3],[3,4]]))
In [ ]:
def recreateArray(newMatrix,t,m):
ret = []
n = t - m +1
for p in range(1,t+1):
if p<m:
alpha = p
elif p>t-m+1:
alpha = t-p+1
else:
alpha = m
sigma = 0
for j in range(1,m+1):
i = p - j +1
if i>0 and i<n+1:
sigma += newMatrix[i-1][j-1]
ret.append(sigma/alpha)
return np.array(ret)
In [ ]:
np.dot(u[:,:eviNum].reshape(u.shape[0],eviNum),sNew)
In [ ]:
#np.dot(u[:,eviNum].reshape(u.shape[0],eviNum),v[:eviNum])
In [ ]:
np.concatenate((np.diag(np.ones(60)),-np.diag(np.ones(60))))
In [ ]:
a=pd.Series(data=[-1,2,3,4],index = ['a','b','c','d'])
b = pd .Series(data=[4,6,7,8],index = ['a','b','c','d'])
c = pd.Series(data= None ,index =['a','b','c','d','e'] )
a[a>0].index
In [ ]:
matrixForts = np.dot(np.dot(u[:,:eviNum].reshape(u.shape[0],eviNum),sNew),v[:eviNum])
In [ ]:
matrixForts
In [ ]:
ma = benchMarkClose[0:sampleNum].rolling(10,min_periods=1).mean()
In [ ]:
benchMarkClose[0:sampleNum].head()
In [ ]:
ma.tail()
In [ ]:
newts = recreateArray(matrixForts,sampleNum,10)
In [ ]:
getSVD(benchMarkClose[0:sampleNum].values,sampleNum,20)
In [ ]:
plt.figure(figsize=(24,16))
plt.plot(benchMarkClose[0:sampleNum].values, 'blue')
plt.plot(getSVD(benchMarkClose[0:sampleNum].values,sampleNum,5),'red')
#plt.plot(getSVD(benchMarkData[0:sampleNum].values,sampleNum,15),'green')
plt.plot(getSVD(benchMarkClose[0:sampleNum].values,sampleNum,20),'orange')
plt.title('SVD')
In [ ]:
newDF=pd.DataFrame(index = benchMarkClose[0:sampleNum].index,data = None,columns =['SVD20','SVD5','PRICE'],dtype=float)
In [ ]:
newDF['SVD20'] = getSVD(benchMarkClose[0:sampleNum].values,sampleNum,20)
newDF['SVD5'] = getSVD(benchMarkClose[0:sampleNum].values,sampleNum,5)
newDF ['PRICE'] = benchMarkClose[0:sampleNum].values
In [ ]:
newDF['DIFF'] = newDF['SVD5'] - newDF['SVD20']
newDF['Price_change'] = newDF['PRICE'].diff()
newDF
upAccuracy = []
downAccuracy = []
for i in range(1,len(newDF)):
if (newDF.iloc[i]['DIFF'] > 0) & (newDF.iloc[i-1]['DIFF'] < 0) & (newDF.iloc[i]['Price_change'] > 0) :
upAccuracy.append(1)
if (newDF.iloc[i]['DIFF'] < 0) & (newDF.iloc[i-1]['DIFF'] > 0) & (newDF.iloc[i]['Price_change'] < 0) :
downAccuracy.append(-1)
a,b = len(upAccuracy)/len(newDF[newDF['Price_change']>0]), len(downAccuracy)/len(newDF[newDF['Price_change']<0])
In [ ]:
newDF[newDF['Price_change']>0]
In [ ]:
In [ ]:
m=7
t=15
dateList = benchMarkOpen.index
assetSSA = []
assetMA = []
holdingFlag = False
tradinglogSSA ={}
tradinglogMA ={}
portfolioValue ={i:0 for i in benchMarkClose.index[:t]}
ma_strategy = {i:0 for i in benchMarkClose.index[:t]}
for i in range(t,len(benchMarkClose)):
todayDate = dateList[i]
yesterDayDate = dateList[i-1]
svdSignal = benchMarkClose.values[:i][-t:]
#print len(svdSignal)
newTS = getSVD(svdSignal,t,m)
#print newTS
#ma = benchMarkClose.iloc[:i].rolling(m,min_periods =1).mean()
if len(assetSSA) !=0 and benchMarkClose[i-2] > newTS[-2] and benchMarkClose[i-1] < newTS[-1]: # sell signal
sellprice = benchMarkOpen[i]
#holdingFlag = False
tradinglogSSA[todayDate] = (sellprice,-1)
portfolioValue[todayDate] = (sellprice - assetSSA[0])/assetSSA[0] + portfolioValue[yesterDayDate]
assetSSA = []
elif len(assetSSA) == 0 and benchMarkClose[i-2] < newTS[-2] and benchMarkClose[i-1] > newTS[-1]: # buy signal
buyprice = benchMarkOpen[i]
assetSSA.append(buyprice)
tradinglogSSA[todayDate] = (buyprice,1)
#holdingFlag = True
portfolioValue[todayDate] = portfolioValue[yesterDayDate]
else:
portfolioValue[todayDate] = portfolioValue[yesterDayDate]
In [ ]:
for i in range(t,len(benchMarkClose)):
todayDate = dateList[i]
yesterDayDate = dateList[i-1]
ma = benchMarkClose.iloc[:i].rolling(m,min_periods =1).mean()
### For MA strategy
if len(assetMA) !=0 and benchMarkClose[i-2] > ma.iloc[-2] and benchMarkClose[i-1] < ma.iloc[-1]: # sell signal
#print 1
sellprice = benchMarkOpen[i]
#holdingFlag = False
tradinglogMA[todayDate] = (sellprice,-1)
ma_strategy[todayDate] = ( sellprice - assetMA[0])/assetMA[0] + ma_strategy[yesterDayDate]
assetMA = []
elif len(assetMA) == 0 and benchMarkClose[i-2] < ma.iloc[-2] and benchMarkClose[i-1] > ma.iloc[-1]: # buy signal
#print -1
buyprice = benchMarkOpen[i]
assetMA.append(buyprice)
tradinglogMA[todayDate] = (buyprice,1)
#holdingFlag = True
ma_strategy[todayDate] = ma_strategy[yesterDayDate]
else:
ma_strategy[todayDate] = ma_strategy[yesterDayDate]
#print i , assetMA,sellprice
In [ ]:
ma_strategy
In [ ]:
portfolioValue
In [ ]:
strategy_cmp = pd.DataFrame()
strategy_cmp['strategy_ssa5'] = pd.Series(portfolioValue)
strategy_cmp['strategy_ma5'] = pd.Series(ma_strategy)
strategy_cmp.plot(figsize=(20,10))
In [ ]:
benchMarkOpen.rolling(m,min_periods =1).mean()
In [ ]:
tb.MACD(benchMarkOpen.values)
In [ ]:
##-------Index trading volume analysis-------------
In [ ]:
IndexVolumeDF = pd.read_csv(path+filenameIndexVolume,infer_datetime_format=True,parse_dates=[0],index_col=0).loc[startTime:endTime]
In [ ]:
IndexCloseDF = pd.read_csv(path+filenameHS300,infer_datetime_format=True,parse_dates=[0],index_col=0).loc[startTime:endTime]
In [ ]:
IndexVolumeDF[ZZ500Index].corr(IndexCloseDF[ZZ500Index],method='spearman')
In [ ]:
IndexVolumeDF[ZZ500Index].corr(IndexCloseDF[ZZ500Index],method='pearson')
In [ ]:
svdVolume = pd.Series(index = IndexVolumeDF[ZZ500Index].index, data= getSVD(IndexVolumeDF[ZZ500Index].values,len(IndexVolumeDF),12))
svdIndexPrice= pd.Series(index = IndexVolumeDF[ZZ500Index].index, data= getSVD(IndexCloseDF[ZZ500Index].values,len(IndexVolumeDF),12))
In [ ]:
fig, axs = plt.subplots(4,1, figsize=(20, 14), facecolor='w', edgecolor='k',sharex=True)
IndexVolumeDF[ZZ500Index].plot(ax=axs[0])
axs[0].set_title('Volume')
IndexCloseDF [ZZ500Index].plot(ax=axs[1])
axs[1].set_title('Close')
svdVolume.plot(ax=axs[2])
axs[2].set_title('SVD_Volume')
svdIndexPrice.plot(ax=axs[3])
axs[3].set_title('SVD_Index')
In [ ]:
IndexCloseDF[ZZ500Index].plot(figsize=(20,14))