In [1]:
filename = '/home/octo/Dropbox'+ '/SPY29Dec.csv'
In [16]:
# loading csv file
def get_csv_pd(path):
#spy_pd=pd.read_csv('C:\\Users\Michal\Dropbox\IB_data\SPY.csv',sep=' ',names=['askPrice','askSize','bidPrice','bidSize'],index_col=0,parse_dates=True)
#spy_pd=pd.read_csv(path+'\SPY.csv',sep=',',names=['askPrice','askSize','bidPrice','bidSize'],index_col=0,parse_dates=True)
spy_pd=pd.read_csv(path,sep=',',dtype={'askPrice':np.float32,'askSize':np.float32,
'bidPrice':np.float32,'bidSize':np.float32},index_col=0,parse_dates=True)
#spy_pd = pd.read_csv(path, usecols=['askPrice','askSize','bidPrice','bidSize'], engine='python', skipfooter=3)
return spy_pd
def BA(df):
df.bidPrice=df.loc[:,'bidPrice'].replace(to_replace=0, method='ffill')
df.bidSize=df.loc[:,'bidSize'].replace(to_replace=0, method='ffill')
df.askPrice=df.loc[:,'askPrice'].replace(to_replace=0, method='ffill')
df.askSize=df.loc[:,'askSize'].replace(to_replace=0, method='ffill')
df=df.dropna()
return df
def preprocessing(df):
df=df.dropna()
# to exclude 0
#data=data[data['bidPrice']>240]
#data=data[data['askPrice']>240]
df=df[df['bidPrice']>df.bidPrice.mean()-df.bidPrice.std()]
df=df[df['askPrice']>df.askPrice.mean()-df.askPrice.std()]
df['mid']=(df.askPrice+df.bidPrice)/2
df['vwap']=((df.loc[:,'bidPrice']*df.loc[:,'bidSize'])+(df.loc[:,'askPrice']*df.loc[:,'askSize']))/(df.loc[:,'bidSize']+df.loc[:,'askSize'])
df['high']=df.askPrice.rolling(60).max()
df['low']=df.bidPrice.rolling(60).min()
df=df.dropna()
return df
In [17]:
import numpy as np
import pandas as pd
In [18]:
data=get_csv_pd(filename)
data=BA(data)
data=preprocessing(data)
In [46]:
len(data)
Out[46]:
In [19]:
data.head()
Out[19]:
In [20]:
# Import a Kalman filter and other useful libraries
from pykalman import KalmanFilter
from scipy import poly1d
import matplotlib.pyplot as plt
In [48]:
def kalman_ma(data):
x=data.bidPrice
y=data.askPrice
# Construct a Kalman filter
kf = KalmanFilter(transition_matrices = [1],
observation_matrices = [1],
initial_state_mean = 246,
initial_state_covariance = 1,
observation_covariance=1,
transition_covariance=.01)
# Use the observed values of the price to get a rolling mean
state_means, _ = kf.filter(x.values)
state_means = pd.Series(state_means.flatten(), index=x.index)
data['km']=state_means
data=data.dropna()
return data
In [49]:
dataset=kalman_ma(data)
In [50]:
dataset.tail()
Out[50]:
In [51]:
len(dataset)
Out[51]:
Hurst exponent helps test whether the time series is:
(1) A Random Walk (H ~ 0.5)
(2) Trending (H > 0.5)
(3) Mean reverting (H < 0.5)
https://www.quantopian.com/posts/hurst-exponent
https://www.quantopian.com/posts/neural-network-that-tests-for-mean-reversion-or-momentum-trending
In [52]:
def hurst(data):
tau, lagvec = [], []
# Step through the different lags
for lag in range(2,20):
# Produce price different with lag
pp = np.subtract(data[lag:],data[:-lag])
# Write the different lags into a vector
lagvec.append(lag)
# Calculate the variance of the difference
tau.append(np.sqrt(np.std(pp)))
# Linear fit to a double-log graph to get power
m = np.polyfit(np.log10(lagvec),np.log10(tau),1)
# Calculate hurst
hurst = m[0]*2
return hurst
In [55]:
H=hurst(dataset.mid.tail(100))
In [56]:
H
Out[56]:
In [57]:
from statsmodels import regression
import statsmodels.api as sm
import scipy.stats as stats
import scipy.spatial.distance as distance
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
In [63]:
short_mavg = pd.rolling_mean(dataset.mid,5)
long_mavg = pd.rolling_mean(dataset.mid,15)
dataset.mid.tail(100).plot(alpha = 0.5)
dataset.high.tail(100).plot()
dataset.low.tail(100).plot()
dataset.vwap.tail(100).plot()
dataset.km.tail(100).plot()
short_mavg.tail(100).plot()
long_mavg.tail(100).plot(alpha = 0.5)
plt.ylabel('Price')
plt.show()
In [70]:
asset=dataset.mid.tail(200)
asset.plot(alpha = 0.5)
rolling_means = {}
for i in np.linspace(10, 100, 10):
X = pd.rolling_mean(asset,int(i))
rolling_means[i] = X
X.plot(alpha = 0.7)
rolling_means = pd.DataFrame(rolling_means).dropna()
plt.show()
In [78]:
asset=dataset.mid.tail(50)
scores = pd.Series(index=asset.index)
for date in rolling_means.index:
mavg_values = rolling_means.loc[date]
ranking = stats.rankdata(mavg_values.values)
d = distance.hamming(ranking, range(1, 11))
scores[date] = d
# Normalize the score
(10 * scores).plot();
asset.plot()
plt.legend(['Signal', 'Asset Price']);
plt.show()
In [81]:
asset=dataset.mid.tail(100)
scores = pd.Series(index=asset.index)
for date in rolling_means.index:
mavg_values = rolling_means.loc[date]
ranking = stats.rankdata(mavg_values.values)
_, d = stats.spearmanr(ranking, range(1, 11))
scores[date] = d
# Normalize the score
(10 * scores).plot();
asset.plot()
plt.legend(['Signal', 'Asset Price']);
plt.show()
In [87]:
asset=dataset.km.tail(100)
scores = pd.Series(index=asset.index)
for date in rolling_means.index:
mavg_values = rolling_means.loc[date]
d = np.max(mavg_values) - np.min(mavg_values)
scores[date] = d
# Normalize the score
(10 * scores).plot();
asset.plot()
plt.legend(['Signal', 'Asset Price']);
plt.show()
In [101]:
k = 30
pricing=dataset.mid.tail(1000)
x = np.log(pricing)
v = x.diff()
m = (0.5*(dataset.askSize.tail(1000)+dataset.bidSize.tail(1000)))/((max(dataset.askSize)+max(dataset.bidSize))*0.5)
p0 = pd.rolling_sum(v, k)
p1 = pd.rolling_sum(m*v, k)
p2 = p1/pd.rolling_sum(m, k)
p3 = pd.rolling_mean(v, k)/pd.rolling_std(v, k)
In [102]:
f, (ax1, ax2) = plt.subplots(2,1)
ax1.plot(p0)
ax1.plot(p1)
ax1.plot(p2)
ax1.plot(p3)
ax1.set_title('Momentum of SPY')
ax1.legend(['p(0)', 'p(1)', 'p(2)', 'p(3)'], bbox_to_anchor=(1.1, 1))
ax2.plot(p0)
ax2.plot(p1)
ax2.plot(p2)
ax2.plot(p3)
#ax2.axis([0, 300, -0.005, 0.005])
ax2.set_xlabel('Time');
plt.show()
In [103]:
def get_p(prices, m, d, k):
""" Returns the dth-degree rolling momentum of data using lookback window length k """
x = np.log(prices)
v = x.diff()
m = np.array(m)
if d == 0:
return pd.rolling_sum(v, k)
elif d == 1:
return pd.rolling_sum(m*v, k)
elif d == 2:
return pd.rolling_sum(m*v, k)/pd.rolling_sum(m, k)
elif d == 3:
return pd.rolling_mean(v, k)/pd.rolling_std(v, k)
def backtest_get_p(prices, m, d):
""" Returns the dth-degree rolling momentum of data"""
v = np.diff(np.log(prices))
m = np.array(m)
if d == 0:
return np.sum(v)
elif d == 1:
return np.sum(m*v)
elif d == 2:
return np.sum(m*v)/np.sum(m)
elif d == 3:
return np.mean(v)/np.std(v)
In [104]:
k = 30
d=3
prices=dataset.mid.tail(1000)
x = np.log(pricing)
v = x.diff()
m = (0.5*(dataset.askSize.tail(1000)+dataset.bidSize.tail(1000)))/((max(dataset.askSize)+max(dataset.bidSize))*0.5)
p0 = pd.rolling_sum(v, k)
p1 = pd.rolling_sum(m*v, k)
p2 = p1/pd.rolling_sum(m, k)
p3 = pd.rolling_mean(v, k)/pd.rolling_std(v, k)
In [ ]: