BS, buy- sell can be predicted with 85% accuracy
In [1]:
import pandas as pd
import numpy as np
import matplotlib.pylab as plt
import csv
import glob
from statsmodels.tsa.arima_model import ARIMA
from statsmodels.tsa.arima_model import ARIMAResults
import pickle
#from sklearn.cross_validation import train_test_split
from sklearn import linear_model
from sklearn.svm import SVR
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
In [2]:
# loading csv file
def get_csv_pd(path):
#spy_pd=pd.read_csv('C:\\Users\Michal\Dropbox\IB_data\SPY.csv',sep=' ',names=['askPrice','askSize','bidPrice','bidSize'],index_col=0,parse_dates=True)
#spy_pd=pd.read_csv(path+'\SPY.csv',sep=',',names=['askPrice','askSize','bidPrice','bidSize'],index_col=0,parse_dates=True)
spy_pd=pd.read_csv(path,sep=',',dtype={'askPrice':np.float32,'askSize':np.float32,
'bidPrice':np.float32,'bidSize':np.float32},index_col=0,parse_dates=True)
#spy_pd = pd.read_csv(path, usecols=['askPrice','askSize','bidPrice','bidSize'], engine='python', skipfooter=3)
return spy_pd
def preprocessing(df):
df.bidPrice=df.loc[:,'bidPrice'].replace(to_replace=0, method='ffill')
df.bidSize=df.loc[:,'bidSize'].replace(to_replace=0, method='ffill')
df.askPrice=df.loc[:,'askPrice'].replace(to_replace=0, method='ffill')
df.askSize=df.loc[:,'askSize'].replace(to_replace=0, method='ffill')
df=df.dropna()
# to exclude 0
df=df[df['bidPrice']>df.bidPrice.mean()-df.bidPrice.std()]
df=df[df['askPrice']>df.askPrice.mean()-df.askPrice.std()]
df['mid']=(df.askPrice+df.bidPrice)/2
df['vwap']=((df.loc[:,'bidPrice']*df.loc[:,'bidSize'])+(df.loc[:,'askPrice']*df.loc[:,'askSize']))/(df.loc[:,'bidSize']+df.loc[:,'askSize'])
df['spread']=df.vwap-df.mid
df['v']=(df.mid-df.mid.shift(60))
df['mom']=np.where(np.logical_and((df.mid-df.mid.shift(12))!=0,df.v!=0),(df.mid-df.mid.shift(12))/df.v,0)
df['return']=(df.askPrice/df.bidPrice.shift(1))-1
#df['ret'] = np.log(df.Close/df.Close.shift(1))
df['sigma']=df.spread.rolling(60).std()
#df['sigma']=df.Close.rolling(5).std()
df['high']=df.askPrice.rolling(5).max()
df['low']=df.bidPrice.rolling(5).min()
#df['mom']=np.where(np.logical_and(df.vel_c==1,df.Close>df.price),1,np.where(np.logical_and(df.vel_c==-1,df.Close<df.price),-1,0))
#flagD=np.logical_and(np.logical_and(df.Close.shift(10)<df.Close.shift(15),df.Close.shift(15)< df.Close.shift(20)),df.Close< df.Close.shift(10))
#flagU=np.logical_and(np.logical_and(df.Close.shift(15)>df.Close.shift(20),df.Close.shift(10)> df.Close.shift(15)),df.Close> df.Close.shift(10))
#df['UD']= np.where(flagU,-1,np.where(flagD,1,0))
#df['P']=(df.High+df.Low+df.Close)/3
#df['UT']=(pd.rolling_max(df.High,60)+pd.rolling_max(df.P+df.High-df.Low,60))*0.5
#df['DT']=(pd.rolling_min(df.Low,60)+pd.rolling_min(df.P+df.High-df.Low,60))*0.5
#df['BA']=np.where(df.Close<=df.DT,-1,np.where(df.Close>=df.UT,1,0))# below or above
return df
In [ ]:
'''
def normalise(df,window_length=60):
dfn=(df-df.rolling(window_length).min())/(df.rolling(window_length).max()-df.rolling(window_length).min())
return dfn
def de_normalise(data,df,window_length=60):
dn=(df*(data.rolling(window_length).max()-data.rolling(window_length).min()))+data.rolling(window_length).min()
return dn
#https://stackoverflow.com/questions/312443/how-do-you-split-a-list-into-evenly-sized-chunks
def chunks(l, n):
"""Yield successive n-sized chunks from l."""
for i in range(0, len(l), n):
yield l[i:i + n]
##### ARIMA
from statsmodels.tsa.arima_model import ARIMA
from statsmodels.tsa.arima_model import ARIMAResults
###ARIMA preprocessing
def arima_processing(df):
#data=df[['vwap','mid']]
df=df.dropna()
df['Lvwap']=np.log(df.vwap)
df['Lmid']=np.log(df.mid)
df['LDvwap']=df.Lvwap-df.Lvwap.shift(60)
df['LDmid']=df.Lmid-df.Lmid.shift(60)
df=df.dropna()
return df
###Model is already saved from "/Dropbox/DataScience/ARIMA_model_saving.ipynb". Here loaded and added to "df_ml"
def ARIMA_(data):
### load model
data=data.dropna()
predictions_mid=ARIMA_mid(data.LDmid)
predictions_vwap=ARIMA_vwap(data.LDvwap)
vwap_arima=np.exp(predictions_vwap+data.Lvwap.shift(60))
mid_arima=np.exp(predictions_mid+data.Lmid.shift(60))
df_ml['arima']=data.mid+vwap_arima-mid_arima
def ARIMA_mid(data):
### load model
mid_arima_loaded = ARIMAResults.load('mid_arima.pkl')
predictions_mid = mid_arima_loaded.predict()
return predictions_mid
def ARIMA_vwap(data):
### load model
vwap_arima_loaded = ARIMAResults.load('vwap_arima.pkl')
predictions_vwap = vwap_arima_loaded.predict()
return predictions_vwap
#### KALMAN moving average
##KF moving average
#https://github.com/pykalman/pykalman
# Import a Kalman filter and other useful libraries
from pykalman import KalmanFilter
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy import poly1d
def kalman_ma(data):
#x=data.mid
x=data.mid
# Construct a Kalman filter
kf = KalmanFilter(transition_matrices = [1],
observation_matrices = [1],
initial_state_mean = 248,
initial_state_covariance = 1,
observation_covariance=1,
transition_covariance=.01)
# Use the observed values of the price to get a rolling mean
state_means, _ = kf.filter(x.values)
state_means = pd.Series(state_means.flatten(), index=x.index)
df_ml['km']=state_means
### Linear Regression, sklearn, svm:SVR,linear_model
import pickle
#from sklearn.cross_validation import train_test_split
from sklearn import linear_model
from sklearn.svm import SVR
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
## loading model saved from /Dropbox/DataScience/REG_model_saving.ipynb
filename_rgr = 'rgr.sav'
filename_svr = 'svr.sav'
# load the model from disk
loaded_rgr_model = pickle.load(open(filename_rgr, 'rb'))
loaded_svr_model = pickle.load(open(filename_svr, 'rb'))
def strat_lr(data,df):
df=df.dropna()
data=data.dropna()
X=df[['askPrice','askSize','bidPrice','bidSize','vwap','spread','v','return','sigma']]
y=df.mid
predict_regr=loaded_rgr_model.predict(X)
predict_svr=loaded_svr_model.predict(X)
df['predict_regr']=predict_regr
df['predict_svr']=predict_svr
df_ml['REG']=de_normalise(data.mid,df.predict_regr)
df_ml['SVR']=de_normalise(data.mid,df.predict_svr)
#### loading classification model from /Dropbox/DataScience/ML_20Sep
filename_svm_model_up = 'svm_model_up.sav'
filename_lm_model_up = 'lm_model_up.sav'
filename_svm_model_dn = 'svm_model_dn.sav'
filename_lm_model_dn = 'lm_model_dn.sav'
# load the model from disk
loaded_svm_up_model = pickle.load(open(filename_svm_model_up, 'rb'))
loaded_lm_up_model = pickle.load(open(filename_lm_model_up, 'rb'))
loaded_svm_dn_model = pickle.load(open(filename_svm_model_dn, 'rb'))
loaded_lm_dn_model = pickle.load(open(filename_lm_model_dn, 'rb'))
def classification_up_dn(data):
X=data[['askPrice','askSize','bidPrice','bidSize','vwap','spread','v','return','sigma']]
y1=data.U
y2=data.D
predict_svm_up=loaded_svm_up_model.predict(X)
predict_lm_up=loaded_lm_up_model.predict(X)
predict_svm_dn=loaded_svm_dn_model.predict(X)
predict_lm_dn=loaded_lm_dn_model.predict(X)
data['predict_svm_up']=predict_svm_up
data['predict_lm_up']=predict_lm_up
data['predict_svm_dn']=predict_svm_dn
data['predict_lm_dn']=predict_lm_dn
data['predict_svm']=data.predict_svm_up+data.predict_svm_dn
data['predict_lm']=data.predict_lm_up+data.predict_lm_dn
data['UD']=np.where(np.logical_and(data.predict_svm>0,data.predict_lm>0),1,np.where(np.logical_and(data.predict_svm<0,data.predict_lm<0),-1,0))
df_ml['UD']=data.UD
### LSTM
#df.loc[:, cols].prod(axis=1)
def lstm_processing(df):
df=df.dropna()
df_price=df[['mid','vwap','arima','km','REG','SVR']]
#normalization
dfn=normalise(df_price,12)
dfn['UD']=df.UD
return dfn
import numpy
import matplotlib.pyplot as plt
import pandas
import math
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
import numpy
import matplotlib.pyplot as plt
import pandas
import math
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
from keras.models import load_model
model = load_model('21sep.h5')
# convert an array of values into a dataset matrix
def create_dataset(dataset, look_back=1):
dataX, dataY = [], []
for i in range(len(dataset)-look_back-1):
a = dataset[i:(i+look_back), 0]
b = dataset[i:(i+look_back), 1]
c = dataset[i:(i+look_back), 2]
d = dataset[i:(i+look_back), 3]
e= dataset[i:(i+look_back), 4]
f = dataset[i:(i+look_back), 5]
g= dataset[i:(i+look_back), 6]
dataX.append(np.c_[b,c,d,e,f,g])
#dataX.append(b)
#dataX.append(c)
#dataX.append(d)
#dataX.append(e)
#dataX.concatenate((a,bT,cT,dT,eT),axis=1)
dataY.append(dataset[i + look_back,0])
return np.array(dataX), np.array(dataY)
def strat_LSTM(df_ml):
#normalization
df_lstm=lstm_processing(df_ml)
df_lstm=df_lstm.dropna()
dataset=df_lstm.values
dataset = dataset.astype('float32')
# reshape into X=t and Y=t+1
look_back = 3
X_,Y_ = create_dataset(dataset,look_back)
# reshape input to be [samples, time steps, features]
X_ = numpy.reshape(X_, (X_.shape[0],X_.shape[1],X_.shape[2]))
# make predictions
predict = model.predict(X_)
df_lstm=df_lstm.tail(len(predict))
df_lstm['LSTM']=predict
#LSTM=(df_lstm.LSTM*(df_ml.mid.rolling(60).max()-df_ml.midClose.rolling(60).min()))+df_LSTM.Close.rolling(60).min()
LSTM=de_normalise(df_ml.mid,df_lstm.LSTM,window_length=12)
df_lstm['pred']=LSTM
df_lstm=df_lstm.dropna()
df_lstm=df_lstm.tail(len(df_ml))
df_ml['LSTM']=df_lstm.pred
'''
In [ ]:
'''
#https://stackoverflow.com/questions/312443/how-do-you-split-a-list-into-evenly-sized-chunks
def chunks(l, n):
"""Yield successive n-sized chunks from l."""
for i in range(0, len(l), n):
yield l[i:i + n]
'''
In [3]:
filename = '/home/octo/Dropbox'+ '/SPY7Dec.csv'
In [4]:
data=get_csv_pd(filename)
In [5]:
data=preprocessing(data)
In [6]:
df=data.dropna()
df=df[['mid','vwap','spread','v','mom','return','sigma','high','low',]]
In [7]:
# split into train and test sets
train_size = int(len(df) * 0.80)
test_size = len(df) - train_size
train= df[0:train_size]
test= df[train_size:len(df)]
print(len(train), len(test))
In [8]:
train_X=train[['mid','vwap','spread','v','return','sigma','high','low',]]
train_y=train['mom']
test_X=test[['mid','vwap','spread','v','return','sigma','high','low',]]
test_y=test['mom']
In [9]:
train_X.head()
Out[9]:
In [10]:
test_y.head()
Out[10]:
In [11]:
from sklearn import linear_model
regr = linear_model.LinearRegression()
#regr.fit(X.tail(20),y.tail(20))
#predict=regr.predict(X.tail(5))
regr.fit(train_X,train_y)
predict=regr.predict(test_X)
#X=X.dropna()
#y=y.dropna()
#y[y == inf] = 0
dt=test[['mid']]
dt['predict']=predict
#dt['predict']=dt.mid+dt.mid*dt.predict
dt['predict']=dt.predict*test.v+test.mid.shift(12)
pdf=test
pdf['pREG']=dt.predict
In [12]:
pdf.tail()
Out[12]:
In [13]:
from sklearn.svm import SVR
# Fit regression model
svr_rbf = SVR(kernel='rbf', C=1e3, gamma=0.9) #kernel='linear' #kernel='poly'
predict_svr = svr_rbf.fit(train_X,train_y).predict(test_X)
dt1=test[['mid']]
dt1['predict']=predict_svr
dt1['predict']=dt1.predict*test.v+test.mid.shift(12)
pdf['pSVR']=dt1.predict
In [14]:
pdf.dropna().head()
Out[14]:
In [15]:
pdf[['mid','high','low','pREG','pSVR']].tail(300).plot(figsize=(15,9))
#df[['Volume']].tail(5000).plot(figsize=(15,9))
#data[['AvgVolume']].tail(5000).plot(figsize=(15,9))
plt.show()
In [16]:
# look at the results
plt.scatter(pdf['mid'],test_y, c='k', label='data')
plt.hold('on')
plt.plot(pdf['pREG'],test_y, c='g', label='pREG')
#plt.plot(pdf['pSVR'], y, c='g', label='pSVR')
plt.xlabel('data')
plt.ylabel('target')
plt.title('Support Vector Regression')
plt.legend()
plt.show()
In [17]:
# look at the results
plt.scatter(pdf['mid'],pdf['pREG'], c='k', label='pSVR')
plt.hold('on')
plt.plot(pdf['mid'],pdf['pSVR'], c='g', label='pREG')
plt.plot(pdf['mid'], pdf['high'], c='g', label='high')
plt.xlabel('data')
plt.ylabel('target')
plt.title('Support Vector Regression')
plt.legend()
plt.show()
https://www.quantopian.com/posts/some-code-from-ernie-chans-new-book-implemented-in-python
http://auquan.com/cointegration-stationarity/
https://www.analyticsvidhya.com/blog/2016/02/time-series-forecasting-codes-python/
http://machinelearningmastery.com/time-series-forecast-case-study-python-monthly-armed-robberies-boston/
https://www.quantstart.com/articles/Basics-of-Statistical-Mean-Reversion-Testing-Part-II
https://datascience.ibm.com/exchange/public/entry/view/815137c868b916821dec777bdc23013c
http://machinelearningmastery.com/time-series-data-stationary-python/
In [18]:
X=df[['mid','vwap','spread','v','return','sigma','high','low',]]
y=df['mom']
In [19]:
len(df)
Out[19]:
In [20]:
from sklearn import linear_model
regr = linear_model.LinearRegression()
#regr.fit(X.tail(20),y.tail(20))
#predict=regr.predict(X.tail(5))
regr.fit(X.dropna(),y.dropna())
predict=regr.predict(X)
#X=X.dropna()
#y=y.dropna()
#y[y == inf] = 0
dt=df[['mid']]
dt['predict']=predict
#dt['predict']=dt.mid+dt.mid*dt.predict
dt['predict']=dt.predict*df.v+df.mid.shift(12)
classify_df=df
classify_df['pREG']=dt.predict
from sklearn.svm import SVR
# Fit regression model
svr_rbf = SVR(kernel='rbf', C=1e3, gamma=0.9) #kernel='linear' #kernel='poly'
predict_svr = svr_rbf.fit(X, y).predict(X)
dt1=df[['mid']]
dt1['predict']=predict_svr
dt1['predict']=dt1.predict*df.v+df.mid.shift(12)
classify_df['pSVR']=dt1.predict
In [21]:
def classification(df):
mid1=(df.high+df.low)/2
#flagUD=np.where(np.logical_and(df.mid>df.pREG,df.mid>df.pSVR),1,np.where(np.logical_and(df.mid<df.pREG,df.mid<df.pSVR),-1,0))
#df['UD']= np.where(np.logical_and(df.mid>mid1,flagUD==1),1,np.where(np.logical_and(df.mid<mid1,flagUD==-1),-1,0))
flagUD=np.where(np.logical_and(df.mid>df.pREG,df.mid>df.pSVR),1,np.where(np.logical_and(df.mid<df.pREG,df.mid<df.pSVR),-1,0))
UD= np.where(np.logical_and(df.mid>mid1,flagUD==1),1,np.where(np.logical_and(df.mid<mid1,flagUD==-1),-1,0))
df['U']= np.where(UD==1,1,0)
df['D']= np.where(UD==-1,-1,0)
df['UD']=df.U+df.D
return df
In [22]:
data_class=classification(classify_df)
data_class=data_class.dropna()
df=df.dropna()
# both df and data_class have U,D,UD
In [23]:
data_class.head()
Out[23]:
In [24]:
df.head()
Out[24]:
In [25]:
# split into train and test sets
train_size = int(len(data_class) * 0.80)
test_size = len(data_class) - train_size
train= data_class[0:train_size]
test= data_class[train_size:len(data_class)]
print(len(train), len(test))
In [26]:
train_X=train[['mid','vwap','spread','v','return','sigma','high','low','mom','pREG','pSVR']]
train_y=train['UD']
test_X=test[['mid','vwap','spread','v','return','sigma','high','low','mom','pREG','pSVR']]
test_y=test['UD']
train_U=train['U']
test_U=test['U']
train_D=train['D']
test_D=test['D']
In [27]:
print(len(train_U), len(test_U))
In [28]:
from sklearn import metrics
from sklearn.linear_model import LogisticRegression
In [29]:
model = LogisticRegression()
model.fit(train_X,train_U)
print(model)
In [30]:
# make predictions
expected =test_U
predicted = model.predict(test_X)
In [31]:
# summarize the fit of the model
print(metrics.classification_report(expected, predicted))
print(metrics.confusion_matrix(expected, predicted))
In [32]:
model = LogisticRegression()
model.fit(train_X,train_D)
print(model)
In [33]:
# make predictions
expected =test_D
predicted = model.predict(test_X)
In [34]:
# summarize the fit of the model
print(metrics.classification_report(expected, predicted))
print(metrics.confusion_matrix(expected, predicted))
In [35]:
from sklearn import metrics
from sklearn.tree import DecisionTreeClassifier
In [36]:
# fit a CART model to the data
model = DecisionTreeClassifier()
model.fit(train_X,train_U)
print(model)
In [37]:
# make predictions
expected =test_U
predicted = model.predict(test_X)
In [38]:
# summarize the fit of the model
print(metrics.classification_report(expected, predicted))
print(metrics.confusion_matrix(expected, predicted))
In [39]:
accuracy =model.score(test_X,test_U)
accuracy
Out[39]:
In [40]:
# fit a CART model to the data
model = DecisionTreeClassifier()
model.fit(train_X,train_D)
print(model)
In [41]:
# make predictions
expected =test_D
predicted = model.predict(test_X)
In [42]:
# summarize the fit of the model
print(metrics.classification_report(expected, predicted))
print(metrics.confusion_matrix(expected, predicted))
In [43]:
accuracy =model.score(test_X,test_D)
accuracy
Out[43]:
The random forest algorithm can find nonlinearities in data that a linear regression wouldn’t be able to pick up on.
In [44]:
# Import the random forest model.
from sklearn.ensemble import RandomForestRegressor
In [45]:
# Initialize the model with some parameters.
model = RandomForestRegressor(n_estimators=100, min_samples_leaf=10, random_state=1)
# Fit the model to the data.
model.fit(train_X,train_U)
print(model)
In [46]:
# Make predictions.
expected=test_U
predicted = model.predict(test_X)
In [47]:
accuracy =model.score(test_X,test_U)
accuracy
Out[47]:
In [48]:
from sklearn.ensemble import RandomForestClassifier
In [49]:
clf = RandomForestClassifier(n_estimators=100, n_jobs=-1)
clf.fit(train_X,train_U)
accuracy = clf.score(test_X,test_U)
print(accuracy)
In [50]:
from sklearn import neighbors
In [51]:
clf = neighbors.KNeighborsClassifier()
clf.fit(train_X,train_U)
Out[51]:
In [52]:
accuracy = clf.score(test_X,test_U)
print(accuracy)
In [53]:
from sklearn.ensemble import AdaBoostClassifier
In [54]:
clf = AdaBoostClassifier()
clf.fit(train_X,train_U)
accuracy = clf.score(test_X,test_U)
print(accuracy)
In [55]:
from sklearn.ensemble import GradientBoostingClassifier
In [56]:
clf = GradientBoostingClassifier(n_estimators=100)
clf.fit(train_X,train_U)
accuracy = clf.score(test_X,test_U)
print(accuracy)
In [57]:
from sklearn.qda import QDA
In [58]:
clf = QDA()
clf.fit(train_X,train_U)
accuracy = clf.score(test_X,test_U)
print(accuracy)
SVM (Support Vector Machines) is one of the most popular machine learning algorithms used mainly for the classification problem. As well as logistic regression, SVM allows multi-class classification with the help of the one-vs-all method.
In [59]:
from sklearn import metrics
from sklearn.svm import SVC
In [60]:
# fit a SVM model to the data
model = SVC()
model.fit(train_X,train_U)
print(model)
In [61]:
# Make predictions.
expected=test_U
predicted = model.predict(test_X)
In [62]:
accuracy =model.score(test_X,test_U)
accuracy
Out[62]:
In [63]:
# fit a SVM model to the data
model = SVC()
model.fit(train_X,train_D)
print(model)
In [64]:
# Make predictions.
expected=test_D
predicted = model.predict(test_X)
In [65]:
accuracy =model.score(test_X,test_D)
accuracy
Out[65]:
In [ ]:
if savemodel == True:
fname_out = '{}-{}.pickle'.format(fout, datetime.now())
with open(fname_out, 'wb') as f:
cPickle.dump(clf, f, -1)
In [66]:
# plotting
import matplotlib.pyplot as plt
import seaborn as sns; sns.set()
%matplotlib inline
#plt.rcParams['figure.figsize'] = 8,6
In [67]:
test.boxplot(column='v')
Out[67]:
In [68]:
test.boxplot(by='v')
plt.ylim(245,248)
Out[68]:
In [69]:
test.boxplot(by='UD')
Out[69]:
In [70]:
#some descriptive statistics
test.describe()
Out[70]:
In [71]:
test['v'].plot(kind='hist', grid=True, title='velocity')
Out[71]:
In [72]:
test['UD'].plot(kind='hist', grid=True, title='up-down')
Out[72]:
In [73]:
test['v'].plot(kind='line', grid=True, title='velocity')
Out[73]:
In [74]:
test['UD'].plot(kind='line', grid=True, title='up-down')
Out[74]:
In [75]:
# Find 7, 30, 120 day moving averages (very broadly, rolling week, month and quarter)
spy_12 = test.rolling(window=12).mean()
spy_60 = test.rolling(window=60).mean()
spy_360 = test.rolling(window=360).mean()
fig = plt.figure()
fig.autofmt_xdate()
ax = fig.add_subplot(1,1,1)
ax.plot(test.index,test, label='SPY')
ax.plot(spy_12.index, spy_12, label='1 min rolling')
ax.plot(spy_60.index, spy_60, label='5 min rolling')
ax.plot(spy_360.index,spy_360, label='30 min rolling')
ax.grid()
ax.legend(loc=2)
ax.set_xlabel('Date')
plt.title('SPY Closes & Rolling Averages')
plt.show()
In [76]:
#frequency
round(test['mom']).value_counts()
Out[76]:
In [77]:
round(test['vwap'],1).hist(bins=50)
Out[77]:
In [78]:
test.boxplot(column='mid')
Out[78]:
In [79]:
#df for datascience
#signal=df.DataFrame(data=df.mid)
signal=df
#df['time']=df.index.strftime('%H:%M:%S')
time=signal.index.strftime('%H:%M:%S')
In [80]:
P=(signal.high+signal.low+signal.mid)/3
signal['UT']=(P+signal.high.rolling(60).max()-signal.low.rolling(60).max())
signal['DT']=(P-signal.high.rolling(60).min()+signal.low.rolling(60).min())
signal['BS']=np.where(signal.mid<=df.DT,"B",np.where(signal.mid>=df.UT,"S","H"))
signal=signal.dropna()
In [81]:
signal.head()
Out[81]:
In [82]:
df[['UT','DT','mid','high','low','pREG','pSVR']].tail(100).plot(figsize=(16, 10))
plt.show()
In [83]:
signal.boxplot(column='mid',by ='BS')
Out[83]:
In [84]:
temp1 = round(signal['UD']).value_counts(ascending=True)
temp2 = signal.pivot_table(values='UD',index=['BS'],aggfunc=lambda x: x.map({'B':1,'S':-1,'H':0}).mean())
In [85]:
print ('Frequency Table for spread:')
print (temp1)
print ('\nProbility')
print (temp2.tail())
In [86]:
temp3 = pd.crosstab(round(signal['UD']),signal['BS'])
temp3.plot(kind='bar', stacked=True, color=['red','blue'], grid=False)
Out[86]:
In [87]:
# number of missing values in each column as isnull() returns 1, if the value is null.
signal.apply(lambda x: sum(x.isnull()),axis=0)
Out[87]:
In [88]:
signal['BS'].value_counts()
Out[88]:
In [89]:
signal['UD'].value_counts()
Out[89]:
In [90]:
table = signal.pivot_table(values='v', index='BS' ,columns='UD', aggfunc=np.median)
print(table)
In [91]:
#Boolean indexing
signal.loc[(signal['v']<0) & (signal["BS"]=="B") & (signal["DT"]>signal["mid"]), ['mid',"spread","BS","DT"]].head()
Out[91]:
In [101]:
train_X.head()
Out[101]:
In [93]:
# Create first network with Keras
from keras.models import Sequential
from keras.layers import Dense
import numpy
In [102]:
# create model
model = Sequential()
model.add(Dense(12, input_dim=11, init='uniform', activation='relu'))
model.add(Dense(8, init='uniform', activation='relu'))
model.add(Dense(1, init='uniform', activation='sigmoid'))
# Compile model
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
# Fit the model
model.fit(train_X,train_U, nb_epoch=11, batch_size=10)
# evaluate the model
scores = model.evaluate(test_X,test_U)
print("%s: %.2f%%" % (model.metrics_names[1], scores[1]*100))
In [ ]: