In [65]:
import category_encoders as ce
import numpy as np
import pandas as pd
from sklearn.linear_model import PassiveAggressiveClassifier,PassiveAggressiveRegressor
from sklearn.ensemble import ExtraTreesClassifier, GradientBoostingRegressor, AdaBoostClassifier, RandomForestClassifier, GradientBoostingClassifier
from sklearn.svm import SVC
from sklearn.neural_network import MLPClassifier, MLPRegressor
from sklearn.tree import DecisionTreeRegressor, ExtraTreeRegressor
from sklearn.utils import shuffle
from sklearn import preprocessing
from sklearn.model_selection import train_test_split
In [66]:
def positive_negative (v):
if float(v) >= 0:
return 1
else:
return 0
In [103]:
df_data_reg = pd.read_csv('GapUp8_jones_Training.csv', header=0, encoding='ascii', engine='python')
df_data = pd.read_csv('GapUp8_jones_Training.csv', header=0, encoding='ascii', engine='python')
df_data['Pchange'] = df_data['Pchange'].map(positive_negative)
In [104]:
print(df_data.head())
len(df_data)
print(df_data_reg.head())
len(df_data_reg)
Out[104]:
In [105]:
X = np.array(df_data.drop(['Pchange'],1))
y = np.array(df_data['Pchange'])
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size= 0.2)
In [106]:
import numpy as np
import matplotlib.pyplot as plt
#ExtraTrees
clf = ExtraTreesClassifier(n_estimators=250,random_state=0).fit(X_train,y_train)
print ('classification score: %s'%(clf.score(X_test, y_test)))
clfExtra = ExtraTreesClassifier (n_estimators=250,random_state=0).fit(X,y)
importances = clfExtra.feature_importances_
std = np.std([tree.feature_importances_ for tree in clfExtra.estimators_],
axis=0)
indices = np.argsort(importances)
In [107]:
# Plot the feature importances of the forest
plt.figure()
plt.title("Feature importances")
plt.barh(range(X.shape[1]), importances[indices],
color="r", xerr=std[indices], align="center")
# If you want to define your own labels,
# change indices to a list of labels on the following line.
plt.yticks(range(X.shape[1]), indices)
plt.ylim([-1, X.shape[1]])
plt.show()
#FRD: Mkt_Cap Float Gap0 pC_1 pC_2 pC_3 V_P V1_P V2_P V3_P
#Gap7: Mkt_Cap Float Gap0 Gap1 pC_1 V1_P V_P
#Gap8: Mkt_Cap Float JonesP Gap0 Gap1 pC_1 V1_P V_P
#GapUpDown: Mcap Float Gap0 Gap1 Gap2 Pchange1 Pchange2 Pchange3 V_1_Float V_2_Float V_3_Float V_Float Pchange
In [70]:
# Plot the feature importances of the forest
plt.figure()
plt.title("Feature importances")
plt.barh(range(X.shape[1]), importances[indices],
color="r", xerr=std[indices], align="center")
# If you want to define your own labels,
# change indices to a list of labels on the following line.
plt.yticks(range(X.shape[1]), indices)
plt.ylim([-1, X.shape[1]])
plt.show()
#FRD: Mkt_Cap Float Gap0 pC_1 pC_2 pC_3 V_P V1_P V2_P V3_P
#Gap: Mkt_Cap Float Gap0 pC_1 V_P V1_P
#GapUpDown: Mcap Float Gap0 Gap1 Gap2 Pchange1 Pchange2 Pchange3 V_1_Float V_2_Float V_3_Float V_Float Pchange
In [84]:
#Gradiant Boosting Ensemble learning
clf = GradientBoostingClassifier(learning_rate= 0.1, n_estimators= 250, max_depth= 11).fit(X_train,y_train)
print ('classification score: %s'%(clf.score(X_test, y_test)))
clfP = GradientBoostingClassifier (learning_rate= 0.1, n_estimators= 250, max_depth= 11).fit(X,y)
In [85]:
#regression prep
X = np.array(df_data_reg.drop(['Pchange'],1))
y = np.array(df_data_reg['Pchange'])
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size= 0.2)
#Gradiant Boosting Ensemble learning
clf = GradientBoostingRegressor(learning_rate= 0.1, n_estimators= 250, max_depth= 11).fit(X_train,y_train)
print ('regression score: %s'%(clf.score(X_test, y_test)))
clfP_reg = GradientBoostingRegressor (learning_rate= 0.1, n_estimators= 250, max_depth= 11).fit(X,y)
In [86]:
#pickling predictors
import pickle as pk
with open('GAP7_GB_Classifier.p','wb') as f:
pk.dump(clfP,f)
with open('GAP7_ET_Classifier.p','wb') as f:
pk.dump(clfExtra,f)
with open('GAP7_GB_Regressor.p','wb') as f:
pk.dump(clfP_reg,f)
In [1]:
#pickling predictors
import pickle as pk
clfP = pk.load(open('GAP7_GB_Classifier.p','rb'))
clfExtra = pk.load(open('GAP7_ET_Classifier.p','rb'))
clfP_reg = pk.load(open('GAP7_GB_Regressor.p','rb'))
In [27]:
import datetime
import time
import requests
import json
import pandas as pd
import numpy as np
from datetime import date, timedelta
def prev_weekday(adate):
'''subtract a day from the given date, then check if the date is a weekday.
If not, subtract another, until you do have a weekday'''
adate -= timedelta(days=1)
while adate.weekday() > 4: # Mon-Fri are 0-4
adate -= timedelta(days=1)
return adate
def symbol_downloader(symbol, days=10, days_ago=0):
'''take symbol and return volume and price change'''
dt = datetime.datetime.now()
UnixTime = int(time.mktime(dt.timetuple()))
#web variables
url = 'https://query1.finance.yahoo.com/v8/finance/chart/'+symbol+'?period1='+str(UnixTime-86400*(days+days_ago))+'&period2='+str(UnixTime-86400*days_ago)+'&interval=1d&indicators=quote%7Csma~60%7Cmfi~14&includePrePost=true&events=div%7Csplit%7Cearn&lang=en-CA®ion=CA&corsDomain=ca.finance.yahoo.com'
#proxies
http_proxy = ''
https_proxy = ''
ftp_proxy = ''
proxyDict = {
"http" : http_proxy,
"https" : https_proxy,
"ftp" : ftp_proxy
}
hdr = {'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2272.101 Safari/537.36',
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
'Connection': 'keep-alive'}
try:
resp = requests.get(url, headers=hdr)
if resp.status_code != 200:
# This means something went wrong.
print(resp.status_code)
data = json.loads(resp.text)
#json unpack
timestamp = data['chart']['result'][0]['timestamp']
timestamp = [datetime.datetime.fromtimestamp(x).strftime('%Y%m%d') for x in timestamp]
quote = data['chart']['result'][0]['indicators']['quote'][0]
# sma60 = data['chart']['result'][0]['indicators']['sma'][0]['sma']
stock_df = pd.DataFrame(quote)
#index is symbol and timestamp
stock_df.index = [str(x) for x in timestamp]
stock_df = stock_df.dropna()
stock_df = stock_df[~(stock_df == np.inf).any(axis=1)]
#if request returns legit values
if len(stock_df) != 0:
return stock_df
except requests.exceptions.RequestException as e: # This is the correct syntax
raise SystemExit(e)
def symbol_downloader_intraday(symbol, days=5, days_ago=0):
'''returns intraday stock data'''
dt = datetime.datetime.now()
UnixTime = int(time.mktime(dt.timetuple()))
#web variables
url = 'https://query1.finance.yahoo.com/v8/finance/chart/'+symbol+'?period1='+str(UnixTime-86400*(days+days_ago))+'&period2='+str(UnixTime-86400*days_ago)+'&interval=5m&indicators=quote%7Csma~60%7Cmfi~8&includePrePost=true&events=div%7Csplit%7Cearn&lang=en-CA®ion=CA&corsDomain=ca.finance.yahoo.com'
#proxies
http_proxy = ''
https_proxy = ''
ftp_proxy = ''
proxyDict = {
"http" : http_proxy,
"https" : https_proxy,
"ftp" : ftp_proxy
}
hdr = {'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2272.101 Safari/537.36',
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
'Connection': 'keep-alive'}
try:
resp = requests.get(url, headers=hdr)
if resp.status_code != 200:
# This means something went wrong.
print(resp.status_code)
data = json.loads(resp.text)
#json unpack
timestamp_raw = data['chart']['result'][0]['timestamp']
timestamp = [datetime.datetime.fromtimestamp(x).strftime('%Y%m%d') for x in timestamp_raw]
HH = [datetime.datetime.fromtimestamp(x).strftime('%H') for x in timestamp_raw]
MM = [datetime.datetime.fromtimestamp(x).strftime('%M') for x in timestamp_raw]
quote = data['chart']['result'][0]['indicators']['quote'][0]
stock_df = pd.DataFrame(quote)
#index is symbol and timestamp
stock_df.index = [str(x) for x in timestamp]
stock_df['hh'] = HH
stock_df['mm'] = MM
stock_df = stock_df.dropna()
stock_df = stock_df[~(stock_df == np.inf).any(axis=1)]
#if request returns legit values
if len(stock_df) != 0:
return stock_df
except requests.exceptions.RequestException as e: # This is the correct syntax
raise SystemExit(e)
def txt_to_float (amount):
'''
Take text large number with letter suffix, and turn into float
'''
try:
num_amount = 0.00
quant = amount[-1]
amount = amount[:-1]
if quant == 'T':
num_amount = float(amount) * 1000000000000
elif quant == 'B':
num_amount = float(amount) * 1000000000
elif quant == 'M':
num_amount = float(amount) * 1000000
elif quant == 'K':
num_amount = float(amount) * 1000
else:
num_amount = float(amount)
return num_amount
except:
return 0
In [54]:
print (symbol_downloader_intraday('BOXL').tail(10))
In [63]:
import finviz
import gc
gc.collect()
#date variables
dateTday = date.today().strftime('%Y%m%d')
dateYday = prev_weekday(date.today()).strftime('%Y%m%d')
date2day = prev_weekday(prev_weekday(date.today())).strftime('%Y%m%d')
#stock variables
symbol_list = ['BOXL']
for s in symbol_list:
#stock dataframes
symbol_df = symbol_downloader(s)
intraday_symbol_df = symbol_downloader_intraday (s)
tday_df = intraday_symbol_df.loc[dateTday]
yday_df = intraday_symbol_df.loc[dateYday]
#daily data
vol_yday = symbol_df.loc[dateYday,'volume']/1000000
yday_close = symbol_df.loc[dateYday,'close']
yday_open = symbol_df.loc[dateYday,'open']
pChange_yday = (yday_close-yday_open)/yday_open
#Pre market volume today
Pre_Mkt_Vol = tday_df.loc[tday_df['hh'].isin(['07','08','09']),'volume'].sum()
#get latest pre mkt price today after 0900
tday_open = list(tday_df.loc[(tday_df['hh'].isin(['09']) & tday_df['mm'].isin(['00','05','10','15','20','25','30'])),'open'])[-1]
#current market cap
MarketCap = txt_to_float(finviz.get_stock(symbol)['Market Cap'])/1000000
#Current float
Float = txt_to_float(finviz.get_stock(symbol)['Shs Float'])/1000000
#yesterday's Gapup
gapUp1 = (symbol_df.loc[dateYday,'open']-symbol_df.loc[date2day,'close'])/symbol_df.loc[date2day,'close']
#price increase yesterday - open to close
pC_1 = pChange_yday
#volume yesterday as a % of Float
V1_P = vol_yday/Float
#gap up from close yesterday
gapUp = (tday_open - yday_close) / yday_close
#volume expected today as a % of Float
V_P = Pre_Mkt_Vol*10/1000000/Float
#gapUp = (symbol_df.loc[dateTday,'open']-symbol_df.loc[dateYday,'close'])/symbol_df.loc[dateYday,'close'] #for use intraday
#V_P = symbol_df.loc[dateTday,'volume']/1000000/Float #for after market use
print ('Mkt Cap of {} is: {}'.format(s,MarketCap))
print ('Float of {} is: {}'.format(s,Float))
print ('Gap up today of {} is: {}'.format(s,gapUp))
print ('Gap up yesterday of {} is: {}'.format(s,gapUp1))
print ('Price change yesterday of {} is: {}'.format(s,pC_1))
print ('V yesterday of {} is: {}'.format(s,V1_P))
print ('V today prediction of {} is: {}'.format(s,V_P))
predict_list = [MarketCap,Float,gapUp,gapUp1,pC_1,V1_P,V_P]
#Gap7: Mkt_Cap Float Gap0 Gap1 pC_1 V1_P V_P
#Float(M), MktCap(M), Yday_v_float, Tday_v_float
print('Gradiant Boosting classification: {}, probabilities: {}'.format(clfP.predict([predict_list]),clfP.predict_proba([predict_list])))
print('Extra Tree classification: {}, probabilities: {}'.format(clfExtra.predict([predict_list]),clfExtra.predict_proba([predict_list])))
print('Gradiant Boosting regression: {}'.format(clfP_reg.predict([predict_list])))
print('-----------------------------------------------------------------------------')
In [6]:
'''
Backlog:
-Enable auto logging of predictions with timestamp
-Enable auto scraping of up to date training data
-automate morning screener: https://www.chartmill.com/chartmill-rest/screener/?sort=((preMarketLast/close)%20-%201)%20*%20100&sorting=DESC&conditions=freeFloat%3C50000000,shares*close%3C300000000,shares*close%3E50000000,smaVolume50%20%3E%20200000,etf=0,(preMarketVolume%3E100000)&exchanges=125,119,3&start=0
'''
Out[6]:
In [ ]: