In [1]:
from yahoo_finance import Share
from matplotlib import pyplot as plt
import numpy as np
import get_historical as gh
from datetime import date as dt
from datetime import timedelta
import normalize as scale
import trading_day as td
In [2]:
def get_dates(num_days):
today = dt.today()
yesturday = today - timedelta(days =1)
date2 = today- timedelta(days = num_days)
yest = yesturday.isoformat()
date2 = date2.isoformat()
return date2, yest
ticker = raw_input("Enter company ticker: ")
num_days = int(input("Enter number of days: "))
company = Share(ticker)
day1, day2 = get_dates(num_days)
historical = company.get_historical(day1, day2)
historical.reverse()
print len(historical)
Get training Data
In [3]:
opening = gh.get_unscaled_opening(historical)
scaler = scale.get_scaler(opening)
_opening, scaled_opening = gh.get_historical_opening(historical, scaler)
closing, scaled_closing = gh.get_historical_closing(historical, scaler)
high, scaled_high = gh.get_historical_high(historical, scaler)
low, scaled_low = gh.get_historical_low(historical, scaler)
change, scaled_change = gh.get_change(historical, scaler)
volume, avg_vol, scaled_volume, scaled_avg_vol = gh.get_historical_volume(historical, company, scaler)
average_volume = avg_vol[0]
In [ ]:
In [4]:
from sklearn import svm
In [5]:
bt = 10
def predictions(clf):
predict = []
full = []
for i in range(bt):
index = bt-i
array = []
_this_day = get_back_trading_day(historical, index, scaler)
#print len(scaled_training[:-index])
#print len(scaled_training[i:len(scaled_training)-index])
_predict = clf.fit(scaled_training[i:len(scaled_training)-index], scaled_target[i:len(scaled_target)-index]).predict(_this_day)
pre = scaler.inverse_transform(_predict)
array.append(pre[0])
Date = historical[-index]['Date']
array.append(Date)
Open = float(historical[-index]["Open"])
array.append(Open)
Close = float(historical[-index]["Adj_Close"])
array.append(Close)
#array = np.array((pre[0], Date, Open, Close))
predict.append(pre)
full.append(array)
return predict, full
def get_back_trading_day(historical, index, scaler):
op = float(historical[-index]["Open"])
h = float(historical[-index]['High'])
l = float(historical[-index]['Low'])
ch = h-l
ths = np.array((op, h, l))
sc_ths = scale.scale(ths, scaler)
#print index, ". ", historical[-index]['Date']
return sc_ths
def plot(predict, clf):
x_axis = np.arange(0+1, len(historical)+1)
plt.plot(x_axis[:bt], closing[-bt:], label='Actual Closing')
plt.plot(x_axis[:bt], predict, label='Predicted Closing')
plt.plot(x_axis[:bt], opening[-bt:], label='Opening')
plt.xlabel("Day")
plt.ylabel("Price ($)")
plt.legend(loc='best')
plt.title("The Coca Cola Co. (KO)\n Features: Default Kernel: " + clf.kernel)
#plt.text(0, 0, "C=0.1, gamma=0.001, kernel = linear")
plt.show()
def print_correct(full):
correct = 0
for i in range(len(full)):
if (((full[i][3]-full[i][2]) > 0) and ((full[i][0]-full[i][2])>0)) or (((full[i][3]-full[i][2]) < 0) and ((full[i][0]-full[i][2]) <0)):
correct = correct+1
#print "Correct at", i
print correct
Getting training data
In [6]:
training, target, scaled_training, scaled_target = gh.training_data(historical, company, scaler, False, False)
this_day, scaled_today = td.get_trading_day(company, scaler, False, False)
Using linear Kernel
In [8]:
clf1 = svm.SVR(gamma=0.001,C=1e3, kernel='linear')
#print historical[-10]["Date"]
predict1, full1 = predictions(clf1)
plot(predict1, clf1)
print_correct(full1)
Using non-linear (RBF) Kernel
In [9]:
clf2 = svm.SVR(gamma=0.001,C=1e3, kernel='rbf')
#print historical[-10]["Date"]
predict2, full2 = predictions(clf2)
plot(predict2, clf2)
print_correct(full2)
In [ ]:
In [9]:
training, target, scaled_training, scaled_target = gh.training_data(historical, company, scaler, False, True)
this_day, scaled_today = td.get_trading_day(company, scaler, False, True)
In [10]:
def get_back_trading_day(historical, index, scaler):
op = float(historical[-index]["Open"])
h = float(historical[-index]['High'])
l = float(historical[-index]['Low'])
v = float(historical[-index]['Volume'])
ch = h-l
ths = np.array((op, h, l, v))
sc_ths = scale.scale(ths, scaler)
#print index, ". ", historical[-index]['Date']
return sc_ths
In [ ]:
bt = 10
clf = svm.SVR(gamma=0.001,C=0.01, kernel='linear')
print clf
#print historical[-10]["Date"]
predict = []
full = []
for i in range(bt):
index = bt-i
array = []
_this_day = get_back_trading_day(historical, index, scaler)
#print len(scaled_training[:-index])
#print len(scaled_training[i:len(scaled_training)-index])
_predict = clf.fit(scaled_training[i:len(scaled_training)-index], scaled_target[i:len(scaled_target)-index]).predict(_this_day)
pre = scaler.inverse_transform(_predict)
array.append(pre[0])
Date = historical[-index]['Date']
array.append(Date)
Open = float(historical[-index]["Open"])
array.append(Open)
Close = float(historical[-index]["Adj_Close"])
array.append(Close)
#array = np.array((pre[0], Date, Open, Close))
predict.append(pre)
full.append(array)
#print predict, full
In [ ]:
plt.plot(x_axis[:bt], closing[-bt:], label='Actual Closing')
plt.plot(x_axis[:bt], predict, label='Predicted Closing')
plt.plot(x_axis[:bt], opening[-bt:], label='Opening')
plt.xlabel("Day")
plt.ylabel("Price ($)")
plt.legend(loc='best')
plt.title("The Coca Cola Co. (KO)\n Default Features")
#plt.text(0, 0, "C=0.1, gamma=0.001, kernel = linear")
plt.show()
In [ ]:
In [ ]:
In [ ]:
In [ ]:
Regression
In [29]:
clf = svm.SVR(gamma=0.1,C=10000,kernel='rbf')
fit = clf.fit(scaled_training, scaled_target)
predict = fit.predict(scaled_training)
predict = scaler.inverse_transform(predict)
x_axis = np.arange(0+1, len(historical)+1)
plt.scatter(x_axis, opening, c='g', label='Opening')
plt.scatter(x_axis, closing, c='r', label='Closing')
plt.scatter(x_axis, high, c='b', label='High')
plt.scatter(x_axis, low, c='y', label='Low')
plt.legend(loc='best')
plt.xlabel('Days')
plt.xlim(0, )
plt.ylabel('Price ($)')
plt.title("Regression for stock price data")
plt.plot(x_axis, predict, c ='b', label = 'Predicted Closing')
plt.show()