In [1]:
from yahoo_finance import Share
from matplotlib import pyplot as plt
import numpy as np
import get_historical as gh
from datetime import date as dt
from datetime import timedelta
import normalize as scale
import trading_day as td

In [2]:
def get_dates(num_days):

    today = dt.today()
    yesturday = today - timedelta(days =1)

    date2 = today- timedelta(days = num_days)
    yest = yesturday.isoformat()
    date2 = date2.isoformat()

    return date2, yest

ticker = raw_input("Enter company ticker: ")
num_days = int(input("Enter number of days: "))

company = Share(ticker)

day1, day2 = get_dates(num_days)

historical = company.get_historical(day1, day2)

historical.reverse()

print len(historical)


Enter company ticker: KO
Enter number of days: 30
21

Get training Data


In [3]:
opening = gh.get_unscaled_opening(historical)
scaler = scale.get_scaler(opening)

_opening, scaled_opening = gh.get_historical_opening(historical, scaler)

closing, scaled_closing = gh.get_historical_closing(historical, scaler)

high, scaled_high = gh.get_historical_high(historical, scaler)

low, scaled_low = gh.get_historical_low(historical, scaler)

change, scaled_change = gh.get_change(historical, scaler)

volume, avg_vol, scaled_volume, scaled_avg_vol = gh.get_historical_volume(historical, company, scaler)

average_volume = avg_vol[0]

In [ ]:


In [4]:
from sklearn import svm

Using default features

Features include opening price, daily high and daily low


In [5]:
bt = 10
def predictions(clf):
    predict = []
    full = []
 
    for i in range(bt):        
        index = bt-i
        array = []
        _this_day = get_back_trading_day(historical, index, scaler)
        #print len(scaled_training[:-index])
        #print len(scaled_training[i:len(scaled_training)-index])
        _predict = clf.fit(scaled_training[i:len(scaled_training)-index], scaled_target[i:len(scaled_target)-index]).predict(_this_day)

        pre = scaler.inverse_transform(_predict)
        array.append(pre[0])
        Date = historical[-index]['Date']
        array.append(Date)
        Open = float(historical[-index]["Open"])
        array.append(Open)
        Close = float(historical[-index]["Adj_Close"])
        array.append(Close)
        #array = np.array((pre[0], Date, Open, Close))
        predict.append(pre)
        full.append(array)
    return predict, full

def get_back_trading_day(historical, index, scaler):
    op = float(historical[-index]["Open"])
    h = float(historical[-index]['High'])
    l = float(historical[-index]['Low'])
    ch = h-l
    ths = np.array((op, h, l))
    
    sc_ths = scale.scale(ths, scaler)
    #print index, ". ", historical[-index]['Date']
    return sc_ths

def plot(predict, clf):
    x_axis = np.arange(0+1, len(historical)+1)
    plt.plot(x_axis[:bt], closing[-bt:], label='Actual Closing')
    plt.plot(x_axis[:bt], predict, label='Predicted Closing')
    plt.plot(x_axis[:bt], opening[-bt:], label='Opening')
    plt.xlabel("Day")
    plt.ylabel("Price ($)")
    plt.legend(loc='best')
    plt.title("The Coca Cola Co. (KO)\n Features: Default   Kernel: " + clf.kernel)
    #plt.text(0, 0, "C=0.1, gamma=0.001, kernel = linear")
    plt.show()
    
def print_correct(full):
    correct = 0
    for i in range(len(full)):
        if (((full[i][3]-full[i][2]) > 0) and ((full[i][0]-full[i][2])>0)) or (((full[i][3]-full[i][2]) < 0) and ((full[i][0]-full[i][2]) <0)):
            correct = correct+1
            #print "Correct at", i        

    print correct

Getting training data


In [6]:
training, target, scaled_training, scaled_target = gh.training_data(historical, company, scaler, False, False)
this_day, scaled_today = td.get_trading_day(company, scaler, False, False)

Using linear Kernel


In [8]:
clf1 = svm.SVR(gamma=0.001,C=1e3, kernel='linear')
#print historical[-10]["Date"]

predict1, full1 = predictions(clf1)

plot(predict1, clf1)

print_correct(full1)


4

Using non-linear (RBF) Kernel


In [9]:
clf2 = svm.SVR(gamma=0.001,C=1e3, kernel='rbf')
#print historical[-10]["Date"]

predict2, full2 = predictions(clf2)

plot(predict2, clf2)

print_correct(full2)


4

In [ ]:

Using Volumes


In [9]:
training, target, scaled_training, scaled_target = gh.training_data(historical, company, scaler, False, True)
this_day, scaled_today = td.get_trading_day(company, scaler, False, True)

In [10]:
def get_back_trading_day(historical, index, scaler):
    op = float(historical[-index]["Open"])
    h = float(historical[-index]['High'])
    l = float(historical[-index]['Low'])
    v = float(historical[-index]['Volume'])
    ch = h-l
    ths = np.array((op, h, l, v))
    
    sc_ths = scale.scale(ths, scaler)
    #print index, ". ", historical[-index]['Date']
    return sc_ths

In [ ]:
bt = 10
clf = svm.SVR(gamma=0.001,C=0.01, kernel='linear')
print clf
#print historical[-10]["Date"]
predict = []
full = []
for i in range(bt):
    index = bt-i
    array = []
    _this_day = get_back_trading_day(historical, index, scaler)
    #print len(scaled_training[:-index])
    #print len(scaled_training[i:len(scaled_training)-index])
    _predict = clf.fit(scaled_training[i:len(scaled_training)-index], scaled_target[i:len(scaled_target)-index]).predict(_this_day)
    
    pre = scaler.inverse_transform(_predict)
    array.append(pre[0])
    Date = historical[-index]['Date']
    array.append(Date)
    Open = float(historical[-index]["Open"])
    array.append(Open)
    Close = float(historical[-index]["Adj_Close"])
    array.append(Close)
    #array = np.array((pre[0], Date, Open, Close))
    predict.append(pre)
    full.append(array)
#print predict, full

In [ ]:
plt.plot(x_axis[:bt], closing[-bt:], label='Actual Closing')
plt.plot(x_axis[:bt], predict, label='Predicted Closing')
plt.plot(x_axis[:bt], opening[-bt:], label='Opening')
plt.xlabel("Day")
plt.ylabel("Price ($)")
plt.legend(loc='best')
plt.title("The Coca Cola Co. (KO)\n Default Features")
#plt.text(0, 0, "C=0.1, gamma=0.001, kernel = linear")
plt.show()

In [ ]:


In [ ]:


In [ ]:


In [ ]:

Regression


In [29]:
clf = svm.SVR(gamma=0.1,C=10000,kernel='rbf')
              
fit = clf.fit(scaled_training, scaled_target)

predict = fit.predict(scaled_training)
              
predict = scaler.inverse_transform(predict)

x_axis = np.arange(0+1, len(historical)+1)

plt.scatter(x_axis, opening, c='g', label='Opening')
plt.scatter(x_axis, closing, c='r', label='Closing')
plt.scatter(x_axis, high, c='b', label='High')
plt.scatter(x_axis, low, c='y', label='Low')
plt.legend(loc='best')
plt.xlabel('Days')
plt.xlim(0, )
plt.ylabel('Price ($)')
plt.title("Regression for stock price data")
plt.plot(x_axis, predict, c ='b', label = 'Predicted Closing')
plt.show()