In [569]:
import numpy as np
import pandas as pd
import datetime
import Quandl
from matplotlib import pyplot as plt
from sklearn.linear_model import BayesianRidge
%matplotlib inline
In [570]:
# http://trading.cheno.net/downloading-google-intraday-historical-data-with-python/
In [571]:
# http://www.google.com/finance/getprices?q=TNXP&i=300&p=10d&f=d,o,h,l,c,v
In [572]:
!ls
In [573]:
!head tnxp_goog_5min_10day_fromJun18.csv
In [574]:
stock_df = pd.read_csv("tnxp_100days.csv", skiprows=[0,1,2,3,5,6])
In [575]:
print stock_df.shape
stock_df.head()
Out[575]:
In [576]:
stock_df.rename(columns={'COLUMNS=DATE':'TIMEINDEX'}, inplace=True)
In [577]:
stock_df.replace(to_replace={'TIMEINDEX':{'a':''}}, regex=True, inplace=True)
In [578]:
stock_df.head()
Out[578]:
In [579]:
time_indices = stock_df['TIMEINDEX'].values
In [580]:
time_indices = [int(x) for x in time_indices]
In [581]:
last_timestamp = time_indices[0]
for i in range(len(time_indices)):
if time_indices[i] < last_timestamp:
time_indices[i] = last_timestamp + (time_indices[i] * 300)
else:
last_timestamp = time_indices[i]
print time_indices[0:5]
print time_indices[-6:-1]
In [582]:
time_indices = [datetime.datetime.fromtimestamp(x).strftime('%Y-%m-%d %H:%M:%S') for x in time_indices]
print time_indices[0:5]
print time_indices[-6:-1]
In [583]:
time_indices = [float(x[-8:-3].replace(':','.')) for x in time_indices]
In [584]:
stock_df['TIMEINDEX'] = time_indices
In [585]:
stock_df['MEDIAN_PRICE'] = stock_df[['CLOSE', 'HIGH', 'LOW', 'OPEN']].median(axis=1)
In [586]:
stock_df.describe()
Out[586]:
In [587]:
stock_df['CLOSE'].plot()
plt.show()
In [588]:
stock_df.head()
Out[588]:
In [589]:
for col in stock_df.columns:
stock_df["PREV" + col] = np.insert(stock_df[col].values[:-1], 0, stock_df[col][0])
In [590]:
print stock_df.shape
stock_df.head()
Out[590]:
In [591]:
stock_df.tail()
Out[591]:
In [592]:
print [col for col in stock_df.columns]
In [593]:
stock_df["FUTURE(2HRS)MED_PRICE"] = np.append(stock_df['MEDIAN_PRICE'].values[24:], np.zeros(24))
In [594]:
print stock_df.shape
stock_df.head(25)
Out[594]:
In [595]:
stock_df.tail()
Out[595]:
In [596]:
train_df = stock_df.ix[:stock_df.shape[0]-78]
print train_df.shape
In [597]:
test_df = stock_df.ix[stock_df.shape[0]-78:]
test_df.shape
Out[597]:
In [598]:
print [col for col in train_df.columns]
In [599]:
X_train = train_df[['TIMEINDEX', 'CLOSE', 'HIGH', 'LOW', 'OPEN', 'VOLUME', 'MEDIAN_PRICE', 'PREVTIMEINDEX', 'PREVCLOSE', 'PREVHIGH', 'PREVLOW', 'PREVOPEN', 'PREVVOLUME', 'PREVMEDIAN_PRICE']]
print X_train.shape
y_train = train_df[['FUTURE(2HRS)MED_PRICE']].values.flatten()
print y_train.shape
In [600]:
X_test = test_df[['TIMEINDEX', 'CLOSE', 'HIGH', 'LOW', 'OPEN', 'VOLUME', 'MEDIAN_PRICE', 'PREVTIMEINDEX', 'PREVCLOSE', 'PREVHIGH', 'PREVLOW', 'PREVOPEN', 'PREVVOLUME', 'PREVMEDIAN_PRICE']]
print X_test.shape
y_test = test_df[['FUTURE(2HRS)MED_PRICE']].values.flatten()
print y_test.shape
In [601]:
clf = BayesianRidge(compute_score=True)
clf.fit(X_train, y_train)
Out[601]:
In [602]:
test_index = stock_df.shape[0]-78
print "Prediction: " + str(clf.predict(test_df.ix[test_index].values[:-1])), "\n"
print test_df.ix[test_index]
In [603]:
test_index = stock_df.shape[0]-30
print "Prediction: " + str(clf.predict(test_df.ix[test_index].values[:-1])), "\n"
print test_df.ix[test_index]
In [604]:
predictions = [round(x, 3) for x in clf.predict(X_test)]
print predictions
In [605]:
y_test = [round(x, 3) for x in y_test]
print y_test
In [606]:
diff = [round(x, 3) for x in (np.array(predictions) - np.array(y_test))]
print diff[:-24]
In [607]:
lower = 0
for val in diff:
if val < 0:
lower += 1
print len(diff), lower
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]: