In [120]:
import pandas as pd
from datetime import datetime
import datetime as dt
from pandas_datareader import data, wb
import matplotlib.pyplot as plt
from matplotlib import style
In [121]:
style.use('ggplot')
def next_biz_day(d):
nd = d+dt.timedelta(days=1)
return nd if nd.weekday() in range(5) else next_biz_day(nd)
def prev_biz_day(d):
pd = d-dt.timedelta(days=1)
return pd if pd.weekday() in range(5) else prev_biz_day(pd)
In [122]:
st = prev_biz_day(next_biz_day(datetime(2010,4,5)))
ed = prev_biz_day(next_biz_day(datetime(2017,4,10)))
stock_list = 'BIDU,MSFT,GOOG,AAPL,AMZN,INTC'
target = 'BIDU'
nop = 20
print (st, ed)
In [123]:
stocks = stock_list.split(',')
dfs = [data.DataReader(s, 'yahoo', st, ed) for s in stocks] + [
data.DataReader(s, 'yahoo', st, ed).diff() for s in stocks]
mdfs = []
for i, d in enumerate(dfs):
d.columns = [c+str(i) for c in d.columns]
mdfs.append(d)
ds = pd.concat(mdfs, axis=1)
In [124]:
tst = next_biz_day(st)
ted = next_biz_day(ed)
print(tst, ted)
tdf = data.DataReader(target, 'yahoo', tst, ted).diff()
In [125]:
tdf.plot()
plt.show()
In [126]:
from sklearn import *
lnr = tree.DecisionTreeRegressor()
In [127]:
print (len(ds),len(tdf))
drop the first 2 rows - NaN due to diff()
In [132]:
lnr.fit(ds[2:-nop], tdf['Close'][2:-nop])
Out[132]:
In [133]:
predicted = lnr.predict(ds[-nop:])
out = pd.Series(predicted, tdf[-nop:].index, name='Predicted')
results = (pd.concat([tdf['Close'][-nop:], out], axis=1))
In [134]:
results.plot()
plt.show()
Print out the daily diff
In [135]:
results.diff().plot()
plt.show()
In [ ]: