In [1]:
%matplotlib inline  
import matplotlib.pyplot as plt
import numpy as np
import theano
import theano.tensor as T

Getting the data


In [38]:
import pandas.io.data as web
import datetime
start = datetime.datetime(2013, 1, 1)
end = datetime.datetime(2013, 1, 27)
f = web.DataReader("F", 'yahoo', start, end)

In [39]:
f


Out[39]:
Open High Low Close Volume Adj Close
Date
2013-01-02 13.23 13.28 13.00 13.20 75274700 12.215207
2013-01-03 13.24 13.70 13.05 13.46 121284700 12.455810
2013-01-04 13.51 13.61 13.35 13.57 54669900 12.557603
2013-01-07 13.52 13.58 13.35 13.43 43482400 12.428048
2013-01-08 13.38 13.43 13.20 13.35 46336200 12.354017
2013-01-09 13.40 13.60 13.39 13.47 36973900 12.465064
2013-01-10 13.78 13.94 13.68 13.83 84884300 12.798206
2013-01-11 14.00 14.07 13.90 14.00 67641200 12.955523
2013-01-14 14.04 14.08 13.86 13.99 46224800 12.946268
2013-01-15 13.91 14.30 13.89 14.30 55425000 13.233141
2013-01-16 14.16 14.25 14.01 14.22 51453400 13.159110
2013-01-17 14.27 14.30 14.07 14.22 44299300 13.159110
2013-01-18 14.10 14.13 13.97 14.11 45716100 13.057316
2013-01-22 14.06 14.19 14.00 14.17 35467900 13.112840
2013-01-23 14.00 14.02 13.79 13.88 58122200 12.844475
2013-01-24 13.82 13.98 13.81 13.87 42532700 12.835221
2013-01-25 13.83 13.84 13.64 13.68 53405400 12.659397

Explore data


In [3]:
f.Close.plot()


Out[3]:
<matplotlib.axes._subplots.AxesSubplot at 0x113a174e0>

In [4]:
f.Volume.plot()


Out[4]:
<matplotlib.axes._subplots.AxesSubplot at 0x1144f7f60>

In [5]:
f.plot(kind='scatter', x='Close', y='Open')


Out[5]:
<matplotlib.axes._subplots.AxesSubplot at 0x1146024e0>

In [6]:
from pandas.tools.plotting import scatter_matrix
scatter_matrix(f, alpha=0.2, figsize=(6, 6), diagonal='kde');



In [7]:
f.describe()


Out[7]:
Open High Low Close Volume Adj Close
count 17.000000 17.000000 17.000000 17.000000 1.700000e+01 17.000000
mean 13.779412 13.900000 13.644706 13.808824 5.665848e+07 12.778609
std 0.334449 0.315099 0.351196 0.345866 2.118857e+07 0.320063
min 13.230000 13.280000 13.000000 13.200000 3.546790e+07 12.215207
25% 13.510000 13.610000 13.350000 13.470000 4.429930e+07 12.465064
50% 13.830000 13.980000 13.790000 13.870000 5.145340e+07 12.835221
75% 14.040000 14.130000 13.900000 14.110000 5.812220e+07 13.057316
max 14.270000 14.300000 14.070000 14.300000 1.212847e+08 13.233141

Data Cleaning


In [8]:
Xtr = f[['Open','Low','High']].values.astype(np.float32)
Xtr


Out[8]:
array([[ 13.22999954,  13.        ,  13.27999973],
       [ 13.23999977,  13.05000019,  13.69999981],
       [ 13.51000023,  13.35000038,  13.60999966],
       [ 13.52000046,  13.35000038,  13.57999992],
       [ 13.38000011,  13.19999981,  13.43000031],
       [ 13.39999962,  13.39000034,  13.60000038],
       [ 13.77999973,  13.68000031,  13.93999958],
       [ 14.        ,  13.89999962,  14.06999969],
       [ 14.03999996,  13.85999966,  14.07999992],
       [ 13.90999985,  13.89000034,  14.30000019],
       [ 14.15999985,  14.01000023,  14.25      ],
       [ 14.27000046,  14.06999969,  14.30000019],
       [ 14.10000038,  13.97000027,  14.13000011],
       [ 14.06000042,  14.        ,  14.18999958],
       [ 14.        ,  13.78999996,  14.02000046],
       [ 13.81999969,  13.81000042,  13.97999954],
       [ 13.82999992,  13.64000034,  13.84000015]], dtype=float32)

In [9]:
Ytr = f.Close.values.astype(np.float32)
Ytr


Out[9]:
array([ 13.19999981,  13.46000004,  13.56999969,  13.43000031,
        13.35000038,  13.47000027,  13.82999992,  14.        ,
        13.98999977,  14.30000019,  14.22000027,  14.22000027,
        14.10999966,  14.17000008,  13.88000011,  13.86999989,  13.68000031], dtype=float32)

Modeling


In [10]:
rng = np.random

In [11]:
def define_fit_model(dataset_x, dataset_y, learning_rate = 1e-4, training_steps = 10000):
    N = len(dataset_x[0])
    x = T.matrix("x")
    y = T.vector("y")
    w = theano.shared(rng.randn(N), name="w")
    b = theano.shared(0., name="b")
    Hx = T.dot(x,w) + b
    cost = T.mean(T.sqr(Hx - y))
    gw, gb = T.grad(cost=cost, wrt=[w, b])
    train = theano.function(inputs=[x,y],outputs=[Hx,cost], updates=((w, w - learning_rate * gw), (b, b - learning_rate * gb)))
    predict = theano.function(inputs=[x], outputs=Hx)
    for i in range(training_steps):
        pred, err = train(dataset_x, dataset_y)
    return predict, w, b

In [12]:
predict, w, b = define_fit_model(Xtr,Ytr)

In [13]:
"Model that was fit from data:\n {}, {}".format( w.get_value(), b.get_value() )


Out[13]:
'Model that was fit from data:\n [-1.0897295   1.04515748  1.04634064], 0.019895761707419128'

Visualize, Summarize, Apply

predict


In [23]:
Ypredict = predict(Xtr)

In [24]:
plt.plot(Ytr, label='real')
plt.plot(Ypredict, label='predict')
plt.legend();


testing


In [34]:
start = datetime.datetime(2015, 5, 1)
end = datetime.datetime(2015, 5, 30)
f = web.DataReader("F", 'yahoo', start, end)

In [35]:
Xts = f[['Open','Low','High']].values.astype(np.float32)
Yts = f.Close.values.astype(np.float32)

In [36]:
Ytest_predict = predict(Xts)

In [37]:
plt.plot(Yts, label='test')
plt.plot(Ytest_predict, label='predict')
plt.legend();



In [ ]: