In [4]:
import numpy as np
import pandas as pd
import pymc3 as pm
from pymc3.distributions.timeseries import GaussianRandomWalk

from scipy.sparse import csc_matrix
from scipy import optimize

%pylab inline


Populating the interactive namespace from numpy and matplotlib

In [7]:
stock_df = pd.read_csv("/Users/excalibur/py/quant/quandl_data/PGNX.csv")
print stock_df.shape
stock_df.head()


(4458, 6)
Out[7]:
Open High Low Close Volume Adjusted Close
0 8.2500 8.2500 8.00 8.0625 637000 8.0625
1 8.0625 8.0625 8.00 8.0625 238400 8.0625
2 8.0625 8.0625 8.00 8.0000 51600 8.0000
3 8.0000 8.5000 8.00 8.5000 311700 8.5000
4 8.5000 8.5000 8.25 8.5000 10500 8.5000

In [9]:
stock_df['return'] = (stock_df['Close'] / stock_df['Open']) - 1
stock_df.head()


Out[9]:
Open High Low Close Volume Adjusted Close return
0 8.2500 8.2500 8.00 8.0625 637000 8.0625 -0.022727
1 8.0625 8.0625 8.00 8.0625 238400 8.0625 0.000000
2 8.0625 8.0625 8.00 8.0000 51600 8.0000 -0.007752
3 8.0000 8.5000 8.00 8.5000 311700 8.5000 0.062500
4 8.5000 8.5000 8.25 8.5000 10500 8.5000 0.000000

In [11]:
n = 400
returns = stock_df['return'].values[-n:]
plt.plot(returns)
plt.show()



In [12]:
model = pm.Model()
with model:
    sigma = pm.Exponential('sigma', 1.0/0.02, testval=0.1)
    
    nu = pm.Exponential('nu', 1.0/10)
    s = GaussianRandomWalk('s', sigma**-2, shape=n)
    
    r = pm.T('r', nu, lam=pm.exp(-2*s), observed=returns)

In [13]:
with model:
    start = pm.find_MAP(vars=[s], fmin=optimize.fmin_l_bfgs_b)

In [14]:
with model:
    step = pm.NUTS(vars=[s, nu, sigma], scaling=start, gamma=0.25)
    start2 = pm.sample(100, step, start=start)[-1]
    
    # start next run at the last sampled position
    step = pm.NUTS(vars=[s, nu, sigma], scaling=start2, gamma=0.55)
    trace = pm.sample(2000, step, start=start2)


 [-----------------100%-----------------] 2000 of 2000 complete in 45.8 sec
/usr/local/lib/python2.7/site-packages/theano/scan_module/scan_perform_ext.py:133: RuntimeWarning: numpy.ndarray size changed, may indicate binary incompatibility
  from scan_perform.scan_perform import *

In [15]:
figsize(12, 6)
pm.traceplot(trace, model.vars[:-1])
plt.show()



In [16]:
figsize(12, 6)
title(str(s))
plot(trace[s][::10].T, 'b', alpha=0.03)
xlabel('time')
ylabel('log volatility')
plt.show()



In [17]:
plot(returns)
plot(np.exp(trace[s][::10].T), 'r', alpha=0.03)
sd = np.exp(trace[s].T)
plot(-np.exp(trace[s][::10].T), 'r', alpha=0.03)
xlabel('time')
ylabel('returns')
plt.show()