In [62]:
    
import os.path
import numpy as np
import pandas as pd
import pymc3 as pm
from scipy.stats import norm
import theano.tensor as tt
%matplotlib inline
    
So far this is just a AR1 process evaluation
In [19]:
    
# skiprows=4, header=None, 
df = pd.read_csv('jgra20797-sup-0004-ds02.txt', delim_whitespace=True, error_bad_lines=False)
df.head()
    
    Out[19]:
In [27]:
    
df['Date'] = pd.DatetimeIndex(df['Date'])
df.index=df['Date']
    
In [28]:
    
df.head()
    
    Out[28]:
In [30]:
    
df['E1-0.0241'].plot(logy=True, figsize=(12,6))
    
    Out[30]:
    
In [31]:
    
df2 = df['E1-0.0241'].loc['1995']
df2.plot(logy=True, figsize=(12,6))
    
    Out[31]:
    
In [34]:
    
df2.head()
    
    Out[34]:
In [54]:
    
tau=1.0
y = df2.dropna().values
with pm.Model() as armodel:
    beta = pm.Normal('beta', mu=0, sd=tau)
    data = pm.AR('y', beta, sd=1.0, observed=y)
    trace = pm.sample(5000, cores=4, tune=1500)
    
    
In [55]:
    
pm.traceplot(trace);
    
    
    
In [56]:
    
mup = ((y[:-1]**2).sum() + tau**-2)**-1 * np.dot(y[:-1],y[1:])
Vp =  ((y[:-1]**2).sum() + tau**-2)**-1
print('Mean: {:5.3f} (exact = {:5.3f})'.format(trace['beta'].mean(), mup))
print('Std: {:5.3f} (exact = {:5.3f})'.format(trace['beta'].std(), np.sqrt(Vp)))
    
    
In [57]:
    
ax=pd.Series(trace['beta']).plot(kind='kde')
xgrid = np.linspace(0.4, 1.2, 1000)
fgrid = norm(loc=mup, scale=np.sqrt(Vp)).pdf(xgrid)
ax.plot(xgrid,fgrid);
    
    
In [63]:
    
def linear_regression(X, y):
    with pm.Model() as model:
        w = pm.Flat('w', shape=X.shape[1])
        σ = pm.HalfCauchy('σ', beta=10.)
        y_obs = pm.Normal('y', mu=tt.dot(X, w), sd=σ, observed=y.squeeze())
    return model
    
In [64]:
    
X = df2.dropna().index.to_julian_date()
with linear_regression(X, y):
    linear_trace = pm.sample(2000)
    
    
In [ ]:
    
    
In [ ]: