In [3]:

    
# http://pymc-devs.github.io/pymc3/getting_started/#a-motivating-example-linear-regression
import numpy as np
import pylab as pyl

%matplotlib inline

Linear Regression



In [2]:

    
# initialize random number generator
np.random.seed(123)



In [10]:

    
# true parameter values
alpha, sigma = 1, 1
beta = [1, 2.5]

# size of dataset
size = 100

# predictor variable 
X1 = np.linspace(0, 1, size)
X2 = np.linspace(0, 0.2, size)

# simulate outcome variable
Y = alpha + beta[0]*X1 + beta[1]*X2 + np.random.randn(size)*sigma



In [13]:

    
fig, axes = pyl.subplots(1, 2, sharex=True, figsize=(10,4))

axes[0].scatter(X1, Y)
axes[1].scatter(X2, Y)

axes[0].set_ylabel('Y')
axes[0].set_xlabel('X1')
axes[1].set_xlabel('X2')

pyl.show()



In [32]:

    
from pymc3 import Model, Normal, HalfNormal 
from pymc3 import find_MAP, NUTS, sample, traceplot, summary
from scipy import optimize



In [16]:

    
basic_model = Model()

with basic_model:
    
    # priors for unknown parameters
    alpha = Normal('alpha', mu=0, sd=10)
    beta = Normal('beta', mu=0, sd=10, shape=2)
    sigma = HalfNormal('sigma', sd=1)
    
    # expected value of outcome
    mu = alpha + beta[0]*X1 + beta[1]*X2
    
    # likelihood (sampling distribution) of observations
    Y_obs = Normal('Y_obs', mu=mu, sd=sigma, observed=Y)



In [20]:

    
map_estimate = find_MAP(model=basic_model) #BFGS

print(map_estimate)









    



{'alpha': array(0.940580333930751), 'beta': array([ 1.23897098,  0.2477942 ]), 'sigma_log': array(-0.05643453185271788)}



In [22]:

    
map_estimate = find_MAP(model=basic_model, fmin=optimize.fmin_powell)

print(map_estimate)









    



{'alpha': array(0.9416977243355756), 'beta': array([ 1.28287086,  0.02152522]), 'sigma_log': array(-0.05679975327072704)}



In [24]:

    
with basic_model:
    
    # obtain starting values via MAP
    start = find_MAP(fmin=optimize.fmin_powell)
    
    # instantiate sampler
    step = NUTS(scaling=start)
    
    # draw 500 posterior samples
    trace = sample(500, step, start=start)









    



 [-----------------100%-----------------] 500 of 500 complete in 3.7 sec





    



/usr/local/lib/python2.7/site-packages/theano/gof/cmodule.py:293: RuntimeWarning: numpy.ndarray size changed, may indicate binary incompatibility
  rval = __import__(module_name, {}, {}, [module_name])



In [29]:

    
trace['alpha'][-5:]









    Out[29]:





array([ 0.91830579,  0.97286547,  0.99565443,  0.90862384,  0.82481802])



In [31]:

    
traceplot(trace)
pyl.show()



In [33]:

    
summary(trace)









    



alpha:
 
  Mean             SD               MC Error         95% HPD interval
  -------------------------------------------------------------------
  
  0.956            0.197            0.012            [0.565, 1.328]

  Posterior quantiles:
  2.5            25             50             75             97.5
  |--------------|==============|==============|--------------|
  
  0.547          0.831          0.955          1.087          1.327


beta:
 
  Mean             SD               MC Error         95% HPD interval
  -------------------------------------------------------------------
  
  1.326            1.777            0.152            [-2.521, 4.463]
  -0.286           8.652            0.746            [-16.533, 17.176]

  Posterior quantiles:
  2.5            25             50             75             97.5
  |--------------|==============|==============|--------------|
  
  -2.403         0.128          1.335          2.582          5.029
  -17.822        -5.840         -1.061         5.950          16.252


sigma_log:
 
  Mean             SD               MC Error         95% HPD interval
  -------------------------------------------------------------------
  
  -0.029           0.080            0.004            [-0.170, 0.145]

  Posterior quantiles:
  2.5            25             50             75             97.5
  |--------------|==============|==============|--------------|
  
  -0.178         -0.082         -0.031         0.014          0.140


sigma:
 
  Mean             SD               MC Error         95% HPD interval
  -------------------------------------------------------------------
  
  0.975            0.080            0.004            [0.839, 1.151]

  Posterior quantiles:
  2.5            25             50             75             97.5
  |--------------|==============|==============|--------------|
  
  0.837          0.922          0.969          1.014          1.150

stochastic volatility



In [ ]:

    
n = 400