This notebook demonstrates the basics of Bayesian estimation of the general linear model. This presentation is based on material from http://twiecki.github.io/blog/2013/08/12/bayesian-glms-1/ . First let's generate some data for a simple design.


In [ ]:
import os,sys
import numpy
%matplotlib inline
import matplotlib.pyplot as plt
sys.path.insert(0,'../')
from utils.mkdesign import create_design_singlecondition
from nipy.modalities.fmri.hemodynamic_models import spm_hrf,compute_regressor
from statsmodels.tsa.arima_process import arma_generate_sample
import scipy.stats

tslength=300
d,design=create_design_singlecondition(blockiness=1.0,deslength=tslength,
                                       blocklength=20,offset=20)
regressor,_=compute_regressor(design,'spm',numpy.arange(0,tslength))


ar1_noise=arma_generate_sample([1,0.3],[1,0.],len(regressor))

X=numpy.hstack((regressor,numpy.ones((len(regressor),1))))
beta=numpy.array([4,100])
noise_sd=10
data = X.dot(beta) + ar1_noise*noise_sd

First estimate the model using ordinary least squares


In [ ]:
beta_hat=numpy.linalg.inv(X.T.dot(X)).dot(X.T).dot(data)
resid=data - X.dot(beta_hat)
sigma2hat=(resid.dot(resid))/(X.shape[0] - X.shape[1])
c=numpy.array([1,0])  # contrast for PPI
t=c.dot(beta_hat)/numpy.sqrt(c.dot(numpy.linalg.inv(X.T.dot(X)).dot(c))*sigma2hat)
print beta_hat
print t, 1.0 - scipy.stats.t.cdf(t,X.shape[0] - X.shape[1])

Now let's estimate the same model using Bayesian estimation


In [ ]:
import pymc3
datadict = dict(x=X[:,0], y=data)

with pymc3.Model() as model:
    # specify glm and pass in data. The resulting linear model, its likelihood and 
    # and all its parameters are automatically added to our model.
    family = pymc3.glm.families.T(link=pymc3.glm.links.Identity,
        priors={'nu': 1.5,'lam': ('sigma', pymc3.Uniform.dist(0, 20))})
    pymc3.glm.glm('y ~ x', datadict,family=family)
    start = pymc3.find_MAP()
    step = pymc3.NUTS(scaling=start) # Instantiate MCMC sampling algorithm
    trace = pymc3.sample(2000, step, progressbar=False) # draw 2000 posterior samples using NUTS sampling

In [ ]:
with pymc3.Model() as model: # model specifications in PyMC3 are wrapped in a with-statement
    # Define priors
    sigma = pymc3.HalfCauchy('sigma', beta=10, testval=1.)
    intercept = pymc3.Normal('Intercept', 0, sd=20)
    x_coeff = pymc3.Normal('x', 0, sd=20)
    
    # Define likelihood
    likelihood = pymc3.Normal('y', mu=intercept + x_coeff * X[:,0], 
                        sd=sigma, observed=data)
    
    # Inference!
    start = pymc3.find_MAP() # Find starting value by optimization
    step = pymc3.NUTS(scaling=start) # Instantiate MCMC sampling algorithm
    trace = pymc3.sample(2000, step, start=start, progressbar=False) # draw 2000 posterior samples using NUTS sampling

In [ ]:
start

In [ ]:
print numpy.mean(trace.get_values('x'),0)

In [ ]:
plt.figure(figsize=(7, 7))
pymc3.traceplot(trace[100:],'x')
plt.tight_layout();
pymc3.plots.summary(trace[100:],vars='x')

In [ ]:


In [ ]: