In [1]:
import copy
import numpy as np
import matplotlib.pyplot as plt
import functions as f
import scipy.stats as stats
import statsmodels.api as sm
from pandas import DataFrame
%matplotlib inline
This notebook attempts to implement the pseudo-code from the paper "Bayesian Auxiliary Variable Models for Binary and Multinomial Regression" by Chris C. Holmes and Leonhard Held from Bayesian Analysis, 2006.
http://ba.stat.cmu.edu/journal/2006/vol01/issue01/held.pdf
Corrections to paper
http://ba.stat.cmu.edu/journal/2011/vol06/issue02/vanderlans.pdf
Consider the Bayesian binary regression model,
\begin{eqnarray} y_i &\sim& Bernoulli(g^{-1}(\eta_i)) \\ \eta_i &=& x_i \beta \\ \beta &\sim& \pi(\beta) \end{eqnarray}where $y_i \in \{0,1\}, i=1,\dots,n$ is a binary response variable for a collection of $n$ objects, with $p$ covariate measurements $x_i = (x_{i1}, \dots, x_{ip})$. $g(u)$ is a link function, $\eta_i$ denotes the linear predictor and $\beta$ represents a $(p\times1)$ column vector of regression coefficients which a priori are from some distribution $\pi(\cdot)$
see also: http://www.stat.cmu.edu/~brian/905-2009/all-papers/albert-chib-1993.pdf
For the probit link - $g^{-1}(u) = \Phi(u)$, where $\Phi(u)$ denotes the cumulative distribution function of the standard normal random variable, the general representation of the binary regression problem is as follows:
\begin{eqnarray} y_i &\sim& \left\{ \begin{array}{ll} 1 & z_i > 0 \\ 0 & o/w \\ \end{array} \right. \\ z_i &=& x_i \beta + \epsilon_i\\ \epsilon_i &\sim& N(0,1) \\ \beta &\sim& \pi(\beta) \end{eqnarray}Now $y_i$ is determined by the sign of the auxiliary variable $z_i$.
For testing purposes we will use the data outlined in the Albery & Chib paper for the probit model. Specifically the Finney data from 1947 where the probit model of interest is
\begin{eqnarray*} \Phi^{-1}(p_i) = \beta_0 + \beta_1 x_{1i} + \beta_2 x_{2i}, \mbox{ where,} i=1,\dots,39 \end{eqnarray*}where $x_{1i}$ is the volume of air inspired, $x_{2i}$ is the rate of air inspired, and the binary outcome observed is the occurence or nonoccurrence on a transient vasorestriction on the skin of the digits.
In [2]:
finney47 = np.genfromtxt('data/finney1947.csv', dtype=None, delimiter=',', names=True)
DataFrame(finney47).tail(n=5)
Out[2]:
This section also assumes that the prior on $\beta$ is $\pi(\beta) = N(0, v)$ and that the design matrix X is of dimension $(n\times p)$. Taking a hint from the Holmes and Held paper, we are going to set up the prior as $\pi(\beta) = N(0,100I_p)$
In [3]:
# Setting up the design Matrix, X and the priors for beta
yi = finney47['Y']
X = np.vstack((np.ones(len(finney47)), finney47['Volume'], finney47['Rate']))
b = np.zeros(3)
v = 100 * np.identity(3)
X = X.transpose()
yi = yi.transpose()
n_para = X.shape[1]
n_obs = len(yi)
In [4]:
#First record constants unaltered within MCMC loop
# print np.dot(X, X.transpose())
V = np.linalg.pinv(np.dot(X.transpose(), X) + np.linalg.pinv(v))
L = np.linalg.cholesky(V)
# L stores the lower triangular Cholesky factorization of V
S = np.dot(V, X.transpose())
In [6]:
#For j=1 to number of observations
H = np.empty(n_obs)
for i in np.arange(n_obs):
H[i] = np.dot(X[i,:], S[:,i])
# H = (X * S).diagonal().transpose()
W = H / (1 - H)
Q = W + 1
H
Out[6]:
In [8]:
# Initialise latent variable Z, from truncated normal
Z = np.empty(n_obs).transpose()
for i, y in enumerate(yi):
if y:
Z[i] = stats.truncnorm.rvs(0., float('inf'), 0, 1)
else:
Z[i] = stats.truncnorm.rvs(float('-inf'), 0., 0, 1)
### Holmes and Held says to initialize Z ~ N(0, I_n)Ind(Y,Z).
### Instead of sampling from a multivariate truncated normal,
### the above is used since each Zi, Zj is independent by the
### specification of the identity matrix as the variance.
### I really hope this assumption holds............
B = np.dot(S,Z)
B.shape
# B denotes the conditional mean of \beta
Out[8]:
In [7]:
n_iter = 10000
low, mid, high = float('-inf'), 0., float('inf')
betas = np.empty(n_para)
for i in np.arange(n_iter):
if (i+1) % 1000. == 0.:
f.progress(i+1, n_iter)
z_old = copy.copy(Z)
for j in np.arange(n_obs):
m = np.dot(X[j,:],B)
m = m - np.dot(W[j],(Z[j] - m))
if yi[j]:
Z[j] = stats.truncnorm.rvs((mid - m) / Q[j], (high - m) / Q[j], loc=m, scale=Q[j])
else:
Z[j] = stats.truncnorm.rvs((low - m) / Q[j], (mid - m) / Q[j], loc=m, scale=Q[j])
B = B + np.float((Z[j] - z_old[j]))*S[:,j]
T = stats.multivariate_normal.rvs(np.zeros(n_para), np.identity(n_para), 1).transpose()
beta_i = (B + np.dot(L,T)).transpose()
betas = np.vstack((betas, beta_i))
print "\n{0} Simulations complete".format(n_iter)
betas = betas[1:,:]
print betas[0:10,:]
In [8]:
F = plt.figure()
plt.hist(betas[1000:,0], 50, normed=True)
F.set_size_inches(10,5)
plt.show()
In [9]:
np.mean(betas[2000:,0])
Out[9]:
In [10]:
F = plt.figure()
plt.hist(betas[1000:,1], 50, normed=True)
F.set_size_inches(10,5)
plt.show()
In [11]:
np.mean(betas[1000:,1])
Out[11]:
In [12]:
F = plt.figure()
plt.hist(betas[1000:,2], 50, normed=True)
F.set_size_inches(10,5)
plt.show()
In [13]:
np.mean(betas[1000:,2])
Out[13]:
In [15]: