Bayesian data analysis

Chapter 11, demo 1

Gibbs sampling demonstration


In [1]:
from __future__ import division
import numpy as np
import scipy.io # For importing a matlab file
from scipy import linalg, stats

%matplotlib inline
import matplotlib as mpl
import matplotlib.pyplot as plt

In [2]:
# edit default plot settings (colours from colorbrewer2.org)
plt.rc('font', size=14)
plt.rc('lines', color='#377eb8', linewidth=1.5, markeredgewidth=1.5,
       markersize=8)
plt.rc('axes', color_cycle=('#377eb8','#e41a1c','#4daf4a',
                            '#984ea3','#ff7f00','#ffff33'))

In [3]:
# Parameters of a Normal distribution used as a toy target distribution
y1 = 0
y2 = 0
r = 0.8
S = np.array([[1.0, r], [r, 1.0]])

# Starting value of the chain
t1 = -2.5
t2 = 2.5
# Number of iterations.
M = 2*1000
# N.B. In this implementation one iteration updates only one parameter and one
# complete iteration updating both parameters takes two basic iterations. This
# implementation was used to make plotting of Gibbs sampler's zig-zagging. In
# plots You can implement this also by saving only the final state of complete
# iteration updating all parameters.

In [4]:
# Gibbs sampling here

# Allocate memory for the samples
tt = np.empty((M,2))
tt[0] = [t1, t2]    # Save starting point

# For demonstration load pre-computed values
# Replace this with your algorithm!
# tt is a M x 2 array, with M samples of both theta_1 and theta_2
res_path = '../utilities_and_data/demo11_2.mat'
res = scipy.io.loadmat(res_path)
''' Content information of the precalculated results:
>>> scipy.io.whosmat(res_path)
[('tt', (2001, 2), 'double')]
'''
tt = res['tt']

The rest is just for illustration


In [5]:
# Plotting grid
Y1 = np.linspace(-4.5, 4.5, 150)
Y2 = np.linspace(-4.5, 4.5, 150)

# Number of samples to discard from the begining
burnin = 50

# Create legend instances
el_legend = mpl.lines.Line2D([], [], color='#e41a1c', linewidth=1)
samp_legend = mpl.lines.Line2D(
    [], [], linestyle='', marker='o',
    markerfacecolor='none', markeredgecolor='#377eb8')
samp_new_legend = mpl.lines.Line2D(
    [], [], linestyle='', marker='x',
    markerfacecolor='none', markeredgecolor='#377eb8')
pdfline_legend = mpl.lines.Line2D([], [], color='#377eb8')
chain_legend = mpl.lines.Line2D(
    [], [], color='#377eb8', marker='o',
    markerfacecolor='none', markeredgecolor='#377eb8')
burnchain_legend = mpl.lines.Line2D(
    [], [], color='m', marker='o',
    markerfacecolor='none', markeredgecolor='m')

# Plot 90% HPD.
# In 2d-case contour for 90% HPD is an ellipse, whose semimajor
# axes can be computed from the eigenvalues of the covariance
# matrix scaled by a value selected to get ellipse match the
# density at the edge of 90% HPD. Angle of the ellipse could be 
# computed from the eigenvectors, but since marginals are same
# we know that angle is 45 degrees.
q = np.sort(np.sqrt(linalg.eigh(S, eigvals_only=True)) * 2.147)

def fig90hpd(ax):
    """Plot 90hpd region into the given axis"""
    el = mpl.patches.Ellipse(
        xy = (y1,y2),
        width = 2 * q[1],
        height = 2 * q[0],
        angle = 45,
        facecolor = 'none',
        edgecolor = '#e41a1c'
    )
    ax.add_artist(el)

In [6]:
spshape = (5,3)
fig, axes = plt.subplots(spshape[0], spshape[1], sharex=True, sharey=True,
                         figsize=(16,27), subplot_kw=dict(aspect='equal'))
fig.subplots_adjust(hspace=0.09, wspace=0.09)
axes[0,0].set_xlim([-4.5, 4.5])
axes[0,0].set_ylim([-4.5, 4.5])
for i in range(spshape[0]):
    axes[i,0].set_ylabel(r'$\theta_2$', fontsize=18)
for j in range(spshape[1]):
    axes[-1,j].set_xlabel(r'$\theta_1$', fontsize=18)
axes[0,0].legend(
    (   el_legend, 
        samp_legend,
        samp_new_legend,
        pdfline_legend,
        chain_legend,
        burnchain_legend
    ),
    (   '90% HPD',
        'samples',
        'new sample',
       r'conditional density given $\theta_{1/2}$',
        'Markov chain',
        'burn-in'
    ),
    numpoints=1,
    loc='lower left',
    bbox_to_anchor=(0., 1.02, 1., .102)
)

ax = axes[0,0]
fig90hpd(ax)
ax.plot(tt[0,0], tt[0,1], 'o',
        markerfacecolor='none', markeredgecolor='#377eb8')
ax.annotate(
    'starting point',
    (tt[0,0], tt[0,1]),
    (-1.2, 3.5),
    arrowprops=dict(
        facecolor='black',
        shrink=0.2,
        width=1,
        headwidth=8,
        frac=0.2
    )
)

ax = axes[0,1]
fig90hpd(ax)
i = 0
ax.plot(tt[:i+1,0], tt[:i+1,1], 'o',
        markerfacecolor='none', markeredgecolor='#377eb8')
ax.axhline(y=tt[i,1], linestyle='--', color='k', linewidth=1)
ax.plot(
    Y1,
    tt[i,1] + stats.norm.pdf(
        Y1,
        loc = y1 + r*(tt[i,1] - y2),
        scale = np.sqrt((1 - r**2))
    ),
    color = '#377eb8'
)

ax = axes[0,2]
fig90hpd(ax)
i = 0
ax.plot(tt[:i+1,0], tt[:i+1,1], 'o',
        markerfacecolor='none', markeredgecolor='#377eb8')
ax.plot(tt[i+1,0], tt[i+1,1], 'x',
        markerfacecolor='none', markeredgecolor='#377eb8')
ax.axhline(y=tt[i,1], linestyle='--', color='k', linewidth=1)
ax.plot(
    Y1,
    tt[i,1] + stats.norm.pdf(
        Y1,
        loc = y1 + r*(tt[i,1] - y2),
        scale = np.sqrt((1 - r**2))
    ),
    color = '#377eb8'
)

ax = axes[1,0]
fig90hpd(ax)
i = 1
ax.plot(tt[:i+1,0], tt[:i+1,1], 'o',
        markerfacecolor='none', markeredgecolor='#377eb8')
ax.axvline(x=tt[i,0], linestyle='--', color='k')
ax.plot(
    tt[i,0] + stats.norm.pdf(
        Y2,
        loc = y2 + r*(tt[i,0] - y1),
        scale = np.sqrt((1 - r**2))
    ),
    Y2,
    color = '#377eb8'
)

ax = axes[1,1]
fig90hpd(ax)
i = 1
ax.plot(tt[:i+1,0], tt[:i+1,1], 'o',
        markerfacecolor='none', markeredgecolor='#377eb8')
ax.plot(tt[i+1,0], tt[i+1,1], 'x',
        markerfacecolor='none', markeredgecolor='#377eb8')
ax.axvline(x=tt[i,0], linestyle='--', color='k')
ax.plot(
    tt[i,0] + stats.norm.pdf(
        Y2,
        loc = y2 + r*(tt[i,0] - y1),
        scale = np.sqrt((1 - r**2))
    ),
    Y2,
    color = '#377eb8'
)

ax = axes[1,2]
fig90hpd(ax)
i = 2
ax.plot(tt[:i+1,0], tt[:i+1,1], 'o',
        markerfacecolor='none', markeredgecolor='#377eb8')
ax.plot(tt[i+1,0], tt[i+1,1], 'x',
        markerfacecolor='none', markeredgecolor='#377eb8')
ax.axhline(y=tt[i,1], linestyle='--', color='k', linewidth=1)
ax.plot(
    Y1,
    tt[i,1] + stats.norm.pdf(
        Y1,
        loc = y1 + r*(tt[i,1] - y2),
        scale = np.sqrt((1 - r**2))
    ),
    color = '#377eb8'
)

ax = axes[2,0]
fig90hpd(ax)
i = 3
ax.plot(tt[:i+1,0], tt[:i+1,1], 'o',
        markerfacecolor='none', markeredgecolor='#377eb8')
ax.plot(tt[i+1,0], tt[i+1,1], 'x',
        markerfacecolor='none', markeredgecolor='#377eb8')
ax.axvline(x=tt[i,0], linestyle='--', color='k')
ax.plot(
    tt[i,0] + stats.norm.pdf(
        Y2,
        loc = y2 + r*(tt[i,0] - y1),
        scale = np.sqrt((1 - r**2))
    ),
    Y2,
    color = '#377eb8'
)

ax = axes[2,1]
fig90hpd(ax)
i = 4
ax.plot(tt[:i+1,0], tt[:i+1,1], 'o',
        markerfacecolor='none', markeredgecolor='#377eb8')
ax.plot(tt[i+1,0], tt[i+1,1], 'x',
        markerfacecolor='none', markeredgecolor='#377eb8')
ax.axhline(y=tt[i,1], linestyle='--', color='k', linewidth=1)
ax.plot(
    Y1,
    tt[i,1] + stats.norm.pdf(
        Y1,
        loc = y1 + r*(tt[i,1] - y2),
        scale = np.sqrt((1 - r**2))
    ),
    color = '#377eb8'
)

ax = axes[2,2]
fig90hpd(ax)
i = 5
ax.plot(tt[:i+1,0], tt[:i+1,1], 'o',
        markerfacecolor='none', markeredgecolor='#377eb8')
ax.plot(tt[i+1,0], tt[i+1,1], 'x',
        markerfacecolor='none', markeredgecolor='#377eb8')
ax.axvline(x=tt[i,0], linestyle='--', color='k')
ax.plot(
    tt[i,0] + stats.norm.pdf(
        Y2,
        loc = y2 + r*(tt[i,0] - y1),
        scale = np.sqrt((1 - r**2))
    ),
    Y2,
    color = '#377eb8'
)

ax = axes[3,0]
fig90hpd(ax)
i = 6
line, = ax.plot(tt[:i+1,0], tt[:i+1,1], color='#377eb8')
line, = ax.plot(
    tt[:i+1:2,0], tt[:i+1:2,1],
    'o', markerfacecolor='none', markeredgecolor='#377eb8')
ax.annotate(
    'every second sample discarded',
    (tt[1,0], tt[1,1]),
    (-3.5, 3.5),
    arrowprops=dict(
        facecolor='black',
        shrink=0.2,
        width=1,
        headwidth=8,
        frac=0.2
    )
)

ax = axes[3,1]
fig90hpd(ax)
i = 26
ax.plot(tt[:i+1,0], tt[:i+1,1], color='#377eb8')
ax.plot(tt[:i+1:2,0], tt[:i+1:2,1],
        'o', markerfacecolor='none', markeredgecolor='#377eb8')
ax.text(-3.5, 3.5, '14 samples')

ax = axes[3,2]
fig90hpd(ax)
i = 198
ax.plot(tt[:i+1,0], tt[:i+1,1], color='#377eb8', alpha=0.5)
ax.scatter(tt[:i+1:2,0], tt[:i+1:2,1], 18, color='#377eb8')
ax.text(-3.5, 3.5, '100 samples')

ax = axes[4,0]
fig90hpd(ax)
i = 198
ax.plot(tt[:burnin,0], tt[:burnin,1], color='m', alpha=0.5)
ax.scatter(tt[:burnin:2,0], tt[:burnin:2,1], 18, color='m')
ax.plot(tt[burnin:i+1,0], tt[burnin:i+1,1], color='#377eb8',
        alpha=0.5)
ax.scatter(tt[burnin:i+1:2,0], tt[burnin:i+1:2,1], 18,
           color='#377eb8')
ax.text(-3.5, 3.5, 'burn-in')

ax = axes[4,1]
fig90hpd(ax)
i = 198
ax.scatter(tt[burnin:i+1:2,0], tt[burnin:i+1:2,1], 20,
           color='#377eb8')
ax.text(-3.5, 3.5, '100 samples with burn-in removed')

ax = axes[4,2]
fig90hpd(ax)
ax.scatter(tt[burnin::2,0], tt[burnin::2,1], 20, color='#377eb8',
           alpha=0.3)
ax.text(-3.5, 3.5, '950 samples');



In [7]:
fig = plt.figure(figsize=(8,16))

indexes = np.arange(burnin,M,2)
samps = tt[indexes]

ax1 = fig.add_subplot(3,1,1)
ax1.axhline(y=0, linewidth=1, color='gray')
line1, line2, = ax1.plot(indexes/2, samps, linewidth=1)
ax1.legend((line1, line2), (r'$\theta_1$', r'$\theta_2$'))
ax1.set_xlabel('iteration')
ax1.set_title('trends')
ax1.set_xlim([burnin/2, 1000])

ax2 = fig.add_subplot(3,1,2)
ax2.axhline(y=0, linewidth=1, color='gray')
ax2.plot(
    indexes/2,
    np.cumsum(samps, axis=0)/np.arange(1,len(samps)+1)[:,None],
    linewidth=1.5
)
ax2.set_xlabel('iteration')
ax2.set_title('cumulative average')
ax2.set_xlim([burnin/2, 1000])

ax3 = fig.add_subplot(3,1,3)
maxlag = 20
sampsc = samps - np.mean(samps, axis=0)
acorlags = np.arange(maxlag+1)
ax3.axhline(y=0, linewidth=1, color='gray')
for i in [0,1]:
    t = np.correlate(sampsc[:,i], sampsc[:,i], 'full')
    t = t[-len(sampsc):-len(sampsc)+maxlag+1] / t[-len(sampsc)]
    ax3.plot(acorlags, t)
ax3.set_xlabel('lag')
ax3.set_title('estimate of the autocorrelation function')
fig.subplots_adjust(hspace=0.3);



In [8]:
fig = plt.figure(figsize=(8,8))

indexes = np.arange(burnin,M,2)
samps = tt[indexes]
nsamps = np.arange(1,len(samps)+1)

ax1 = fig.add_subplot(1,1,1)
ax1.axhline(y=0, linewidth=1, color='gray')
line1, line2, = ax1.plot(
    indexes/2,
    np.cumsum(samps, axis=0)/nsamps[:,None],
    linewidth=1.5
)
er1, = ax1.plot(
    indexes/2, 1.96/np.sqrt(nsamps/4), 'k--', linewidth=1)
ax1.plot(indexes/2, -1.96/np.sqrt(nsamps/4), 'k--', linewidth=1)
er2, = ax1.plot(
    indexes/2, 1.96/np.sqrt(nsamps), 'k:', linewidth=1)
ax1.plot(indexes/2, -1.96/np.sqrt(nsamps), 'k:', linewidth=1)
ax1.set_xlabel('iteration')
ax1.set_title('cumulative average')
ax1.legend(
    (line1, line2, er1, er2),
    (r'$\theta_1$', r'$\theta_2$',
      '95% interval for MCMC error',
      '95% interval for independent MC'
    )
)
ax1.set_xlim([burnin/2, 1000])
ax1.set_ylim([-1.5, 1.5]);