In this notebook, we illustrate hierarchical Bayesian linear regression on a toy 1d dataset.
The code is based on Bayesian Analysis with Python, ch 3 and this blog post from Thomas Wiecki.
In [3]:
%matplotlib inline
import sklearn
import scipy.stats as stats
import scipy.optimize
import matplotlib.pyplot as plt
import seaborn as sns
import time
import numpy as np
import os
import pandas as pd
In [1]:
!pip install pymc3==3.8
import pymc3 as pm
pm.__version__
!pip install arviz
import arviz as az
In [11]:
N = 10 #20
M = 8 # num groups
idx = np.repeat(range(M-1), N) # N samples for groups 0-6
idx = np.append(idx, 7) # 1 sample for 7'th group
np.random.seed(123)
#alpha_real = np.random.normal(2.5, 0.5, size=M)
#beta_real = np.random.beta(6, 1, size=M)
#eps_real = np.random.normal(0, 0.5, size=len(idx))
alpha_real = np.random.normal(2.5, 0.5, size=M)
beta_real = np.random.beta(1, 1, size=M) # slope is closer to 0
eps_real = np.random.normal(0, 0.5, size=len(idx))
print(beta_real)
y_m = np.zeros(len(idx))
x_m = np.random.normal(10, 1, len(idx))
y_m = alpha_real[idx] + beta_real[idx] * x_m + eps_real
x_centered = x_m - x_m.mean()
_, ax = plt.subplots(2, 4, figsize=(10, 5), sharex=True, sharey=True)
ax = np.ravel(ax)
j, k = 0, N
for i in range(M):
ax[i].scatter(x_m[j:k], y_m[j:k])
ax[i].set_xlabel(f'x_{i}')
ax[i].set_ylabel(f'y_{i}', rotation=0, labelpad=15)
#ax[i].set_xlim(6, 15)
#ax[i].set_ylim(1, 18)
j += N
k += N
plt.tight_layout()
In [5]:
# Fit separarate models per group
with pm.Model() as unpooled_model:
α = pm.Normal('α', mu=0, sd=10, shape=M)
β = pm.Normal('β', mu=0, sd=10, shape=M)
ϵ = pm.HalfCauchy('ϵ', 5)
y_pred = pm.Normal('y_pred', mu=α[idx] + β[idx] * x_m,
sd=ϵ, observed=y_m)
trace_up = pm.sample(1000)
az.summary(trace_up)
Out[5]:
In [0]:
def plot_post_pred_samples(trace, nsamples=20):
_, ax = plt.subplots(2, 4, figsize=(10, 5), sharex=True, sharey=True,
constrained_layout=True)
ax = np.ravel(ax)
j, k = 0, N
x_range = np.linspace(x_m.min(), x_m.max(), 10)
X = x_range[:, np.newaxis]
for i in range(M):
ax[i].scatter(x_m[j:k], y_m[j:k])
ax[i].set_xlabel(f'x_{i}')
ax[i].set_ylabel(f'y_{i}', labelpad=17, rotation=0)
alpha_m = trace['α'][:, i].mean()
beta_m = trace['β'][:, i].mean()
ax[i].plot(x_range, alpha_m + beta_m * x_range, c='r', lw=3,
label=f'y = {alpha_m:.2f} + {beta_m:.2f} * x')
plt.xlim(x_m.min()-1, x_m.max()+1)
plt.ylim(y_m.min()-1, y_m.max()+1)
alpha_samples = trace['α'][:,i]
beta_samples = trace['β'][:,i]
ndx = np.random.choice(np.arange(len(alpha_samples)), nsamples)
alpha_samples_thinned = alpha_samples[ndx]
beta_samples_thinned = beta_samples[ndx]
ax[i].plot(x_range, alpha_samples_thinned + beta_samples_thinned * X,
c='gray', alpha=0.5)
j += N
k += N
In [7]:
plot_post_pred_samples(trace_up)
In [8]:
# Fit a hierarchical (centered) model to the raw data
with pm.Model() as model_centered:
# hyper-priors
μ_α = pm.Normal('μ_α', mu=0, sd=10)
σ_α = pm.HalfNormal('σ_α', 10)
μ_β = pm.Normal('μ_β', mu=0, sd=10)
σ_β = pm.HalfNormal('σ_β', sd=10)
# priors
α = pm.Normal('α', mu=μ_α, sd=σ_α, shape=M)
β = pm.Normal('β', mu=μ_β, sd=σ_β, shape=M)
ϵ = pm.HalfCauchy('ϵ', 5)
y_pred = pm.Normal('y_pred', mu=α[idx] + β[idx] * x_m,
sd=ϵ, observed=y_m)
trace_centered = pm.sample(1000)
az.summary(trace_centered)
Out[8]:
In [9]:
plot_post_pred_samples(trace_centered)
In [10]:
# Fit the non-centered model to the raw data
with pm.Model() as model_noncentered:
# hyper-priors
μ_α = pm.Normal('μ_α', mu=0, sd=10)
σ_α = pm.HalfNormal('σ_α', 10)
μ_β = pm.Normal('μ_β', mu=0, sd=10)
σ_β = pm.HalfNormal('σ_β', sd=10)
# priors
α_offset = pm.Normal('α_offset', mu=0, sd=1, shape=M)
α = pm.Deterministic('α', μ_α + σ_α * α_offset)
β_offset = pm.Normal('β_offset', mu=0, sd=1, shape=M)
β = pm.Deterministic('β', μ_β + σ_β * β_offset)
ϵ = pm.HalfCauchy('ϵ', 5)
y_pred = pm.Normal('y_pred', mu=α[idx] + β[idx] * x_m,
sd=ϵ, observed=y_m)
trace_noncentered = pm.sample(1000)
az.summary(trace_noncentered)
Out[10]:
In [12]:
plot_post_pred_samples(trace_noncentered)
In [0]: