In [4]:
%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import pymc3 as pm
import seaborn as sns
import theano
sns.set_style('whitegrid')
np.random.seed(123)
data = pd.read_csv('data/radon.csv')
data['log_radon'] = data['log_radon'].astype(theano.config.floatX)
county_names = data.county.unique()
county_index = data.county_code.values
n_counties = len(data.county.unique())
In [5]:
with pm.Model() as hierarchical_model_centered:
# Hyperpriors for group nodes
mu_a = pm.Normal('mu_a', mu=0., sd=100**2)
sigma_a = pm.HalfCauchy('sigma_a', 5)
mu_b = pm.Normal('mu_b', mu=0., sd=100**2)
sigma_b = pm.HalfCauchy('sigma_b', 5)
# Intercept for each county, distributed around group mean mu_a
# Above we just set mu and sd to a fixed value while here we
# plug in a common group distribution for all a and b (which are
# vectors of length n_counties).
a = pm.Normal('a', mu=mu_a, sd=sigma_a, shape=n_counties)
# Intercept for each county, distributed around group mean mu_a
b = pm.Normal('b', mu=mu_b, sd=sigma_b, shape=n_counties)
# Model error
eps = pm.HalfCauchy('eps', 5)
# Linear regression
radon_est = a[county_index] + b[county_index] * data.floor.values
# Data likelihood
radon_like = pm.Normal('radon_like', mu=radon_est, sd=eps, observed=data.log_radon)
In [6]:
with hierarchical_model_centered:
hierarchical_centered_trace = pm.sample(draws=5000, tune=1000, njobs=4)[1000:]
In [7]:
pm.traceplot(hierarchical_centered_trace);
In [9]:
fig, axs = plt.subplots(nrows=2, figsize=(12, 8))
axs[0].plot(hierarchical_centered_trace.get_values('sigma_b', chains=1), alpha=.5);
axs[0].set(ylabel='sigma_b');
axs[1].plot(hierarchical_centered_trace.get_values('b', chains=1), alpha=.5);
axs[1].set(ylabel='b');
In [14]:
x = pd.Series(hierarchical_centered_trace['b'][:, 75], name='slope b_75')
y = pd.Series(hierarchical_centered_trace['sigma_b'], name='slope group variance sigma_b')
sns.jointplot(x, y, ylim=(0, .7), size=10);
In [15]:
with pm.Model() as hierarchical_model_non_centered:
# Hyperpriors for group nodes
mu_a = pm.Normal('mu_a', mu=0., sd=100**2)
sigma_a = pm.HalfCauchy('sigma_a', 5)
mu_b = pm.Normal('mu_b', mu=0., sd=100**2)
sigma_b = pm.HalfCauchy('sigma_b', 5)
# Before:
# a = pm.Normal('a', mu=mu_a, sd=sigma_a, shape=n_counties)
# Transformed:
a_offset = pm.Normal('a_offset', mu=0, sd=1, shape=n_counties)
a = pm.Deterministic("a", mu_a + a_offset * sigma_a)
# Before:
# b = pm.Normal('b', mu=mu_b, sd=sigma_b, shape=n_counties)
# Now:
b_offset = pm.Normal('b_offset', mu=0, sd=1, shape=n_counties)
b = pm.Deterministic("b", mu_b + b_offset * sigma_b)
# Model error
eps = pm.HalfCauchy('eps', 5)
radon_est = a[county_idx] + b[county_idx] * data.floor.values
# Data likelihood
radon_like = pm.Normal('radon_like', mu=radon_est, sd=eps, observed=data.log_radon)
In [16]:
with hierarchical_model_non_centered:
hierarchical_non_centered_trace = pm.sample(draws=5000, tune=1000, njobs=4)[1000:]
In [17]:
pm.traceplot(hierarchical_non_centered_trace);
In [19]:
x = pd.Series(hierarchical_non_centered_trace['b_offset'][:, 75], name='slope b_offset_75')
y = pd.Series(hierarchical_non_centered_trace['sigma_b'], name='slope group variance sigma_b')
sns.jointplot(x, y, ylim=(0, .7), size=10);
In [22]:
%load_ext watermark
%watermark -dmvgp numpy,scipy,pandas,matplotlib,seaborn,patsy,pymc3,theano,joblib