In [1]:
import numpy as np
import torch
from scipy import stats
from torch.autograd import Variable
import sys, inspect
sys.path.insert(0, '..')
%matplotlib inline
import pymc
import matplotlib.pyplot as plt
from learn_smc_proposals import cde
from learn_smc_proposals.examples import multilevel_poisson
import seaborn as sns
sns.set_context("notebook", font_scale=1.5, rc={"lines.markersize": 12})
sns.set_style('ticks')
We're going to learn the model for the PUMPS data:
http://www.openbugs.net/Examples/Pumps.html
This model has local parameters $\theta_i$ and global parameters $\alpha, \beta$.
In [2]:
theta_est, params_est = multilevel_poisson.get_estimators()
theta_est.load_state_dict(torch.load('../saved/trained_poisson_theta.rar'))
params_est.load_state_dict(torch.load('../saved/trained_poisson_params.rar'))
In [3]:
true_t = np.array([94.3, 15.7, 62.9, 126, 5.24, 31.4, 1.05, 1.05, 2.1, 10.5])
true_x = np.array([5, 1, 5, 14, 3, 19, 1, 1, 4, 22])
Values on true data, as reported in the George et al. (1993) are:
theta = [0.060 0.102 0.089 0.116 0.602 0.609 0.891 0.894 1.588 1.994 ]
alpha = 0.7
beta = 0.9
We'll run MCMC here to get benchmark estimates of the posterior distributions.
In [4]:
real_data = Variable(torch.FloatTensor(np.vstack((true_x, true_t)).T))
In [5]:
M_train = pymc.Model(multilevel_poisson.gamma_poisson(None,None))
M_test = pymc.Model(multilevel_poisson.gamma_poisson(true_x, true_t))
In [6]:
def estimate_MCMC(data_x, data_t, ns, iters=10000, burn=0.5):
""" MCMC estimate of weight distribution """
mcmc_est = pymc.MCMC(multilevel_poisson.gamma_poisson(data_x, data_t))
mcmc_est.sample(iters, burn=burn*iters, thin=np.ceil(burn*iters/ns))
trace_theta = mcmc_est.trace('theta').gettrace()[:ns]
trace_alpha = mcmc_est.trace('alpha').gettrace()[:ns]
trace_beta = mcmc_est.trace('beta').gettrace()[:ns]
return trace_theta, trace_alpha, trace_beta
In [7]:
mcmc_theta, mcmc_alpha, mcmc_beta = estimate_MCMC(true_x, true_t, ns=1000, iters=500000)
# print "MCMC MSE", np.mean((mcmc_theta.mean(0) - true_theta)**2)
true_alpha = mcmc_alpha.mean()
true_beta = mcmc_beta.mean()
true_theta = mcmc_theta.mean(0)
print "\n\nMCMC Estimated theta:", true_theta.round(3)
print "\nMCMC Estimated (alpha, beta):", true_alpha.round(3), true_beta.round(3)
In [8]:
def draw_inverse(ns=100):
tmp = theta_est.sample(real_data, ns=ns).squeeze(2)
samples = params_est.sample(tmp, ns=1)
return tmp.cpu().data.numpy(), samples.cpu().data.numpy()
In [9]:
nn_raw_theta, nn_raw_params = draw_inverse(1000)
In [10]:
plt.figure(figsize=(20,6))
for i in xrange(10):
plt.subplot(2,5,i+1)
plt.hist(mcmc_theta[:,i], normed=True);
plt.hist(nn_raw_theta[:,i], alpha=0.7, normed=True, bins=20);
plt.title("theta_"+str(i))
plt.legend(['MCMC', 'NN'])
plt.tight_layout()
In [11]:
bins = 20
a = .7
plt.figure(figsize=(9,3.75))
plt.subplot(121)
plt.hist(mcmc_alpha, normed=True, bins=bins, color=sns.color_palette()[0], histtype='stepfilled', linewidth=2.0, alpha=a);
plt.hist(nn_raw_params[:,0], color=sns.color_palette()[2], normed=True, bins=bins, histtype='stepfilled', linewidth=2.0, alpha=a);
# plt.hist(nn_raw_samples[:,0], normed=True, bins=bins, histtype='step', linewidth=2.0);
plt.hist(stats.expon(scale=1.0).rvs(10000), color=sns.color_palette()[5], normed=True, bins=2*bins, histtype='stepfilled', linewidth=2.0, alpha=a);
plt.xlim((0,5))
# plt.xlim((0,plt.xlim()[1]))
plt.xlabel("$\\alpha$")
plt.subplot(122)
plt.hist(mcmc_beta, normed=True, bins=bins, color=sns.color_palette()[0], histtype='stepfilled', linewidth=2.0, alpha=a);
# plt.hist(nn_raw_samples[:,1], normed=True, bins=bins, histtype='step', linewidth=2.0);
plt.hist(nn_raw_params[:,1], normed=True, color=sns.color_palette()[2], bins=bins, histtype='stepfilled', linewidth=2.0, alpha=a);
plt.hist(stats.gamma(a=0.1, scale=1.0).rvs(10000), color=sns.color_palette()[5], normed=True, bins=2*bins, histtype='stepfilled', linewidth=2.0, alpha=a);
plt.xlim((0,3.5))
# plt.xlim((0,plt.xlim()[1]))
plt.ylim(0,4)
plt.legend(['MCMC', 'NN', 'Prior'])
plt.xlabel("$\\beta$")
plt.tight_layout()
# plt.savefig("poisson-params-alt.pdf")
In [12]:
def bootstrap_propose(model):
success = False
while success == False:
try:
model.alpha.rand()
model.beta.rand()
model.theta.rand()
success = True
except:
pass
try:
weight = model.x.logp
except:
weight = -np.Inf
return weight
def bootstrap_IS(model, ns=100):
theta = np.empty((ns,10))
params = np.empty((ns,2))
log_w = np.empty((ns,))
for n in xrange(ns):
log_w[n] = bootstrap_propose(model)
theta[n] = model.theta.value
params[n] = [model.alpha.value, model.beta.value]
return theta, params, log_w
def normalize_log_weights(log_w):
safe_log_w = np.array(log_w)
safe_log_w[np.isnan(log_w)] = np.NINF
A = safe_log_w.max()
log_denom = np.log(np.sum(np.exp(safe_log_w - A))) + A
return np.exp(safe_log_w - log_denom)
def systematic_resample(weights):
ns = len(weights)
cdf = np.cumsum(weights)
cutoff = (np.random.rand() + np.arange(ns))/ns
return np.digitize(cutoff, cdf)
In [13]:
def nn_IS(model, ns=100):
# theta
theta, log_q = theta_est.propose(real_data, ns=ns)
theta = theta.data.cpu().numpy().squeeze(2)
log_w = -log_q.data.cpu().numpy().sum(1)
# alpha, beta
params, log_q = params_est.propose(Variable(torch.Tensor(theta)))
alpha = params.data.cpu().numpy()[:,0] #np.empty((ns,))
beta = params.data.cpu().numpy()[:,1] # np.empty((ns,))
log_w -= log_q.data.cpu().numpy().squeeze(1)
# likelihood
log_w += stats.poisson(model.t.value * theta).logpmf(model.x.value).sum(1)
log_w += stats.gamma(a=alpha[:,None], scale=1.0/beta[:,None]).logpdf(theta).sum(1)
log_w += stats.expon(scale=1.0).logpdf(alpha)
log_w += stats.gamma(a=0.1, scale=1.0).logpdf(beta)
log_w[np.isnan(log_w)] = np.NINF
return theta, params.data.cpu().numpy(), log_w
In [14]:
nn_theta,nn_params,nn_log_w = nn_IS(M_test, ns=10000)
In [15]:
boostrap_theta,bootstrap_params,bootstrap_log_w = bootstrap_IS(M_test, ns=10000)
In [16]:
print "Effective sample size (benchmark):", 1.0/np.sum(normalize_log_weights(bootstrap_log_w)**2)
print "Effective sample size (NN):", 1.0/np.sum(normalize_log_weights(nn_log_w)**2)
In [17]:
def run_DCSMC(model, ns=100, return_extras=False):
# stage one
num_points = 10
theta, log_q = theta_est.propose(real_data, ns=ns)
theta = theta.data.cpu().numpy().squeeze(2)
log_w = -log_q.data.cpu().numpy()
log_w += stats.poisson(model.t.value * theta).logpmf(model.x.value)
log_w[np.isnan(log_w)] = np.NINF
Z_est = np.log(np.exp(log_w).mean(0))
# print Z_est
for i in xrange(num_points):
indices = systematic_resample(normalize_log_weights(log_w[:,i]))
np.random.shuffle(indices)
theta[:,i] = theta[indices,i]
# Now: we have unweighted samples for each theta[:,i]
# ... and we have Z estimates for each i. What is next?
# Sample values of alpha, beta
params, log_q = params_est.propose(Variable(torch.Tensor(theta)))
alpha = params.data.cpu().numpy()[:,0] #np.empty((ns,))
beta = params.data.cpu().numpy()[:,1] # np.empty((ns,))
log_w = -log_q.data.cpu().numpy().squeeze(1)
# Merge
tmp = stats.gamma(a=alpha[:,None], scale=1.0/beta[:,None])
log_w += tmp.logpdf(theta).sum(1)
log_w += stats.expon(scale=1.0).logpdf(alpha)
log_w += stats.gamma(a=0.1, scale=1.0).logpdf(beta)
assert(log_w.shape == (ns,))
indices = systematic_resample(normalize_log_weights(log_w))
log_w[np.isnan(log_w)] = np.NINF
Z_est = Z_est.sum() + np.log(np.exp(log_w).mean())
if return_extras:
alpha_orig = np.array(alpha)
beta_orig = np.array(beta)
alpha = alpha[indices]
beta = beta[indices]
theta = theta[indices]
if return_extras:
return theta, alpha, beta, Z_est, alpha_orig, beta_orig
else:
return theta, alpha, beta, Z_est
In [18]:
sizes = [5, 10, 50, 100, 500, 1000, 5000, 10000]
replications = 10
In [19]:
dcsmc_results_Z = np.empty((replications, len(sizes)))
dcsmc_results_L2 = np.empty((replications, len(sizes)))
for c, size in enumerate(sizes):
for rep in xrange(replications):
tmp_theta, tmp_alpha, tmp_beta, Z_est = run_DCSMC(M_test, size)
dcsmc_results_L2[rep,c] = np.sqrt(np.mean((tmp_theta.mean(0) - true_theta)**2))
dcsmc_results_Z[rep,c] = Z_est
In [20]:
lwis_results_Z = np.empty((replications, len(sizes)))
lwis_results_L2 = np.empty((replications, len(sizes)))
for c, size in enumerate(sizes):
for rep in xrange(replications):
bootstrap_theta, _, bootstrap_log_w = bootstrap_IS(M_test, ns=size)
Z_est = np.log(np.exp(bootstrap_log_w).mean())
mean_est = np.dot(bootstrap_theta.T, normalize_log_weights(bootstrap_log_w))
lwis_results_L2[rep,c] = np.sqrt(np.mean((mean_est - true_theta)**2))
lwis_results_Z[rep,c] = Z_est
In [21]:
nnis_results_Z = np.empty((replications, len(sizes)))
nnis_results_L2 = np.empty((replications, len(sizes)))
for c, size in enumerate(sizes):
for rep in xrange(replications):
nnis_theta, _, nnis_log_w = nn_IS(M_test, ns=size)
Z_est = np.log(np.exp(nnis_log_w).mean())
mean_est = np.dot(nnis_theta.T, normalize_log_weights(nnis_log_w))
nnis_results_L2[rep,c] = np.sqrt(np.mean((mean_est - true_theta)**2))
nnis_results_Z[rep,c] = Z_est
In [22]:
plt.figure(figsize=(8,3.6))
def get_res(res):
filtered = np.array(res)
return np.nanmean(filtered, 0), np.nanstd(filtered, 0)
tmp_m, tmp_s = get_res(dcsmc_results_Z)
plt.errorbar(sizes, tmp_m, 2*tmp_s,marker='.', capsize=4, markeredgewidth=2, color=sns.color_palette()[1])
tmp_m, tmp_s = get_res(nnis_results_Z)
plt.errorbar(sizes, tmp_m, 2*tmp_s,marker='.', capsize=4, markeredgewidth=2, color=sns.color_palette()[2])
tmp_m, tmp_s = get_res(lwis_results_Z)
plt.errorbar(sizes, tmp_m, 2*tmp_s,marker='.', capsize=4, markeredgewidth=2, zorder=0,color=sns.color_palette()[5])
plt.legend(['NN-SMC', 'NN-IS', 'IS (Prior)'], loc='lower right')
plt.semilogx();
plt.xlim(sizes[0]-1, sizes[-1])
plt.ylim([-250, 0])
plt.ylabel("$\log \hat Z$")
plt.xlabel("Number of samples")
plt.tight_layout()
In [23]:
plt.figure(figsize=(8,3.5))
tmp_m, tmp_s = dcsmc_results_L2.mean(0), dcsmc_results_L2.std(0)
plt.errorbar(sizes, tmp_m, 2*tmp_s,marker='.', capsize=4, markeredgewidth=2, color=sns.color_palette()[1])
tmp_m, tmp_s = lwis_results_L2.mean(0), lwis_results_L2.std(0)
plt.errorbar(sizes, tmp_m, 2*tmp_s,marker='.', capsize=4, markeredgewidth=2,color=sns.color_palette()[5])
tmp_m, tmp_s = nnis_results_L2.mean(0), nnis_results_L2.std(0)
plt.errorbar(sizes, tmp_m, 2*tmp_s,marker='.', capsize=4, markeredgewidth=2, color=sns.color_palette()[2])
plt.legend(['D&C SMC', 'IS (Prior)', 'IS (NN)'], loc='upper right')
plt.loglog();
plt.xlim(sizes[0]-1, sizes[-1])
plt.ylabel("L2 error in theta")
plt.xlabel("Number of samples")
plt.tight_layout();
Note that the L2 error above is relative to the MCMC run, which is not authoratative.
In [ ]: