In [ ]:
import os
import time
import traceback
import numpy as np
import gpdemo.kernels as krn
import gpdemo.utils as utils
import gpdemo.latent_posterior_approximations as lpa
import gpdemo.estimators as est
import auxpm.samplers as smp
import matplotlib.pyplot as plt
%matplotlib inline
Construct data and experiments directorys from environment variables
In [ ]:
data_dir = os.path.join(os.environ['DATA_DIR'], 'uci')
exp_dir = os.path.join(os.environ['EXP_DIR'], 'apm_mcmc')
Specify main run parameters
In [ ]:
data_set = 'pima'
method = 'apm(mi+mh)'
n_chain = 10
chain_offset = 0
seeds = np.random.random_integers(10000, size=n_chain)
n_imp_sample = 1
adapt_run = dict(
low_acc_thr = 0.15,
upp_acc_thr = 0.30,
batch_size = 100,
n_batch = 20
)
init_log_sigma_prop_scale = 0.5
init_log_tau_prop_scale = 0.5
n_sample_main = 10000
epsilon = 1e-8
Load data and normalise inputs
In [ ]:
X = np.genfromtxt(os.path.join(data_dir, data_set + '_X.txt'))
y = np.genfromtxt(os.path.join(data_dir, data_set + '_y.txt'))
X, X_mn, X_sd = utils.normalise_inputs(X)
Specify prior parameters (data dependent so do after data load)
In [ ]:
prior = dict(
a_tau = 1.,
b_tau = 1. / X.shape[1]**0.5,
a_sigma = 1.1,
b_sigma = 0.1
)
Assemble run parameters into dictionary for recording with results
In [ ]:
run_params = dict(
data_set = data_set,
n_data = X.shape[0],
n_feature = X.shape[1],
method = method,
n_imp_sample = n_imp_sample,
epsilon = epsilon,
prior = prior,
adapt_run = adapt_run,
init_log_sigma_prop_scale = init_log_sigma_prop_scale,
init_log_tau_prop_scale = init_log_tau_prop_scale,
n_sample_main = n_sample_main
)
Create necessary run objects
In [ ]:
prng = np.random.RandomState()
kernel_func = lambda K, X, theta: (
krn.isotropic_squared_exponential_kernel(K, X, theta, epsilon)
)
ml_estimator = est.LogMarginalLikelihoodApproxPosteriorISEstimator(
X, y, kernel_func, lpa.laplace_approximation)
def log_f_estimator(u, theta=None, cached_res=None):
log_marg_lik_est, new_cached_res = ml_estimator(u, theta, cached_res)
log_prior = (
utils.log_gamma_log_pdf(theta[0], prior['a_sigma'], prior['b_sigma']) +
utils.log_gamma_log_pdf(theta[1], prior['a_tau'], prior['b_tau'])
)
return log_marg_lik_est + log_prior, new_cached_res
prop_sampler = lambda theta, prop_scales: np.r_[
theta[0] + prop_scales[0] * prng.normal(),
theta[1] + prop_scales[1] * prng.normal()
]
log_prop_density = lambda theta_prop, theta_curr, prop_scales: (
-0.5 * (
((theta_prop[0] - theta_curr[0]) / prop_scales[0])**2 +
((theta_prop[1] - theta_curr[1]) / prop_scales[1])**2
)
)
init_prop_scales = np.array([
init_log_sigma_prop_scale,
init_log_tau_prop_scale
])
sampler = smp.APMMetIndPlusMHSampler(
log_f_estimator, log_prop_density, prop_sampler, init_prop_scales,
lambda: prng.normal(size=(y.shape[0], n_imp_sample)), prng)
Run chains, starting from random sample from prior in each and saving results to experiments directory
In [ ]:
for c in range(n_chain):
try:
print('Starting chain {0}...'.format(c + 1))
prng.seed(seeds[c])
theta_init = np.array([
np.log(prng.gamma(prior['a_sigma'], 1. / prior['b_sigma'])),
np.log(prng.gamma(prior['a_tau'], 1. / prior['b_tau'])),
])
sampler.prop_scales = init_prop_scales
print('Starting initial adaptive run...')
adapt_thetas, adapt_prop_scales, adapt_accept_rates = (
sampler.adaptive_run(
theta_init, adapt_run['batch_size'],
adapt_run['n_batch'], adapt_run['low_acc_thr'],
adapt_run['upp_acc_thr'], utils.adapt_factor_func, True
)
)
print('Final proposal scales: {0}'.format(adapt_prop_scales[-1]))
print('Starting main run...')
ml_estimator.reset_cubic_op_count()
start_time = time.clock()
thetas, n_reject = sampler.get_samples(adapt_thetas[-1], n_sample_main)
comp_time = time.clock() - start_time
n_cubic_ops = ml_estimator.n_cubic_ops
tag = '{0}_{1}_chain_{2}'.format(data_set, method, c + 1 + chain_offset)
print('Main run completed: accept rate (u|theta) {0:.1f}%, '
'accept rate (theta|u) {1:.1f}%, time {2}s, # cubic ops {3}'
.format((1. - n_reject[0] * 1./ n_sample_main) * 100.,
(1. - n_reject[1] * 1./ n_sample_main) * 100.,
comp_time, n_cubic_ops))
utils.save_adaptive_run(exp_dir, tag, adapt_thetas, adapt_prop_scales,
adapt_accept_rates, thetas, n_reject,
n_cubic_ops, comp_time, run_params)
utils.plot_trace(thetas)
plt.show()
except Exception as e:
print('Exception encountered')
print(e.message)
print(traceback.format_exc())
print('Skipping to next chain')
continue