In [ ]:
import os
import time
import traceback
import numpy as np
import gpdemo.kernels as krn
import gpdemo.utils as utils
import gpdemo.latent_posterior_approximations as lpa
import gpdemo.estimators as est
import auxpm.samplers as smp
import matplotlib.pyplot as plt
%matplotlib inline
Construct data and experiments directorys from environment variables
In [ ]:
data_dir = os.path.join(os.environ['DATA_DIR'], 'uci')
exp_dir = os.path.join(os.environ['EXP_DIR'], 'apm_mcmc')
Specify main run parameters
In [ ]:
data_set = 'pima'
method = 'apm(mi+rdss)'
n_chain = 10
chain_offset = 0
seeds = np.random.random_integers(10000, size=n_chain)
n_imp_sample = 1
n_sample = 10000 + 500 # 500 'warm-up' updates
epsilon = 1e-8
w = 1
max_steps_out = 0
Load data and normalise inputs
In [ ]:
X = np.genfromtxt(os.path.join(data_dir, data_set + '_X.txt'))
y = np.genfromtxt(os.path.join(data_dir, data_set + '_y.txt'))
X, X_mn, X_sd = utils.normalise_inputs(X)
Specify prior parameters (data dependent so do after data load)
In [ ]:
prior = dict(
a_tau = 1.,
b_tau = 1. / X.shape[1]**0.5,
a_sigma = 1.1,
b_sigma = 0.1
)
Assemble run parameters into dictionary for recording with results
In [ ]:
run_params = dict(
data_set = data_set,
n_data = X.shape[0],
n_feature = X.shape[1],
method = method,
n_imp_sample = n_imp_sample,
epsilon = epsilon,
prior = prior,
w = w,
max_steps_out = max_steps_out,
n_sample = n_sample
)
Create necessary run objects
In [ ]:
def dir_and_w_sampler(w):
d = prng.normal(size=2)
d /= d.dot(d)**0.5
return d, w
prng = np.random.RandomState()
kernel_func = lambda K, X, theta: (
krn.isotropic_squared_exponential_kernel(K, X, theta, epsilon)
)
ml_estimator = est.LogMarginalLikelihoodApproxPosteriorISEstimator(
X, y, kernel_func, lpa.laplace_approximation)
def log_f_estimator(u, theta=None, cached_res=None):
log_marg_lik_est, new_cached_res = ml_estimator(u, theta, cached_res)
log_prior = (
utils.log_gamma_log_pdf(theta[0], prior['a_sigma'], prior['b_sigma']) +
utils.log_gamma_log_pdf(theta[1], prior['a_tau'], prior['b_tau'])
)
return log_marg_lik_est + log_prior, new_cached_res
sampler = smp.APMMetIndPlusRandDirSliceSampler(
log_f_estimator, lambda: prng.normal(size=(y.shape[0], n_imp_sample)), prng,
lambda: dir_and_w_sampler(w), max_steps_out)
Run chains, starting from random sample from prior in each and saving results to experiments directory
In [ ]:
for c in range(n_chain):
try:
print('Starting chain {0}...'.format(c + 1))
prng.seed(seeds[c])
theta_init = np.array([
np.log(prng.gamma(prior['a_sigma'], 1. / prior['b_sigma'])),
np.log(prng.gamma(prior['a_tau'], 1. / prior['b_tau'])),
])
ml_estimator.reset_cubic_op_count()
start_time = time.clock()
thetas, n_reject = sampler.get_samples(theta_init, n_sample)
comp_time = time.clock() - start_time
n_cubic_ops = ml_estimator.n_cubic_ops
tag = '{0}_{1}_chain_{2}'.format(data_set, method, c + 1 + chain_offset)
print('Completed: u update accept rate {0:.1f}%, time {1}s, # cubic ops {2}'
.format((1. - n_reject * 1./ n_sample) * 100., comp_time, n_cubic_ops))
utils.save_run(exp_dir, tag, thetas, n_reject, n_cubic_ops, comp_time, run_params)
utils.plot_trace(thetas)
plt.show()
except Exception as e:
print('Exception encountered')
print(e.message)
print('Skipping to next chain')
continue