notebook.community

Edit and run



In [ ]:

    
import os
import time
import traceback
import numpy as np
import gpdemo.kernels as krn
import gpdemo.utils as utils
import gpdemo.latent_posterior_approximations as lpa
import gpdemo.estimators as est
import auxpm.samplers as smp
import matplotlib.pyplot as plt
%matplotlib inline

Construct data and experiments directorys from environment variables



In [ ]:

    
data_dir = os.path.join(os.environ['DATA_DIR'], 'uci')
exp_dir = os.path.join(os.environ['EXP_DIR'], 'apm_mcmc')

Specify main run parameters



In [ ]:

    
data_set = 'pima'
method = 'apm(ess+rdss)'
n_chain = 10
chain_offset = 0
seeds = np.random.random_integers(10000, size=n_chain)
n_imp_sample = 1
n_sample = 10000 + 500 # 500 'warm-up' updates
epsilon = 1e-8
w = 1.
max_steps_out = 0

Load data and normalise inputs



In [ ]:

    
X = np.genfromtxt(os.path.join(data_dir, data_set + '_X.txt'))
y = np.genfromtxt(os.path.join(data_dir, data_set + '_y.txt'))
X, X_mn, X_sd = utils.normalise_inputs(X)

Specify prior parameters (data dependent so do after data load)



In [ ]:

    
prior = dict(
    a_tau = 1.,
    b_tau = 1. / X.shape[1]**0.5,
    a_sigma = 1.1,
    b_sigma = 0.1
)

Assemble run parameters into dictionary for recording with results



In [ ]:

    
run_params = dict(
    data_set = data_set,
    n_data = X.shape[0],
    n_feature = X.shape[1],
    method = method,
    n_imp_sample = n_imp_sample,
    epsilon = epsilon,
    prior = prior,
    w = w,
    max_steps_out = max_steps_out,
    n_sample = n_sample
)

Create necessary run objects



In [ ]:

    
def dir_and_w_sampler(w):
    d = prng.normal(size=2)
    d /= d.dot(d)**0.5
    return d, w
prng = np.random.RandomState()
kernel_func = lambda K, X, theta: (
    krn.isotropic_squared_exponential_kernel(K, X, theta, epsilon=epsilon)
)
ml_estimator = est.LogMarginalLikelihoodApproxPosteriorISEstimator(
    X, y, kernel_func, lpa.laplace_approximation)
def log_f_estimator(u, theta=None, cached_res=None):
    log_marg_lik_est, new_cached_res = ml_estimator(u, theta, cached_res)
    log_prior = (
        utils.log_gamma_log_pdf(theta[0], prior['a_sigma'], prior['b_sigma']) +
        utils.log_gamma_log_pdf(theta[1], prior['a_tau'], prior['b_tau'])
    )
    return log_marg_lik_est + log_prior, new_cached_res
sampler = smp.APMEllSSPlusRandDirSliceSampler(
    log_f_estimator, lambda: prng.normal(size=(y.shape[0], n_imp_sample)), prng, 
    lambda: dir_and_w_sampler(w), max_steps_out)

Run chains, starting from random sample from prior in each and saving results to experiments directory



In [ ]:

    
for c in range(n_chain):
    try:
        print('Starting chain {0}...'.format(c + 1))
        prng.seed(seeds[c])
        theta_init = np.array([
                np.log(prng.gamma(prior['a_sigma'], 1. / prior['b_sigma'])),
                np.log(prng.gamma(prior['a_tau'], 1. / prior['b_tau'])),
        ])
        ml_estimator.reset_cubic_op_count()
        start_time = time.clock()
        thetas = sampler.get_samples(theta_init, n_sample)
        comp_time = time.clock() - start_time
        n_cubic_ops = ml_estimator.n_cubic_ops
        tag = '{0}_{1}_chain_{2}'.format(data_set, method, c + 1 + chain_offset)
        print('Completed: time {0}s, # cubic ops {1}'
             .format(comp_time, n_cubic_ops))
        utils.save_run(exp_dir, tag, thetas, 0, n_cubic_ops, comp_time, run_params)
        utils.plot_trace(thetas)
        plt.show()
    except Exception as e:
        print('Exception encountered')
        print(e.message)
        print(traceback.format_exc())
        print('Skipping to next chain')
        continue