Nuisance Parameters

In this notebook, Bayesian Optimisation is used on a fundamentally 1D problem, pretending to be 2D with the addition of a nuisance parameter.

by using an 'automatic relevance detection' (ARD) kernel, the problems caused by nuisance parameters can be mitigated, by using 'less precision' along those dimensions. Below, the predictions from ARD and non-ARD surrogates is plotted. See how the ARD surrogate matches the true objective function much better.

An ARD kernel is formed when different length scales are used for each dimension


In [ ]:
%load_ext autoreload
%autoreload 2
%matplotlib inline

In [ ]:
import numpy as np
import sklearn.gaussian_process as gp
import GPy
import matplotlib.pyplot as plt
import seaborn as sns; sns.set() # prettify matplotlib

In [ ]:
# local modules
import turbo as tb
import turbo.modules as tm
import turbo.plotting as tp
import turbo.gui.jupyter as tg

In [ ]:
# Make deterministic
np.random.seed(100)

In [ ]:
surrogate = 'GPy' # can be 'GPy' or 'scikit'

Target Function


In [ ]:
xmin, xmax = 0, 12
nuisance_min, nuisance_max = 0, 10
xs = np.linspace(xmin, xmax, 200)
nuisance = np.linspace(nuisance_min, nuisance_max, 100)

#f = lambda x: np.exp(-(x - 2)**2) + np.exp(-(x - 6)**2/10) + 1/ (x**2 + 1) + \
    #np.random.normal(0, 0.02, size=None if isinstance(x, float) else x.shape)
f = lambda x: 100 * np.sin(x**2/5) * np.cos(x*1.5) + 100 + \
    np.random.normal(0, 2, size=None if isinstance(x, float) else x.shape)
f2D = lambda x, nuisance: f(x)

ys = f(xs)
best_y = np.max(ys)

X,Y = np.meshgrid(xs, nuisance)
Z = f2D(X, Y)

In [ ]:
plt.figure(figsize=(16,6))
plt.plot(xs, ys)
plt.show()

In [ ]:
tp.surface_3D(X, Y, Z)

In [ ]:
bounds1D = [('x', xmin, xmax)]
bounds2D = [('x', xmin, xmax), ('nuisance', nuisance_min, nuisance_max)]

pre_phase = 8
iterations = 10

if surrogate == 'GPy':
    sur1 = tm.GPySurrogate(model_params=dict(kernel=GPy.kern.Matern52(input_dim=1), normalizer=True), training_iterations=iterations)
    sur2 = tm.GPySurrogate(model_params=dict(kernel=GPy.kern.Matern52(input_dim=2), normalizer=True), training_iterations=iterations)
    sur3 = tm.GPySurrogate(model_params=dict(kernel=GPy.kern.Matern52(input_dim=2, ARD=True), normalizer=True), training_iterations=iterations)
    
elif surrogate == 'scikit':
    model_params = dict(
        alpha = 1e-5, # larger => more noise. Default = 1e-10
        kernel = 1.0 * gp.kernels.Matern(nu=2.5) + gp.kernels.WhiteKernel(),
        normalize_y = True
    )
    sur1 = tm.SciKitGPSurrogate(model_params=model_params, training_iterations=iterations)
    sur2 = tm.SciKitGPSurrogate(model_params=model_params, training_iterations=iterations)
    # the only change is the use of multiple length scales
    model_params_ARD = dict(
        alpha = 1e-5, # larger => more noise. Default = 1e-10
        kernel = 1.0 * gp.kernels.Matern(length_scale=(1.0, 1.0), nu=2.5) + gp.kernels.WhiteKernel(),
    )
    sur3 = tm.SciKitGPSurrogate(model_params=model_params_ARD, training_iterations=iterations)
else:
    raise ValueError()

op1D = tb.Optimiser(f, 'max', bounds1D, pre_phase_trials=pre_phase, settings_preset='default')
op1D.acquisition = tm.UCB(beta=2)
op1D.surrogate = sur1

rec1D = tb.Recorder(op1D)

op2D = tb.Optimiser(f2D, 'max', bounds2D, pre_phase_trials=pre_phase, settings_preset='default')
op2D.acquisition = tm.UCB(beta=2)
op2D.surrogate = sur2
rec2D = tb.Recorder(op2D)

op2DARD = tb.Optimiser(f2D, 'max', bounds2D, pre_phase_trials=pre_phase, settings_preset='default')
op2DARD.acquisition = tm.UCB(beta=2)
op2DARD.surrogate = sur3
rec2DARD = tb.Recorder(op2DARD)

In [ ]:
tg.OptimiserProgressBar(op1D)
np.random.seed(0)
op1D.run(max_trials=25)

tg.OptimiserProgressBar(op2D)
np.random.seed(0)
op2D.run(max_trials=25)

tg.OptimiserProgressBar(op2DARD)
np.random.seed(0)
op2DARD.run(max_trials=25)

In [ ]:
tp.compare_error([rec1D, rec2D, rec2DARD], ['1D', '2D', '2DARD'], true_best=best_y);

In [ ]:
tp.plot_error(rec1D, true_best=best_y);
tp.plot_error(rec2D, true_best=best_y);
tp.plot_error(rec2DARD, true_best=best_y);

In [ ]:
tp.plot_trial_1D(rec1D, param='x', trial_num=-1, true_objective=f);

In [ ]:
tp.interactive_plot_trial_2D(rec2D, trial_num=-1, true_objective=f2D);

In [ ]:
tp.interactive_plot_trial_1D(rec2D, trial_num=-1);

In [ ]:
tp.interactive_plot_trial_2D(rec2DARD, trial_num=-1, true_objective=f2D);

In [ ]:
tp.interactive_plot_trial_1D(rec2DARD, trial_num=-1);

In [ ]: