In this notebook, Bayesian Optimisation is used on a fundamentally 1D problem, pretending to be 2D with the addition of a nuisance parameter.
by using an 'automatic relevance detection' (ARD) kernel, the problems caused by nuisance parameters can be mitigated, by using 'less precision' along those dimensions. Below, the predictions from ARD and non-ARD surrogates is plotted. See how the ARD surrogate matches the true objective function much better.
An ARD kernel is formed when different length scales are used for each dimension
In [ ]:
%load_ext autoreload
%autoreload 2
%matplotlib inline
In [ ]:
import numpy as np
import sklearn.gaussian_process as gp
import GPy
import matplotlib.pyplot as plt
import seaborn as sns; sns.set() # prettify matplotlib
In [ ]:
# local modules
import turbo as tb
import turbo.modules as tm
import turbo.plotting as tp
import turbo.gui.jupyter as tg
In [ ]:
# Make deterministic
np.random.seed(100)
In [ ]:
surrogate = 'GPy' # can be 'GPy' or 'scikit'
In [ ]:
xmin, xmax = 0, 12
nuisance_min, nuisance_max = 0, 10
xs = np.linspace(xmin, xmax, 200)
nuisance = np.linspace(nuisance_min, nuisance_max, 100)
#f = lambda x: np.exp(-(x - 2)**2) + np.exp(-(x - 6)**2/10) + 1/ (x**2 + 1) + \
#np.random.normal(0, 0.02, size=None if isinstance(x, float) else x.shape)
f = lambda x: 100 * np.sin(x**2/5) * np.cos(x*1.5) + 100 + \
np.random.normal(0, 2, size=None if isinstance(x, float) else x.shape)
f2D = lambda x, nuisance: f(x)
ys = f(xs)
best_y = np.max(ys)
X,Y = np.meshgrid(xs, nuisance)
Z = f2D(X, Y)
In [ ]:
plt.figure(figsize=(16,6))
plt.plot(xs, ys)
plt.show()
In [ ]:
tp.surface_3D(X, Y, Z)
In [ ]:
bounds1D = [('x', xmin, xmax)]
bounds2D = [('x', xmin, xmax), ('nuisance', nuisance_min, nuisance_max)]
pre_phase = 8
iterations = 10
if surrogate == 'GPy':
sur1 = tm.GPySurrogate(model_params=dict(kernel=GPy.kern.Matern52(input_dim=1), normalizer=True), training_iterations=iterations)
sur2 = tm.GPySurrogate(model_params=dict(kernel=GPy.kern.Matern52(input_dim=2), normalizer=True), training_iterations=iterations)
sur3 = tm.GPySurrogate(model_params=dict(kernel=GPy.kern.Matern52(input_dim=2, ARD=True), normalizer=True), training_iterations=iterations)
elif surrogate == 'scikit':
model_params = dict(
alpha = 1e-5, # larger => more noise. Default = 1e-10
kernel = 1.0 * gp.kernels.Matern(nu=2.5) + gp.kernels.WhiteKernel(),
normalize_y = True
)
sur1 = tm.SciKitGPSurrogate(model_params=model_params, training_iterations=iterations)
sur2 = tm.SciKitGPSurrogate(model_params=model_params, training_iterations=iterations)
# the only change is the use of multiple length scales
model_params_ARD = dict(
alpha = 1e-5, # larger => more noise. Default = 1e-10
kernel = 1.0 * gp.kernels.Matern(length_scale=(1.0, 1.0), nu=2.5) + gp.kernels.WhiteKernel(),
)
sur3 = tm.SciKitGPSurrogate(model_params=model_params_ARD, training_iterations=iterations)
else:
raise ValueError()
op1D = tb.Optimiser(f, 'max', bounds1D, pre_phase_trials=pre_phase, settings_preset='default')
op1D.acquisition = tm.UCB(beta=2)
op1D.surrogate = sur1
rec1D = tb.Recorder(op1D)
op2D = tb.Optimiser(f2D, 'max', bounds2D, pre_phase_trials=pre_phase, settings_preset='default')
op2D.acquisition = tm.UCB(beta=2)
op2D.surrogate = sur2
rec2D = tb.Recorder(op2D)
op2DARD = tb.Optimiser(f2D, 'max', bounds2D, pre_phase_trials=pre_phase, settings_preset='default')
op2DARD.acquisition = tm.UCB(beta=2)
op2DARD.surrogate = sur3
rec2DARD = tb.Recorder(op2DARD)
In [ ]:
tg.OptimiserProgressBar(op1D)
np.random.seed(0)
op1D.run(max_trials=25)
tg.OptimiserProgressBar(op2D)
np.random.seed(0)
op2D.run(max_trials=25)
tg.OptimiserProgressBar(op2DARD)
np.random.seed(0)
op2DARD.run(max_trials=25)
In [ ]:
tp.compare_error([rec1D, rec2D, rec2DARD], ['1D', '2D', '2DARD'], true_best=best_y);
In [ ]:
tp.plot_error(rec1D, true_best=best_y);
tp.plot_error(rec2D, true_best=best_y);
tp.plot_error(rec2DARD, true_best=best_y);
In [ ]:
tp.plot_trial_1D(rec1D, param='x', trial_num=-1, true_objective=f);
In [ ]:
tp.interactive_plot_trial_2D(rec2D, trial_num=-1, true_objective=f2D);
In [ ]:
tp.interactive_plot_trial_1D(rec2D, trial_num=-1);
In [ ]:
tp.interactive_plot_trial_2D(rec2DARD, trial_num=-1, true_objective=f2D);
In [ ]:
tp.interactive_plot_trial_1D(rec2DARD, trial_num=-1);
In [ ]: