Demonstrate a Gaussian process which, when taking a log, goes to zero where there isn't data
In [11]:
%matplotlib inline
%config InlineBackend.figure_format = 'retina'
import matplotlib.pyplot as plt
import numpy as np
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.gaussian_process.kernels import RBF, ConstantKernel
def f_target(x):
return 0.01 * x * np.sin(x)
def _reshapex(x):
''' Reshape an x value to include a second dimension '''
assert len(x.shape) == 1
return x.reshape((x.shape[0], 1))
# The points and values which we have sampled
x = np.linspace(1, 8, 6)
y = f_target(x)
# Our view of the prior mean
prior_mean = -500
# Fit a gaussian process
# The tuples in the arguments to the kernels indicate the valid range of the parameters
# when calling 'fit'
kernel = ConstantKernel(1, (1e-3, 1e3)) * RBF(10, (1e-2, 1e2))
print('Prior kernel:', kernel)
# n_restarts_optimizer is trying to avoid getting stuck in a local minimum when looking for
# hyperparameter values. If the hyperparameters were set *exactly*, then this could be left
# at the default value, which is 0.
regressor = GaussianProcessRegressor(kernel, n_restarts_optimizer=20)
regressor.fit(_reshapex(x), y - prior_mean)
print('MLE kernel :', regressor.kernel_)
# Evaluate our GP at many more points - include the uncertainty at every point
x_eval = np.linspace(0, 100, 500)
y_eval, sigma_eval = regressor.predict(_reshapex(x_eval), return_std=True)
y_eval += prior_mean
plt.figure(figsize=(8, 5))
plt.plot(x_eval, f_target(x_eval), label=r'$f(x) = x\,\sin(x)$')
plt.plot(x, y, 'r.', label='Observations')
plt.plot(x_eval, y_eval, 'b-', label='Prediction')
plt.fill(np.concatenate([x_eval, x_eval[::-1]]),
np.concatenate([(y_eval - 1.96 * sigma_eval),
(y_eval + 1.96 * sigma_eval)[::-1]]),
alpha=.5, fc='b', ec='None', label='95% CI')
plt.xlabel('$x$')
plt.ylabel('$f(x)$')
plt.legend();
Similar plot in log space...
In [12]:
plt.figure(figsize=(8, 5))
plt.plot(x, np.exp(y), 'r.', label='Observations')
plt.plot(x_eval, np.exp(y_eval), 'b-', label='Prediction')
plt.fill(np.concatenate([x_eval, x_eval[::-1]]),
np.concatenate([np.exp(y_eval - 1.96 * sigma_eval),
np.exp(y_eval + 1.96 * sigma_eval)[::-1]]),
alpha=.5, fc='b', ec='None', label='95% CI')
plt.xlabel('$x$')
plt.ylabel('$\exp\,f(x)$')
plt.legend();
plt.show()
# Enhance... there's some weird numerical artifacts visible here. Not just due to exponential
x_eval2 = np.linspace(0, 8.5, 500)
y_eval2, sigma_eval2 = regressor.predict(_reshapex(x_eval2), return_std=True)
y_eval2 += prior_mean
plt.figure(figsize=(12, 5))
plt.subplot(1, 2, 1)
plt.plot(x, y, 'r.', label='Observations')
plt.plot(x_eval2, y_eval2, 'b-', label='Prediction')
plt.fill(np.concatenate([x_eval2, x_eval2[::-1]]),
np.concatenate([(y_eval2 - 1.96 * sigma_eval2),
(y_eval2 + 1.96 * sigma_eval2)[::-1]]),
alpha=.5, fc='b', ec='None', label='95% CI')
plt.xlabel('$x$')
plt.ylabel('$f(x)$')
plt.legend();
plt.subplot(1, 2, 2)
plt.plot(x, np.exp(y), 'r.', label='Observations')
plt.plot(x_eval2, np.exp(y_eval2), 'b-', label='Prediction')
plt.fill(np.concatenate([x_eval2, x_eval2[::-1]]),
np.concatenate([np.exp(y_eval2 - 1.96 * sigma_eval2),
np.exp(y_eval2 + 1.96 * sigma_eval2)[::-1]]),
alpha=.5, fc='b', ec='None', label='95% CI')
plt.xlabel('$x$')
plt.ylabel('$\exp\,f(x)$')
plt.legend();