This example shows how to use grid interpolation based variational classification with an AbstractVariationalGP
using a GridInterpolationVariationalStrategy
module. This classification module is designed for when the inputs of the function you're modeling are one-dimensional.
The use of inducing points allows for scaling up the training data by making computational complexity linear instead of cubic.
In this example, we’re modeling a function that is periodically labeled cycling every 1/8 (think of a square wave with period 1/4)
This notebook doesn't use cuda, in general we recommend GPU use if possible and most of our notebooks utilize cuda as well.
Kernel interpolation for scalable structured Gaussian processes (KISS-GP) was introduced in this paper: http://proceedings.mlr.press/v37/wilson15.pdf
KISS-GP with SVI for classification was introduced in this paper: https://papers.nips.cc/paper/6426-stochastic-variational-deep-kernel-learning.pdf
In [1]:
import math
import torch
import gpytorch
from matplotlib import pyplot as plt
from math import exp
%matplotlib inline
%load_ext autoreload
%autoreload 2
In [2]:
train_x = torch.linspace(0, 1, 26)
train_y = torch.sign(torch.cos(train_x * (2 * math.pi)))
In [3]:
from gpytorch.models import AbstractVariationalGP
from gpytorch.variational import CholeskyVariationalDistribution
from gpytorch.variational import GridInterpolationVariationalStrategy
class GPClassificationModel(AbstractVariationalGP):
def __init__(self, grid_size=128, grid_bounds=[(0, 1)]):
variational_distribution = CholeskyVariationalDistribution(grid_size)
variational_strategy = GridInterpolationVariationalStrategy(self, grid_size, grid_bounds, variational_distribution)
super(GPClassificationModel, self).__init__(variational_strategy)
self.mean_module = gpytorch.means.ConstantMean()
self.covar_module = gpytorch.kernels.ScaleKernel(gpytorch.kernels.RBFKernel())
def forward(self,x):
mean_x = self.mean_module(x)
covar_x = self.covar_module(x)
latent_pred = gpytorch.distributions.MultivariateNormal(mean_x, covar_x)
return latent_pred
model = GPClassificationModel()
likelihood = gpytorch.likelihoods.BernoulliLikelihood()
In [4]:
from gpytorch.mlls.variational_elbo import VariationalELBO
# Find optimal model hyperparameters
model.train()
likelihood.train()
# Use the adam optimizer
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
# "Loss" for GPs - the marginal log likelihood
# n_data refers to the amount of training data
mll = VariationalELBO(likelihood, model, num_data=train_y.numel())
def train():
num_iter = 400
for i in range(num_iter):
optimizer.zero_grad()
output = model(train_x)
# Calc loss and backprop gradients
loss = -mll(output, train_y)
loss.backward()
print('Iter %d/%d - Loss: %.3f' % (i + 1, num_iter, loss.item()))
optimizer.step()
# Get clock time
%time train()
In [5]:
# Set model and likelihood into eval mode
model.eval()
likelihood.eval()
# Initialize axes
f, ax = plt.subplots(1, 1, figsize=(4, 3))
with torch.no_grad():
test_x = torch.linspace(0, 1, 101)
predictions = likelihood(model(test_x))
ax.plot(train_x.numpy(), train_y.numpy(), 'k*')
pred_labels = predictions.mean.ge(0.5).float().mul(2).sub(1)
ax.plot(test_x.data.numpy(), pred_labels.numpy(), 'b')
ax.set_ylim([-3, 3])
ax.legend(['Observed Data', 'Mean', 'Confidence'])
Out[5]:
In [ ]: