In [1]:
import math
import torch
import gpytorch
import pyro
from matplotlib import pyplot as plt
# Make plots inline
%matplotlib inline
In [2]:
import urllib.request
import os.path
from scipy.io import loadmat
from math import floor
if not os.path.isfile('3droad.mat'):
print('Downloading \'3droad\' UCI dataset...')
urllib.request.urlretrieve('https://www.dropbox.com/s/f6ow1i59oqx05pl/3droad.mat?dl=1', '3droad.mat')
data = torch.Tensor(loadmat('3droad.mat')['data'])
X = data[:, :-1]
X = X - X.min(0)[0]
X = 2 * (X / X.max(0)[0]) - 1
y = data[:, -1]
# Use the first 80% of the data for training, and the last 20% for testing.
train_n = int(floor(0.8*len(X)))
train_x = X[:train_n, :].contiguous().cuda()
train_y = y[:train_n].contiguous().cuda()
test_x = X[train_n:, :].contiguous().cuda()
test_y = y[train_n:].contiguous().cuda()
In [3]:
from torch.utils.data import TensorDataset, DataLoader
train_dataset = TensorDataset(train_x, train_y)
train_loader = DataLoader(train_dataset, batch_size=1024, shuffle=True)
In [4]:
data_dim = train_x.size(-1)
class LargeFeatureExtractor(torch.nn.Sequential):
def __init__(self):
super(LargeFeatureExtractor, self).__init__()
self.add_module('linear1', torch.nn.Linear(data_dim, 1000))
self.add_module('bn1', torch.nn.BatchNorm1d(1000))
self.add_module('relu1', torch.nn.ReLU())
self.add_module('linear2', torch.nn.Linear(1000, 500))
self.add_module('bn2', torch.nn.BatchNorm1d(500))
self.add_module('relu2', torch.nn.ReLU())
self.add_module('linear3', torch.nn.Linear(500, 50))
self.add_module('bn3', torch.nn.BatchNorm1d(50))
self.add_module('relu3', torch.nn.ReLU())
self.add_module('linear4', torch.nn.Linear(50, 2))
feature_extractor = LargeFeatureExtractor().cuda()
# num_features is the number of final features extracted by the neural network, in this case 2.
num_features = 2
In [5]:
from gpytorch.models import PyroVariationalGP
from gpytorch.variational import CholeskyVariationalDistribution, GridInterpolationVariationalStrategy
class PyroSVDKLGridInterpModel(PyroVariationalGP):
def __init__(self, likelihood, grid_size=32, grid_bounds=[(-1, 1), (-1, 1)], name_prefix="svdkl_grid_example"):
variational_distribution = CholeskyVariationalDistribution(num_inducing_points=(grid_size ** num_features))
variational_strategy = GridInterpolationVariationalStrategy(self,
grid_size=grid_size,
grid_bounds=grid_bounds,
variational_distribution=variational_distribution)
super(PyroSVDKLGridInterpModel, self).__init__(variational_strategy,
likelihood,
num_data=train_y.numel(),
name_prefix=name_prefix)
self.mean_module = gpytorch.means.ConstantMean()
self.covar_module = gpytorch.kernels.ScaleKernel(gpytorch.kernels.RBFKernel(
lengthscale_prior=gpytorch.priors.SmoothedBoxPrior(0.001, 1., sigma=0.1, transform=torch.exp
))
def forward(self, x):
mean_x = self.mean_module(x)
covar_x = self.covar_module(x)
return gpytorch.distributions.MultivariateNormal(mean_x, covar_x)
In [6]:
class DKLModel(gpytorch.Module):
def __init__(self, likelihood, feature_extractor, num_features, grid_bounds=(-1., 1.)):
super(DKLModel, self).__init__()
self.feature_extractor = feature_extractor
self.gp_layer = PyroSVDKLGridInterpModel(likelihood)
self.grid_bounds = grid_bounds
self.num_features = num_features
def features(self, x):
features = self.feature_extractor(x)
features = gpytorch.utils.grid.scale_to_bounds(features, self.grid_bounds[0], self.grid_bounds[1])
return features
def forward(self, x):
res = self.gp_layer(self.features(x))
return res
def guide(self, x, y):
self.gp_layer.guide(self.features(x), y)
def model(self, x, y):
pyro.module(self.gp_layer.name_prefix + ".feature_extractor", self.feature_extractor)
self.gp_layer.model(self.features(x), y)
likelihood = gpytorch.likelihoods.GaussianLikelihood().cuda()
model = DKLModel(likelihood, feature_extractor, num_features=num_features).cuda()
In [7]:
from pyro import optim
from pyro import infer
optimizer = optim.Adam({"lr": 0.1})
elbo = infer.Trace_ELBO(num_particles=256, vectorize_particles=True)
svi = infer.SVI(model.model, model.guide, optimizer, elbo)
In [8]:
num_epochs = 3
# Not enough for this model to converge, but enough for a fast example
for i in range(num_epochs):
# Within each iteration, we will go over each minibatch of data
for minibatch_i, (x_batch, y_batch) in enumerate(train_loader):
loss = svi.step(x_batch, y_batch)
print('Epoch {} Loss {}'.format(i + 1, loss))
In [9]:
model.eval()
likelihood.eval()
with torch.no_grad():
preds = model(test_x)
In [10]:
print('Test MAE: {}'.format(torch.mean(torch.abs(preds.mean - test_y))))