Example of how to use revrand for regression

In this notebook we demonstrate revrand's standard linear model (SLM) and generalised linear model (GLM) fitting a random draw from a GP. We also compare the perfomance of these algorithms to a full GP.



In [15]:

    
%matplotlib inline

import matplotlib.pyplot as pl
pl.style.use('ggplot')
import numpy as np
from scipy.stats import gamma

from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.gaussian_process.kernels import WhiteKernel, RBF

from revrand import StandardLinearModel, GeneralizedLinearModel, likelihoods, Parameter, Positive
from revrand.metrics import msll, smse
from revrand.utils.datasets import gen_gausprocess_se
from revrand import basis_functions as bs
from revrand.optimize import AdaDelta, Adam

Dataset settings and creation



In [16]:

    
N = 150  # Number of training points
Ns = 250  # Number of test points
lenscale_true = 1.2
noise_true = 0.1

Xtrain, ytrain, Xtest, ftest = \
            gen_gausprocess_se(N, Ns, lenscale=lenscale_true, noise=noise_true)
ytest = ftest + np.random.randn(Ns) * noise_true

Algorithm Settings



In [17]:

    
# Common settings
nbases = 20  # Number of unique random bases to use for approximating a kernel
lenscale = gamma(a=1, scale=1)  # Initial value for the lengthscale
var = gamma(a=0.1, scale=2)  # Initial value for target noise
reg = gamma(a=1, scale=1)  # Initial weight prior

# GLM specific settings
maxiter = 10000
batch_size = 10
updater = Adam()

# Setup random basis functions
base = bs.RandomRBF(Xdim=1,
                    nbases=nbases,
                    lenscale=Parameter(lenscale, Positive()),
                    regularizer=Parameter(reg, Positive())
                    )

Parameter learning



In [18]:

    
# SLM
slm = StandardLinearModel(base, var=Parameter(var, Positive()),)
slm.fit(Xtrain, ytrain)

# GLM
llhood = likelihoods.Gaussian(var=Parameter(var, Positive()))
glm = GeneralizedLinearModel(llhood,
                             base,
                             batch_size=batch_size,
                             maxiter=maxiter,
                             updater=updater
                             )
glm.fit(Xtrain, ytrain)

# GP
kern = WhiteKernel(noise_level=np.sqrt(var.mean())) + 1**2 * RBF(length_scale=lenscale.mean())
gp = GaussianProcessRegressor(kernel=kern)
gp.fit(Xtrain, ytrain)









    Out[18]:





GaussianProcessRegressor(alpha=1e-10, copy_X_train=True,
             kernel=WhiteKernel(noise_level=0.447) + 1**2 * RBF(length_scale=1),
             n_restarts_optimizer=0, normalize_y=False,
             optimizer='fmin_l_bfgs_b', random_state=None)

Model Querying



In [19]:

    
# SLM
Ey_e, Vy_e = slm.predict_moments(Xtest)
Sy_e = np.sqrt(Vy_e)
    
# GLM
Ey_g, Vf_g = glm.predict_moments(Xtest)
Vy_g = Vf_g + glm.like_hypers_
Sy_g = np.sqrt(Vy_g)

# GP
Ey_gp, Sy_gp = gp.predict(Xtest, return_std=True) 
Vy_gp = Sy_gp**2

Score the models



In [20]:

    
LL_s = msll(ytest, Ey_e, Vy_e, ytrain)
LL_gp = msll(ytest, Ey_gp, Vy_gp, ytrain)
LL_g = msll(ytest, Ey_g, Vy_g, ytrain)

smse_s = smse(ytest, Ey_e)
smse_gp = smse(ytest, Ey_gp)
smse_glm = smse(ytest, Ey_g)

print("SLM: msll = {}, smse = {}, noise: {}, hypers: {}"
     .format(LL_s, smse_s, np.sqrt(slm.var_), slm.hypers_))
print("GLM: msll = {}, smse = {}, noise: {}, hypers: {}"
     .format(LL_g, smse_glm, np.sqrt(glm.like_hypers_),
             glm.basis_hypers_))
print("GP: msll = {}, smse = {}, noise: {}, hypers: {}"
     .format(LL_gp, smse_gp, gp.kernel_.k1.noise_level,
             gp.kernel_.k2.k2.length_scale))









    



SLM: msll = -2.1106245552475986, smse = 0.014123827148832011, noise: 0.11926076694719408, hypers: 0.8634172518359098
GLM: msll = -1.9655718490410439, smse = 0.016770228477925437, noise: 0.14341314260325827, hypers: 0.8490324977691501
GP: msll = -2.205896071199454, smse = 0.01185713899304955, noise: 0.01035913594187815, hypers: 1.1605691492118075

Plot predictions



In [21]:

    
Xpl_t = Xtrain.flatten()
Xpl_s = Xtest.flatten()

# Training/Truth
pl.figure(figsize=(15, 10))
pl.plot(Xpl_t, ytrain, 'k.', label='Training')
pl.plot(Xpl_s, ftest, 'k-', label='Truth')

# ELBO Regressor
pl.plot(Xpl_s, Ey_e, 'g-', label='Bayesian linear regression')
pl.fill_between(Xpl_s, Ey_e - 2 * Sy_e, Ey_e + 2 * Sy_e, facecolor='none',
                edgecolor='g', linestyle='--', label=None)

# GP
pl.plot(Xpl_s, Ey_gp, 'b-', label='GP')
pl.fill_between(Xpl_s, Ey_gp - 2 * Sy_gp, Ey_gp + 2 * Sy_gp,
                facecolor='none', edgecolor='b', linestyle='--',
                label=None)

# GLM Regressor
pl.plot(Xpl_s, Ey_g, 'm-', label='GLM')
pl.fill_between(Xpl_s, Ey_g - 2 * Sy_g, Ey_g + 2 * Sy_g, facecolor='none',
                edgecolor='m', linestyle='--', label=None)

pl.legend()

pl.grid(True)
pl.ylabel('y')
pl.xlabel('x')

pl.show()



In [ ]: