PyDeepGP tutorial


In [1]:
import numpy as np
import GPy
from pylab import *
%matplotlib inline
import os
from sys import path
np.random.seed(42)

In [2]:
# Add parent directory to path if deepgp is not there already
pwd = os.getcwd()
base_path = pwd[0:pwd.rfind(os.sep)]
path.insert(0,base_path)

In [3]:
import deepgp

In [4]:
# Utility to load sample data. It can be installed with pip. Otherwise just load some other data.
import pods

Unsupervised learning (oil data)


In [5]:
# Load data
data = pods.datasets.oil_100()
Y = data['X']
labels =  data['Y'].argmax(axis=1)

# Number of latent dimensions per layer
Q1, Q2 = 5, 4

# Type of kernel per layer
kern1 = GPy.kern.RBF(Q1,ARD=True) + GPy.kern.Bias(Q1)
kern2 = GPy.kern.RBF(Q2,ARD=True) + GPy.kern.Bias(Q2)

# Number of inducing points per layer (can be set to different if given as list).
num_inducing = 40

# Whether to use back-constraint for variational posterior
back_constraint = False

# Dimensions of the MLP back-constraint if set to true
encoder_dims=[[300],[150]]

In [7]:
m = deepgp.DeepGP([Y.shape[1],Q1,Q2],
                  Y,kernels=[kern1,kern2], 
                  num_inducing=num_inducing, 
                  back_constraint=back_constraint, 
                  encoder_dims = encoder_dims)

In [8]:
# Make sure initial noise variance gives a reasonable signal to noise ratio
for i in range(len(m.layers)):
    output_var = m.layers[i].Y.var() if i==0 else m.layers[i].Y.mean.var()
    m.layers[i].Gaussian_noise.variance = output_var*0.01

In [9]:
m.optimize(max_iters=5000, messages=True)


Running L-BFGS-B (Scipy implementation) Code:
  runtime   i      f              |g|        
    00s12  0001   7.755559e+04   8.005322e+09 
    01s12  0037   1.108158e+02   7.873395e+02 
    03s13  0121  -5.507231e+01   1.949809e+02 
    11s21  0447  -1.728118e+02   2.852418e+01 
    35s51  1340  -2.465651e+02   1.361611e+02 
    49s69  1848  -2.566525e+02   1.575437e+01 
 02m26s44  5002  -2.615853e+02   1.870644e+00 
Runtime:  02m26s44
Optimization status: Maximum number of f evaluations reached


In [10]:
# Plot ARD scales per layer
m.obslayer.kern.plot_ARD()
m.layer_1.kern.plot_ARD()


 /Users/neil/anaconda3/lib/python3.6/site-packages/matplotlib/__init__.py:1710: MatplotlibDeprecationWarning:The *left* kwarg to `bar` is deprecated use *x* instead. Support for *left* will be removed in Matplotlib 3.0
Out[10]:
<matplotlib.axes._subplots.AxesSubplot at 0x11c74b8d0>
 /Users/neil/anaconda3/lib/python3.6/site-packages/matplotlib/figure.py:1999: UserWarning:This figure includes Axes that are not compatible with tight_layout, so results might be incorrect.

In [11]:
# From the plots above, we see which ones are the dominant dimensions for each layer. 
# So we use these dimensions in the visualization of the latent space below.
plt.figure(figsize=(5,5))
deepgp.util.visualize_DGP(m, labels, layer=0, dims=[1,2]); plt.title('Layer 0')
plt.figure(figsize=(5,5))
deepgp.util.visualize_DGP(m, labels, layer=1, dims=[0,1]); plt.title('Layer 1')


Out[11]:
Text(0.5,1,'Layer 1')

Supervised learning


In [12]:
# Load some mocap data.
data = pods.datasets.cmu_mocap_35_walk_jog()

Ntr = 100
Nts = 500

# All data represented in Y_all, which is the angles of the movement of the subject
Y_all = data['Y']
perm = np.random.permutation(Ntr+Nts)
index_training = np.sort(perm[0:Ntr])
index_test     = np.sort(perm[Ntr:Ntr+Nts])

Y_all_tr = Y_all[index_training,:]
Y_all_ts = Y_all[index_test,    :]


# Some of the features (body joints) to be used as inputs, and some as outputs
X_tr = Y_all_tr[:,0:55].copy()
Y_tr = Y_all_tr[:, 55:].copy()

X_ts = Y_all_ts[:,0:55].copy()
Y_ts = Y_all_ts[:, 55:].copy()

# TODO: You might need to normalize the input and/or output data.

In [13]:
# Number of latent dimensions (single hidden layer, since the top layer is observed)
Q = 5

# Define what kernels to use per layer
kern1 = GPy.kern.RBF(Q,ARD=True) + GPy.kern.Bias(Q)
kern2 = GPy.kern.RBF(Q,ARD=False) + GPy.kern.Bias(X_tr.shape[1])

# Number of inducing points to use
num_inducing = 40

# Whether to use back-constraint for variational posterior
back_constraint = False

# Dimensions of the MLP back-constraint if set to true
encoder_dims=[[300],[150]]

In [14]:
m = deepgp.DeepGP([Y_tr.shape[1],Q,X_tr.shape[1]],
                  Y_tr, X_tr=X_tr,kernels=[kern1, kern2], 
                  num_inducing=num_inducing, back_constraint=back_constraint)

In [15]:
# Make sure initial noise variance gives a reasonable signal to noise ratio.
# Fix to that value for a few iterations to avoid early local minima
for i in range(len(m.layers)):
    output_var = m.layers[i].Y.var() if i==0 else m.layers[i].Y.mean.var()
    m.layers[i].Gaussian_noise.variance = output_var*0.01
    m.layers[i].Gaussian_noise.variance.fix()

In [16]:
display(m)


Model: deepgp
Objective: 27729.285582716577
Number of Parameters: 14492
Number of Optimization Parameters: 14490
Updates: True

deepgp. valueconstraintspriors
obslayer.inducing inputs (40, 5)
obslayer.sum.rbf.variance 1.0 +ve
obslayer.sum.rbf.lengthscale (5,) +ve
obslayer.sum.bias.variance 1.0 +ve
obslayer.Gaussian_noise.variance 2.13179847772 +ve fixed
obslayer.Kuu_var (40,) +ve
obslayer.latent space.mean (100, 5)
obslayer.latent space.variance (100, 5) +ve
layer_1.inducing inputs (40, 55)
layer_1.sum.rbf.variance 1.0 +ve
layer_1.sum.rbf.lengthscale 1.0 +ve
layer_1.sum.bias.variance 1.0 +ve
layer_1.Gaussian_noise.variance 0.0100039017522 +ve fixed
layer_1.Kuu_var (40,) +ve
layer_1.latent space.mean (100, 55)
layer_1.latent space.variance (100, 55) +ve

In [17]:
m.optimize(max_iters=800, messages=True)


Running L-BFGS-B (Scipy implementation) Code:
  runtime   i     f              |g|        
    00s18  001   2.772929e+04   2.660103e+08 
    01s20  040   5.172149e+03   2.273552e+05 
    05s22  196   1.541046e+03   1.879665e+02 
    06s24  236   1.530905e+03   7.159914e+02 
    19s41  743   1.429718e+03   1.042016e+02 
    20s97  802   1.425838e+03   7.004558e+01 
Runtime:     20s97
Optimization status: Maximum number of f evaluations reached


In [18]:
# Unfix noise variance now that we have initialized the model
for i in range(len(m.layers)):
    m.layers[i].Gaussian_noise.variance.unfix()

In [19]:
m.optimize(max_iters=1500, messages=True)


Running L-BFGS-B (Scipy implementation) Code:
  runtime   i      f              |g|        
    00s14  0001   1.425838e+03   3.782786e+03 
    03s18  0101   1.307856e+03   8.537983e+02 
    10s24  0339   1.293169e+03   2.541807e+01 
    32s56  1027   1.292130e+03   1.878501e+01 
    48s12  1502   1.291413e+03   1.317024e+01 
Runtime:     48s12
Optimization status: Maximum number of f evaluations reached

Compare with GP


In [20]:
m_GP = GPy.models.SparseGPRegression(X=X_tr, Y=Y_tr, kernel=GPy.kern.RBF(X_tr.shape[1])+GPy.kern.Bias(X_tr.shape[1]), num_inducing=num_inducing)

In [21]:
m_GP.Gaussian_noise.variance = m_GP.Y.var()*0.01
m_GP.Gaussian_noise.variance.fix()
m_GP.optimize(max_iters=100, messages=True)
m_GP.Gaussian_noise.variance.unfix()
m_GP.optimize(max_iters=400, messages=True)


Running L-BFGS-B (Scipy implementation) Code:
  runtime   i     f              |g|        
    00s19  006   2.463751e+03   4.986610e+02 
    00s33  027   2.339956e+03   5.774437e-11 
Runtime:     00s33
Optimization status: Converged

Running L-BFGS-B (Scipy implementation) Code:
  runtime   i     f              |g|        
    00s07  004   1.855557e+03   2.678079e+03 
    00s18  011   1.778417e+03   5.465010e-02 
    00s32  029   1.778190e+03   1.885596e-09 
Runtime:     00s32
Optimization status: Converged

Out[21]:
<paramz.optimization.optimization.opt_lbfgsb at 0x11c78de10>

In [22]:
def rmse(predictions, targets):
    return np.sqrt(((predictions.flatten() - targets.flatten()) ** 2).mean())

In [23]:
Y_pred = m.predict(X_ts)[0]
Y_pred_s = m.predict_withSamples(X_ts, nSamples=500)[0]
Y_pred_GP = m_GP.predict(X_ts)[0]

print('# RMSE DGP               : ' + str(rmse(Y_pred, Y_ts)))
print('# RMSE DGP (with samples): ' + str(rmse(Y_pred_s, Y_ts)))
print('# RMSE GP                : ' + str(rmse(Y_pred_GP, Y_ts)))


# RMSE DGP               : 3.03000726496
# RMSE DGP (with samples): 3.03436330576
# RMSE GP                : 3.02347121967

In [ ]: