Objective :
Related material:
Readings :
- Rasmussen, Carl Edward. "Gaussian processes in machine learning." In Advanced lectures on machine learning, pp. 63-71. Springer Berlin Heidelberg, 2004.
- see http://www.GaussianProcess.org/gpml
- Chapters: 2, 4, 5.1, & 5.4.2
- Slides provided
Software :
- R-cran (https://cran.r-project.org/)
- R packages
- DiceKrigin (https://cran.r-project.org/web/packages/DiceKriging/index.html)
- lhs (https://cran.r-project.org/web/packages/lhs/index.html)
- Roustant, Olivier, David Ginsbourger, and Yves Deville. "DiceKriging, DiceOptim: Two R packages for the analysis of computer experiments by kriging-based metamodeling and optimization." (2012).
- Plus dependences ...
The robot arm function, used commonly in neural network papers, models the position of a robot arm which has four segments. While the shoulder is fixed at the origin, the four segments each have length $L_i$, and are positioned at an angle $\theta_i$ (with respect to the horizontal axis), for $i = 1,..., 4$.
The response is the distance from the end of the robot arm to the origin, on the $(u, v)$-plane.
$$ C(x) = \sqrt{u^2+u^2} $$$$ u = \sum_{i=1}^4 L_i \cos(\sum_{j=1}^i \theta_j) ) $$$$ v = \sum_{i=1}^4 L_i \sin (\sum_{j=1}^i \theta_j) ) $$The input variables and their usual input ranges are:
-------------------------------------------------- θ1 ∈ [0, 2π] | angle of the first arm segment θ2 ∈ [0, 2π] | angle of the second arm segment θ3 ∈ [0, 2π] | angle of the third arm segment θ4 ∈ [0, 2π] | angle of the fourth arm segment L1 ∈ [0, 1] | length of the first arm segment L2 ∈ [0, 1] | length of the second arm segment L3 ∈ [0, 1] | length of the third arm segment L4 ∈ [0, 1] | length of the fourth arm segment --------------------------------------------------
=> As a result we wish to emulate the function $f(\theta_1,\theta_2,\theta_3,\theta_4,L_1,L_2,L_3,L_4) := C(\theta_1,\theta_2,\theta_3,\theta_4,L_1,L_2,L_3,L_4)$.
Reference: http://www.sfu.ca/~ssurjano/piston.html
An, J., & Owen, A. (2001). Quasi-regression. Journal of Complexity, 17(4), 588-607.
In [1]:
# DOWNLOAD THE R PACKAGES REQUIRED
install.packages('DiceKriging', repos = "http://cran.us.r-project.org")
install.packages('lhs', repos = "http://cran.us.r-project.org")
# install.packages('tcltk', repos = "http://cran.us.r-project.org")
# install.packages('aplpack', repos = "http://cran.us.r-project.org")
In [1]:
# LOAD THE R PACKAGES REQUIRED
library('lhs')
library('DiceKriging')
# library('tcltk')
# library('aplpack')
In [3]:
# THIS IS THE SIMULATOR AND THE MIN AND MAX OF THE INPUTS
robot <- function(xx) {
##########################################################################
#
# ROBOT ARM FUNCTION
#
# Authors: Sonja Surjanovic, Simon Fraser University
# Derek Bingham, Simon Fraser University
# Questions/Comments: Please email Derek Bingham at dbingham@stat.sfu.ca.
#
# Copyright 2013. Derek Bingham, Simon Fraser University.
#
# THERE IS NO WARRANTY, EXPRESS OR IMPLIED. WE DO NOT ASSUME ANY LIABILITY
# FOR THE USE OF THIS SOFTWARE. If software is modified to produce
# derivative works, such modified software should be clearly marked.
# Additionally, this program is free software; you can redistribute it
# and/or modify it under the terms of the GNU General Public License as
# published by the Free Software Foundation; version 2.0 of the License.
# Accordingly, this program is distributed in the hope that it will be
# useful, but WITHOUT ANY WARRANTY; without even the implied warranty
# of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# General Public License for more details.
#
# For function details and reference information, see:
# http://www.sfu.ca/~ssurjano/
#
##########################################################################
#
# OUTPUT AND INPUTS:
#
# y = distance from the end of the arm to the origin
# xx = c(theta1, theta2, theta3, theta4, L1, L2, L3, L4)
#
#########################################################################
theta <- xx[1:4]
L <- xx[5:8]
thetamat <- matrix(rep(theta,times=4), 4, 4, byrow=TRUE)
thetamatlow <- thetamat
thetamatlow[upper.tri(thetamatlow)] <- 0
sumtheta <- rowSums(thetamatlow)
u <- sum(L*cos(sumtheta))
v <- sum(L*sin(sumtheta))
y <- (u^2 + v^2)^(0.5)
return(y)
}
input_min <- c(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 )
input_max <- c(2*pi, 2*pi, 2*pi, 2*pi, 1.0, 1.0, 1.0, 1.0)
input_d <- length(input_min)
myfun <- robot
# PLOT THE REAL FUNCTION TO SEE HOW IT LOOKS LIKE
par(mfrow = c(3,3))
for (i in 1:input_d) {
for ( j in 1:input_d )
if(i>j) {
n.grid <- 100 ;
x1.grid <-seq(input_min[i],input_max[i],length.out=n.grid) ;
x2.grid <-seq(input_min[j],input_max[j],length.out=n.grid) ;
X.grid <- expand.grid( x1=x1.grid, x2=x2.grid )
myfun2d<-function(xx){
zz<-0.5*(input_min+input_max) ;
zz[i]<-xx[1]; zz[j]<-xx[2];
return(myfun(zz))
}
y.grid <- apply(X.grid,1,myfun2d)
contour(x1.grid, x2.grid, matrix(y.grid, n.grid, n.grid), 5,
main = "Real function",
xlab = paste("x", as.character(i)),
ylab = paste("x", as.character(j)),
xlim = c(input_min[i],input_max[i]),
ylim = c(input_min[j],input_max[j]))
}
}
In [6]:
# GENERATE THE TRAINING DATA SET
n_data <- 20 ;
n_dim <- length(input_min)
X_data <- t(input_min + (input_max-input_min)*t(optimumLHS(n=n_data, k=n_dim))) ;
Y_data <- apply(X_data, 1, myfun) ;
# PLOT THE INPUT POINT TO SEE THE SPREAD
par(mfrow = c(3,3))
for (i in 1:n_dim)
for ( j in 1:n_dim )
if(i>j) {
plot(X_data[,i], X_data[,j],
main = "Design points",
xlab = paste("x", as.character(i)),
ylab = paste("x", as.character(j)) ,
xlim = c(input_min[i],input_max[i]),
ylim = c(input_min[j],input_max[j])
)
}
In [7]:
# Diagnostics (Feel free to use others that you know)
R2 <-function (Y, Ypred) {
Ypred <- as.numeric(Ypred)
Y <- as.numeric(Y)
return(1 - mean((Y - Ypred)^2)/mean((Y - mean(Y))^2))
}
RMSE <- function (Y, Ypred) {
Ypred <- as.numeric(Ypred)
Y <- as.numeric(Y)
return(sqrt(mean((Y - Ypred)^2)))
}
MAE <- function (Y, Ypred) {
Ypred <- as.numeric(Ypred)
Y <- as.numeric(Y)
return(mean(abs(Y - Ypred)))
}
In [16]:
# Train different GP regressions differing on the covariance function
myfun_km_matern5_2 <- km( formula = ~ 1 , # linear trend formula; E.G.: "formula = ~ 1 +x1 +exp(x2) +I(x1*x2) +I(x1^2)"
design = data.frame(x1=X_data[,1],
x2=X_data[,2],
x3=X_data[,3],
x4=X_data[,4],
x5=X_data[,5],
x6=X_data[,6],
x7=X_data[,7],
x8=X_data[,8]), # a data frame representing the design of experiments.
response=Y_data, # the values of the 1-dimensional outpu
covtype="matern5_2", # covariance structure
coef.trend = NULL, # values for the trend parameters
coef.cov = NULL, # values for the covariance parameters
coef.var = NULL, # values for the variance parameters
nugget= 1e-7, # the homogeneous nugget effect
noise.var = NULL, # containing the noise variance at each observation
optim.method = "BFGS", # optimization method is chosen for the likelihood maximization.
lower = NULL, # lower bounds of the correlation parameters for optimization
upper = NULL, # upper bounds of the correlation parameters for optimizati
control =list(trace=FALSE), # =list(trace=FALSE) to supress optimization trace information
kernel=NULL) # a user's new covariance structure
myfun_km_matern3_2 <- km( formula = ~ 1 , # linear trend formula; E.G.: "formula = ~ 1 +x1 +exp(x2) +I(x1*x2) +I(x1^2)"
design = data.frame(x1=X_data[,1],
x2=X_data[,2],
x3=X_data[,3],
x4=X_data[,4],
x5=X_data[,5],
x6=X_data[,6],
x7=X_data[,7],
x8=X_data[,8]), # a data frame representing the design of experiments.
response=Y_data, # the values of the 1-dimensional outpu
covtype="matern3_2", # covariance structure
coef.trend = NULL, # values for the trend parameters
coef.cov = NULL, # values for the covariance parameters
coef.var = NULL, # values for the variance parameters
nugget= 1e-7, # the homogeneous nugget effect
noise.var = NULL, # containing the noise variance at each observation
optim.method = "BFGS", # optimization method is chosen for the likelihood maximization.
lower = NULL, # lower bounds of the correlation parameters for optimization
upper = NULL, # upper bounds of the correlation parameters for optimizati
control =list(trace=FALSE), # =list(trace=FALSE) to supress optimization trace information
kernel=NULL)
myfun_km_gauss <- km( formula = ~ 1 , # linear trend formula; E.G.: "formula = ~ 1 +x1 +exp(x2) +I(x1*x2) +I(x1^2)"
design = data.frame(x1=X_data[,1],
x2=X_data[,2],
x3=X_data[,3],
x4=X_data[,4],
x5=X_data[,5],
x6=X_data[,6],
x7=X_data[,7],
x8=X_data[,8]), # a data frame representing the design of experiments.
response=Y_data, # the values of the 1-dimensional outpu
covtype="gauss", # covariance structure
coef.trend = NULL, # values for the trend parameters
coef.cov = NULL, # values for the covariance parameters
coef.var = NULL, # values for the variance parameters
nugget= 1e-7, # the homogeneous nugget effect
noise.var = NULL, # containing the noise variance at each observation
optim.method = "BFGS", # optimization method is chosen for the likelihood maximization.
lower = NULL, # lower bounds of the correlation parameters for optimization
upper = NULL, # upper bounds of the correlation parameters for optimizati
control =list(trace=FALSE), # =list(trace=FALSE) to supress optimization trace information
kernel=NULL) # a user's new covariance structure
myfun_km_exp <- km( formula = ~ 1 , # linear trend formula; E.G.: "formula = ~ 1 +x1 +exp(x2) +I(x1*x2) +I(x1^2)"
design = data.frame(x1=X_data[,1],
x2=X_data[,2],
x3=X_data[,3],
x4=X_data[,4],
x5=X_data[,5],
x6=X_data[,6],
x7=X_data[,7],
x8=X_data[,8]), # a data frame representing the design of experiments.
response=Y_data, # the values of the 1-dimensional outpu
covtype="exp", # covariance structure
coef.trend = NULL, # values for the trend parameters
coef.cov = NULL, # values for the covariance parameters
coef.var = NULL, # values for the variance parameters
nugget= 1e-7, # the homogeneous nugget effect
noise.var = NULL, # containing the noise variance at each observation
optim.method = "BFGS", # optimization method is chosen for the likelihood maximization.
lower = NULL, # lower bounds of the correlation parameters for optimization
upper = NULL, # upper bounds of the correlation parameters for optimizati
control =list(trace=FALSE), # =list(trace=FALSE) to supress optimization trace information
kernel=NULL) # a user's new covariance structure
# CROSS VALIDATION CRITERIA
Y_pred <- leaveOneOut.km(myfun_km_matern5_2, "UK")$mean
CV_matern5_2 <-data.frame( R2 = R2(Y_data, Y_pred),
RMSE = RMSE(Y_data, Y_pred),
MAE = MAE(Y_data, Y_pred), row.names="matern5_2")
Y_pred <- leaveOneOut.km(myfun_km_matern3_2, "UK")$mean
CV_matern3_2 <-data.frame( R2 = R2(Y_data, Y_pred),
RMSE = RMSE(Y_data, Y_pred),
MAE = MAE(Y_data, Y_pred), row.names="matern3_2")
Y_pred <- leaveOneOut.km(myfun_km_gauss, "UK")$mean
CV_gauss <-data.frame( R2 = R2(Y_data, Y_pred),
RMSE = RMSE(Y_data, Y_pred),
MAE = MAE(Y_data, Y_pred), row.names="gauss~~~~")
Y_pred <- leaveOneOut.km(myfun_km_exp, "UK")$mean
CV_exp <-data.frame( R2 = R2(Y_data, Y_pred),
RMSE = RMSE(Y_data, Y_pred),
MAE = MAE(Y_data, Y_pred), row.names="exp~~~~~~")
# PRINT THE RESULTS TO COMPARE
CV_matern5_2
CV_matern3_2
CV_gauss
CV_exp
Out[16]:
Out[16]:
Out[16]:
Out[16]:
In [17]:
# SELECT THE BEST GAUSSIAN PROCESS
myfun_km <- myfun_km_gauss
In [22]:
# CROSS VALIDATION CRITERIA
Y_pred <- leaveOneOut.km(myfun_km, "UK")$mean
data.frame(
R2 = R2(Y_data, Y_pred),
RMSE = RMSE(Y_data, Y_pred),
MAE = MAE(Y_data, Y_pred))
# PLOT DIAGNOSTICS
plot(myfun_km)
Out[22]:
In [18]:
# PRINT THE PARAMETERS
show(myfun_km)
In [20]:
# PLOT THE GPR MEAN AND VARIANCE , AS WELL AS THE REAL FUNCTION
d_input <- length(input_min)
# Contour plot
par(mfrow = c(3,3))
for (i in 1:d_input)
for ( j in 1:d_input )
if(i>j) {
# GET THE GRID FOR INPUTS
n.grid <- 100 ;
x1.grid <-seq(input_min[i],input_max[i],length.out=n.grid) ;
x2.grid <-seq(input_min[j],input_max[j],length.out=n.grid) ;
X.grid <- expand.grid( x1=x1.grid, x2=x2.grid )
# COMPUTE THE REAL FUNCTION
myfun_2d<-function(xx){
zz<-0.5*(input_min+input_max) ;
zz[i]<-xx[1]; zz[j]<-xx[2];
return(myfun(zz))
}
y.grid <- apply(X.grid, 1, myfun_2d)
contour(x1.grid, x2.grid, matrix(y.grid, n.grid, n.grid), 11, main = "Real function")
points(X_data[ , i], X_data[ , j], pch = 19, cex = 1.5, col = "red")
# COMPUTE THE GPR MEAN
myfun_km_pred_2d_mean<-function(xx){
zz<-0.5*(input_min+input_max) ;
zz[i]<-xx[1]; zz[j]<-xx[2];
return( predict(myfun_km, t(matrix(zz)), "UK", checkNames=FALSE)$mean )
}
y.pred.grid_mean <- apply(X.grid, 1, myfun_km_pred_2d_mean)
contour(x1.grid, x2.grid, matrix(y.pred.grid_mean, n.grid, n.grid), 11, main = "Posterior GP regression mean")
points(X_data[ , i], X_data[ , j], pch = 19, cex = 1.5, col = "red")
# COMPUTE THE GPR VARIANCE
myfun_km_pred_2d_sd<-function(xx){
zz<-0.5*(input_min+input_max) ;
zz[i]<-xx[1]; zz[j]<-xx[2];
return( predict(myfun_km, t(matrix(zz)), "UK", checkNames=FALSE)$sd )
}
y.pred.grid_sd <- apply(X.grid, 1, myfun_km_pred_2d_sd)
contour(x1.grid, x2.grid, matrix(y.pred.grid_sd^2, n.grid, n.grid), 15, main = "Posterior GP regression variance")
points(X_data[ , i], X_data[ , j], pch = 19, cex = 1.5, col = "red")
}