Objective :
Related material:
Readings :
- Rasmussen, Carl Edward. "Gaussian processes in machine learning." In Advanced lectures on machine learning, pp. 63-71. Springer Berlin Heidelberg, 2004.
- see http://www.GaussianProcess.org/gpml
- Chapters: 2, 4, 5.1, & 5.4.2
- Slides provided
Software :
- R-cran (https://cran.r-project.org/)
- R packages
- DiceKrigin (https://cran.r-project.org/web/packages/DiceKriging/index.html)
- lhs (https://cran.r-project.org/web/packages/lhs/index.html)
- Roustant, Olivier, David Ginsbourger, and Yves Deville. "DiceKriging, DiceOptim: Two R packages for the analysis of computer experiments by kriging-based metamodeling and optimization." (2012).
- Plus dependences ...
In [2]:
# DOWNLOAD THE R PACKAGES REQUIRED
install.packages('DiceKriging', repos = "http://cran.us.r-project.org")
install.packages('DiceOptim', repos = "http://cran.us.r-project.org")
install.packages('lhs', repos = "http://cran.us.r-project.org")
# install.packages('tcltk', repos = "http://cran.us.r-project.org")
# install.packages('aplpack', repos = "http://cran.us.r-project.org")
In [3]:
# LOAD THE R PACKAGES REQUIRED
library('lhs')
library('DiceKriging')
library('DiceOptim')
# library('tcltk')
# library('aplpack')
We consider the Franke's 2D function, $f(x)$.
We pretend that it is a "black-box" function; namely:
- Its mathematical formula is not not available
- It is too expensive to compute its output $f(x)$ for a given input $x$
However, just to satisfy our curiosity, here is how it looks like:
$$ f(x) = 0.75 \exp(-\frac{(9 x_1-2)^2}{4} - \frac{(9 x_2-2)^2}{4}) + 0.75 \exp(-\frac{(9 x_1+1)^2}{49} - \frac{(9 x_2+1)^2}{10}) \\ + 0.5 \exp(-\frac{(9 x_1-7)^2}{4} - \frac{(9 x_2-3)^2}{4}) - 0.2 \exp(-(9x_1-4)^2 - (9x_2-7)^2) $$
In [56]:
# THIS IS THE FUNCTION WE ARE INTERESTED IN
# Franke's 2D function
franke2d <- function(xx){
x1 <- xx[1]
x2 <- xx[2]
term1 <- 0.75 * exp(-(9*x1-2)^2/4 - (9*x2-2)^2/4)
term2 <- 0.75 * exp(-(9*x1+1)^2/49 - (9*x2+1)/10)
term3 <- 0.5 * exp(-(9*x1-7)^2/4 - (9*x2-3)^2/4)
term4 <- -0.2 * exp(-(9*x1-4)^2 - (9*x2-7)^2)
y <- term1 + term2 + term3 + term4
return(y)
}
myfun <- franke2d
input_min <- c(0.0 , 0.0)
input_max <- c(1.0 , 1.0)
n.grid <- 100 ;
x1.grid <-seq(input_min[1],input_max[1],length.out=n.grid) ;
x2.grid <-seq(input_min[2],input_max[2],length.out=n.grid) ;
X.grid <- expand.grid( x1=x1.grid, x2=x2.grid )
y.grid <- apply(X.grid,1,myfun)
# Contour plot
contour(x1.grid, x2.grid, matrix(y.grid, n.grid, n.grid), 10,
main = "Real function",
xlab = "x1", ylab = "x2",
xlim = c(0,1), ylim = c(0,1))
# Surface plot
persp(x1.grid, x2.grid, matrix(y.grid, n.grid, n.grid ),
xlab = "x1", ylab = "x2", zlab = "f(x)",
theta = 30, phi = 30,
xlim = c(0,1), ylim = c(0,1),
col='blue',
main="Real function")
In [57]:
# Generate a training data-set
n_data <- 20 ;
n_dim <- 2
X_data <- t(input_min + (input_max-input_min)*t(optimumLHS(n=n_data, k=n_dim))) ;
Y_data <- apply(X_data, 1, myfun) ;
plot(X_data, xlab = "x1", ylab = "x2", main="Design points")
In [58]:
# We use the function:
# km {DiceKriging}
?km
Out[58]:
In [59]:
# Diagnostics (Feel free to use others that you know)
R2 <-function (Y, Ypred) {
Ypred <- as.numeric(Ypred)
Y <- as.numeric(Y)
return(1 - mean((Y - Ypred)^2)/mean((Y - mean(Y))^2))
}
RMSE <- function (Y, Ypred) {
Ypred <- as.numeric(Ypred)
Y <- as.numeric(Y)
return(sqrt(mean((Y - Ypred)^2)))
}
MAE <- function (Y, Ypred) {
Ypred <- as.numeric(Ypred)
Y <- as.numeric(Y)
return(mean(abs(Y - Ypred)))
}
In [60]:
# Train different GP regressions differing on the covariance function
myfun_km_matern5_2 <- km( formula = ~ 1 , # linear trend formula; E.G.: "formula = ~ 1 +x1 +exp(x2) +I(x1*x2) +I(x1^2)"
design = data.frame(x1=X_data[,1],
x2=X_data[,2]), # a data frame representing the design of experiments.
response=Y_data, # the values of the 1-dimensional outpu
covtype="matern5_2", # covariance structure
coef.trend = NULL, # values for the trend parameters
coef.cov = NULL, # values for the covariance parameters
coef.var = NULL, # values for the variance parameters
nugget= 1e-7, # the homogeneous nugget effect
noise.var = NULL, # containing the noise variance at each observation
optim.method = "BFGS", # optimization method is chosen for the likelihood maximization.
lower = NULL, # lower bounds of the correlation parameters for optimization
upper = NULL, # upper bounds of the correlation parameters for optimizati
control =list(trace=FALSE), # =list(trace=FALSE) to supress optimization trace information
kernel=NULL) # a user's new covariance structure
myfun_km_matern3_2 <- km( formula = ~ 1 , # linear trend formula; E.G.: "formula = ~ 1 +x1 +exp(x2) +I(x1*x2) +I(x1^2)"
design = data.frame(x1=X_data[,1],
x2=X_data[,2]), # a data frame representing the design of experiments.
response=Y_data, # the values of the 1-dimensional outpu
covtype="matern3_2", # covariance structure
coef.trend = NULL, # values for the trend parameters
coef.cov = NULL, # values for the covariance parameters
coef.var = NULL, # values for the variance parameters
nugget= 1e-7, # the homogeneous nugget effect
noise.var = NULL, # containing the noise variance at each observation
optim.method = "BFGS", # optimization method is chosen for the likelihood maximization.
lower = NULL, # lower bounds of the correlation parameters for optimization
upper = NULL, # upper bounds of the correlation parameters for optimizati
control =list(trace=FALSE), # =list(trace=FALSE) to supress optimization trace information
kernel=NULL)
myfun_km_gauss <- km( formula = ~ 1 , # linear trend formula; E.G.: "formula = ~ 1 +x1 +exp(x2) +I(x1*x2) +I(x1^2)"
design = data.frame(x1=X_data[,1],
x2=X_data[,2]), # a data frame representing the design of experiments.
response=Y_data, # the values of the 1-dimensional outpu
covtype="gauss", # covariance structure
coef.trend = NULL, # values for the trend parameters
coef.cov = NULL, # values for the covariance parameters
coef.var = NULL, # values for the variance parameters
nugget= 1e-7, # the homogeneous nugget effect
noise.var = NULL, # containing the noise variance at each observation
optim.method = "BFGS", # optimization method is chosen for the likelihood maximization.
lower = NULL, # lower bounds of the correlation parameters for optimization
upper = NULL, # upper bounds of the correlation parameters for optimizati
control =list(trace=FALSE), # =list(trace=FALSE) to supress optimization trace information
kernel=NULL) # a user's new covariance structure
myfun_km_exp <- km( formula = ~ 1 , # linear trend formula; E.G.: "formula = ~ 1 +x1 +exp(x2) +I(x1*x2) +I(x1^2)"
design = data.frame(x1=X_data[,1],
x2=X_data[,2]), # a data frame representing the design of experiments.
response=Y_data, # the values of the 1-dimensional outpu
covtype="exp", # covariance structure
coef.trend = NULL, # values for the trend parameters
coef.cov = NULL, # values for the covariance parameters
coef.var = NULL, # values for the variance parameters
nugget= 1e-7, # the homogeneous nugget effect
noise.var = NULL, # containing the noise variance at each observation
optim.method = "BFGS", # optimization method is chosen for the likelihood maximization.
lower = NULL, # lower bounds of the correlation parameters for optimization
upper = NULL, # upper bounds of the correlation parameters for optimizati
control =list(trace=FALSE), # =list(trace=FALSE) to supress optimization trace information
kernel=NULL) # a user's new covariance structure
# CROSS VALIDATION CRITERIA
Y_pred <- leaveOneOut.km(myfun_km_matern5_2, "UK")$mean
CV_matern5_2 <-data.frame( R2 = R2(Y_data, Y_pred),
RMSE = RMSE(Y_data, Y_pred),
MAE = MAE(Y_data, Y_pred), row.names="matern5_2")
Y_pred <- leaveOneOut.km(myfun_km_matern3_2, "UK")$mean
CV_matern3_2 <-data.frame( R2 = R2(Y_data, Y_pred),
RMSE = RMSE(Y_data, Y_pred),
MAE = MAE(Y_data, Y_pred), row.names="matern3_2")
Y_pred <- leaveOneOut.km(myfun_km_gauss, "UK")$mean
CV_gauss <-data.frame( R2 = R2(Y_data, Y_pred),
RMSE = RMSE(Y_data, Y_pred),
MAE = MAE(Y_data, Y_pred), row.names="gauss~~~~")
Y_pred <- leaveOneOut.km(myfun_km_exp, "UK")$mean
CV_exp <-data.frame( R2 = R2(Y_data, Y_pred),
RMSE = RMSE(Y_data, Y_pred),
MAE = MAE(Y_data, Y_pred), row.names="exp~~~~~~")
CV_matern5_2
CV_matern3_2
CV_gauss
CV_exp
Out[60]:
Out[60]:
Out[60]:
Out[60]:
In [61]:
# Train the GP regression
myfun_km <- myfun_km_matern5_2
In [62]:
# CROSS VALIDATION CRITERIA
Y_pred <- leaveOneOut.km(myfun_km, "UK")$mean
data.frame(
R2 = R2(Y_data, Y_pred),
RMSE = RMSE(Y_data, Y_pred),
MAE = MAE(Y_data, Y_pred))
# PLOT DIAGNOSTICS
plot(myfun_km)
Out[62]:
In [63]:
# PRINT THE PARAMETERS
show(myfun_km)
In [64]:
# Plot the
n.grid <- 100 ;
x1.grid <-seq(input_min[1],input_max[1],length.out=n.grid) ;
x2.grid <-seq(input_min[2],input_max[2],length.out=n.grid) ;
X.grid <- expand.grid( x1=x1.grid, x2=x2.grid )
y.grid <- apply(X.grid, 1, myfun)
y.pred.grid <- predict(myfun_km, X.grid, "UK")
par(mfrow = c(3,3))
contour(x1.grid, x2.grid, matrix(y.grid, n.grid, n.grid), 11, main = "Real function")
points(X_data[ , 1], X_data[ , 2], pch = 19, cex = 1.5, col = "red")
contour(x1.grid, x2.grid, matrix(y.pred.grid$mean, n.grid, n.grid), 11, main = "Posterior GP regression mean")
points(X_data[ , 1], X_data[ , 2], pch = 19, cex = 1.5, col = "red")
contour(x1.grid, x2.grid, matrix(y.pred.grid$sd^2, n.grid, n.grid), 15, main = "Posterior GP regression variance")
points(X_data[ , 1], X_data[ , 2], pch = 19, cex = 1.5, col = "red")
In [ ]:
# Franke's 2D function
franke2d <- function(xx){
x1 <- xx[1]
x2 <- xx[2]
term1 <- 0.75 * exp(-(9*x1-2)^2/4 - (9*x2-2)^2/4)
term2 <- 0.75 * exp(-(9*x1+1)^2/49 - (9*x2+1)/10)
term3 <- 0.5 * exp(-(9*x1-7)^2/4 - (9*x2-3)^2/4)
term4 <- -0.2 * exp(-(9*x1-4)^2 - (9*x2-7)^2)
y <- term1 + term2 + term3 + term4
return(y)
}
input_min <- c(0.0 , 0.0)
input_max <- c(1.0 , 1.0)
# Branin's (rescaled) 2D function
braninsc <- function(xx) {
x1 <- xx[1]
x2 <- xx[2]
x1bar <- 15*x1 - 5
x2bar <- 15 * x2
term1 <- x2bar - 5.1*x1bar^2/(4*pi^2) + 5*x1bar/pi - 6
term2 <- (10 - 10/(8*pi)) * cos(x1bar)
y <- (term1^2 + term2 - 44.81) / 51.95
return(y)
}
input_min <- c(0.0 , 0.0)
input_max <- c(1.0 , 1.0)
# Curretal's 2D function
curretal2d <- function(xx) {
x1 <- xx[1]
x2 <- xx[2]
fact1 <- 1 - exp(-1/(2*x2))
fact2 <- 2300*x1^3 + 1900*x1^2 + 2092*x1 + 60
fact3 <- 100*x1^3 + 500*x1^2 + 4*x1 + 20
y <- fact1 * fact2/fact3
return(y)
}
input_min <- c(0.0 , 0.0)
input_max <- c(1.0 , 1.0)