This notebook includes a pybind11 implementation of Stochastic Gradient Descent Hamiltonian Monte Carlo. Performance is compared on the Pima Indian dataset.
In [3]:
import os
if not os.path.exists('./eigen'):
! git clone https://github.com/RLovelett/eigen.git
In [4]:
import cppimport
import numpy as np
import matplotlib.pyplot as plt
import sghmc
In [1]:
%%file wrap.cpp
<%
cfg['compiler_args'] = ['-std=c++11']
cfg['include_dirs'] = ['./eigen']
setup_pybind11(cfg)
%>
#include <pybind11/pybind11.h>
#include <pybind11/numpy.h>
#include <pybind11/eigen.h>
#include <Eigen/Cholesky>
#include <random>
#include <algorithm>
#include <iterator>
#include <iostream>
namespace py = pybind11;
Eigen::VectorXd logistic(Eigen::VectorXd x) {
return 1.0/(1.0 + exp((-x).array()));
}
Eigen::VectorXd gd(Eigen::MatrixXd X, Eigen::VectorXd y, Eigen::VectorXd beta, double alpha, int niter) {
int n = X.rows();
Eigen::VectorXd y_pred;
Eigen::VectorXd resid;
Eigen::VectorXd grad;
Eigen::MatrixXd Xt = X.transpose();
for (int i=0; i<niter; i++) {
y_pred = logistic(X * beta);
resid = y - y_pred;
grad = Xt * resid / n;
beta = beta + alpha * grad;
}
return beta;
}
Eigen::MatrixXd mvnorm(Eigen::VectorXd mu, Eigen::MatrixXd Sigma, int n) {
/*
Samples from multivariate normal
*/
std::default_random_engine gen(std::random_device{}());
std::normal_distribution<double> distribution(0, 1);
Eigen::MatrixXd A(Sigma.llt().matrixL());
int p = mu.size();
Eigen::MatrixXd Z(n,p);
for(int i=0; i<n; i++) {
Eigen::VectorXd v(p);
for(int j=0; j<p; j++){
v[j] = distribution(gen);
}
Z.row(i) = mu + A*v;
}
return Z;
}
std::unordered_set<int> pickSet(int N, int k, std::mt19937& gen)
{
// Index of random rows to take.
// Adapted from http://stackoverflow.com/questions/28287138/c-randomly-sample-k-numbers-from-range-0n-1-n-k-without-replacement/28287837
std::unordered_set<int> sample;
std::default_random_engine generator;
for(int d = N - k; d < N; d++) {
int t = std::uniform_int_distribution<>(0, d)(generator);
if (sample.find(t) == sample.end()) {
sample.insert(t);
} else {
sample.insert(d);
}
}
return sample;
}
std::vector<int> pick(int N, int k) {
// Randomly samples k integers from 1:N
// Adapted from http://stackoverflow.com/questions/28287138/c-randomly-sample-k-numbers-from-range-0n-1-n-k-without-replacement/28287837
std::random_device rd;
std::mt19937 gen(rd());
std::unordered_set<int> elems = pickSet(N, k, gen);
std::vector<int> result(elems.begin(), elems.end());
std::shuffle(result.begin(), result.end(), gen);
return result;
}
Eigen::VectorXd stogradU_logistic(Eigen::VectorXd theta, Eigen::VectorXd Y, Eigen::MatrixXd X, int nbatch, double phi) {
// Stochastic gradient function
int n = X.rows();
int p = X.cols();
// Allocate
Eigen::MatrixXd Xsamp = Eigen::MatrixXd::Zero( nbatch, p );
Eigen::VectorXd Ysamp = Eigen::VectorXd::Zero( nbatch );
Eigen::VectorXd Y_pred;
Eigen::VectorXd epsilon;
Eigen::VectorXd grad;
std::vector<int> r = pick(n, nbatch);
for(int i=0; i<nbatch; i++) {
Xsamp.row(i) = X.row(r[i]-1);
Ysamp.row(i) = Y.row(r[i]-1);
}
Eigen::MatrixXd Xsampt = Xsamp.transpose();
Y_pred = logistic(Xsamp * theta);
epsilon = Ysamp - Y_pred;
grad = n/nbatch * Xsampt * epsilon - phi * theta;
return -grad;
}
Eigen::VectorXd sghmc_opt(Eigen::VectorXd Y, Eigen::MatrixXd X, Eigen::MatrixXd M, Eigen::MatrixXd Minv, double eps, int m, Eigen::VectorXd theta, Eigen::MatrixXd C, Eigen::MatrixXd B, Eigen::MatrixXd D, double phi, int nbatch) {
// Optimized sghmc
int n = X.rows();
int p = X.cols();
Eigen::VectorXd sgrad;
Eigen::VectorXd noise;
// Randomly sample momentum
Eigen::VectorXd mu = Eigen::VectorXd::Zero( p );
Eigen::VectorXd r = mvnorm(mu,M,1).row(0);
//Eigen::MatrixXd r = Eigen::VectorXd::Zero( p, p );
for(int i=0; i<m; i++) {
theta = theta + eps * Minv * r;
sgrad = stogradU_logistic(theta, Y, X, nbatch, phi);
noise = mvnorm(mu,D,1).row(0);
r = r - eps*sgrad - eps*C*Minv*r + noise;
}
return theta;
}
Eigen::MatrixXd sghmc_opt_run(Eigen::VectorXd Y, Eigen::MatrixXd X, Eigen::MatrixXd M, double eps, int m, Eigen::VectorXd theta, Eigen::MatrixXd C, Eigen::MatrixXd V, double phi, int nsample, int nbatch) {
// sghmc wrapper
int n = X.rows();
int p = X.cols();
// Precompute
Eigen::MatrixXd Minv = M;
Eigen::MatrixXd B = 0.5 * V * eps;
Eigen::MatrixXd D = 2*(C-B)*eps;
//Allocate
Eigen::MatrixXd samples(nsample,p);
for(int i=0; i<nsample; i++) {
theta = sghmc_opt(Y, X, M, Minv, eps, m, theta, C, B, D, phi, nbatch);
samples.row(i) = theta;
}
return samples;
}
PYBIND11_PLUGIN(wrap) {
py::module m("wrap", "pybind11 example plugin");
m.def("gd", &gd, "The gradient descent fucntion.");
m.def("logistic", &logistic, "The logistic function.");
m.def("mvnorm", &mvnorm, "Random multivariate normal function");
m.def("sghmc_opt", &sghmc_opt, "SGHMC");
m.def("stogradU_logistic", &stogradU_logistic, "Logistic stochastic gradient");
m.def("sghmc_opt_run", &sghmc_opt_run, "Wrapper for sghmc");
m.def("pickSet", &pickSet, "Random sampling helper");
m.def("pick", &pick, "Random sampling");
return m.ptr();
}
In [5]:
cppimport.force_rebuild()
funcs = cppimport.imp("wrap")
In [40]:
### Load data and set parameters
pima = np.genfromtxt('pima-indians-diabetes.data', delimiter=',')
# Load data
X = np.concatenate((np.ones((pima.shape[0],1)),pima[:,0:8]), axis=1)
Y = pima[:,8]
Xs = (X - np.mean(X, axis=0))/np.concatenate((np.ones(1),np.std(X[:,1:], axis=0)))
Xs = Xs[:,1:]
n, q = Xs.shape
# SGHMC - Scaled (no intercept)
nsample = 1000
m = 20
eps = .002
theta = np.zeros(q)
phi = 5
nbatch = 100
C = 1 * np.identity(q)
V = 0 * np.identity(q)
M = np.identity(q)
prun:
In [41]:
%prun -q -D work_pybind11.prof funcs.sghmc_opt_run(Y, Xs, M, eps, m, np.zeros(q), C, V, phi, nsample, nbatch)
In [42]:
import pstats
p = pstats.Stats('work_pybind11.prof')
p.sort_stats('time', 'cumulative').print_stats()
pass
timeit:
In [58]:
%timeit -n10 -r3 funcs.sghmc_opt_run(Y, Xs, M, eps, m, np.zeros(q), C, V, phi, nsample, nbatch)
In [56]:
%timeit -n1000 -r10 funcs.stogradU_logistic(theta, Y, Xs, nbatch, phi)
prun:
In [46]:
%prun -q -D work_python.prof sghmc.run_sghmc(Y, Xs, sghmc.U_logistic, sghmc.stogradU_logistic, M, eps, m, np.zeros(q), C, V, phi, nsample, nbatch)
In [47]:
import pstats
p = pstats.Stats('work_python.prof')
p.sort_stats('time', 'cumulative').print_stats()
pass
timeit:
In [54]:
%timeit -n1000 -r10 sghmc.stogradU_logistic(theta, Y, Xs, nbatch, phi)
In [55]:
%timeit -n5 -r3 sghmc.run_sghmc(Y, Xs, sghmc.U_logistic, sghmc.stogradU_logistic, M, eps, m, np.zeros(q), C, V, phi, nsample, nbatch)
In [ ]: