In [ ]:
"""
Bayesian neural network using mean-field variational inference.
(see, e.g., Blundell et al. (2015); Kucukelbir et al. (2016))
Inspired by autograd's Bayesian neural network example.
Probability model:
Bayesian neural network
Prior: Normal
Likelihood: Normal with mean parameterized by fully connected NN
Variational model
Likelihood: Mean-field Normal
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import edward as ed
import matplotlib.pyplot as plt
import numpy as np
import tensorflow as tf
from edward.models import Variational, Normal
from edward.stats import norm
# plt.style.use('ggplot')
class BayesianNN:
"""
Bayesian neural network for regressing outputs y on inputs x.
p((x,y), z) = Normal(y | NN(x; z), lik_variance) *
Normal(z | 0, 1),
where z are neural network weights, and with known lik_variance.
Parameters
----------
layer_sizes : list
The size of each layer, ordered from input to output.
nonlinearity : function, optional
Non-linearity after each linear transformation in the neural
network; aka activation function.
lik_variance : float, optional
Variance of the normal likelihood; aka noise parameter,
homoscedastic variance, scale parameter.
"""
def __init__(self, layer_sizes, nonlinearity=tf.nn.tanh,
lik_variance=0.01):
self.layer_sizes = layer_sizes
self.nonlinearity = nonlinearity
self.lik_variance = lik_variance
self.n_layers = len(layer_sizes)
self.weight_dims = list(zip(layer_sizes[:-1], layer_sizes[1:]))
self.n_vars = sum((m + 1) * n for m, n in self.weight_dims)
def unpack_weights(self, z):
"""Unpack weight matrices and biases from a flattened vector."""
for m, n in self.weight_dims:
yield tf.reshape(z[:m * n], [m, n]), \
tf.reshape(z[m * n:(m * n + n)], [1, n])
z = z[(m + 1) * n:]
def neural_network(self, x, zs):
"""
Return a `n_samples` x `n_minibatch` matrix. Each row is
the output of a neural network on the input data `x` and
given a set of weights `z` in `zs`.
"""
matrix = []
for z in tf.unpack(zs):
# Calculate neural network with weights given by `z`.
h = x
for W, b in self.unpack_weights(z):
# broadcasting to do (h*W) + b (e.g. 40x10 + 1x10)
h = self.nonlinearity(tf.matmul(h, W) + b)
matrix += [tf.squeeze(h)] # n_minibatch x 1 to n_minibatch
return tf.pack(matrix)
def log_lik(self, xs, zs):
"""Return a vector [log p(xs | zs[1,:]), ..., log p(xs | zs[S,:])]."""
x, y = xs['x'], xs['y']
mus = self.neural_network(x, zs)
log_lik = tf.reduce_sum(norm.logpdf(y, loc=mus, scale=self.lik_variance), 1)
return log_lik
def build_toy_dataset(N=50, noise_std=0.1):
x = np.linspace(-3, 3, num=N)
y = np.cos(x) + norm.rvs(0, noise_std, size=N)
x = x.reshape((N, 1))
return {'x': x, 'y': y}
ed.set_seed(42)
model = BayesianNN(layer_sizes=[1, 2, 2, 1])
variational = Variational()
variational.add(Normal(model.n_vars))
data = build_toy_dataset()
inference = ed.MFVI(model, variational, data)
inference.initialize()
sess = ed.get_session()
# FIRST VISUALIZATION (prior)
print("first visualization -- prior")
# Sample functions from variational model
mean, std = sess.run([variational.layers[0].loc,
variational.layers[0].scale])
rs = np.random.RandomState(0)
zs = rs.randn(10, variational.n_vars) * std + mean
zs = tf.constant(zs, dtype=tf.float32)
inputs = np.linspace(-5, 5, num=400, dtype=np.float32)
x = tf.expand_dims(tf.constant(inputs), 1)
mus = model.neural_network(x, zs)
outputs = mus.eval()
x, y = data['x'], data['y']