In [ ]:
import numpy as np
%matplotlib inline
import matplotlib.pyplot as plt
import tensorflow as tf
import Henbun as hb
In [ ]:
# random state
rng = np.random.RandomState(0)
In [ ]:
X = np.linspace(0,6,40).reshape(-1,1)
Y = np.sin(X) + rng.randn(40,1)*0.3
In [ ]:
plt.figure(figsize=(5,3))
plt.plot(X, Y, 'o', label='data')
plt.plot(X, np.sin(X), '--k', label='true')
plt.legend(loc='best')
In [ ]:
# Any model should inherite hb.model.Model
class GPR(hb.model.Model):
def setUp(self):
"""
Set up parameters and Data for this model.
Model.setUp is immediately called after Model.__init__()
"""
# Data should be stored in hb.param.Data class.
self.X = hb.param.Data(X)
self.Y = hb.param.Data(Y)
# Variational parameters.
# By default, the shape of the variational covariance is 'diagonal'.
# In small problems, the fullrank covariance can be inferred.
self.q = hb.variationals.Gaussian(shape=X.shape, q_shape='fullrank')
# Kernel object for GPR.
self.kern = hb.gp.kernels.UnitRBF()
# Since our kernel does not contain the variance term,
# we should multiply positive parameter by hand.
# To constrain k_var to stay in positive space, set transform option.
self.k_var = hb.param.Variable(shape=[1], transform=hb.transforms.positive)
# likelihood variance, which is also positive parameter.
self.var = hb.param.Variable(shape=[1], transform=hb.transforms.positive)
@hb.model.AutoOptimize()
def ELBO_gaussian(self):
"""
Any method decorated by @AutoOptimize can be optimized.
In the decorated method, [hb.param.Variable, hb.variationals.Variational,
hb.param.Data] objects are treated as tf.Tensor.
Therefore, tensorflow's method can be directoly used.
Here, we calculate ELBO that should be maximized.
"""
y_fit = tf.matmul(self.kern.Cholesky(self.X), self.q) * tf.sqrt(self.k_var)
# Kulback-Leibler divergence can be accessed by self.KL() method.
return tf.reduce_sum(hb.densities.gaussian(self.Y, y_fit, self.var))\
- self.KL()
@hb.model.AutoOptimize()
def ELBO_student(self):
"""
It is often convenient to prepare several variants of target method.
In this method, we assume Student's t distribution as likelihood.
"""
y_fit = tf.matmul(self.kern.Cholesky(self.X), self.q) * tf.sqrt(self.k_var)
# Kulback-Leibler divergence can be accessed by self.KL() method.
return tf.reduce_sum(hb.densities.student_t(self.Y, y_fit, self.var, 3.0))\
- self.KL()
In [ ]:
model_gpr = GPR()
In [ ]:
# First, we need a compilation of the model
model_gpr.ELBO_gaussian().compile()
In [ ]:
# To evaluate this method with current parameters, run() method can be used.
model_gpr.ELBO_gaussian().run()
In [ ]:
# To optimize this method, optimize() method can be used.
model_gpr.ELBO_gaussian().optimize(maxiter=20000)
In [ ]:
# See the resultant ELBO value
model_gpr.ELBO_gaussian().run()
In [ ]:
# There are some options to obtain the result.
# Most primitive way is to make an op and then run by model.run() method with appropriate feed_dict.
with model_gpr.tf_mode():
op = tf.matmul(model_gpr.kern.Cholesky(model_gpr.X), model_gpr.q) * tf.sqrt(model_gpr.k_var)
# In each run, the different samples are taken from the variational parameters
f_samples = [model_gpr.run(op) for _ in range(30)]
In [ ]:
plt.figure(figsize=(5,3))
plt.plot(X, Y, 'o', label='data')
for s in f_samples: plt.plot(X, s,'k', alpha=0.2)
plt.plot(X, np.sin(X), '--k', label='true')
plt.legend(loc='best')
In [ ]:
# To get the current parameters, .value property can be used.
In [ ]:
print('kernel lengthscale', model_gpr.kern.lengthscales.value)
print('kernel variance', model_gpr.k_var.value)
print('likelihood variance',model_gpr.var.value)
In [ ]:
# We added some outliers to the above Toy dat
Y[np.random.randint(0,X.shape[0],5),0] = 2*rng.randn(5) # Add non-Gaussian noise
In [ ]:
plt.figure(figsize=(5,3))
plt.plot(X, Y, 'o', label='data')
plt.plot(X, np.sin(X), '--k', label='true')
plt.legend(loc='best')
In [ ]:
# Just assign new values
model_gpr.X = X
model_gpr.Y = Y
In [ ]:
model_gpr.ELBO_gaussian().compile()
model_gpr.ELBO_gaussian().optimize(maxiter=10000)
In [ ]:
f_samples = [model_gpr.run(op) for _ in range(30)]
In [ ]:
# To optimize this method, optimize() method can be used.
model_gpr.ELBO_student().compile()
model_gpr.ELBO_student().optimize(maxiter=10000)
In [ ]:
f_samples_student = [model_gpr.run(op) for _ in range(30)]
In [ ]:
plt.figure(figsize=(5,3))
plt.plot(X, Y, 'o', label='data')
for s in f_samples: plt.plot(X, s,'k', alpha=0.2)
for s in f_samples_student: plt.plot(X, s,'r', alpha=0.2)
plt.plot(X, np.sin(X), '--k', label='true')
plt.legend(loc='best')
The above is an simple example of Henbun usage.
However, Henbun is not very efficient for such a simple problem.
Another single-purpose library such as GPflow is much faster.
In [ ]: