In [2]:
## Import necessary stuff
%matplotlib inline
import sys
sys.path.append('/apps')
import django
django.setup()
import matplotlib.pyplot as plt
import pandas as pd
import itertools as it
import numpy as np
import pymc3 as pm
In [11]:
## Simulation of the data
# set the seed
#np.random.seed(1)
n = 100 # The number of data points
X = np.linspace(0, 10, n)[:, None] # The inputs to the GP, they must be arranged as a column vector
# Define the true covariance function and its parameters
l_true = 1.0
n_true = 3.0
cov_func = n_true**2 * pm.gp.cov.Matern52(1, l_true)
# A mean function that is zero everywhere
mean_func = pm.gp.mean.Zero()
# The latent function values are one sample from a multivariate normal
# Note that we have to call `eval()` because PyMC3 built on top of Theano
f_true = np.random.multivariate_normal(mean_func(X).eval(),
cov_func(X).eval() + 1e-8*np.eye(n), 1).flatten()
# The observed data is the latent function plus a small amount of T distributed noise
# The standard deviation of the noise is `sigma`, and the degrees of freedom is `nu`
s_true = 2.0
v_true = 3.0
y = f_true + s_true * np.random.standard_t(v_true, size=n)
## Plot the data and the unobserved latent function
fig = plt.figure(figsize=(12,5)); ax = fig.gca()
ax.plot(X, f_true, "dodgerblue", lw=3, label="True f");
ax.plot(X, y, 'ok', ms=3, label="Data");
ax.set_xlabel("X"); ax.set_ylabel("y"); plt.legend();
In [12]:
with pm.Model() as model:
l = pm.Gamma("l",alpha=2,beta=1)
n = pm.HalfCauchy("n",beta=5)
cov = n ** 2 * pm.gp.cov.Matern52(1,l)
gp = pm.gp.Latent(cov_func=cov)
f = gp.prior("f",X=X)
sigma = pm.HalfCauchy("sigma",beta=5)
v = pm.Gamma("v",alpha=2,beta=0.1)
y_ = pm.StudentT("y",mu=f,lam=1.0/sigma,nu=v,observed=y)
In [13]:
with model:
trace = pm.sample(100)
In [14]:
# plot the results
fig = plt.figure(figsize=(12,5)); ax = fig.gca()
# plot the samples from the gp posterior with samples and shading
from pymc3.gp.util import plot_gp_dist
plot_gp_dist(ax, trace["f"], X);
# plot the data and the true latent function
plt.plot(X, f_true, "dodgerblue", lw=3, label="True f");
plt.plot(X, y, 'ok', ms=3, alpha=0.5, label="Observed data");
# axis labels and title
plt.xlabel("X"); plt.ylabel("True f(x)");
plt.title("Posterior distribution over $f(x)$ at the observed values"); plt.legend();
In [54]:
## With ADVI method
with model:
%time mean_field = pm.fit(method='advi',n=1500)
%time tracef = mean_field.sample(draws=5000)
In [35]:
import matplotlib.lines as mlines
#'import matplotlib.pyplot as plt
In [55]:
# plot the results
fig = plt.figure(figsize=(12,5)); ax = fig.gca()
# plot the samples from the gp posterior with samples and shading
from pymc3.gp.util import plot_gp_dist
mcmc = plot_gp_dist(ax, trace["f"], X,palette='Reds');
advi = plot_gp_dist(ax,tracef['f'],X,palette='Blues');
# plot the data and the true latent function
truth, = plt.plot(X, f_true, "orange", lw=3, label="True f");
obs, = plt.plot(X, y, 'ok', ms=3, alpha=0.5, label="Observed data");
# axis labels and title
plt.xlabel("X");
plt.ylabel("True f(x)");
plt.title("Posterior distribution over $f(x)$ at the observed values");
blue_advi = mlines.Line2D([], [], color='blue',label='Blue stars')
red_mcmc = mlines.Line2D([], [], color='red', label='Blue stars')
plt.legend([truth,obs,blue_advi,red_mcmc],("True","dd","ADVI","MCMC"));
In [57]:
pm.traceplot(trace)
Out[57]:
In [58]:
pm.traceplot(tracef)
Out[58]:
In [ ]: