First, let's set up some environmental dependencies. These just make the numerics easier and adjust some of the plotting defaults to make things more legible.
In [1]:
# Python 3 compatability
from __future__ import division, print_function
from six.moves import range
# system functions that are always useful to have
import time, sys, os
# basic numeric setup
import numpy as np
import math
# inline plotting
%matplotlib inline
# plotting
import matplotlib
from matplotlib import pyplot as plt
# seed the random number generator
np.random.seed(715)
In [2]:
# re-defining plotting defaults
from matplotlib import rcParams
rcParams.update({'xtick.major.pad': '7.0'})
rcParams.update({'xtick.major.size': '7.5'})
rcParams.update({'xtick.major.width': '1.5'})
rcParams.update({'xtick.minor.pad': '7.0'})
rcParams.update({'xtick.minor.size': '3.5'})
rcParams.update({'xtick.minor.width': '1.0'})
rcParams.update({'ytick.major.pad': '7.0'})
rcParams.update({'ytick.major.size': '7.5'})
rcParams.update({'ytick.major.width': '1.5'})
rcParams.update({'ytick.minor.pad': '7.0'})
rcParams.update({'ytick.minor.size': '3.5'})
rcParams.update({'ytick.minor.width': '1.0'})
rcParams.update({'font.size': 30})
In [3]:
import dynesty
To demonstrate more of the functionality afforded by our different sampling/bounding options we will demonstrate how these various features work using a set of 2-D Gaussian shells with a uniform prior over $[-6, 6]$.
In [4]:
# defining constants
r = 2. # radius
w = 0.1 # width
c1 = np.array([-3.5, 0.]) # center of shell 1
c2 = np.array([3.5, 0.]) # center of shell 2
const = math.log(1. / math.sqrt(2. * math.pi * w**2)) # normalization constant
# log-likelihood of a single shell
def logcirc(theta, c):
d = np.sqrt(np.sum((theta - c)**2, axis=-1)) # |theta - c|
return const - (d - r)**2 / (2. * w**2)
# log-likelihood of two shells
def loglike(theta):
return np.logaddexp(logcirc(theta, c1), logcirc(theta, c2))
# our prior transform
def prior_transform(x):
return 12. * x - 6.
In [5]:
# compute likelihood surface over a 2-D grid
xx, yy = np.meshgrid(np.linspace(-6., 6., 200), np.linspace(-6., 6., 200))
L = np.exp(loglike(np.dstack((xx, yy))))
In [6]:
# plot result
fig = plt.figure(figsize=(6,5))
plt.scatter(xx, yy, c=L, s=0.5)
plt.xlabel(r'$x$')
plt.ylabel(r'$y$')
plt.colorbar(label=r'$\mathcal{L}$');
Let's first run with just the default set of dynesty
options.
In [7]:
# run with all defaults
sampler = dynesty.DynamicNestedSampler(loglike, prior_transform, ndim=2)
sampler.run_nested()
res = sampler.results
In [8]:
from dynesty import plotting as dyplot
dyplot.cornerplot(sampler.results, span=([-6, 6], [-6, 6]), fig=plt.subplots(2, 2, figsize=(10, 10)));
Let's test out the bounding options available in dynesty
(with uniform sampling) on these 2-D shells. To illustrate their baseline effectiveness, we will also disable the initial delay before our first update.
In [9]:
# bounding methods
bounds = ['none', 'single', 'multi', 'balls', 'cubes']
# run over each method and collect our results
bounds_res = []
for b in bounds:
sampler = dynesty.NestedSampler(loglike, prior_transform, ndim=2,
bound=b, sample='unif', nlive=500,
first_update={'min_ncall': 0.,
'min_eff': 100.})
sys.stderr.flush()
sys.stderr.write('{}:\n'.format(b))
sys.stderr.flush()
t0 = time.time()
sampler.run_nested(dlogz=0.05)
t1 = time.time()
res = sampler.results
res['time'] = t1 - t0
sys.stderr.flush()
sys.stderr.write('\ntime: {0}s\n\n'.format(res['time']))
bounds_res.append(sampler.results)
We can see the amount of overhead associated with 'balls'
and 'cubes'
is non-trivial in this case. This mainly comes from sampling from our bouding distributions, since accepting or rejecting a point requires counting all neighbors within some radius $r$, leading to frequent nearest-neighbor searches. dynesty
utilizes KDTrees to help with these, adding some overhead but improving overall scaling.
Runtime aside, we see that each method runs for a similar number of iterations and give similar logz
values (with comparable errors). They thus appear to be unbiased both with respect to each other and with respect to the analytic solution ($\ln \mathcal{Z} = -1.75$).
To get a sense of what each of our bounds looks like, we can use some of dynesty
's built-in plotting functionality. First, let's take a look at the case where we had no bounds ('none'
).
In [10]:
from dynesty import plotting as dyplot
# initialize figure
fig, axes = plt.subplots(1, 1, figsize=(6, 6))
# plot proposals in corner format for 'none'
fg, ax = dyplot.cornerbound(bounds_res[0], it=1500, prior_transform=prior_transform,
show_live=True, fig=(fig, axes))
ax[0, 0].set_title('No Bound', fontsize=26)
ax[0, 0].set_xlim([-6.5, 6.5])
ax[0, 0].set_ylim([-6.5, 6.5]);
Now let's examine the single and multi-ellipsoidal cases.
In [11]:
# initialize figure
fig, axes = plt.subplots(1, 3, figsize=(18, 6))
axes = axes.reshape((1, 3))
[a.set_frame_on(False) for a in axes[:, 1]]
[a.set_xticks([]) for a in axes[:, 1]]
[a.set_yticks([]) for a in axes[:, 1]]
# plot proposals in corner format for 'single'
fg, ax = dyplot.cornerbound(bounds_res[1], it=1500, prior_transform=prior_transform,
show_live=True, fig=(fig, axes[:, 0]))
ax[0, 0].set_title('Single', fontsize=26)
ax[0, 0].set_xlim([-6.5, 6.5])
ax[0, 0].set_ylim([-6.5, 6.5])
# plot proposals in corner format for 'multi'
fg, ax = dyplot.cornerbound(bounds_res[2], it=1500, prior_transform=prior_transform,
show_live=True, fig=(fig, axes[:, 2]))
ax[0, 0].set_title('Multi', fontsize=26)
ax[0, 0].set_xlim([-6.5, 6.5])
ax[0, 0].set_ylim([-6.5, 6.5]);
Finally, let's take a look at our overlapping set of balls and cubes.
In [12]:
# initialize figure
fig, axes = plt.subplots(1, 3, figsize=(18, 6))
axes = axes.reshape((1, 3))
[a.set_frame_on(False) for a in axes[:, 1]]
[a.set_xticks([]) for a in axes[:, 1]]
[a.set_yticks([]) for a in axes[:, 1]]
# plot proposals in corner format for 'balls'
fg, ax = dyplot.cornerbound(bounds_res[3], it=1500, prior_transform=prior_transform,
show_live=True, fig=(fig, axes[:, 0]))
ax[0, 0].set_title('Balls', fontsize=26)
ax[0, 0].set_xlim([-6.5, 6.5])
ax[0, 0].set_ylim([-6.5, 6.5])
# plot proposals in corner format for 'cubes'
fg, ax = dyplot.cornerbound(bounds_res[4], it=1500, prior_transform=prior_transform,
show_live=True, fig=(fig, axes[:, 2]))
ax[0, 0].set_title('Cubes', fontsize=26)
ax[0, 0].set_xlim([-6.5, 6.5])
ax[0, 0].set_ylim([-6.5, 6.5]);
By default, the nested samplers in dynesty
save all bounding distributions used throughout the course of a run, which can be accessed within the results
dictionary. More information on these distributions can be found in bounding.py
.
In [13]:
# the proposals associated with our 'multi' bounds
bounds_res[2].bound
Out[13]:
Each bounding object has a host of additional functionality that the user can experiment with. For instance, the volume contained by the union of ellipsoids within MultiEllipsoid
can be estimated using Monte Carlo integration (but otherwise are not computed by default). These volume estimates, combined with what fraction of our samples overlap with the unit cube (since our bounding distributions can exceed our prior bounds), can give us an idea of how effectively our multi-ellipsoid bounds are shrinking over time compared with the single-ellipsoid case.
In [14]:
# compute effective 'single' volumes
single_vols = [1.] # unit cube
for bound in bounds_res[1].bound[1:]:
vol = bound.vol # volume
funit = bound.unitcube_overlap() # fractional overlap with unit cube
single_vols.append(vol * funit)
single_vols = np.array(single_vols)
# compute effective 'multi' volumes
multi_vols = [1.] # unit cube
for bound in bounds_res[2].bound[1:]: # skip unit cube
vol, funit = bound.monte_carlo_vol(return_overlap=True)
multi_vols.append(vol * funit) # numerical estimate via Monte Carlo methods
multi_vols = np.array(multi_vols)
In [15]:
# plot results as a function of ln(volume)
plt.figure(figsize=(12,6))
plt.xlabel(r'$-\ln X_i$')
plt.ylabel(r'$\ln V_i$')
# 'single'
res = bounds_res[1]
x = -res.logvol # ln(prior volume)
it = res.bound_iter # proposal idx at given iteration
y = np.log(single_vols[it]) # corresponding ln(bounding volume)
plt.plot(x, y, lw=3, label='single')
# 'multi'
res = bounds_res[2]
x, it = -res.logvol, res.bound_iter
y = np.log(multi_vols[it])
plt.plot(x, y, lw=3, label='multi')
plt.legend(loc='best', fontsize=24);
We see that in the beginning, only a single ellipsoid is used. After some bounding updates have been made, there is enough of an incentive to split the proposal into several ellipsoids. Although the initial ellipsoid decompositions can be somewhat unstable (i.e. bootstrapping can give relatively large volume expansion factors), over time this process leads to a significant decrease in effective overall volume. The process by which an ellipsoid is split into multiple ellipsoids can be tuned by the user using the 'vol_dec'
and 'vol_check'
keyword arguments.
Let's test out the sampling options available in dynesty
(with 'multi'
bounding) on our 2-D shells defined above.
In [16]:
# bounding methods
sampling = ['unif', 'rwalk', 'rstagger', 'slice', 'rslice', 'hslice']
# run over each method and collect our results
sampling_res = []
for s in sampling:
sampler = dynesty.NestedSampler(loglike, prior_transform, ndim=2,
bound='multi', sample=s, nlive=1000)
sys.stderr.flush()
sys.stderr.write('{}:\n'.format(s))
sys.stderr.flush()
t0 = time.time()
sampler.run_nested(dlogz=0.05)
t1 = time.time()
res = sampler.results
res['time'] = t1 - t0
sys.stderr.flush()
sys.stderr.write('\ntime: {0}s\n\n'.format(res['time']))
sampling_res.append(sampler.results)
As expected, uniform sampling in 2-D is substantially more efficient that other more complex alternatives (especially 'hslice'
, which is computing numerical gradients!). Regardless of runtime, however, we see that each method runs for a similar number of iterations and gives similar logz
values (with comparable errors). They thus appear to be unbiased both with respect to each other and with respect to the analytic solution ($\ln\mathcal{Z} = −1.75$).
One of the largest overheads associated with nested sampling is the time needed to propose new bounding distributions. To avoid bounding distributions that fail to properly encompass the remaining likelihood, dynesty
automatically expands the volume of all bounding distributions by an enlargement factor (enlarge
). By default, this factor is set to a constant value of 1.25
. However, it can also be determined in real time using bootstrapping (over the set of live points) following the scheme outlined in Buchner (2014).
Bootstrapping these expansion factors can help to ensure accurate evidence estimation when the proposals rely heavily on the size of an object rather than the overall shape, such as when proposing new points uniformly within their boundaries. In theory, it also helps to prevent mode "death": if occasionally a secondary mode disappears when bootstrapping, the existing bounds would be expanded to theoretically encompass it. In practice, however, most modes are widely separated, leading enormous expansion factors whenever any possible instance of mode death may occur.
Bootstrapping thus imposes a de facto floor on the number of acceptable live points to avoid mode death for any given problem, which can often be quite large for many problems. While these numbers are often justified, they can drastically reduce the raw sampling efficiency until such a target threshold of live points is reached.
We showcase this behavior below by illustrating the performance of our NestedSampler
on several N-D Gaussian shells with and without bootstrapping.
In [17]:
# setup for running tests over gaussian shells in arbitrary dimensions
def run(ndim, bootstrap, bound, method, nlive):
"""Convenience function for running in any dimension."""
c1 = np.zeros(ndim)
c1[0] = -3.5
c2 = np.zeros(ndim)
c2[0] = 3.5
f = lambda theta: np.logaddexp(logcirc(theta, c1), logcirc(theta, c2))
sampler = dynesty.NestedSampler(f, prior_transform, ndim,
bound=bound, sample=method, nlive=nlive,
bootstrap=bootstrap,
first_update={'min_ncall': 0.,
'min_eff': 100.})
sampler.run_nested(dlogz=0.5)
return sampler.results
# analytic ln(evidence) values
ndims = [2, 5, 10]
analytic_logz = {2: -1.75,
5: -5.67,
10: -14.59}
In [18]:
# results with bootstrapping
results = []
for ndim in ndims:
t0 = time.time()
sys.stderr.flush()
sys.stderr.write('{} dimensions:\n'.format(ndim))
sys.stderr.flush()
res = run(ndim, 20, 'multi', 'unif', 2000)
sys.stderr.flush()
res.time = time.time() - t0
sys.stderr.write('\ntime: {0}s\n\n'.format(res['time']))
results.append(res)
In [19]:
# results without bootstrapping
results2 = []
for ndim in ndims:
t0 = time.time()
sys.stderr.flush()
sys.stderr.write('{} dimensions:\n'.format(ndim))
sys.stderr.flush()
res = run(ndim, 0, 'multi', 'unif', 2000)
sys.stderr.flush()
res.time = time.time() - t0
sys.stderr.write('\ntime: {0}s\n\n'.format(res['time']))
results2.append(res)
In [20]:
print('With bootstrapping:')
print("D analytic logz logzerr nlike eff(%) time")
for ndim, res in zip(ndims, results):
print("{:2d} {:6.2f} {:6.2f} {:4.2f} {:6d} {:5.2f} {:6.2f}"
.format(ndim, analytic_logz[ndim], res.logz[-1], res.logzerr[-1],
sum(res.ncall), res.eff, res.time))
print('\n')
print('Without bootstrapping:')
print("D analytic logz logzerr nlike eff(%) time")
for ndim, res in zip(ndims, results2):
print("{:2d} {:6.2f} {:6.2f} {:4.2f} {:6d} {:5.2f} {:6.2f}"
.format(ndim, analytic_logz[ndim], res.logz[-1], res.logzerr[-1],
sum(res.ncall), res.eff, res.time))
While our results are comparable between both cases, in higher dimensions multi-ellipsoid bounding distributions can sometimes be over-constrained, leading to biased results. Other sampling methods mitigate this problem by sampling conditioned on the ellipsoid axes, and so only depends on ellipsoid shapes, not sizes. 'rslice'
is demonstrated below.
In [21]:
# adding on slice sampling
results3 = []
for ndim in ndims:
t0 = time.time()
sys.stderr.flush()
sys.stderr.write('{} dimensions:\n'.format(ndim))
sys.stderr.flush()
res = run(ndim, 0, 'multi', 'rslice', 2000)
sys.stderr.flush()
res.time = time.time() - t0
sys.stderr.write('\ntime: {0}s\n\n'.format(res['time']))
results3.append(res)
In [22]:
print('Random Slice sampling:')
print("D analytic logz logzerr nlike eff(%) time")
for ndim, res in zip([2, 5, 10, 20], results3):
print("{:2d} {:6.2f} {:6.2f} {:4.2f} {:8d} {:5.2f} {:6.2f}"
.format(ndim, analytic_logz[ndim], res.logz[-1], res.logzerr[-1],
sum(res.ncall), res.eff, res.time))