In [1]:
import edward as ed
from edward.models import Uniform
from edward.models import Bernoulli
import tensorflow as tf

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import matplotlib.pyplot as plt
import matplotlib.cm as cm
from matplotlib.colors import ListedColormap, Normalize
from matplotlib.animation import FuncAnimation
from tempfile import NamedTemporaryFile
from IPython.display import HTML

import seaborn as sns
import numpy as np
import six
import matplotlib
matplotlib.interactive(True)

plt.style.use('seaborn-talk')
sns.set_context("talk", font_scale=1.4)
sess = ed.get_session()

sns.palplot(sns.color_palette())



In [2]:
# initialize a random seed for edward. Can be done ONCE only before using Edward
ed.set_seed(42)

In [3]:
VIDEO_TAG = """<video controls> 
 <source src="data:video/x-m4v;base64,{0}" type="video/mp4">
 Your browser does noet support the video tag
</video>"""

def anim_to_html(anim):
    if not hasattr(anim, '_encoded_video'):
        with NamedTemporaryFile(suffix='.mp4') as f:
            anim.save(f.name, fps=20, extra_args=['-vcodec', 'libx264'])
            video = open(f.name, "rb").read()
        anim._encoded_video = video.encode("base64")
        
    return VIDEO_TAG.format(anim._encoded_video)

Coin


In [4]:
from edward.models import Bernoulli, Beta, Empirical, Uniform

In [5]:
N = 100

In [6]:
def build_fair_dataset(N):
    pheads = tf.constant(0.5)
    c = Bernoulli(probs=pheads, sample_shape = N)
    return sess.run([pheads, c])

In [7]:
def build_unfair_dataset(N):
    pheads = tf.constant(0.05)
    c = Bernoulli(probs=pheads, sample_shape = N)
    return sess.run([pheads, c])

In [8]:
def build_dataset(N):
    pheads = Uniform(low=0.0,high=1.0)
    c = Bernoulli(probs=pheads, sample_shape = N)
    return sess.run([pheads, c])

In [9]:
x = tf.range(-0.2, 1.2, 0.001)

plt.plot(*sess.run([x, Uniform(low=0.0,high=1.0).prob(x)]));
plt.ylim((-0.2,1.2))
plt.title('Uniform distribution')


Out[9]:
<matplotlib.text.Text at 0x24a8b68f240>

In [10]:
pheads_true,c_train = build_fair_dataset(N)

In [11]:
pheads_true


Out[11]:
0.5

In [12]:
c_train


Out[12]:
array([1, 1, 1, 0, 0, 1, 1, 1, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0,
       1, 1, 0, 0, 1, 0, 0, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1,
       1, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 0, 1,
       0, 1, 0, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1,
       1, 1, 1, 0, 0, 1, 0, 1])

In [13]:
sum(c_train == 0)


Out[13]:
50

In [14]:
sum(c_train == 1)


Out[14]:
50

inference


In [15]:
# declaring priors

pheads_fair = Beta(concentration1=1000.0, concentration0=1000.0) # blue
pheads_unfair = Beta(concentration1=0.1, concentration0=0.1) # green

pheads_unknown = Beta(concentration1 = 1.0, concentration0=1.0)

In [16]:
x = tf.range(0.0,1.0,0.001)
plt.plot(*sess.run([x, pheads_fair.prob(x)]));
plt.plot(*sess.run([x, pheads_unfair.prob(x)]));
plt.plot(*sess.run([x, pheads_unknown.prob(x)]));
plt.axvline(x=pheads_true);



In [17]:
# Forward model
pheads = pheads_unknown
c = Bernoulli(probs=pheads, sample_shape=N)

exact solution


In [19]:
# inference
pheads_cond = ed.complete_conditional(pheads)
pheads_post = ed.copy(pheads_cond, {c: c_train})

In [20]:
sess.run({key:val for key, val in six.iteritems(pheads_post.parameters) if isinstance(val,tf.Tensor)})


Out[20]:
{'concentration0': 51.0, 'concentration1': 51.0}

In [21]:
# criticism
mean, stddev = sess.run([pheads_post.mean(),pheads_post.stddev()])
print(" exact posterior mean: " + str(mean))
print("exact posterior std" + str(stddev))


 exact posterior mean: 0.5
exact posterior std0.0492665

In [22]:
x = tf.range(0.0,1.0,0.001)
plt.plot(*sess.run([x,pheads.prob(x)]));
plt.plot(*sess.run([x,pheads_post.prob(x)]));
plt.axvline(x=pheads_true);


RECURSIVE INFERENCE


In [23]:
pheads_true,c_train = build_fair_dataset(100)

fig = plt.figure()
ax = plt.axes(xlim=(-0.05,1.05), ylim=(-1.0,11.0))

def go(pheads_prior, sample_shape, c_train, recursion=1):
    # model 
    c = Bernoulli(probs=pheads_prior,
                 sample_shape=sample_shape)
    
    # INFERENCE
    pheads_cond = ed.complete_conditional(pheads_prior)
    pheads_post = ed.copy(pheads_cond,{c:c_train[:sample_shape]})
    
    # CRITICISM
    ax.plot(*sess.run([x,pheads_post.prob(x)]));
    
    print("finished recursion "+str(recursion))
    recursion += 1
    # RECURSION
    if len(c_train[sample_shape:]) >= sample_shape:
        go(pheads_post, sample_shape, c_train[sample_shape:],recursion)
        
pheads_prior = Beta(concentration1=0.1, concentration0=0.1)
ax.plot(*sess.run([x, pheads_prior.prob(x)])); 
plt.axvline(x=pheads_true);
go(pheads_prior,33,c_train)


finished recursion 1
finished recursion 2
finished recursion 3

approximate inference :MCMC: metroplis hastings:


In [24]:
# BACKWARD MODEL
T = 10000
q_pheads = Empirical(params=tf.Variable(tf.ones([T])*0.5))

In [25]:
# INFERENCE
proposal_pheads = Beta(concentration1=1.0,
                       concentration0=1.0)
inference = ed.MetropolisHastings(latent_vars = {pheads: q_pheads},
                                 proposal_vars={pheads: proposal_pheads},
                                 data={c:c_train})
inference.run()


10000/10000 [100%] ██████████████████████████████ Elapsed: 17s | Acceptance Rate: 0.161

In [26]:
# criticism
mean, stddev = sess.run([q_pheads.mean(),q_pheads.stddev()])
print("inferred posterior mean:")
print(mean)
print("inferred posterior std:")
print(stddev)


inferred posterior mean:
0.488378
inferred posterior std:
0.049427

In [27]:
plt.plot(q_pheads.params.eval());
plt.axhline(y=pheads_true)


Out[27]:
<matplotlib.lines.Line2D at 0x24a975372e8>

In [28]:
def lags(x):
    mean = tf.reduce_mean(x)
    var = tf.cast(tf.size(x) - 1, tf.float32) *tf.reduce_mean(tf.square(x-mean))
    ret = tf.map_fn(lambda k:tf.cond(tf.equal(k,0),
                                     lambda:var, 
                                     lambda:tf.reduce_sum((x[:-k] - mean) * (x[k:]-mean))),
                                     tf.range(0,tf.size(x)),
                                     dtype=tf.float32)
    return ret / var

In [29]:
plt.plot(lags(q_pheads.params).eval());



In [30]:
x = tf.range(0.0,1.0,0.001)
plt.plot(*sess.run([x,pheads.prob(x)]));
plt.plot(*sess.run([x,pheads_cond.prob(x)],
                  {c:c_train}));
plt.hist(q_pheads.params.eval(),
        bins=100,range=(0.0,1.0),
        normed = True);
plt.axvline(x=pheads_true);


MCMC: Gibbs


In [31]:
# BACKWARD MODEL
T = 10000
q_pheads = Empirical(params=tf.Variable(tf.ones([T])*0.5))

In [32]:
# INFERENCE 
inference = ed.Gibbs(latent_vars={pheads: q_pheads},
                    data={c: c_train})
inference.run()


10000/10000 [100%] ██████████████████████████████ Elapsed: 39s | Acceptance Rate: 1.000

In [41]:
# CRITICISM
mean, stddev = sess.run([q_pheads.mean(),q_pheads.stddev()])
print('Inferred posterior mean:')
print(mean)
print('Inferred posterior std:')
print(stddev)


Inferred posterior mean:
0.490163
Inferred posterior std:
0.0493696

In [34]:
plt.plot(q_pheads.params.eval());
plt.axhline(y=pheads_true)


Out[34]:
<matplotlib.lines.Line2D at 0x24a9d089dd8>

In [35]:
x = tf.range(0.0,1.0,0.001)
plt.plot(*sess.run([x,pheads.prob(x)]));
plt.plot(*sess.run([x,pheads_cond.prob(x)],
                  {c:c_train}));
plt.hist(q_pheads.params.eval(),
        bins=100,range=(0.0,1.0),
        normed = True);
plt.axvline(x=pheads_true);



In [36]:
plt.plot(lags(q_pheads.params).eval());


MCMC: Hamiltonian monte carlo

Metropolis hasting and Gibbs will fail in high_dimensional paramter space while HMC will not.

HMC unitlises the geometry of the important regions of the posterior (where the probability mass is concentrated) to guide the jump proposal.

For Gibbs you need to be able to derive the complete conditional algebraically. Most of the times this is not the case, so Gibbs cannot be used. Hamiltonian monte carlo does not have this restriction. However, it only works for continuous variables. This is because it works with derivatives (i.e. we need smooth functions).


In [37]:
# BACKWARD MODEL
T = 10000 # number of empirical samples
q_pheads = Empirical(params=tf.Variable(tf.ones([T])*.5))

In [41]:
# INFERENCE
inference = ed.HMC(latent_vars={pheads: q_pheads},
                  data = {c: c_train})
inference.run(step_size=1.0 / N, n_steps=20)


10000/10000 [100%] ██████████████████████████████ Elapsed: 153s | Acceptance Rate: 0.997

In [58]:
# criticism
mean, stddev = sess.run([q_pheads.mean(), q_pheads.stddev()])
print("Inferred posterior mean:")
print(mean)
print("Inferred posterior stddev")
print(stddev)


Inferred posterior mean:
0.539722
Inferred posterior stddev
0.0492454

In [ ]:


In [60]:
plt.plot(q_pheads.params.eval())
plt.axhline(y=pheads_true)


Out[60]:
<matplotlib.lines.Line2D at 0x120748278>

In [61]:
plt.plot(lags(q_pheads.params).eval())


Out[61]:
[<matplotlib.lines.Line2D at 0x125178da0>]

In [62]:
x = tf.range(0.0, 1.0, 0.001)
plt.plot(*sess.run([x, pheads.prob(x)]))
plt.plot(*sess.run([x, pheads_cond.prob(x)],
                  {c:c_train}))
plt.hist(q_pheads.params.eval(),
        bins=100, range=(0.0, 1.0),
        normed=True)
plt.axvline(x=pheads_true)


Out[62]:
<matplotlib.lines.Line2D at 0x12541f908>

variational inference (VI)

Good alternative to markov chain monte carlo methods. Class of algorithm that cast posterior inference as optimisation:

Approximate the model paramters, that is, the posterior p(params|data = obs):

a. build a variational backward model, q(params: lambda), over the params with some variational paramters lambda.

b. Match q(params; lambda) to p(params|obs) by optimising over lambda,

lambda* = argmin Closeness(p(params|obs), q(params;lambda))

c. use q(params;lambda*) instead of p(params|obs)

d. criticise the variational model, revise it (-->a)


In [70]:
# BACKWARD MODEL
q_pheads_concentration1 = tf.nn.softplus(tf.Variable(51+ tf.random_normal([])))
q_pheads_concentration0 = tf.nn.softplus(tf.Variable(51+ tf.random_normal([])))
q_pheads = Beta(concentration1=q_pheads_concentration1,
               concentration0=q_pheads_concentration0)

In [71]:
x = tf.range(-5.0, 5.0, 0.001)
plt.plot(*sess.run([x,tf.nn.softplus(x)]))


Out[71]:
[<matplotlib.lines.Line2D at 0x11eb5a940>]

In [72]:
# inference
inference = ed.KLqp(latent_vars={pheads: q_pheads},
                   data={c:c_train})
inference.run(n_samples=20, n_iter=1000)


1000/1000 [100%] ██████████████████████████████ Elapsed: 28s | Loss: 71.154

In [ ]:


In [ ]:


In [73]:
sess.run({key: val for key, val in six.iteritems(q_pheads.parameters) if isinstance(val,tf.Tensor)})


Out[73]:
{'concentration0': 46.225704, 'concentration1': 57.310051}

In [74]:
plt.plot(*sess.run([x,pheads.prob(x)]))
plt.plot(*sess.run([x, pheads_cond.prob(x)],
                  {c:c_train}))
plt.plot(*sess.run([x, q_pheads.prob(x)]))
plt.axvline(x=pheads_true)


Out[74]:
<matplotlib.lines.Line2D at 0x134e89c18>

In [75]:
# criticism
mean, stddev = sess.run([q_pheads.mean(),q_pheads.stddev()])
print("Inferred posterior mean:")
print(mean)
print("Inferred posterior stddev")
print(stddev)


Inferred posterior mean:
0.553529
Inferred posterior stddev
0.0486222

In [76]:
## A/B/... Testing

In [ ]:

Bayesian linear regression


In [ ]: