In [1]:

    
import edward as ed
from edward.models import Uniform
from edward.models import Bernoulli
import tensorflow as tf

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import matplotlib.pyplot as plt
import matplotlib.cm as cm
from matplotlib.colors import ListedColormap, Normalize
from matplotlib.animation import FuncAnimation
from tempfile import NamedTemporaryFile
from IPython.display import HTML

import seaborn as sns
import numpy as np
import six
import matplotlib
matplotlib.interactive(True)

plt.style.use('seaborn-talk')
sns.set_context("talk", font_scale=1.4)
sess = ed.get_session()

sns.palplot(sns.color_palette())



In [2]:

    
# initialize a random seed for edward. Can be done ONCE only before using Edward
ed.set_seed(42)



In [3]:

    
VIDEO_TAG = """<video controls> 
 <source src="data:video/x-m4v;base64,{0}" type="video/mp4">
 Your browser does noet support the video tag
</video>"""

def anim_to_html(anim):
    if not hasattr(anim, '_encoded_video'):
        with NamedTemporaryFile(suffix='.mp4') as f:
            anim.save(f.name, fps=20, extra_args=['-vcodec', 'libx264'])
            video = open(f.name, "rb").read()
        anim._encoded_video = video.encode("base64")
        
    return VIDEO_TAG.format(anim._encoded_video)

Coin



In [4]:

    
from edward.models import Bernoulli, Beta, Empirical, Uniform



In [5]:

    
N = 100



In [6]:

    
def build_fair_dataset(N):
    pheads = tf.constant(0.5)
    c = Bernoulli(probs=pheads, sample_shape = N)
    return sess.run([pheads, c])



In [7]:

    
def build_unfair_dataset(N):
    pheads = tf.constant(0.05)
    c = Bernoulli(probs=pheads, sample_shape = N)
    return sess.run([pheads, c])



In [8]:

    
def build_dataset(N):
    pheads = Uniform(low=0.0,high=1.0)
    c = Bernoulli(probs=pheads, sample_shape = N)
    return sess.run([pheads, c])



In [9]:

    
x = tf.range(-0.2, 1.2, 0.001)

plt.plot(*sess.run([x, Uniform(low=0.0,high=1.0).prob(x)]));
plt.ylim((-0.2,1.2))
plt.title('Uniform distribution')









    Out[9]:





<matplotlib.text.Text at 0x24a8b68f240>



In [10]:

    
pheads_true,c_train = build_fair_dataset(N)



In [11]:

    
pheads_true









    Out[11]:





0.5



In [12]:

    
c_train









    Out[12]:





array([1, 1, 1, 0, 0, 1, 1, 1, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0,
       1, 1, 0, 0, 1, 0, 0, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1,
       1, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 0, 1,
       0, 1, 0, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1,
       1, 1, 1, 0, 0, 1, 0, 1])



In [13]:

    
sum(c_train == 0)









    Out[13]:





50



In [14]:

    
sum(c_train == 1)









    Out[14]:





50

inference



In [15]:

    
# declaring priors

pheads_fair = Beta(concentration1=1000.0, concentration0=1000.0) # blue
pheads_unfair = Beta(concentration1=0.1, concentration0=0.1) # green

pheads_unknown = Beta(concentration1 = 1.0, concentration0=1.0)



In [16]:

    
x = tf.range(0.0,1.0,0.001)
plt.plot(*sess.run([x, pheads_fair.prob(x)]));
plt.plot(*sess.run([x, pheads_unfair.prob(x)]));
plt.plot(*sess.run([x, pheads_unknown.prob(x)]));
plt.axvline(x=pheads_true);



In [17]:

    
# Forward model
pheads = pheads_unknown
c = Bernoulli(probs=pheads, sample_shape=N)

exact solution



In [19]:

    
# inference
pheads_cond = ed.complete_conditional(pheads)
pheads_post = ed.copy(pheads_cond, {c: c_train})



In [20]:

    
sess.run({key:val for key, val in six.iteritems(pheads_post.parameters) if isinstance(val,tf.Tensor)})









    Out[20]:





{'concentration0': 51.0, 'concentration1': 51.0}



In [21]:

    
# criticism
mean, stddev = sess.run([pheads_post.mean(),pheads_post.stddev()])
print(" exact posterior mean: " + str(mean))
print("exact posterior std" + str(stddev))









    



 exact posterior mean: 0.5
exact posterior std0.0492665



In [22]:

    
x = tf.range(0.0,1.0,0.001)
plt.plot(*sess.run([x,pheads.prob(x)]));
plt.plot(*sess.run([x,pheads_post.prob(x)]));
plt.axvline(x=pheads_true);

RECURSIVE INFERENCE



In [23]:

    
pheads_true,c_train = build_fair_dataset(100)

fig = plt.figure()
ax = plt.axes(xlim=(-0.05,1.05), ylim=(-1.0,11.0))

def go(pheads_prior, sample_shape, c_train, recursion=1):
    # model 
    c = Bernoulli(probs=pheads_prior,
                 sample_shape=sample_shape)
    
    # INFERENCE
    pheads_cond = ed.complete_conditional(pheads_prior)
    pheads_post = ed.copy(pheads_cond,{c:c_train[:sample_shape]})
    
    # CRITICISM
    ax.plot(*sess.run([x,pheads_post.prob(x)]));
    
    print("finished recursion "+str(recursion))
    recursion += 1
    # RECURSION
    if len(c_train[sample_shape:]) >= sample_shape:
        go(pheads_post, sample_shape, c_train[sample_shape:],recursion)
        
pheads_prior = Beta(concentration1=0.1, concentration0=0.1)
ax.plot(*sess.run([x, pheads_prior.prob(x)])); 
plt.axvline(x=pheads_true);
go(pheads_prior,33,c_train)









    



finished recursion 1
finished recursion 2
finished recursion 3

approximate inference :MCMC: metroplis hastings:



In [24]:

    
# BACKWARD MODEL
T = 10000
q_pheads = Empirical(params=tf.Variable(tf.ones([T])*0.5))



In [25]:

    
# INFERENCE
proposal_pheads = Beta(concentration1=1.0,
                       concentration0=1.0)
inference = ed.MetropolisHastings(latent_vars = {pheads: q_pheads},
                                 proposal_vars={pheads: proposal_pheads},
                                 data={c:c_train})
inference.run()









    



10000/10000 [100%] ██████████████████████████████ Elapsed: 17s | Acceptance Rate: 0.161



In [26]:

    
# criticism
mean, stddev = sess.run([q_pheads.mean(),q_pheads.stddev()])
print("inferred posterior mean:")
print(mean)
print("inferred posterior std:")
print(stddev)









    



inferred posterior mean:
0.488378
inferred posterior std:
0.049427



In [27]:

    
plt.plot(q_pheads.params.eval());
plt.axhline(y=pheads_true)









    Out[27]:





<matplotlib.lines.Line2D at 0x24a975372e8>



In [28]:

    
def lags(x):
    mean = tf.reduce_mean(x)
    var = tf.cast(tf.size(x) - 1, tf.float32) *tf.reduce_mean(tf.square(x-mean))
    ret = tf.map_fn(lambda k:tf.cond(tf.equal(k,0),
                                     lambda:var, 
                                     lambda:tf.reduce_sum((x[:-k] - mean) * (x[k:]-mean))),
                                     tf.range(0,tf.size(x)),
                                     dtype=tf.float32)
    return ret / var



In [29]:

    
plt.plot(lags(q_pheads.params).eval());



In [30]:

    
x = tf.range(0.0,1.0,0.001)
plt.plot(*sess.run([x,pheads.prob(x)]));
plt.plot(*sess.run([x,pheads_cond.prob(x)],
                  {c:c_train}));
plt.hist(q_pheads.params.eval(),
        bins=100,range=(0.0,1.0),
        normed = True);
plt.axvline(x=pheads_true);

MCMC: Gibbs



In [31]:

    
# BACKWARD MODEL
T = 10000
q_pheads = Empirical(params=tf.Variable(tf.ones([T])*0.5))



In [32]:

    
# INFERENCE 
inference = ed.Gibbs(latent_vars={pheads: q_pheads},
                    data={c: c_train})
inference.run()









    



10000/10000 [100%] ██████████████████████████████ Elapsed: 39s | Acceptance Rate: 1.000



In [41]:

    
# CRITICISM
mean, stddev = sess.run([q_pheads.mean(),q_pheads.stddev()])
print('Inferred posterior mean:')
print(mean)
print('Inferred posterior std:')
print(stddev)









    



Inferred posterior mean:
0.490163
Inferred posterior std:
0.0493696



In [34]:

    
plt.plot(q_pheads.params.eval());
plt.axhline(y=pheads_true)









    Out[34]:





<matplotlib.lines.Line2D at 0x24a9d089dd8>



In [35]:

    
x = tf.range(0.0,1.0,0.001)
plt.plot(*sess.run([x,pheads.prob(x)]));
plt.plot(*sess.run([x,pheads_cond.prob(x)],
                  {c:c_train}));
plt.hist(q_pheads.params.eval(),
        bins=100,range=(0.0,1.0),
        normed = True);
plt.axvline(x=pheads_true);



In [36]:

    
plt.plot(lags(q_pheads.params).eval());

MCMC: Hamiltonian monte carlo

Metropolis hasting and Gibbs will fail in high_dimensional paramter space while HMC will not.

HMC unitlises the geometry of the important regions of the posterior (where the probability mass is concentrated) to guide the jump proposal.

For Gibbs you need to be able to derive the complete conditional algebraically. Most of the times this is not the case, so Gibbs cannot be used. Hamiltonian monte carlo does not have this restriction. However, it only works for continuous variables. This is because it works with derivatives (i.e. we need smooth functions).



In [37]:

    
# BACKWARD MODEL
T = 10000 # number of empirical samples
q_pheads = Empirical(params=tf.Variable(tf.ones([T])*.5))



In [41]:

    
# INFERENCE
inference = ed.HMC(latent_vars={pheads: q_pheads},
                  data = {c: c_train})
inference.run(step_size=1.0 / N, n_steps=20)









    



10000/10000 [100%] ██████████████████████████████ Elapsed: 153s | Acceptance Rate: 0.997



In [58]:

    
# criticism
mean, stddev = sess.run([q_pheads.mean(), q_pheads.stddev()])
print("Inferred posterior mean:")
print(mean)
print("Inferred posterior stddev")
print(stddev)









    



Inferred posterior mean:
0.539722
Inferred posterior stddev
0.0492454



In [ ]:



In [60]:

    
plt.plot(q_pheads.params.eval())
plt.axhline(y=pheads_true)









    Out[60]:





<matplotlib.lines.Line2D at 0x120748278>



In [61]:

    
plt.plot(lags(q_pheads.params).eval())









    Out[61]:





[<matplotlib.lines.Line2D at 0x125178da0>]



In [62]:

    
x = tf.range(0.0, 1.0, 0.001)
plt.plot(*sess.run([x, pheads.prob(x)]))
plt.plot(*sess.run([x, pheads_cond.prob(x)],
                  {c:c_train}))
plt.hist(q_pheads.params.eval(),
        bins=100, range=(0.0, 1.0),
        normed=True)
plt.axvline(x=pheads_true)









    Out[62]:





<matplotlib.lines.Line2D at 0x12541f908>

variational inference (VI)

Good alternative to markov chain monte carlo methods. Class of algorithm that cast posterior inference as optimisation:

Approximate the model paramters, that is, the posterior p(params|data = obs):

a. build a variational backward model, q(params: lambda), over the params with some variational paramters lambda.

b. Match q(params; lambda) to p(params|obs) by optimising over lambda,

lambda* = argmin Closeness(p(params|obs), q(params;lambda))

c. use q(params;lambda*) instead of p(params|obs)

d. criticise the variational model, revise it (-->a)



In [70]:

    
# BACKWARD MODEL
q_pheads_concentration1 = tf.nn.softplus(tf.Variable(51+ tf.random_normal([])))
q_pheads_concentration0 = tf.nn.softplus(tf.Variable(51+ tf.random_normal([])))
q_pheads = Beta(concentration1=q_pheads_concentration1,
               concentration0=q_pheads_concentration0)



In [71]:

    
x = tf.range(-5.0, 5.0, 0.001)
plt.plot(*sess.run([x,tf.nn.softplus(x)]))









    Out[71]:





[<matplotlib.lines.Line2D at 0x11eb5a940>]



In [72]:

    
# inference
inference = ed.KLqp(latent_vars={pheads: q_pheads},
                   data={c:c_train})
inference.run(n_samples=20, n_iter=1000)









    



1000/1000 [100%] ██████████████████████████████ Elapsed: 28s | Loss: 71.154



In [ ]:



In [ ]:



In [73]:

    
sess.run({key: val for key, val in six.iteritems(q_pheads.parameters) if isinstance(val,tf.Tensor)})









    Out[73]:





{'concentration0': 46.225704, 'concentration1': 57.310051}



In [74]:

    
plt.plot(*sess.run([x,pheads.prob(x)]))
plt.plot(*sess.run([x, pheads_cond.prob(x)],
                  {c:c_train}))
plt.plot(*sess.run([x, q_pheads.prob(x)]))
plt.axvline(x=pheads_true)









    Out[74]:





<matplotlib.lines.Line2D at 0x134e89c18>



In [75]:

    
# criticism
mean, stddev = sess.run([q_pheads.mean(),q_pheads.stddev()])
print("Inferred posterior mean:")
print(mean)
print("Inferred posterior stddev")
print(stddev)









    



Inferred posterior mean:
0.553529
Inferred posterior stddev
0.0486222



In [76]:

    
## A/B/... Testing



In [ ]:

Bayesian linear regression



In [ ]: