In [1]:
import time
import numpy
from galpy.util import bovy_plot
%pylab inline
numpy.random.seed(1)


Populating the interactive namespace from numpy and matplotlib

Examples of ABC inference

Coin flip with two flips

We've flipped a coin twice and gotten heads twice. What is the probability for getting heads?


In [2]:
data= ['H','H']
outcomes= ['T','H']

In [3]:
def coin_ABC():
    while True:
        h= numpy.random.uniform()
        flips= numpy.random.binomial(1,h,size=2)
        if outcomes[flips[0]] == data[0] \
            and outcomes[flips[1]] == data[1]:
                yield h

In [4]:
hsamples= []
start= time.time()
for h in coin_ABC():
    hsamples.append(h)
    if time.time() > start+2.: break
print "Obtained %i samples" % len(hsamples)


Obtained 169433 samples

In [5]:
dum= hist(hsamples,bins=31,lw=2.,normed=True,zorder=0)
plot(numpy.linspace(0.,1.,1001),numpy.linspace(0.,1.,1001)**2.*3.,lw=3.)
xlabel(r'$h$')
ylabel(r'$p(h|D)$')


Out[5]:
<matplotlib.text.Text at 0x1021aad10>

Coin flip with 10 flips

Same with 10 flips, still matching the entire sequence:


In [6]:
data= ['T', 'H', 'H', 'T', 'T', 'H', 'H', 'T', 'H', 'H']

In [7]:
def coin_ABC_10flips():
    while True:
        h= numpy.random.uniform()
        flips= numpy.random.binomial(1,h,size=len(data))
        if outcomes[flips[0]] == data[0] \
            and outcomes[flips[1]] == data[1] \
            and outcomes[flips[2]] == data[2] \
            and outcomes[flips[3]] == data[3] \
            and outcomes[flips[4]] == data[4] \
            and outcomes[flips[5]] == data[5] \
            and outcomes[flips[6]] == data[6] \
            and outcomes[flips[7]] == data[7] \
            and outcomes[flips[8]] == data[8] \
            and outcomes[flips[9]] == data[9]:
                yield h

In [8]:
hsamples= []
start= time.time()
for h in coin_ABC_10flips():
    hsamples.append(h)
    if time.time() > start+2.: break
print "Obtained %i samples" % len(hsamples)


Obtained 184 samples

In [9]:
dum= hist(hsamples,bins=31,lw=2.,normed=True,zorder=0)
xs= numpy.linspace(0.,1.,1001)
ys= xs**numpy.sum([d == 'H' for d in data])*(1.-xs)**numpy.sum([d == 'T' for d in data])
ys/= numpy.sum(ys)*(xs[1]-xs[0])
plot(xs,ys,lw=3.)
xlabel(r'$h$')
ylabel(r'$p(h|D)$')


Out[9]:
<matplotlib.text.Text at 0x11558e350>

Using a sufficient statistic instead:


In [10]:
sufficient_data= numpy.sum([d == 'H' for d in data])
def coin_ABC_10flips_suff():
    while True:
        h= numpy.random.uniform()
        flips= numpy.random.binomial(1,h,size=len(data))
        if numpy.sum(flips) == sufficient_data:
            yield h

In [11]:
hsamples= []
start= time.time()
for h in coin_ABC_10flips_suff():
    hsamples.append(h)
    if time.time() > start+2.: break
print "Obtained %i samples" % len(hsamples)


Obtained 20897 samples

In [12]:
dum= hist(hsamples,bins=31,lw=2.,normed=True,zorder=0)
xs= numpy.linspace(0.,1.,1001)
ys= xs**numpy.sum([d == 'H' for d in data])*(1.-xs)**numpy.sum([d == 'T' for d in data])
ys/= numpy.sum(ys)*(xs[1]-xs[0])
plot(xs,ys,lw=3.)
xlabel(r'$h$')
ylabel(r'$p(h|D)$')


Out[12]:
<matplotlib.text.Text at 0x1159dba90>

Variance of a Gaussian with zero mean

Now we infer the variance of a Gaussian with zero mean using ABC:


In [13]:
data= numpy.random.normal(size=100)

In [14]:
def Var_ABC(threshold=0.05):
    while True:
        v= numpy.random.uniform()*4
        sim= numpy.random.normal(size=len(data))*numpy.sqrt(v)
        d= numpy.fabs(numpy.var(sim)-numpy.var(data))
        if d < threshold:
            yield v

In [15]:
vsamples= []
start= time.time()
for v in Var_ABC(threshold=0.05):
    vsamples.append(v)
    if time.time() > start+2.: break
print "Obtained %i samples" % len(vsamples)


Obtained 827 samples

In [16]:
h= hist(vsamples,range=[0.,2.],bins=51,normed=True)
xs= numpy.linspace(0.001,2.,1001)
ys= xs**(-len(data)/2.)*numpy.exp(-1./xs/2.*len(data)*(numpy.var(data)+numpy.mean(data)**2.))
ys/= numpy.sum(ys)*(xs[1]-xs[0])
plot(xs,ys,lw=2.)


Out[16]:
[<matplotlib.lines.Line2D at 0x115b94c50>]

If we raise the threshold too much, we sample simply from the prior:


In [17]:
vsamples= []
start= time.time()
for v in Var_ABC(threshold=1.5):
    vsamples.append(v)
    if time.time() > start+2.: break
print "Obtained %i samples" % len(vsamples)


Obtained 22032 samples

In [18]:
h= hist(vsamples,range=[0.,2.],bins=51,normed=True)
xs= numpy.linspace(0.001,2.,1001)
ys= xs**(-len(data)/2.)*numpy.exp(-1./xs/2.*len(data)*(numpy.var(data)+numpy.mean(data)**2.))
ys/= numpy.sum(ys)*(xs[1]-xs[0])
plot(xs,ys,lw=2.)


Out[18]:
[<matplotlib.lines.Line2D at 0x115fd3f50>]

And if we make the threshold too small, we don't get many samples:


In [19]:
vsamples= []
start= time.time()
for v in Var_ABC(threshold=0.001):
    vsamples.append(v)
    if time.time() > start+2.: break
print "Obtained %i samples" % len(vsamples)


Obtained 23 samples

In [20]:
h= hist(vsamples,range=[0.,2.],bins=51,normed=True)
xs= numpy.linspace(0.001,2.,1001)
ys= xs**(-len(data)/2.)*numpy.exp(-1./xs/2.*len(data)*(numpy.var(data)+numpy.mean(data)**2.))
ys/= numpy.sum(ys)*(xs[1]-xs[0])
plot(xs,ys,lw=2.)


Out[20]:
[<matplotlib.lines.Line2D at 0x1161d04d0>]

In [ ]: