In [1]:
# Configure Jupyter so figures appear in the notebook
%matplotlib inline
# Configure Jupyter to display the assigned value after an assignment
%config InteractiveShell.ast_node_interactivity='last_expr_or_assign'
import numpy as np
import pandas as pd
# import classes from thinkbayes2
from thinkbayes2 import Pmf, Cdf, Suite, Joint
from thinkbayes2 import MakePoissonPmf, EvalBinomialPmf, MakeMixture
import thinkplot
I got the idea for the following problem from Tom Campbell-Ricketts, author of the Maximum Entropy blog. And he got the idea from E. T. Jaynes, author of the classic Probability Theory: The Logic of Science:
Suppose that a radioactive source emits particles toward a Geiger counter at an average rate of r particles per second, but the counter only registers a fraction, f, of the particles that hit it. If f is 10% and the counter registers 15 particles in a one second interval, what is the posterior distribution of n, the actual number of particles that hit the counter, and r, the average rate particles are emitted?
In [2]:
class Logistic(Suite, Joint):
def Likelihood(self, data, hypo):
"""
data: k, number of particles detected
hypo: r, emission rate in particles per second
"""
return 1
In [3]:
r = 160
k = 15
f = 0.1
pmf = MakePoissonPmf(r, high=500)
thinkplot.Hist(pmf)
In [4]:
total = 0
for n, p in pmf.Items():
total += p * EvalBinomialPmf(k, n, f)
total
Out[4]:
In [5]:
def compute_likelihood(k, r, f):
pmf = MakePoissonPmf(r, high=500)
total = 0
for n, p in pmf.Items():
total += p * EvalBinomialPmf(k, n, f)
return total
In [6]:
compute_likelihood(k, r, f)
Out[6]:
In [7]:
likes = pd.Series([])
for kk in range(0, 40):
likes[kk] = compute_likelihood(kk, r, f)
In [8]:
likes.plot()
thinkplot.decorate(xlabel='Counter particles (n)',
ylabel='PMF')
In [9]:
# Solution
class Logistic(Suite, Joint):
f = 0.1
def Likelihood(self, data, hypo):
"""
data: k, number of particles detected
hypo: r, emission rate in particles per second
"""
k = data
r = hypo
return compute_likelihood(k, r, self.f)
In [10]:
rs = np.linspace(0, 300, 51);
In [11]:
suite = Logistic(rs);
In [12]:
suite.Update(15)
Out[12]:
In [13]:
thinkplot.Pdf(suite)
thinkplot.decorate(xlabel='Emission rate (particles/second)',
ylabel='PMF',
title='Posterior marginal distribution')
Implement this model using MCMC. As a starting place, you can use this example from the PyMC3 docs.
As a challege, try writing the model more explicitly, rather than using the GLM module.
In [14]:
import pymc3 as pm
In [15]:
# Solution
f = 0.1
model = pm.Model()
with model:
r = pm.Uniform('r', 0, 500)
n = pm.Poisson('n', r)
k = pm.Binomial('k', n, f, observed=15)
trace = pm.sample_prior_predictive(1000)
In [16]:
thinkplot.Cdf(Cdf(trace['r']));
In [17]:
thinkplot.Cdf(Cdf(trace['n']));
In [18]:
thinkplot.Cdf(Cdf(trace['k']));
In [19]:
with model:
trace = pm.sample(1000, tune=3000)
In [20]:
pm.traceplot(trace);
In [21]:
n_sample = trace['n']
thinkplot.Cdf(Cdf(n_sample))
Out[21]:
In [22]:
r_sample = trace['r']
thinkplot.Cdf(Cdf(r_sample))
Out[22]:
In [23]:
thinkplot.Cdf(suite.MakeCdf())
thinkplot.Cdf(Cdf(r_sample))
Out[23]:
In [24]:
# Solution
class Logistic(Suite, Joint):
f = 0.1
def Likelihood(self, data, hypo):
"""
data: k, number of particles detected
hypo: r, n
"""
k = data
r, n = hypo
return EvalBinomialPmf(k, n, self.f)
In [25]:
rs = np.linspace(0, 300, 51);
In [26]:
suite = Logistic()
for r in rs:
pmf = MakePoissonPmf(r, high=500)
for n, p in pmf.Items():
suite[r, n] += p
suite.Normalize()
Out[26]:
In [27]:
suite.Update(15)
Out[27]:
In [28]:
pmf_r = suite.Marginal(0)
thinkplot.Pdf(pmf_r)
thinkplot.decorate(xlabel='Emission rate (particles/second)',
ylabel='PMF',
title='Posterior marginal distribution')
In [29]:
pmf_n = suite.Marginal(1)
thinkplot.Pdf(pmf_n)
thinkplot.decorate(xlabel='Number of particles (n)',
ylabel='PMF',
title='Posterior marginal distribution')
In [30]:
class Detector(Suite):
"""Represents hypotheses about n."""
def __init__(self, r, f, high=500):
"""Initializes the suite.
r: known emission rate, r
f: fraction of particles registered
high: maximum number of particles, n
"""
pmf = MakePoissonPmf(r, high)
super().__init__(pmf)
self.r = r
self.f = f
def Likelihood(self, data, hypo):
"""Likelihood of the data given the hypothesis.
data: number of particles counted
hypo: number of particles hitting the counter, n
"""
k = data
n = hypo
return EvalBinomialPmf(k, n, self.f)
In [31]:
r = 160
k = 15
f = 0.1
suite = Detector(r, f);
In [32]:
suite.Update(15)
Out[32]:
In [33]:
class Emitter(Suite):
"""Represents hypotheses about r."""
def Likelihood(self, data, hypo):
"""Likelihood of the data given the hypothesis.
data: number of counted per unit time
hypo: Detector object
"""
return hypo.Update(data)
In [34]:
rs = np.linspace(0, 300, 51);
In [35]:
detectors = [Detector(r, f=0.1) for r in rs[1:]]
suite = Emitter(detectors);
In [36]:
suite.Update(15)
Out[36]:
In [37]:
pmf_r = Pmf()
for detector, p in suite.Items():
pmf_r[detector.r] = p
thinkplot.Pdf(pmf_r)
In [38]:
mix = MakeMixture(suite);
In [39]:
thinkplot.Pdf(mix)
In [ ]: