In [1]:
# Configure Jupyter so figures appear in the notebook
%matplotlib inline
# Configure Jupyter to display the assigned value after an assignment
%config InteractiveShell.ast_node_interactivity='last_expr_or_assign'
import numpy as np
import pandas as pd
# import classes from thinkbayes2
from thinkbayes2 import Pmf, Cdf, Suite, Joint
from thinkbayes2 import MakePoissonPmf, EvalBinomialPmf, MakeMixture
import thinkplot
Suppose we visit a wild animal preserve where we know that the only animals are lions and tigers and bears, but we don't know how many of each there are.
During the tour, we see 3 lions, 2 tigers and one bear. Assuming that every animal had an equal chance to appear in our sample, estimate the prevalence of each species.
What is the probability that the next animal we see is a bear?
In [2]:
class LionsTigersBears(Suite, Joint):
def Likelihood(self, data, hypo):
"""
data: string 'L' , 'T', 'B'
hypo: p1, p2, p3
"""
# Fill this in.
In [3]:
# Solution goes here
In [4]:
ps = np.linspace(0, 1, 101);
In [5]:
from itertools import product
def enumerate_triples(ps):
for p1, p2, p3 in product(ps, ps, ps):
if p1+p2+p3 == 1:
yield p1, p2, p3
Write a better version of enumerate_triples
that doesn't run into problems with floating-point.
In [6]:
# Solution goes here
In [7]:
suite = LionsTigersBears(enumerate_triples(ps));
In [8]:
def plot_marginal_pmfs(joint):
pmf_lion = joint.Marginal(0)
pmf_tiger = joint.Marginal(1)
pmf_bear = joint.Marginal(2)
thinkplot.Pdf(pmf_lion, label='lions')
thinkplot.Pdf(pmf_tiger, label='tigers')
thinkplot.Pdf(pmf_bear, label='bears')
thinkplot.decorate(xlabel='Prevalence',
ylabel='PMF')
In [9]:
plot_marginal_pmfs(suite)
In [10]:
for data in 'LLLTTB':
suite.Update(data)
In [11]:
plot_marginal_pmfs(suite)
In [12]:
def plot_marginal_cdfs(joint):
pmf_lion = joint.Marginal(0)
pmf_tiger = joint.Marginal(1)
pmf_bear = joint.Marginal(2)
thinkplot.Cdf(pmf_lion.MakeCdf(), label='lions')
thinkplot.Cdf(pmf_tiger.MakeCdf(), label='tigers')
thinkplot.Cdf(pmf_bear.MakeCdf(), label='bears')
thinkplot.decorate(xlabel='Prevalence',
ylabel='CDF')
In [13]:
plot_marginal_cdfs(suite)
In [14]:
from thinkbayes2 import Dirichlet
def DirichletMarginal(dirichlet, i):
return dirichlet.MarginalBeta(i).MakePmf()
Dirichlet.Marginal = DirichletMarginal
In [15]:
dirichlet = Dirichlet(3)
plot_marginal_pmfs(dirichlet)
In [16]:
dirichlet.Update((3, 2, 1))
In [17]:
plot_marginal_pmfs(dirichlet)
In [18]:
plot_marginal_cdfs(dirichlet)
In [19]:
thinkplot.PrePlot(6)
plot_marginal_cdfs(dirichlet)
plot_marginal_cdfs(suite)
Implement this model using MCMC. You might want to start with this example.
In [20]:
import pymc3 as pm
In [21]:
observed = [0,0,0,1,1,2]
k = len(Pmf(observed))
a = np.ones(k)
In [30]:
model = pm.Model()
with model:
"""FILL THIS IN"""
In [22]:
# Solution goes here
In [23]:
def plot_trace_cdfs(trace):
rows = trace['ps'].transpose()
cdf_lion = Cdf(rows[0])
cdf_tiger = Cdf(rows[1])
cdf_bear = Cdf(rows[2])
thinkplot.Cdf(cdf_lion, label='lions')
thinkplot.Cdf(cdf_tiger, label='tigers')
thinkplot.Cdf(cdf_bear, label='bears')
thinkplot.decorate(xlabel='Prevalence',
ylabel='CDF')
In [24]:
#plot_trace_cdfs(trace)
In [25]:
#pmf = Pmf(trace['xs'][0])
#thinkplot.Hist(pmf)
In [26]:
with model:
start = pm.find_MAP()
step = pm.Metropolis()
trace = pm.sample(1000, start=start, step=step, tune=1000)
In [27]:
pm.traceplot(trace);
In [28]:
plot_trace_cdfs(trace)
In [29]:
thinkplot.PrePlot(6)
plot_marginal_cdfs(dirichlet)
plot_trace_cdfs(trace)
In [ ]: