dirichlet


Think Bayes

Copyright 2018 Allen B. Downey

MIT License: https://opensource.org/licenses/MIT


In [1]:
# Configure Jupyter so figures appear in the notebook
%matplotlib inline

# Configure Jupyter to display the assigned value after an assignment
%config InteractiveShell.ast_node_interactivity='last_expr_or_assign'

import numpy as np
import pandas as pd

# import classes from thinkbayes2
from thinkbayes2 import Pmf, Cdf, Suite, Joint

from thinkbayes2 import MakePoissonPmf, EvalBinomialPmf, MakeMixture

import thinkplot

Lions and Tigers and Bears

Suppose we visit a wild animal preserve where we know that the only animals are lions and tigers and bears, but we don't know how many of each there are.

During the tour, we see 3 lions, 2 tigers and one bear. Assuming that every animal had an equal chance to appear in our sample, estimate the prevalence of each species.

What is the probability that the next animal we see is a bear?

Grid algorithm


In [2]:
class LionsTigersBears(Suite, Joint):
    
    def Likelihood(self, data, hypo):
        """
        
        data: string 'L' , 'T', 'B'
        hypo: p1, p2, p3
        """
        # Fill this in.

In [3]:
# Solution goes here

In [4]:
ps = np.linspace(0, 1, 101);

In [5]:
from itertools import product

def enumerate_triples(ps):
    for p1, p2, p3 in product(ps, ps, ps):
        if p1+p2+p3 == 1:
            yield p1, p2, p3

Write a better version of enumerate_triples that doesn't run into problems with floating-point.


In [6]:
# Solution goes here

In [7]:
suite = LionsTigersBears(enumerate_triples(ps));

In [8]:
def plot_marginal_pmfs(joint):
    pmf_lion = joint.Marginal(0)
    pmf_tiger = joint.Marginal(1)
    pmf_bear = joint.Marginal(2)

    thinkplot.Pdf(pmf_lion, label='lions')
    thinkplot.Pdf(pmf_tiger, label='tigers')
    thinkplot.Pdf(pmf_bear, label='bears')
    
    thinkplot.decorate(xlabel='Prevalence',
                       ylabel='PMF')

In [9]:
plot_marginal_pmfs(suite)

In [10]:
for data in 'LLLTTB':
    suite.Update(data)

In [11]:
plot_marginal_pmfs(suite)

In [12]:
def plot_marginal_cdfs(joint):
    pmf_lion = joint.Marginal(0)
    pmf_tiger = joint.Marginal(1)
    pmf_bear = joint.Marginal(2)

    thinkplot.Cdf(pmf_lion.MakeCdf(), label='lions')
    thinkplot.Cdf(pmf_tiger.MakeCdf(), label='tigers')
    thinkplot.Cdf(pmf_bear.MakeCdf(), label='bears')
    
    thinkplot.decorate(xlabel='Prevalence',
                       ylabel='CDF')

In [13]:
plot_marginal_cdfs(suite)

Using the Dirichlet object


In [14]:
from thinkbayes2 import Dirichlet

def DirichletMarginal(dirichlet, i):
    return dirichlet.MarginalBeta(i).MakePmf()

Dirichlet.Marginal = DirichletMarginal

In [15]:
dirichlet = Dirichlet(3)
plot_marginal_pmfs(dirichlet)

In [16]:
dirichlet.Update((3, 2, 1))

In [17]:
plot_marginal_pmfs(dirichlet)

In [18]:
plot_marginal_cdfs(dirichlet)

In [19]:
thinkplot.PrePlot(6)
plot_marginal_cdfs(dirichlet)
plot_marginal_cdfs(suite)

MCMC

Implement this model using MCMC. You might want to start with this example.


In [20]:
import pymc3 as pm

In [21]:
observed = [0,0,0,1,1,2]
k = len(Pmf(observed))
a = np.ones(k)

In [30]:
model = pm.Model()

with model:
    """FILL THIS IN"""

In [22]:
# Solution goes here

In [23]:
def plot_trace_cdfs(trace):
    rows = trace['ps'].transpose()

    cdf_lion = Cdf(rows[0])
    cdf_tiger = Cdf(rows[1])
    cdf_bear = Cdf(rows[2])

    thinkplot.Cdf(cdf_lion, label='lions')
    thinkplot.Cdf(cdf_tiger, label='tigers')
    thinkplot.Cdf(cdf_bear, label='bears')
    
    thinkplot.decorate(xlabel='Prevalence',
                       ylabel='CDF')

In [24]:
#plot_trace_cdfs(trace)

In [25]:
#pmf = Pmf(trace['xs'][0])
#thinkplot.Hist(pmf)

In [26]:
with model:
    start = pm.find_MAP()
    step = pm.Metropolis()
    trace = pm.sample(1000, start=start, step=step, tune=1000)

In [27]:
pm.traceplot(trace);

In [28]:
plot_trace_cdfs(trace)

In [29]:
thinkplot.PrePlot(6)
plot_marginal_cdfs(dirichlet)
plot_trace_cdfs(trace)

In [ ]: