Bayesian interpretation of medical tests

This notebooks explores several problems related to interpreting the results of medical tests.

Copyright 2016 Allen Downey

MIT License: http://opensource.org/licenses/MIT


In [1]:
from __future__ import print_function, division

from thinkbayes2 import Pmf

In [2]:
from random import random

def flip(p):
    return random() < p

In [3]:
def run_single_simulation(func, iters=1000000):
    pmf_t = Pmf([0.2, 0.4])
    p = 0.1
    s = 0.9

    outcomes = Pmf()
    post_t = Pmf()
    for i in range(iters):
        test, sick, t = func(p, s, pmf_t)
        if test:
            outcomes[sick] += 1
            post_t[t] += 1

    outcomes.Normalize()
    post_t.Normalize()
    return outcomes, post_t

Scenario A: Choose t for each patient, yield all patients regardless of test.


In [4]:
def generate_patient_A(p, s, pmf_t):
    while True:
        t = pmf_t.Random()
        sick = flip(p)
        test = flip(s) if sick else flip(t)
        return test, sick, t
                
outcomes, post_t = run_single_simulation(generate_patient_A)
outcomes.Print()
post_t.Print()


False 0.75061282845
True 0.24938717155
0.2 0.375486862013
0.4 0.624513137987

Scenario B: Choose t before generating patients, yield all patients regardless of test.


In [5]:
def generate_patient_B(p, s, pmf_t):
    t = pmf_t.Random()
    while True:
        sick = flip(p)
        test = flip(s) if sick else flip(t)
        return test, sick, t
                
outcomes, post_t = run_single_simulation(generate_patient_B)
outcomes.Print()
post_t.Print()


False 0.751086814612
True 0.248913185388
0.2 0.375297571924
0.4 0.624702428076

Scenario C: Choose t for each patient, only yield patients who test positive.


In [7]:
def generate_patient_C(p, s, pmf_t):
    while True:
        t = pmf_t.Random()
        sick = flip(p)
        test = flip(s) if sick else flip(t)
        if test:
            return test, sick, t
                
outcomes, post_t = run_single_simulation(generate_patient_C)
outcomes.Print()
post_t.Print()


False 0.750118
True 0.249882
0.2 0.374743
0.4 0.625257

Scenario D: Choose t before generating patients, only yield patients who test positive.


In [8]:
def generate_patient_D(p, s, pmf_t):
    t = pmf_t.Random()
    while True:
        sick = flip(p)
        test = flip(s) if sick else flip(t)
        if test:
            return test, sick, t
                
outcomes, post_t = run_single_simulation(generate_patient_D)
outcomes.Print()
post_t.Print()


False 0.73333
True 0.26667
0.2 0.498557
0.4 0.501443

Here's a variation of the Scenario D where we only consider cases where patient 1 is the first to test positive.


In [9]:
from random import choice
import numpy as np 
N = 100
patients = range(N)

p = 0.1
s = 0.9
num_sick = 0

pmf_t = Pmf()
pmf_sick = Pmf()

for i in range(10000000):
    # decide what the value of t is
    t = choice([0.2, 0.4])
    np.random.shuffle(patients)
    
    # generate patients until we get a positive test
    for patient in patients:
        sick = flip(p)
        test = flip(s) if sick else flip(t)
        if test:
            if patient==1:
                #print(patient, sick, t)
                pmf_t[t] += 1
                pmf_sick[sick] += 1
            break
            
pmf_t.Normalize()
pmf_sick.Normalize()

print('Dist of t')
pmf_t.Print()
print('Dist of status')
pmf_sick.Print()


Dist of t
0.2 0.502046233824
0.4 0.497953766176
Dist of status
False 0.733476787564
True 0.266523212436

In [10]:
num_sick


Out[10]:
0

In [ ]: