In [44]:
import pandas as pd
import numpy as np
from __future__ import division # this is to avoid problems when we divide integers (remember 3/5=0 in Python 2, unless we do this step)

In [47]:
# read in Ch 6 data on Meningitis diagnosis
df = pd.read_csv('./Table6-1.csv', skiprows=[0], names=['ID', 'Headache', 'Fever', 'Vomiting', 'Meningitis'])

df.head()


Out[47]:
ID Headache Fever Vomiting Meningitis
0 1 true true false false
1 2 false true false false
2 3 true false true false
3 4 true false true false
4 5 false true false true

Applying Bayes' Theorem to Meningitis Diagnosing


In [51]:
# from the data, probability of having headache, no fever, and vomiting, regardless of meningitis diagnosis
Phnfv = len(df[np.logical_and(df.Headache.str.strip()=='true', np.logical_and(df.Fever.str.strip()=='false', df.Vomiting.str.strip()=='true'))])/len(df)

# from the data, probability of having meningitis, regardless of symptoms
Pm = sum(df.Meningitis.str.strip()=='true')/len(df)

# from the data, probability of having headache, no fever, and vomiting, given a positive meningitis diagnosis
Phnfv_g_m = len(df[np.logical_and(df.Meningitis.str.strip()=='true',np.logical_and(df.Headache.str.strip()=='true', np.logical_and(df.Fever.str.strip()=='false', df.Vomiting.str.strip()=='true')))])/sum(df.Meningitis.str.strip()=='true')

In [52]:
# Bayes' Theorem: 
# P(meningitis|headache, no fever, vomiting) = P(headache, no fever, vomiting|meningitis)P(meningitis)/P(headache, no fever, vomiting)
# ... so probability that the patient with headache, no fever and vomiting has meningitis is:
Phnfv_g_m*Pm/Phnfv


Out[52]:
0.33333333333333331

Using Naive Bayes


In [59]:
# probability of headache given positive meningitis diagnosis
Ph_g_m = len(df[np.logical_and(df.Meningitis.str.strip()=='true',df.Headache.str.strip()=='true')])/sum(df.Meningitis.str.strip()=='true')

# probability of no fever given positive meningitis diagnosis
Pnf_g_m = len(df[np.logical_and(df.Meningitis.str.strip()=='true',df.Fever.str.strip()=='false')])/sum(df.Meningitis.str.strip()=='true')

# probability of vomiting given positive meningitis diagnosis
Pv_g_m = len(df[np.logical_and(df.Meningitis.str.strip()=='true',df.Vomiting.str.strip()=='true')])/sum(df.Meningitis.str.strip()=='true')

# probability of headache given negative meningitis diagnosis
Ph_g_nm = len(df[np.logical_and(df.Meningitis.str.strip()=='false',df.Headache.str.strip()=='true')])/sum(df.Meningitis.str.strip()=='false')

# probability of no fever given negative meningitis diagnosis
Pnf_g_nm = len(df[np.logical_and(df.Meningitis.str.strip()=='false',df.Fever.str.strip()=='false')])/sum(df.Meningitis.str.strip()=='false')

# probability of vomiting, given negative meningitis diagnosis
Pv_g_nm = len(df[np.logical_and(df.Meningitis.str.strip()=='false',df.Vomiting.str.strip()=='true')])/sum(df.Meningitis.str.strip()=='false')

In [60]:
# to sum it up, here are the probabilities of headache, no fever, and vomiting, given a positive meningitis diagnosis...
print Ph_g_m, Pnf_g_m, Pv_g_m


0.666666666667 0.666666666667 0.666666666667

In [41]:
# ... and here are the probabilities of headache, no fever, and vomiting, given a negative meningitis diagnosis
print Ph_g_nm, Pnf_g_nm, Pv_g_nm


0.714285714286 0.571428571429 0.571428571429

In [62]:
# overall probability of having headache, no fever, and vomiting (assuming conditional independence between all symptoms)
# (this is the denominator for naive Bayes estimator)
Ph_g_m*Pnf_g_m*Pv_g_m*Pm + Ph_g_nm*Pnf_g_nm*Pv_g_nm*(1-Pm)


Out[62]:
0.25215419501133784

In [63]:
# probability of headache, no fever, and vomiting (assuming conditional independence between all symptoms)
# (this is the numerator for naive Bayes estimator)
Ph_g_m*Pnf_g_m*Pv_g_m*Pm


Out[63]:
0.088888888888888878

In [64]:
# Naive Bayes: assuming that all symptoms are conditionally independent,
# P(meningitis|headache, no fever, vomiting) = P(headache|meningitis)P(no fever|meningitis)P(vomiting|meningitis)P(meningitis)/ 
#                                                       (P(headache|meningitis)P(no fever|meningitis)P(vomiting|meningitis)P(meningitis)+
#                                                          P(headache|no meningitis)P(no fever|no meningitis)P(vomiting|no meningitis)P(no meningitis))
# so probability of meningitis given headache, no fever, and vomiting is approx.:
Ph_g_m*Pnf_g_m*Pv_g_m*Pm/(Ph_g_m*Pnf_g_m*Pv_g_m*Pm + Ph_g_nm*Pnf_g_nm*Pv_g_nm*(1-Pm))


Out[64]:
0.35251798561151076