In [44]:

    
import pandas as pd
import numpy as np
from __future__ import division # this is to avoid problems when we divide integers (remember 3/5=0 in Python 2, unless we do this step)



In [47]:

    
# read in Ch 6 data on Meningitis diagnosis
df = pd.read_csv('./Table6-1.csv', skiprows=[0], names=['ID', 'Headache', 'Fever', 'Vomiting', 'Meningitis'])

df.head()









    Out[47]:






  
    
      
      ID
      Headache
      Fever
      Vomiting
      Meningitis
    
  
  
    
      0
      1
      true
      true
      false
      false
    
    
      1
      2
      false
      true
      false
      false
    
    
      2
      3
      true
      false
      true
      false
    
    
      3
      4
      true
      false
      true
      false
    
    
      4
      5
      false
      true
      false
      true

Applying Bayes' Theorem to Meningitis Diagnosing



In [51]:

    
# from the data, probability of having headache, no fever, and vomiting, regardless of meningitis diagnosis
Phnfv = len(df[np.logical_and(df.Headache.str.strip()=='true', np.logical_and(df.Fever.str.strip()=='false', df.Vomiting.str.strip()=='true'))])/len(df)

# from the data, probability of having meningitis, regardless of symptoms
Pm = sum(df.Meningitis.str.strip()=='true')/len(df)

# from the data, probability of having headache, no fever, and vomiting, given a positive meningitis diagnosis
Phnfv_g_m = len(df[np.logical_and(df.Meningitis.str.strip()=='true',np.logical_and(df.Headache.str.strip()=='true', np.logical_and(df.Fever.str.strip()=='false', df.Vomiting.str.strip()=='true')))])/sum(df.Meningitis.str.strip()=='true')



In [52]:

    
# Bayes' Theorem: 
# P(meningitis|headache, no fever, vomiting) = P(headache, no fever, vomiting|meningitis)P(meningitis)/P(headache, no fever, vomiting)
# ... so probability that the patient with headache, no fever and vomiting has meningitis is:
Phnfv_g_m*Pm/Phnfv









    Out[52]:





0.33333333333333331

Using Naive Bayes



In [59]:

    
# probability of headache given positive meningitis diagnosis
Ph_g_m = len(df[np.logical_and(df.Meningitis.str.strip()=='true',df.Headache.str.strip()=='true')])/sum(df.Meningitis.str.strip()=='true')

# probability of no fever given positive meningitis diagnosis
Pnf_g_m = len(df[np.logical_and(df.Meningitis.str.strip()=='true',df.Fever.str.strip()=='false')])/sum(df.Meningitis.str.strip()=='true')

# probability of vomiting given positive meningitis diagnosis
Pv_g_m = len(df[np.logical_and(df.Meningitis.str.strip()=='true',df.Vomiting.str.strip()=='true')])/sum(df.Meningitis.str.strip()=='true')

# probability of headache given negative meningitis diagnosis
Ph_g_nm = len(df[np.logical_and(df.Meningitis.str.strip()=='false',df.Headache.str.strip()=='true')])/sum(df.Meningitis.str.strip()=='false')

# probability of no fever given negative meningitis diagnosis
Pnf_g_nm = len(df[np.logical_and(df.Meningitis.str.strip()=='false',df.Fever.str.strip()=='false')])/sum(df.Meningitis.str.strip()=='false')

# probability of vomiting, given negative meningitis diagnosis
Pv_g_nm = len(df[np.logical_and(df.Meningitis.str.strip()=='false',df.Vomiting.str.strip()=='true')])/sum(df.Meningitis.str.strip()=='false')



In [60]:

    
# to sum it up, here are the probabilities of headache, no fever, and vomiting, given a positive meningitis diagnosis...
print Ph_g_m, Pnf_g_m, Pv_g_m









    



0.666666666667 0.666666666667 0.666666666667



In [41]:

    
# ... and here are the probabilities of headache, no fever, and vomiting, given a negative meningitis diagnosis
print Ph_g_nm, Pnf_g_nm, Pv_g_nm









    



0.714285714286 0.571428571429 0.571428571429



In [62]:

    
# overall probability of having headache, no fever, and vomiting (assuming conditional independence between all symptoms)
# (this is the denominator for naive Bayes estimator)
Ph_g_m*Pnf_g_m*Pv_g_m*Pm + Ph_g_nm*Pnf_g_nm*Pv_g_nm*(1-Pm)









    Out[62]:





0.25215419501133784



In [63]:

    
# probability of headache, no fever, and vomiting (assuming conditional independence between all symptoms)
# (this is the numerator for naive Bayes estimator)
Ph_g_m*Pnf_g_m*Pv_g_m*Pm









    Out[63]:





0.088888888888888878



In [64]:

    
# Naive Bayes: assuming that all symptoms are conditionally independent,
# P(meningitis|headache, no fever, vomiting) = P(headache|meningitis)P(no fever|meningitis)P(vomiting|meningitis)P(meningitis)/ 
#                                                       (P(headache|meningitis)P(no fever|meningitis)P(vomiting|meningitis)P(meningitis)+
#                                                          P(headache|no meningitis)P(no fever|no meningitis)P(vomiting|no meningitis)P(no meningitis))
# so probability of meningitis given headache, no fever, and vomiting is approx.:
Ph_g_m*Pnf_g_m*Pv_g_m*Pm/(Ph_g_m*Pnf_g_m*Pv_g_m*Pm + Ph_g_nm*Pnf_g_nm*Pv_g_nm*(1-Pm))









    Out[64]:





0.35251798561151076

	ID	Headache	Fever	Vomiting	Meningitis
0	1	true	true	false	false
1	2	false	true	false	false
2	3	true	false	true	false
3	4	true	false	true	false
4	5	false	true	false	true