In [44]:
import pandas as pd
import numpy as np
from __future__ import division # this is to avoid problems when we divide integers (remember 3/5=0 in Python 2, unless we do this step)
In [47]:
# read in Ch 6 data on Meningitis diagnosis
df = pd.read_csv('./Table6-1.csv', skiprows=[0], names=['ID', 'Headache', 'Fever', 'Vomiting', 'Meningitis'])
df.head()
Out[47]:
In [51]:
# from the data, probability of having headache, no fever, and vomiting, regardless of meningitis diagnosis
Phnfv = len(df[np.logical_and(df.Headache.str.strip()=='true', np.logical_and(df.Fever.str.strip()=='false', df.Vomiting.str.strip()=='true'))])/len(df)
# from the data, probability of having meningitis, regardless of symptoms
Pm = sum(df.Meningitis.str.strip()=='true')/len(df)
# from the data, probability of having headache, no fever, and vomiting, given a positive meningitis diagnosis
Phnfv_g_m = len(df[np.logical_and(df.Meningitis.str.strip()=='true',np.logical_and(df.Headache.str.strip()=='true', np.logical_and(df.Fever.str.strip()=='false', df.Vomiting.str.strip()=='true')))])/sum(df.Meningitis.str.strip()=='true')
In [52]:
# Bayes' Theorem:
# P(meningitis|headache, no fever, vomiting) = P(headache, no fever, vomiting|meningitis)P(meningitis)/P(headache, no fever, vomiting)
# ... so probability that the patient with headache, no fever and vomiting has meningitis is:
Phnfv_g_m*Pm/Phnfv
Out[52]:
In [59]:
# probability of headache given positive meningitis diagnosis
Ph_g_m = len(df[np.logical_and(df.Meningitis.str.strip()=='true',df.Headache.str.strip()=='true')])/sum(df.Meningitis.str.strip()=='true')
# probability of no fever given positive meningitis diagnosis
Pnf_g_m = len(df[np.logical_and(df.Meningitis.str.strip()=='true',df.Fever.str.strip()=='false')])/sum(df.Meningitis.str.strip()=='true')
# probability of vomiting given positive meningitis diagnosis
Pv_g_m = len(df[np.logical_and(df.Meningitis.str.strip()=='true',df.Vomiting.str.strip()=='true')])/sum(df.Meningitis.str.strip()=='true')
# probability of headache given negative meningitis diagnosis
Ph_g_nm = len(df[np.logical_and(df.Meningitis.str.strip()=='false',df.Headache.str.strip()=='true')])/sum(df.Meningitis.str.strip()=='false')
# probability of no fever given negative meningitis diagnosis
Pnf_g_nm = len(df[np.logical_and(df.Meningitis.str.strip()=='false',df.Fever.str.strip()=='false')])/sum(df.Meningitis.str.strip()=='false')
# probability of vomiting, given negative meningitis diagnosis
Pv_g_nm = len(df[np.logical_and(df.Meningitis.str.strip()=='false',df.Vomiting.str.strip()=='true')])/sum(df.Meningitis.str.strip()=='false')
In [60]:
# to sum it up, here are the probabilities of headache, no fever, and vomiting, given a positive meningitis diagnosis...
print Ph_g_m, Pnf_g_m, Pv_g_m
In [41]:
# ... and here are the probabilities of headache, no fever, and vomiting, given a negative meningitis diagnosis
print Ph_g_nm, Pnf_g_nm, Pv_g_nm
In [62]:
# overall probability of having headache, no fever, and vomiting (assuming conditional independence between all symptoms)
# (this is the denominator for naive Bayes estimator)
Ph_g_m*Pnf_g_m*Pv_g_m*Pm + Ph_g_nm*Pnf_g_nm*Pv_g_nm*(1-Pm)
Out[62]:
In [63]:
# probability of headache, no fever, and vomiting (assuming conditional independence between all symptoms)
# (this is the numerator for naive Bayes estimator)
Ph_g_m*Pnf_g_m*Pv_g_m*Pm
Out[63]:
In [64]:
# Naive Bayes: assuming that all symptoms are conditionally independent,
# P(meningitis|headache, no fever, vomiting) = P(headache|meningitis)P(no fever|meningitis)P(vomiting|meningitis)P(meningitis)/
# (P(headache|meningitis)P(no fever|meningitis)P(vomiting|meningitis)P(meningitis)+
# P(headache|no meningitis)P(no fever|no meningitis)P(vomiting|no meningitis)P(no meningitis))
# so probability of meningitis given headache, no fever, and vomiting is approx.:
Ph_g_m*Pnf_g_m*Pv_g_m*Pm/(Ph_g_m*Pnf_g_m*Pv_g_m*Pm + Ph_g_nm*Pnf_g_nm*Pv_g_nm*(1-Pm))
Out[64]: