This is code to accompany the udacity course on the Naive Bayes algorithm.
This section looks at the probability of having cancer given a test for cancer.
In [2]:
probability_of_cancer = 0.01
probability_no_cancer = 1 - probability_of_cancer
p_positive_given_cancer = .9
p_positive_given_no_cancer = 1 - p_positive_given_cancer
p_negative_given_no_cancer = .9
p_negative_given_cancer = 1 - p_negative_given_no_cancer
So there's only a 1% chance that a person has cancer and a 90% chance that if a person tests positive for cancer that person actually has cancer.
In [4]:
p_cancer_given_positive = probability_of_cancer * p_positive_given_cancer
p_no_cancer_given_positive = probability_no_cancer * p_positive_given_no_cancer
print("P(C|+) = {0:.4f}".format(p_cancer_given_positive))
print("P(~C|+) = {0:.4f}".format(p_no_cancer_given_positive))
To make the posterior probabilities add up to 1 they have to be normalized.
In [7]:
normalizer = p_cancer_given_positive + p_no_cancer_given_positive
print(normalizer)
In [10]:
normalized_p_cancer_given_positive = p_cancer_given_positive/normalizer
normalized_p_no_cancer_given_positive = p_no_cancer_given_positive/normalizer
print(normalized_p_cancer_given_positive)
print(normalized_p_no_cancer_given_positive)
print(normalized_p_cancer_given_positive + normalized_p_no_cancer_given_positive)
In [39]:
class Mailer(object):
def __init__(self, probabilities, person_probability):
"""
:param:
- `probabilities`: dictionary of probabilities for words in mail
- `person_probability`: fraction of mail sent by this person
"""
self.probabilities = probabilities
assert sum(probabilities.values()) == 1, probabilities
self.person_probability = person_probability
return
def probability(self, words):
"""
:param:
- `words`: list of words in mail
:return: probability that this person wrote the mail
"""
probability = self.person_probability
for word in words:
probability *= self.probabilities[word]
return probability
In [41]:
p_chris = .5
p_sara = .5
chris = Mailer({'love': .1, 'deal': .8, 'life': .1}, p_chris)
sara = Mailer({'love': .5, 'deal': .2, 'life': .3}, p_sara)
In [42]:
text = 'life deal'.split()
p_text_given_chris = chris.probability(text)
p_text_given_sara = sara.probability(text)
p_text = p_text_given_sara + p_text_given_chris
In [47]:
p_chris_given_text = (p_text_given_chris * p_chris)/p_text
p_sara_given_text = (p_text_given_sara * p_sara)/p_text
print(p_chris_given_text)
print(p_sara_given_text)
In [44]:
normalizer = p_chris_given_text + p_sara_given_text
print(p_chris_given_text/normalizer)
print(p_sara_given_text/normalizer)
In [45]:
text = 'love deal'.split()
p_text_given_chris = chris.probability(text)
p_text_given_sara = sara.probability(text)
p_text = p_text_given_sara + p_text_given_chris
In [46]:
p_chris_given_text = (p_text_given_chris * p_chris)/p_text
p_sara_given_text = (p_text_given_sara * p_sara)/p_text
normalizer = p_chris_given_text + p_sara_given_text
print(p_chris_given_text/normalizer)
print(p_sara_given_text/normalizer)
In [50]:
p_spleenitis = .008
p_positive_given_spleenitis = .98
p_positive_given_not_spleenitis = 1 - p_positive_given_spleenitis
p_negative_given_not_spleenitis = .97
p_negative_given_spleenitis = 1 - p_negative_given_not_spleenitis
In [51]:
p_spleenitis_given_positive = (p_positive_given_spleenitis * p_spleenitis)
print(p_spleenitis_given_positive)