In [2]:
import pandas as pd


/usr/local/lib/python3.4/dist-packages/pandas/io/excel.py:626: UserWarning: Installed openpyxl is not supported at this time. Use >=1.6.1 and <2.0.0.
  .format(openpyxl_compat.start_ver, openpyxl_compat.stop_ver))

In [41]:
columns = ["forecast", "play_golf"]
data = [
 ("Sunny","Y"),
 ("Sunny","Y"),
 ("Sunny","Y"),
 ("Sunny","N"),
 ("Sunny","N"),
 ("Overcast","Y"),
 ("Overcast","Y"),
 ("Overcast","Y"),
 ("Overcast","Y"),
 ("Rainy","N"),
 ("Rainy","N"),
 ("Rainy","N"),
 ("Rainy","Y"),
 ("Rainy","Y"),
]

df = pd.DataFrame(data, columns=columns)

In [42]:
print("Tally of outcomes\n")
ct = pd.crosstab(index=[df['forecast']], columns=[df['play_golf']])
print(ct)


Tally of outcomes

play_golf  N  Y
forecast       
Overcast   0  4
Rainy      3  2
Sunny      2  3

In [43]:
print("Probability Distribution Table\n")
pdt = ct / len(data)
print(pdt)


Probability Distribution Table

play_golf         N         Y
forecast                     
Overcast   0.000000  0.285714
Rainy      0.214286  0.142857
Sunny      0.142857  0.214286

In [44]:
print("P(Sunny, N) = %g" % pdt["N"]["Sunny"])
print("P(N) = %g" % pdt["N"].sum())
print("P(Sunny) = %g" % pdt.ix["Sunny"].sum())


P(Sunny, N) = 0.142857
P(N) = 0.357143
P(Sunny) = 0.357143

Bayes' Theorem

$\displaystyle P(A|B) = \frac{P(B|A) * P(A)}{P(B)}$

Find the chance of playing golf given that it is sunny

$\displaystyle P(Yes | Sunny) = \frac{P(Sunny|Yes) * P(Yes)}{P(Sunny)}$


In [49]:
p_sy = len(df[(df['forecast'] == 'Sunny') & (df['play_golf'] == 'Y')]) / len(df[df['play_golf'] == 'Y'])
p_y = len(df[df['play_golf'] == 'Y']) / len(df)
p_s = len(df[df['forecast'] == 'Sunny']) / len(df)
p_ys = p_sy * p_y / p_s
print("P(Yes | Sunny) = %g" % p_ys)


P(Yes | Sunny) = 0.6

In [ ]: