In [2]:
import pandas as pd
In [41]:
columns = ["forecast", "play_golf"]
data = [
("Sunny","Y"),
("Sunny","Y"),
("Sunny","Y"),
("Sunny","N"),
("Sunny","N"),
("Overcast","Y"),
("Overcast","Y"),
("Overcast","Y"),
("Overcast","Y"),
("Rainy","N"),
("Rainy","N"),
("Rainy","N"),
("Rainy","Y"),
("Rainy","Y"),
]
df = pd.DataFrame(data, columns=columns)
In [42]:
print("Tally of outcomes\n")
ct = pd.crosstab(index=[df['forecast']], columns=[df['play_golf']])
print(ct)
In [43]:
print("Probability Distribution Table\n")
pdt = ct / len(data)
print(pdt)
In [44]:
print("P(Sunny, N) = %g" % pdt["N"]["Sunny"])
print("P(N) = %g" % pdt["N"].sum())
print("P(Sunny) = %g" % pdt.ix["Sunny"].sum())
In [49]:
p_sy = len(df[(df['forecast'] == 'Sunny') & (df['play_golf'] == 'Y')]) / len(df[df['play_golf'] == 'Y'])
p_y = len(df[df['play_golf'] == 'Y']) / len(df)
p_s = len(df[df['forecast'] == 'Sunny']) / len(df)
p_ys = p_sy * p_y / p_s
print("P(Yes | Sunny) = %g" % p_ys)
In [ ]: