In [1]:
import numpy as np
import pandas as pd
from itertools import product
from functools import reduce
In [2]:
variables = {
'I': ['h', 'vh'],
'G': ['A', 'B'],
'GPA': ['[0,3)', '[3,3.5)', '[3.5,4.0]']
}
In [3]:
df = pd.DataFrame([
dict(zip(variables.keys(), values))
for values in product(*variables.values())
])
df['prob'] = np.random.randint(1, 10, len(df))
df['prob'] /= df['prob'].sum()
df
Out[3]:
In [4]:
def joint_probability(df, y, xs):
df_y = df[df['G'] == y]
p_y = df_y['prob'].sum()
df_ = df_y
l = []
for col in xs:
df_ = df_[df_[col] == xs[col]]
p = df_['prob'].sum()
l.append(p)
return p_y * reduce(lambda x, y: x * y, l)
def joint_probability_with_cond_ind(df, y, xs):
df_y = df[df['G'] == y]
p_y = df_y['prob'].sum()
l = [
df_y[df_y[col] == xs[col]]['prob'].sum()
for col in xs
]
return p_y * reduce(lambda x, y: x * y, l)
def naive_bayes(df, y, xs):
df_y = df[df['G'] == y]
p_y = df_y['prob'].sum()
l = [
df_y[df_y[col] == xs[col]]['prob'].sum()
for col in xs
]
num = p_y * reduce(lambda x, y: x * y, l)
den = sum([
reduce(lambda x, y: x * y, [
df[(df['G'] == y_val) & (df[col] == xs[col])]['prob'].sum()
for col in xs
])
for y_val in df['G'].unique()
])
return num / den
In [5]:
joint_probability(df, 'A', {'GPA': '[3.5,4.0]', 'I': 'vh'})
Out[5]:
In [6]:
joint_probability(df, 'A', {'GPA': '[3.5,4.0]', 'I': 'h'})
Out[6]:
In [7]:
joint_probability(df, 'A', {'GPA': '[3,3.5)', 'I': 'h'})
Out[7]:
In [8]:
joint_probability(df, 'A', {'GPA': '[3,3.5)', 'I': 'vh'})
Out[8]:
In [9]:
joint_probability_with_cond_ind(df, 'A', {'GPA': '[3.5,4.0]', 'I': 'vh'})
Out[9]:
In [10]:
joint_probability_with_cond_ind(df, 'A', {'GPA': '[3.5,4.0]', 'I': 'h'})
Out[10]:
In [11]:
naive_bayes(df, 'A', {'GPA': '[3.5,4.0]', 'I': 'vh'})
Out[11]:
In [12]:
naive_bayes(df, 'B', {'GPA': '[3.5,4.0]', 'I': 'vh'})
Out[12]:
In [13]:
naive_bayes(df, 'A', {'GPA': '[3.5,4.0]', 'I': 'h'})
Out[13]:
In [14]:
naive_bayes(df, 'B', {'GPA': '[3.5,4.0]', 'I': 'h'})
Out[14]:
In [15]:
naive_bayes(df, 'A', {'GPA': '[0,3)', 'I': 'h'})
Out[15]:
In [16]:
naive_bayes(df, 'B', {'GPA': '[0,3)', 'I': 'h'})
Out[16]:
In [ ]: