Bayesian classification methods


In [ ]:
%pylab inline
import pandas as pd
from matplotlib import pyplot as plt
import seaborn; seaborn.set()
from ipywidgets import interact
pylab.rcParams['figure.figsize'] = (10.0, 8.0)
autumn()
scatter_args = dict(s=100, edgecolor='black', linewidth='1.5')

In [ ]:
np.random.seed(sum(list(map(ord, "shad"))))
n = 100
df = pd.DataFrame(
    np.vstack([
        np.random.normal(loc=0, scale=2, size=(n, 1)),
        np.random.normal(loc=6, scale=3, size=(n, 1))
    ]), columns=['x'])

df['target'] = np.hstack([np.ones(n), np.zeros(n)]).T
figsize(10.0, 5.0)
x = np.arange(-5, 15, 0.1)
def gaussian(x, mean, std):
    return np.exp(-(x - mean)**2 / (2 * std ** 2) )/(std * sqrt(2 * np.pi))
xlim((-5, 15))
fill_between(x, gaussian(x, 0, 2) * 100, 0, edgecolor='black', linewidth='1.5', color='yellow', alpha=0.5)
fill_between(x, gaussian(x, 6, 3) * 100, 0, edgecolor='black', linewidth='1.5', color='red', alpha=0.5)
plt.scatter(df.x, np.random.randn(2 * n), c=df.target, **scatter_args);
$$ p(x, y) = p(x) P(y|x) = P(y) p(x|y)$$$$ a(x) = \arg \max_{k} \lambda_k P(y) p(x|y)$$

Naive Bayes classifier

$$ p(c| x^1, \dots, x^m) = \frac{p(c)p(x^1, \dots, x^m|c)}{p(x^1, \dots, x^m)} \propto p(c)p(x^1, \dots, x^m|c) \approx p(c)\prod_{i=1}^m p(x^i | c)$$

$$ p(c| x^1, \dots, x^m) \propto e^{\log{p(c)} + \sum_{i=1}^m \log{p(x^i | c)}} $$$$ p(c| x^1, \dots, x^m) = \frac{e^{\log{p(c)} + \sum_{i=1}^m \log{p(x^i | c)}}}{\sum_{k=1}^K e^{\log{p(c_k)} + \sum_{i=1}^m \log{p(x^i | c_k)} } }$$

Maximum likelihood: $$ p(c) = \frac{N_c}{\sum_{k=1}^K N_{c_k}}$$

Add-one smoothing: $$ p(c) = \frac{N_c + 1}{K + \sum_{k=1}^K N_{c_k}}$$

Additive smoothing: $$ p(c) = \frac{N_c + \alpha_c}{\sum_{k=1}^K N_{c_k} + \alpha_{c_k}}$$

KDE


In [ ]:
def plot_kde(kernel, width=0.2):
    params = {"kernel":kernel, "legend": False, 'bw': width}
    xlim((-5, 15))
    seaborn.kdeplot(df.x[df.target==0], **params)
    seaborn.kdeplot(df.x[df.target==1], **params);
    
interact(plot_kde, width=((0.05, 10, 0.05)), kernel=['gau', 'cos', 'biw', 'epa', 'tri', 'triw']);

Maximum likelihood


In [ ]:
def plot_normal_distribution(label, color):
    points = df.x[df.target==label]
    m = np.mean(points)
    s = np.std(points)
    fill_between(x, gaussian(x, m, s), 0, edgecolor='black', linewidth='1.5', color=color, alpha=0.5)

xlim((-5, 15))
plot_normal_distribution(0, 'red')
plot_normal_distribution(1, 'yellow')

Mixture


In [ ]:
from sklearn import mixture
components = [
    (2, 3),
    (8, 5),
    (4, 1),
]
for m, s in components:
    fill_between(x, gaussian(x, m, s), 0, edgecolor='black', linewidth='1.5', alpha=0.5, color='b')
n=100
samples = np.hstack([
    np.random.normal(loc=m, scale=s, size=n) for m, s in components
])
gmm = mixture.GMM(n_components=len(components), n_iter=100)
gmm.fit(samples[:, np.newaxis]);
for i, _ in enumerate(components):
    fill_between(x, gaussian(x, gmm.means_[i], gmm.covars_[i]), 0, edgecolor='black', linewidth='1.5', alpha=0.5, color='y')
print(gmm.means_)