In [ ]:
%pylab inline
import pandas as pd
from matplotlib import pyplot as plt
import seaborn; seaborn.set()
from ipywidgets import interact
pylab.rcParams['figure.figsize'] = (10.0, 8.0)
autumn()
scatter_args = dict(s=100, edgecolor='black', linewidth='1.5')
In [ ]:
np.random.seed(sum(list(map(ord, "shad"))))
n = 100
df = pd.DataFrame(
np.vstack([
np.random.normal(loc=0, scale=2, size=(n, 1)),
np.random.normal(loc=6, scale=3, size=(n, 1))
]), columns=['x'])
df['target'] = np.hstack([np.ones(n), np.zeros(n)]).T
figsize(10.0, 5.0)
x = np.arange(-5, 15, 0.1)
def gaussian(x, mean, std):
return np.exp(-(x - mean)**2 / (2 * std ** 2) )/(std * sqrt(2 * np.pi))
xlim((-5, 15))
fill_between(x, gaussian(x, 0, 2) * 100, 0, edgecolor='black', linewidth='1.5', color='yellow', alpha=0.5)
fill_between(x, gaussian(x, 6, 3) * 100, 0, edgecolor='black', linewidth='1.5', color='red', alpha=0.5)
plt.scatter(df.x, np.random.randn(2 * n), c=df.target, **scatter_args);
Maximum likelihood: $$ p(c) = \frac{N_c}{\sum_{k=1}^K N_{c_k}}$$
Add-one smoothing: $$ p(c) = \frac{N_c + 1}{K + \sum_{k=1}^K N_{c_k}}$$
Additive smoothing: $$ p(c) = \frac{N_c + \alpha_c}{\sum_{k=1}^K N_{c_k} + \alpha_{c_k}}$$
In [ ]:
def plot_kde(kernel, width=0.2):
params = {"kernel":kernel, "legend": False, 'bw': width}
xlim((-5, 15))
seaborn.kdeplot(df.x[df.target==0], **params)
seaborn.kdeplot(df.x[df.target==1], **params);
interact(plot_kde, width=((0.05, 10, 0.05)), kernel=['gau', 'cos', 'biw', 'epa', 'tri', 'triw']);
In [ ]:
def plot_normal_distribution(label, color):
points = df.x[df.target==label]
m = np.mean(points)
s = np.std(points)
fill_between(x, gaussian(x, m, s), 0, edgecolor='black', linewidth='1.5', color=color, alpha=0.5)
xlim((-5, 15))
plot_normal_distribution(0, 'red')
plot_normal_distribution(1, 'yellow')
In [ ]:
from sklearn import mixture
components = [
(2, 3),
(8, 5),
(4, 1),
]
for m, s in components:
fill_between(x, gaussian(x, m, s), 0, edgecolor='black', linewidth='1.5', alpha=0.5, color='b')
n=100
samples = np.hstack([
np.random.normal(loc=m, scale=s, size=n) for m, s in components
])
gmm = mixture.GMM(n_components=len(components), n_iter=100)
gmm.fit(samples[:, np.newaxis]);
for i, _ in enumerate(components):
fill_between(x, gaussian(x, gmm.means_[i], gmm.covars_[i]), 0, edgecolor='black', linewidth='1.5', alpha=0.5, color='y')
print(gmm.means_)