In [1]:
# Import Package
In [50]:
%matplotlib inline
%config InlineBackend.figure_formats = {'png', 'retina'}
import scipy.stats
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib as mpl
import matplotlib.pyplot as plt
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report
In [73]:
# two subject(X0, X1), three student(y0, y1, y2)
# predict who student(y0, y1, y2) as two subject point
# student 0
X00 = scipy.stats.norm(80, 1).rvs(100)
X01 = scipy.stats.norm(70, 1).rvs(100)
X0 = np.vstack([X00, X01]).T
# student 1
X10 = scipy.stats.norm(70, 5).rvs(200)
X11 = scipy.stats.norm(90, 5).rvs(200)
X1 = np.vstack([X10, X11]).T
# student 2
X20 = scipy.stats.norm(70, 1).rvs(300)
X21 = scipy.stats.norm(80, 5).rvs(300)
X2 = np.vstack([X20, X21]).T
# merge student data as vertical
X = np.vstack([X0, X1, X2])
# category
y0 = np.zeros(100)
y1 = np.ones(200)
y2 = np.ones(300)*2
y = np.hstack([y0, y1, y2])[:, np.newaxis]
# merge for make pandas dataframe
m = np.hstack([X, y])
X.shape, y.shape, m.shape
Out[73]:
In [74]:
df = pd.DataFrame(m, columns=["subject_0","subject_1","student"])
df.tail()
Out[74]:
In [75]:
# draw pairplot on more 2 level
sns.pairplot(df, hue="student")
Out[75]:
In [76]:
cmap = mpl.colors.ListedColormap(sns.color_palette("Set3"))
plt.scatter(df.subject_0, df.subject_1, c=y, s=50, cmap=cmap)
Out[76]:
In [77]:
y = np.hstack([y0, y1, y2]) # change y shape (600,1) -> (600,) because of DataConversionWarning
clf_norm = GaussianNB().fit(X, y)
In [78]:
clf_norm.classes_
Out[78]:
In [79]:
clf_norm.class_count_
Out[79]:
In [80]:
clf_norm.class_prior_
Out[80]:
In [81]:
clf_norm.theta_
Out[81]:
In [87]:
# sigma is square of sample data's distributhon
clf_norm.sigma_
Out[87]:
In [88]:
y_pred = clf_norm.predict(X)
In [89]:
confusion_matrix(y, y_pred)
Out[89]:
In [90]:
print(classification_report(y, y_pred))
# this model performance is good!
In [91]:
xmin, xmax = 50, 90
ymin, ymax = 60, 110
XX, YY = np.meshgrid(np.arange(xmin, xmax, (xmax-xmin)/1000), np.arange(ymin, ymax, (ymax-ymin)/1000))
ZZ = np.reshape(clf_norm.predict(np.array([XX.ravel(), YY.ravel()]).T), XX.shape)
cmap = mpl.colors.ListedColormap(sns.color_palette("Set3"))
plt.contourf(XX, YY, ZZ, cmap=cmap, alpha=0.5)
plt.scatter(X[:, 0], X[:, 1], c=y, s=50, cmap=cmap)
plt.xlim(xmin, xmax)
plt.ylim(ymin, ymax)
Out[91]:
In [ ]: