In [1]:
import numpy as np
import scipy as sp
import pandas as pd
import statsmodels.api as sm
import statsmodels.formula.api as smf
import sklearn as sk
import matplotlib as mpl
import matplotlib.pylab as plt
from mpl_toolkits.mplot3d import Axes3D
import seaborn as sns
sns.set()
sns.set_color_codes()
%matplotlib inline
%config InlineBackend.figure_format='png'
# python 2
from __future__ import division
In [2]:
from sklearn.datasets import load_digits
digits = load_digits()
print(digits.DESCR)
print(digits.keys())
In [60]:
N = 2 ; M = 5;
fig = plt.figure(figsize=(10,5))
plt.subplots_adjust(top=1, bottom = 0 , hspace= 0, wspace = 0.05)
for i in range(N):
for j in range(M):
k = i * M + j
ax = fig.add_subplot(N, M, k+1)
ax.imshow(digits.images[k], cmap = plt.cm.bone, interpolation="none");
ax.grid(False)
ax.xaxis.set_ticks([])
ax.yaxis.set_ticks([])
plt.title(digits.target_names[k])
plt.tight_layout()
plt.show()
In [61]:
from sklearn.tree import DecisionTreeClassifier
tree1 = DecisionTreeClassifier(criterion='entropy', max_depth=5)
In [62]:
plt.imshow(digits.images[1], interpolation="none");
In [63]:
X = digits.data
y = digits.target
In [68]:
tree1_fit = tree1.fit(X,y)
In [36]:
tree1_fit.predict(X)
Out[36]:
In [65]:
from sklearn.metrics import classification_report
In [69]:
print(classification_report(y, tree1_fit.predict(X), digits=4))
In [70]:
digits.target_names
Out[70]:
In [71]:
import StringIO
import pydot
from sklearn.tree import export_graphviz
from IPython.core.display import Image
def draw_decision_tree(classifier):
command_buf = StringIO.StringIO()
export_graphviz(classifier, out_file=command_buf )
graph = pydot.graph_from_dot_data(command_buf.getvalue())
image = graph.create_png()
image_buf = StringIO.StringIO()
image_buf.write(image)
return Image(image_buf.getvalue())
In [72]:
from sklearn.cross_validation import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.5, random_state=1)
In [73]:
tree_fit_2 = tree1.fit(X_train, y_train)
In [74]:
from sklearn.metrics import classification_report, confusion_matrix
print(classification_report(y_test, tree_fit_2.predict(X_test), digits=4))
print(confusion_matrix(y_test, tree_fit_2.predict(X_test)))
In [75]:
draw_decision_tree(tree_fit_2)
Out[75]:
In [76]:
digits.images[0]
Out[76]:
In [78]:
# 박사님 코드
K = 0
plt.figure(figsize=(10,10))
plt.imshow(digits.images[K], interpolation="nearest")
plt.grid(False)
plt.title(tree_fit_2.predict([X[K:K+1]][0]))
plt.show()
In [83]:
idx_miss = y_test != tree_fit_2.predict(X_test)
X_miss = X_test[idx_miss]
y_miss = y_test[idx_miss]
In [86]:
K = 8
plt.imshow(X_miss[K])
plt.title(y_miss[K])
In [6]:
from sklearn.datasets import fetch_lfw_people
lfw_people = fetch_lfw_people(min_faces_per_person=70, resize = 0.4)
print(lfw_people.DESCR)
print(lfw_people.keys())
In [33]:
lfw_people.images[0].max()
Out[33]:
In [58]:
print(lfw_people.images[0][-1,:])
In [55]:
idx = 0
plt.figure(figsize=(10,10))
plt.imshow(lfw_people.images[idx], interpolation="nearest", );#cmap = mpl.cm.bone);
plt.grid(False)
plt.title(lfw_people.target[idx]);
빈도주의와 베이즈 주의 , 빈도주의를 이용하는 알고리즘은 없나?
조건부 확률은 무엇을 알기 위함인가?
두개를 나눠서 생각할 수는 없다.
통 안에 공이 있는데, 하나를 뽑았는데 뭔지 모르겠다. => 하얀색일 확률과 까만색일 확률 모르는데 사실일 확률 -=> 베이지안 어떻게 아냐?
실제로 여러번 뽑아본다.
1000 번 중 606번, 394번 빈도주의 값을 이용한 조건부를
6/10, 4/10 에 수렴한다?
하나를 뽑았을때 바라본 관점. 1000개를 뽑아본 확률
%logstart %logstop
.ipython폴더에 ipython_log.py
조건부 확률은