In [ ]:
# ライブラリのインポート
from itertools import product
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from sklearn import datasets
from sklearn import tree
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.model_selection import cross_val_score
from IPython.display import Image
import pydotplus
In [ ]:
# データを読み込む
iris = datasets.load_iris()
# データを見てみる
print("Iris Data")
df = pd.DataFrame(iris.data)
df['target'] = iris.target
print("data = ", iris.feature_names)
print("target = ", iris.target_names)
df
In [ ]:
# データをプロットしてみる
plt.clf()
f, axarr = plt.subplots(2, 2, sharex='col', sharey='row', figsize=(11, 10))
axarr[0, 0].scatter(iris.data[:, 0], iris.data[:, 1], c=iris.target, alpha=0.6)
axarr[0, 0].set_title(iris.feature_names[0] + " vs " + iris.feature_names[1])
axarr[0, 1].scatter(iris.data[:, 1], iris.data[:, 2], c=iris.target, alpha=0.6)
axarr[0, 1].set_title(iris.feature_names[1] + " vs " + iris.feature_names[2])
axarr[1, 0].scatter(iris.data[:, 0], iris.data[:, 3], c=iris.target, alpha=0.6)
axarr[1, 0].set_title(iris.feature_names[0] + " vs " + iris.feature_names[3])
axarr[1, 1].scatter(iris.data[:, 2], iris.data[:, 3], c=iris.target, alpha=0.6)
axarr[1, 1].set_title(iris.feature_names[2] + " vs " + iris.feature_names[3])
plt.show()
In [ ]:
from sklearn.base import BaseEstimator, ClassifierMixin
class IrisClassifier(BaseEstimator, ClassifierMixin):
def __init__(self):
pass
def fit(self, x, y):
return self
def predict_proba(self, x_list):
return [self.predict_proba_sample(x) for x in x_list]
def predict(self, x_list):
proba = self.predict_proba(x_list)
most_probable_category = np.argmax(proba, axis=1)
return most_probable_category
def predict_proba_sample(self, x):
# e.g. x = [5.1 3.5 1.4 0.2] should be classified as 0 ([1, 0, 0])
FILL HERE
In [ ]:
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix
## 評価
### 単純化のために2次元の特徴量のみを使う
X = iris.data[:, [0, 1]]
y = iris.target
clf = IrisClassifier()
clf.fit(X, y)
# 予測する
predicted = clf.predict(X)
# 精度(accuracy)
print("Accuracy = ", clf.score(X, y))
In [ ]:
# 単純化のために2次元の特徴量のみを使う
X = iris.data[:, [0, 1]]
y = iris.target
# 識別器のインスタンスをつくる. SVM
svm = SVC(kernel='rbf', probability=True)
# 学習させる
svm.fit(X, y)
# 学習データに対する精度
print("Score = {0}".format(svm.score(X, y)))
# 最初の10個を予測する
for x in X[:10]:
print("Predict f(%s) = %s" % (x, svm.predict([x])))
In [ ]:
# 他の識別器
classifiers = [
SVC(kernel='rbf', probability=True),
DecisionTreeClassifier(max_depth=2),
KNeighborsClassifier(n_neighbors=1),
IrisClassifier(),
]
# 同じインタフェース で使える
for classifier in classifiers:
classifier.fit(X, y)
# Plotting decision regions
plt.clf()
x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1
y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1
xx, yy = np.meshgrid(np.arange(x_min, x_max, 0.05), np.arange(y_min, y_max, 0.05))
f, axarr = plt.subplots(2, 2, sharex='col', sharey='row', figsize=(11, 10))
for index, classifier, title in zip([0, 1, 2, 3], classifiers, ['Kernel SVM', 'Decision Tree (depth=2)', 'KNN (k=1)', 'IrisClassifer']):
predicted = classifier.predict(np.c_[xx.ravel(), yy.ravel()])
predicted = predicted.reshape(xx.shape)
# print(predicted)
axarr[index // 2, index % 2].contourf(xx, yy, predicted, alpha=0.3)
axarr[index // 2, index % 2].scatter(X[:, 0], X[:, 1], c=y, alpha=0.6)
axarr[index // 2, index % 2].set_title("%s (%f)" % (title, classifier.score(X, y)))
plt.show()
In [ ]:
%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
import itertools
def plot_confusion_matrix(cm, classes, normalize=False, title='Confusion matrix', cmap=plt.cm.Blues):
"""
This function prints and plots the confusion matrix.
Normalization can be applied by setting `normalize=True`.
"""
if normalize:
cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
print("Normalized confusion matrix")
else:
print('Confusion matrix, without normalization')
print(cm)
plt.imshow(cm, interpolation='nearest', cmap=cmap)
plt.title(title)
plt.colorbar()
tick_marks = np.arange(len(classes))
plt.xticks(tick_marks, classes, rotation=45)
plt.yticks(tick_marks, classes)
fmt = '.2f' if normalize else 'd'
thresh = cm.max() / 2.
for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
plt.text(j, i, format(cm[i, j], fmt),
horizontalalignment="center",
color="white" if cm[i, j] > thresh else "black")
plt.tight_layout()
plt.ylabel('True label')
plt.xlabel('Predicted label')
from sklearn.metrics import confusion_matrix
matrix = confusion_matrix(y, clf.predict(X))
plot_confusion_matrix(matrix, ['A', 'B', 'C'], normalize=True)
In [ ]:
## クロスバリデーション
for classifier, title in zip(classifiers, ['Decision Tree (depth=2)', 'KNN (k=1)', 'Kernel SVM', 'Iris Classifier']):
scores = cross_val_score(classifier, X, y, cv=6)
print("Cross Validation Score of %s" % title)
print("mean(%s) = %s" % (scores, scores.mean()))
ref. http://scikit-learn.org/stable/modules/generated/sklearn.tree.DecisionTreeClassifier.html
In [ ]:
## パラメータのグリッドサーチ
from sklearn.model_selection import GridSearchCV
search_params = [{
'criterion': ['gini', 'entropy'],
'max_depth': [1],
FILL HERE
}]
tuned_clf = GridSearchCV(DecisionTreeClassifier(random_state=1), search_params, cv=6)
tuned_clf.fit(iris.data, iris.target)
print("Best Score %s " % tuned_clf.best_score_)
print("Best Params %s " % tuned_clf.best_params_)