第7章 アンサンブル学習

7.1 アンサンブルによる学習

  • アンサンブル法(ensemble method)
  • 多数決(majority voting)
  • 相対多数決(plurality voting)

In [1]:
from distutils.version import LooseVersion as Version
from sklearn import __version__ as sklearn_version

In [2]:
# 誤分類率を計算する
from scipy.misc import comb
import math
def ensemble_error(n_classifier, error):
    k_start = int(math.ceil(n_classifier / 2.0))
    # print(k_start) # 6
    
    probs = []
    for k in range(k_start, n_classifier + 1): # 6 to 11
        # print(k, comb(n_classifier, k))
        probs.append(comb(n_classifier, k) * error ** k * (1 - error) ** (n_classifier - k))

    #print(probs)
    return sum(probs)

ensemble_error(n_classifier=11, error=0.25)


Out[2]:
0.034327507019042969

In [3]:
import numpy as np
error_range = np.arange(0.0, 1.0, 0.01)
ens_errors = [ensemble_error(n_classifier=11, error=error) for error in error_range]

import matplotlib.pyplot as plt
plt.plot(error_range, ens_errors, label='Ensemble error', linewidth=2)
plt.plot(error_range, error_range, label='Base error', linewidth=2, linestyle='--')
plt.xlabel('Base error')
plt.ylabel('Base/Ensemble error')
plt.legend(loc='upper left')
plt.grid()
plt.show()


7.2 単純な多数決分類機の実装

  • 特性関数(charasteristic function): Χ(カイ)
  • 重み付け(weighting)

In [4]:
import numpy as np
# クラスラベルが 0, 0, 1
# 重み係数が 0.2, 0.2 0.6
np.argmax(np.bincount([0, 0, 1], weights=[0.2, 0.2, 0.6]))


Out[4]:
1

In [5]:
ex = np.array([[0.9, 0.1],
               [0.8, 0.2],
               [0.4, 0.6]])
# それぞれのクラスに所属する確率
p = np.average(ex, axis=0, weights=[0.2, 0.2, 0.6])
print(p)
# 多数決の結果
print(np.argmax(p))


[ 0.58  0.42]
0

In [6]:
from sklearn.base import BaseEstimator
from sklearn.base import ClassifierMixin
from sklearn.preprocessing import LabelEncoder
from sklearn.externals import six
from sklearn.base import clone
from sklearn.pipeline import _name_estimators
import numpy as np
import operator

class MajorityVoteClassifier(BaseEstimator, ClassifierMixin):
    """
    多数決アンサンブル分類器
    
    パラメータ
    ----------
    classifiers : array-like, shape = [n_classifiers]
        アンサンブルの様々な分類器
        
    vote : str, {}'classlabel', 'probability'} {default: 'classlabel}
        'classlabel' の場合、クラスラベルの予測はクラスラベルのargmaxに基づく
        'probability' の場合、クラスラベルの予測はクラスの所属確率のargmaxに基づく(分類器が調整済であることが推奨される)
        
    weights : array-like, shape = [n_classifiers] (optional, default=None)
        `int` または `float` 型の値のリストが提供された場合、分類器は重要度で重み付けされる
        `weights=None` の場合は均一な重みを使用
        
    """
    def __init__(self, classifiers, vote='classlabel', weights=None):
        self.classifiers = classifiers
        self.named_classifiers = {key: value for key, value in _name_estimators(classifiers)}
        self.vote = vote
        self.weights = weights
        
    def fit(self, X, y):
        """
        分類機を学習させる
        
        パラメータ
        ----------
        X : {array-like, sparse matrix}, shape = [n_samples, n_features]
            トレーニングサンプルからなる行列
            
        y : array-like, shape = [n_samples]
            クラスラベルのリスト
        
        戻り値
        ------
        self : object
        """
        self.lablenc_ = LabelEncoder()
        self.lablenc_.fit(y)
        self.classes_ = self.lablenc_.classes_
        self.classifiers_ = []
        for clf in self.classifiers:
            fitted_clf = clone(clf).fit(X, self.lablenc_.transform(y))
            self.classifiers_.append(fitted_clf)
        return self
    
    def predict(self, X):
        """
        Xのクラスラベルを予測する
        """
        if self.vote == 'probability':
            maj_vote = np.argmax(self.predict_proba(X), axis=1)
        else:
            predictions = np.asarray([clf.predict(X) for clf in self.classifiers_]).T
            maj_vote = np.apply_along_axis(
                lambda x:
                np.argmax(np.bincount(x, weights=self.weights)),
                axis=1,
                arr=predictions
            )
            
        maj_vote = self.lablenc_.inverse_transform(maj_vote)
        return maj_vote
    
    def predict_proba(self, X):
        """
        Xのクラス確率を予測する
        """
        probas = np.asarray([clf.predict_proba(X) for clf in self.classifiers_])
        arg_proba = np.average(probas, axis=0, weights=self.weights)
        return arg_proba
    
    def get_params(self, deep=True):
        """
        GridSearchの実行時に分類器のパラメータ名を取得
        """
        if not deep:
            return super(MajorityVoteClassifier, self).get_params(deep=False)
        else:
            out = self.named_classifiers.copy()
            for name, step in self.named_classifiers.items():
                for key, value in step.get_params(deep=True).items():
                    out['{}_{}'.format(name, key)] = value
            return out

7.2.1 多数決方式の分類アルゴリズムを組み合わせる


In [23]:
predictions = np.asarray([1, 1, 0]).T
print(predictions)
maj_vote = np.apply_along_axis(
                lambda x:
                np.argmax(np.bincount(x, weights=[0.2, 0.2, 0.6])),
                axis=1,
                arr=predictions
            )
maj_vote


[1 1 0]
---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-23-592adf5a518d> in <module>()
      5                 np.argmax(np.bincount(x, weights=[0.2, 0.2, 0.6])),
      6                 axis=1,
----> 7                 arr=predictions
      8             )
      9 maj_vote

/Users/takanori/Private/python-machine-learning/venv/lib/python3.5/site-packages/numpy/lib/shape_base.py in apply_along_axis(func1d, axis, arr, *args, **kwargs)
     81     if (axis >= nd):
     82         raise ValueError("axis must be less than arr.ndim; axis=%d, rank=%d."
---> 83             % (axis, nd))
     84     ind = [0]*(nd-1)
     85     i = zeros(nd, 'O')

ValueError: axis must be less than arr.ndim; axis=1, rank=1.

In [ ]:

from sklearn import datasets from sklearn.preprocessing import StandardScaler from sklearn.preprocessing import LabelEncoder if Version(sklearn_version) < '0.18': from sklearn.cross_validation import train_test_split else: from sklearn.model_selection import train_test_split

iris = datasets.load_iris() X = iris.data[50:, [1, 2]] y = iris.target[50:] le = LabelEncoder() y = le.fit_transform(y)


In [8]:
# 50% のトレーニングデータと 50% のテストデータに分割
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5,random_state=1)

In [9]:
# ロジスティック回帰、決定木、k近傍をそれぞれつかって分類
# 10分割交差検証を使う
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier 
from sklearn.pipeline import Pipeline
if Version(sklearn_version) < '0.18':
    from sklearn.cross_validation import cross_val_score
else:
    from sklearn.model_selection import cross_val_score

clf1 = LogisticRegression(penalty='l2', C=0.001, random_state=0)
clf2 = DecisionTreeClassifier(max_depth=1, criterion='entropy', random_state=0)
clf3 = KNeighborsClassifier(n_neighbors=1, p=2, metric='minkowski')

pipe1 = Pipeline([['sc', StandardScaler()],
                  ['clf', clf1]])
pipe3 = Pipeline([['sc', StandardScaler()],
                  ['clf', clf3]])
clfs = [pipe1, clf2, pipe3]
clf_labels = ['Logistic Regression', 'Decision Tree', 'KNN']

for clf, label in zip(clfs, clf_labels):
    scores = cross_val_score(estimator=clf, X=X_train, y=y_train, cv=10, scoring='roc_auc')
    print('ROC AUC: {:.2f} (+/- {:.2f}) [{}]'.format(scores.mean(), scores.std(), label))


ROC AUC: 0.92 (+/- 0.20) [Logistic Regression]
ROC AUC: 0.92 (+/- 0.15) [Decision Tree]
ROC AUC: 0.93 (+/- 0.10) [KNN]

In [10]:
mv_clf = MajorityVoteClassifier(classifiers=[pipe1, clf2, pipe3])
clfs.append(mv_clf)
clf_labels.append('Majority Voting')

for clf, label in zip(clfs, clf_labels):
    scores = cross_val_score(estimator=clf, X=X_train, y=y_train, cv=10, scoring='roc_auc')
    print('ROC AUC: {:.2f} (+/- {:.2f}) [{}]'.format(scores.mean(), scores.std(), label))


ROC AUC: 0.92 (+/- 0.20) [Logistic Regression]
ROC AUC: 0.92 (+/- 0.15) [Decision Tree]
ROC AUC: 0.93 (+/- 0.10) [KNN]
ROC AUC: 0.97 (+/- 0.10) [Majority Voting]

7.3 アンサンブル分類器の評価とチューニング


In [11]:
from sklearn.metrics import roc_curve
from sklearn.metrics import auc
colors = ['black', 'orange', 'blue', 'green']
linestyles = [':', '--', '-.', '-']

for clf, label, clr, ls in zip(clfs, clf_labels, colors, linestyles):
    # 陽性クラスのラベルは1であることが前提
    y_pred = clf.fit(X_train, y_train).predict_proba(X_test)[:, 1]
    fpr, tpr, thresholds = roc_curve(y_true=y_test, y_score=y_pred)
    roc_auc = auc(x=fpr, y=tpr)
    plt.plot(fpr, tpr, color=clr, linestyle=ls, label='{} (auc={:.2f})'.format(label, roc_auc))

plt.legend(loc='lower right')
plt.plot([0, 1], [0, 1], linestyle='--', color='gray', linewidth=2)
plt.xlim([-0.1, 1.1])
plt.ylim([-0.1, 1.1])
plt.grid()
plt.xlabel('False Positive Rate')
plt.xlabel('True Positive Rate')
plt.show()



In [12]:
from itertools import product
import numpy as np

sc = StandardScaler()
X_train_std = sc.fit_transform(X_train)
# 決定領域を描画する最小値、最大値を生成
x_min = X_train_std[:, 0].min() - 1
x_max = X_train_std[:, 0].max() + 1
y_min = X_train_std[:, 1].min() - 1
y_max = X_train_std[:, 1].max() + 1
print(x_min, x_max, y_min, y_max)
# グリッドポイントを生成
xx, yy = np.meshgrid(np.arange(x_min, x_max, 0.1), np.arange(y_min, y_max, 0.1))
# 描画領域を2行2列に分割
f, axarr = plt.subplots(nrows=2, ncols=2, sharex='col', sharey='row', figsize=(7, 5))

# 決定領域のプロット、青や赤の散布図の作成などを実行
# 変数 idx は各分類器を描画する行と列の位置を表すタプル
for idx, clf, tt in zip(product([0, 1], [0, 1]), clfs, clf_labels):
    clf.fit(X_train_std, y_train)
    Z = clf.predict(np.c_[xx.ravel(), yy.ravel()])
    Z = Z.reshape(xx.shape)
    # 描画対象のグラフ
    ax = axarr[idx[0], idx[1]]
    ax.contourf(xx, yy, Z, alpha=0.3)
    ax.scatter(X_train_std[y_train==0, 0], X_train_std[y_train==0, 1], c='blue', marker='^', s=50)
    ax.scatter(X_train_std[y_train==1, 0], X_train_std[y_train==1, 1], c='red', marker='o', s=50)
    ax.set_title(tt)

plt.text(-3.5, -4.5, s='Sepal width [standardized]', ha='center', va='center', fontsize=12)
plt.text(-11.5, 4.5, s='Petal length [standardized]', ha='center', va='center', fontsize=12, rotation=90)
plt.show()


-3.07478920904 2.55939571126 -2.79369815479 3.32448638427

In [13]:
# パラメータの一覧
mv_clf.get_params()


Out[13]:
{'decisiontreeclassifier': DecisionTreeClassifier(class_weight=None, criterion='entropy', max_depth=1,
             max_features=None, max_leaf_nodes=None,
             min_impurity_split=1e-07, min_samples_leaf=1,
             min_samples_split=2, min_weight_fraction_leaf=0.0,
             presort=False, random_state=0, splitter='best'),
 'decisiontreeclassifier_class_weight': None,
 'decisiontreeclassifier_criterion': 'entropy',
 'decisiontreeclassifier_max_depth': 1,
 'decisiontreeclassifier_max_features': None,
 'decisiontreeclassifier_max_leaf_nodes': None,
 'decisiontreeclassifier_min_impurity_split': 1e-07,
 'decisiontreeclassifier_min_samples_leaf': 1,
 'decisiontreeclassifier_min_samples_split': 2,
 'decisiontreeclassifier_min_weight_fraction_leaf': 0.0,
 'decisiontreeclassifier_presort': False,
 'decisiontreeclassifier_random_state': 0,
 'decisiontreeclassifier_splitter': 'best',
 'pipeline-1': Pipeline(steps=[['sc', StandardScaler(copy=True, with_mean=True, with_std=True)], ['clf', LogisticRegression(C=0.001, class_weight=None, dual=False, fit_intercept=True,
           intercept_scaling=1, max_iter=100, multi_class='ovr', n_jobs=1,
           penalty='l2', random_state=0, solver='liblinear', tol=0.0001,
           verbose=0, warm_start=False)]]),
 'pipeline-1_clf': LogisticRegression(C=0.001, class_weight=None, dual=False, fit_intercept=True,
           intercept_scaling=1, max_iter=100, multi_class='ovr', n_jobs=1,
           penalty='l2', random_state=0, solver='liblinear', tol=0.0001,
           verbose=0, warm_start=False),
 'pipeline-1_clf__C': 0.001,
 'pipeline-1_clf__class_weight': None,
 'pipeline-1_clf__dual': False,
 'pipeline-1_clf__fit_intercept': True,
 'pipeline-1_clf__intercept_scaling': 1,
 'pipeline-1_clf__max_iter': 100,
 'pipeline-1_clf__multi_class': 'ovr',
 'pipeline-1_clf__n_jobs': 1,
 'pipeline-1_clf__penalty': 'l2',
 'pipeline-1_clf__random_state': 0,
 'pipeline-1_clf__solver': 'liblinear',
 'pipeline-1_clf__tol': 0.0001,
 'pipeline-1_clf__verbose': 0,
 'pipeline-1_clf__warm_start': False,
 'pipeline-1_sc': StandardScaler(copy=True, with_mean=True, with_std=True),
 'pipeline-1_sc__copy': True,
 'pipeline-1_sc__with_mean': True,
 'pipeline-1_sc__with_std': True,
 'pipeline-1_steps': [['sc',
   StandardScaler(copy=True, with_mean=True, with_std=True)],
  ['clf',
   LogisticRegression(C=0.001, class_weight=None, dual=False, fit_intercept=True,
             intercept_scaling=1, max_iter=100, multi_class='ovr', n_jobs=1,
             penalty='l2', random_state=0, solver='liblinear', tol=0.0001,
             verbose=0, warm_start=False)]],
 'pipeline-2': Pipeline(steps=[['sc', StandardScaler(copy=True, with_mean=True, with_std=True)], ['clf', KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
            metric_params=None, n_jobs=1, n_neighbors=1, p=2,
            weights='uniform')]]),
 'pipeline-2_clf': KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
            metric_params=None, n_jobs=1, n_neighbors=1, p=2,
            weights='uniform'),
 'pipeline-2_clf__algorithm': 'auto',
 'pipeline-2_clf__leaf_size': 30,
 'pipeline-2_clf__metric': 'minkowski',
 'pipeline-2_clf__metric_params': None,
 'pipeline-2_clf__n_jobs': 1,
 'pipeline-2_clf__n_neighbors': 1,
 'pipeline-2_clf__p': 2,
 'pipeline-2_clf__weights': 'uniform',
 'pipeline-2_sc': StandardScaler(copy=True, with_mean=True, with_std=True),
 'pipeline-2_sc__copy': True,
 'pipeline-2_sc__with_mean': True,
 'pipeline-2_sc__with_std': True,
 'pipeline-2_steps': [['sc',
   StandardScaler(copy=True, with_mean=True, with_std=True)],
  ['clf',
   KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
              metric_params=None, n_jobs=1, n_neighbors=1, p=2,
              weights='uniform')]]}

In [14]:
# ロジスティック回帰分類器の逆正則化パラメータCと決定木の深さをチューニング
if Version(sklearn_version) < '0.18':
    from sklearn.cross_validation import GridSearchCV
else:
    from sklearn.model_selection import GridSearchCV

params = {'decisiontreeclassifier_max_depth': [1, 2],
          'pipeline-1_clf__C': [0.001, 0.1, 100.0]}
grid = GridSearchCV(estimator=mv_clf, param_grid=params, cv=10, scoring='roc_auc')
grid.fit(X_train, y_train)


Out[14]:
GridSearchCV(cv=10, error_score='raise',
       estimator=MajorityVoteClassifier(classifiers=[Pipeline(steps=[['sc', StandardScaler(copy=True, with_mean=True, with_std=True)], ['clf', LogisticRegression(C=0.001, class_weight=None, dual=False, fit_intercept=True,
          intercept_scaling=1, max_iter=100, multi_class='ovr', n_jobs=1,
          penalty='l2', random_state=0, solv...ski',
           metric_params=None, n_jobs=1, n_neighbors=1, p=2,
           weights='uniform')]])],
            vote='classlabel', weights=None),
       fit_params={}, iid=True, n_jobs=1,
       param_grid={'decisiontreeclassifier_max_depth': [1, 2], 'pipeline-1_clf__C': [0.001, 0.1, 100.0]},
       pre_dispatch='2*n_jobs', refit=True, return_train_score=True,
       scoring='roc_auc', verbose=0)

In [15]:
# ハイパーパラメータ値の組み合わせとROC曲線の平均値を出力
# scikit-learn 0.18 以前
#for params, mean_score, scores in grid.grid_scores_:
#    print('{:.3f}+/-{:.2f} {}'.format(mean_score, scores.std() / 2 , params))

# scikit-learn 0.18 以上の場合
cv_keys = ('mean_test_score', 'std_test_score','params')

for r, _ in enumerate(grid.cv_results_['mean_test_score']):
    print('{:.3f}+/-{:.2f} {}'.format(grid.cv_results_[cv_keys[0]][r], 
                                       grid.cv_results_[cv_keys[1]][r] / 2.0, 
                                       grid.cv_results_[cv_keys[2]][r]))

# 最も良いスコアを出したパラメーターとスコアを出力
print('Best parameters: {}'.format(grid.best_params_))
print('Accuracy: {:.2f}'.format(grid.best_score_))


0.967+/-0.05 {'decisiontreeclassifier_max_depth': 1, 'pipeline-1_clf__C': 0.001}
0.967+/-0.05 {'decisiontreeclassifier_max_depth': 1, 'pipeline-1_clf__C': 0.1}
1.000+/-0.00 {'decisiontreeclassifier_max_depth': 1, 'pipeline-1_clf__C': 100.0}
0.967+/-0.05 {'decisiontreeclassifier_max_depth': 2, 'pipeline-1_clf__C': 0.001}
0.967+/-0.05 {'decisiontreeclassifier_max_depth': 2, 'pipeline-1_clf__C': 0.1}
1.000+/-0.00 {'decisiontreeclassifier_max_depth': 2, 'pipeline-1_clf__C': 100.0}
Best parameters: {'decisiontreeclassifier_max_depth': 1, 'pipeline-1_clf__C': 100.0}
Accuracy: 1.00

7.4 バギング: ブートストラップ標本を使った分類器アンサンブルの構築

  • バギング(Bagging)
  • ブートストラップ集約(bootstrap aggregating)
  • BAGGING: Bootstrap AGGregatING

In [16]:
import pandas as pd
# ワインのデータを読み込む
df_wine = pd.read_csv('https://archive.ics.uci.edu/ml/machine-learning-databases/wine/wine.data', header=None)
df_wine.columns = ['Class label', 'Alcohol', 'Malic acid', 'Ash',
                   'Alcalinity of ash', 'Magnesium', 'Total phenols',
                   'Flavanoids', 'Nonflavanoid phenols', 'Proanthocyanins',
                   'Color intensity', 'Hue', 'OD280/OD315 of diluted wines',
                   'Proline']
# クラス2とクラス3のみ
df_wine = df_wine[df_wine['Class label'] != 1]
y = df_wine['Class label'].values
# 2つの特徴量(Alcohol, Hue)を選択
X = df_wine[['Alcohol', 'Hue']].values
df_wine.head()


Out[16]:
Class label Alcohol Malic acid Ash Alcalinity of ash Magnesium Total phenols Flavanoids Nonflavanoid phenols Proanthocyanins Color intensity Hue OD280/OD315 of diluted wines Proline
59 2 12.37 0.94 1.36 10.6 88 1.98 0.57 0.28 0.42 1.95 1.05 1.82 520
60 2 12.33 1.10 2.28 16.0 101 2.05 1.09 0.63 0.41 3.27 1.25 1.67 680
61 2 12.64 1.36 2.02 16.8 100 2.02 1.41 0.53 0.62 5.75 0.98 1.59 450
62 2 13.67 1.25 1.92 18.0 94 2.10 1.79 0.32 0.73 3.80 1.23 2.46 630
63 2 12.37 1.13 2.16 19.0 87 3.50 3.10 0.19 1.87 4.45 1.22 2.87 420

In [17]:
from sklearn.preprocessing import LabelEncoder
if Version(sklearn_version) < '0.18':
    from sklearn.cross_validation import train_test_split
else:
    from sklearn.model_selection import train_test_split
    
le = LabelEncoder()
y = le.fit_transform(y)
# 60%のトレーニングデータセットと40%のテストデータセットに分割
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.40, random_state=1)
print(len(X_train), len(X_test))


71 48

In [18]:
from sklearn.ensemble import BaggingClassifier
from sklearn.tree import DecisionTreeClassifier
tree = DecisionTreeClassifier(criterion='entropy', max_depth=None, random_state=1)
# 500この決定木からなるアンサンブルを作成し、トレーニングデータセットの異なるブートストラップ標本で学習する
bag = BaggingClassifier(base_estimator=tree, n_estimators=500, max_samples=1.0, max_features=1.0,
                        bootstrap=True, bootstrap_features=False, n_jobs=1, random_state=1)
bag


Out[18]:
BaggingClassifier(base_estimator=DecisionTreeClassifier(class_weight=None, criterion='entropy', max_depth=None,
            max_features=None, max_leaf_nodes=None,
            min_impurity_split=1e-07, min_samples_leaf=1,
            min_samples_split=2, min_weight_fraction_leaf=0.0,
            presort=False, random_state=1, splitter='best'),
         bootstrap=True, bootstrap_features=False, max_features=1.0,
         max_samples=1.0, n_estimators=500, n_jobs=1, oob_score=False,
         random_state=1, verbose=0, warm_start=False)

In [19]:
# 普通の決定木での性能
from sklearn.metrics import accuracy_score
tree = tree.fit(X_train, y_train)
y_train_pred = tree.predict(X_train)
y_test_pred = tree.predict(X_test)
tree_train = accuracy_score(y_train, y_train_pred)
tree_test = accuracy_score(y_test, y_test_pred)
print('Decision tree train/test accuracies {:.3f}/{:.3f}'.format(tree_train, tree_test))


Decision tree train/test accuracies 1.000/0.833

In [20]:
# バギングでの性能
bag = bag.fit(X_train, y_train)
y_train_pred = bag.predict(X_train)
y_test_pred = bag.predict(X_test)
bag_train = accuracy_score(y_train, y_train_pred)
bag_test = accuracy_score(y_test, y_test_pred)
print('Bagging traain/test accuracies {:.3f}/{:.3f}'.format(bag_train, bag_test))


Bagging traain/test accuracies 1.000/0.896

In [21]:
# 決定木とバギング分類器の決定領域を比較
import numpy as np
import matplotlib.pyplot as plt

# 決定領域を描画する最小値、最大値を生成
x_min = X_train[:, 0].min() - 1
x_max = X_train[:, 0].max() + 1
y_min = X_train[:, 1].min() - 1
y_max = X_train[:, 1].max() + 1
print(x_min, x_max, y_min, y_max)
# グリッドポイントを生成
xx, yy = np.meshgrid(np.arange(x_min, x_max, 0.1), np.arange(y_min, y_max, 0.1))
# 描画領域を2列に分割
f, axarr = plt.subplots(nrows=1, ncols=2, sharex='col', sharey='row', figsize=(8, 3))
for idx, clf, tt in zip([0, 1], [tree, bag], ['Decision Tree', 'Bagging']):
    clf.fit(X_train, y_train)
    Z = clf.predict(np.c_[xx.ravel(), yy.ravel()])
    Z = Z.reshape(xx.shape)
    # 描画対象のグラフ
    ax = axarr[idx]
    ax.contourf(xx, yy, Z, alpha=0.3)
    ax.scatter(X_train[y_train==0, 0], X_train[y_train==0, 1], c='blue', marker='^')
    ax.scatter(X_train[y_train==1, 0], X_train[y_train==1, 1], c='red', marker='o')
    ax.set_title(tt)

axarr[0].set_ylabel('Alcohol', fontsize=12)
plt.text(10.2, -1.2, s='Hue', ha='center', va='center', fontsize=12)
plt.show()


10.45 14.88 -0.52 2.45

7.5 アダブーストによる弱学習器の活用

  • ブースティング(Boosting)
  • アダブースト(Adaptive Boosting: AdaBoost)
  • 弱学習器(weak learner)

In [24]:
from sklearn.ensemble import AdaBoostClassifier
tree = DecisionTreeClassifier(criterion='entropy', max_depth=1, random_state=0)

ada = AdaBoostClassifier(base_estimator=tree, n_estimators=500, learning_rate=0.1, random_state=0)

tree = tree.fit(X_train, y_train)
y_train_pred = tree.predict(X_train)
y_test_pred = tree.predict(X_test)
tree_train = accuracy_score(y_train, y_train_pred)
tree_test = accuracy_score(y_test, y_test_pred)
print('Decision tree train/test accuracies {:.3f}/{:.3f}'.format(tree_train, tree_test))


Decision tree train/test accuracies 0.845/0.854

In [25]:
ada = ada.fit(X_train, y_train)
y_train_pred = ada.predict(X_train)
y_test_pred = ada.predict(X_test)
ada_train = accuracy_score(y_train, y_train_pred) 
ada_test = accuracy_score(y_test, y_test_pred) 
print('AdaBoost train/test accuracies {:.3f}/{:.3f}'.format(ada_train, ada_test))


AdaBoost train/test accuracies 1.000/0.875

In [26]:
# 決定木とアダブースト分類器の決定領域を比較
import numpy as np
import matplotlib.pyplot as plt

# 決定領域を描画する最小値、最大値を生成
x_min = X_train[:, 0].min() - 1
x_max = X_train[:, 0].max() + 1
y_min = X_train[:, 1].min() - 1
y_max = X_train[:, 1].max() + 1
print(x_min, x_max, y_min, y_max)
# グリッドポイントを生成
xx, yy = np.meshgrid(np.arange(x_min, x_max, 0.1), np.arange(y_min, y_max, 0.1))
# 描画領域を2列に分割
f, axarr = plt.subplots(nrows=1, ncols=2, sharex='col', sharey='row', figsize=(8, 3))
for idx, clf, tt in zip([0, 1], [tree, ada], ['Decision Tree', 'AdaBoost']):
    clf.fit(X_train, y_train)
    Z = clf.predict(np.c_[xx.ravel(), yy.ravel()])
    Z = Z.reshape(xx.shape)
    # 描画対象のグラフ
    ax = axarr[idx]
    ax.contourf(xx, yy, Z, alpha=0.3)
    ax.scatter(X_train[y_train==0, 0], X_train[y_train==0, 1], c='blue', marker='^')
    ax.scatter(X_train[y_train==1, 0], X_train[y_train==1, 1], c='red', marker='o')
    ax.set_title(tt)

axarr[0].set_ylabel('Alcohol', fontsize=12)
plt.text(10.2, -1.2, s='Hue', ha='center', va='center', fontsize=12)
plt.show()


10.45 14.88 -0.52 2.45