第7章アンサンブル学習

オリジナルのコード: https://github.com/rasbt/python-machine-learning-book/blob/master/code/ch07/ch07.ipynb
多数決に基づく予測
トレーニングデータセットのランダムな組み合わせを繰り返し抽出することによる過学習の抑制
誤答から学習する「弱学習器」による強力なモデルの構築

7.1 アンサンブルによる学習

アンサンブル法(ensemble method)
多数決(majority voting)
相対多数決(plurality voting)



In [1]:

    
from distutils.version import LooseVersion as Version
from sklearn import __version__ as sklearn_version



In [2]:

    
# 誤分類率を計算する
from scipy.misc import comb
import math
def ensemble_error(n_classifier, error):
    k_start = int(math.ceil(n_classifier / 2.0))
    # print(k_start) # 6
    
    probs = []
    for k in range(k_start, n_classifier + 1): # 6 to 11
        # print(k, comb(n_classifier, k))
        probs.append(comb(n_classifier, k) * error ** k * (1 - error) ** (n_classifier - k))

    #print(probs)
    return sum(probs)

ensemble_error(n_classifier=11, error=0.25)









    Out[2]:





0.034327507019042969



In [3]:

    
import numpy as np
error_range = np.arange(0.0, 1.0, 0.01)
ens_errors = [ensemble_error(n_classifier=11, error=error) for error in error_range]

import matplotlib.pyplot as plt
plt.plot(error_range, ens_errors, label='Ensemble error', linewidth=2)
plt.plot(error_range, error_range, label='Base error', linewidth=2, linestyle='--')
plt.xlabel('Base error')
plt.ylabel('Base/Ensemble error')
plt.legend(loc='upper left')
plt.grid()
plt.show()

7.2 単純な多数決分類機の実装

特性関数(charasteristic function): Χ(カイ)
重み付け(weighting)



In [4]:

    
import numpy as np
# クラスラベルが 0, 0, 1
# 重み係数が 0.2, 0.2 0.6
np.argmax(np.bincount([0, 0, 1], weights=[0.2, 0.2, 0.6]))









    Out[4]:





1



In [5]:

    
ex = np.array([[0.9, 0.1],
               [0.8, 0.2],
               [0.4, 0.6]])
# それぞれのクラスに所属する確率
p = np.average(ex, axis=0, weights=[0.2, 0.2, 0.6])
print(p)
# 多数決の結果
print(np.argmax(p))









    



[ 0.58  0.42]
0



In [6]:

    
from sklearn.base import BaseEstimator
from sklearn.base import ClassifierMixin
from sklearn.preprocessing import LabelEncoder
from sklearn.externals import six
from sklearn.base import clone
from sklearn.pipeline import _name_estimators
import numpy as np
import operator

class MajorityVoteClassifier(BaseEstimator, ClassifierMixin):
    """
    多数決アンサンブル分類器
    
    パラメータ
    ----------
    classifiers : array-like, shape = [n_classifiers]
        アンサンブルの様々な分類器
        
    vote : str, {}'classlabel', 'probability'} {default: 'classlabel}
        'classlabel' の場合、クラスラベルの予測はクラスラベルのargmaxに基づく
        'probability' の場合、クラスラベルの予測はクラスの所属確率のargmaxに基づく(分類器が調整済であることが推奨される)
        
    weights : array-like, shape = [n_classifiers] (optional, default=None)
        `int` または `float` 型の値のリストが提供された場合、分類器は重要度で重み付けされる
        `weights=None` の場合は均一な重みを使用
        
    """
    def __init__(self, classifiers, vote='classlabel', weights=None):
        self.classifiers = classifiers
        self.named_classifiers = {key: value for key, value in _name_estimators(classifiers)}
        self.vote = vote
        self.weights = weights
        
    def fit(self, X, y):
        """
        分類機を学習させる
        
        パラメータ
        ----------
        X : {array-like, sparse matrix}, shape = [n_samples, n_features]
            トレーニングサンプルからなる行列
            
        y : array-like, shape = [n_samples]
            クラスラベルのリスト
        
        戻り値
        ------
        self : object
        """
        self.lablenc_ = LabelEncoder()
        self.lablenc_.fit(y)
        self.classes_ = self.lablenc_.classes_
        self.classifiers_ = []
        for clf in self.classifiers:
            fitted_clf = clone(clf).fit(X, self.lablenc_.transform(y))
            self.classifiers_.append(fitted_clf)
        return self
    
    def predict(self, X):
        """
        Xのクラスラベルを予測する
        """
        if self.vote == 'probability':
            maj_vote = np.argmax(self.predict_proba(X), axis=1)
        else:
            predictions = np.asarray([clf.predict(X) for clf in self.classifiers_]).T
            maj_vote = np.apply_along_axis(
                lambda x:
                np.argmax(np.bincount(x, weights=self.weights)),
                axis=1,
                arr=predictions
            )
            
        maj_vote = self.lablenc_.inverse_transform(maj_vote)
        return maj_vote
    
    def predict_proba(self, X):
        """
        Xのクラス確率を予測する
        """
        probas = np.asarray([clf.predict_proba(X) for clf in self.classifiers_])
        arg_proba = np.average(probas, axis=0, weights=self.weights)
        return arg_proba
    
    def get_params(self, deep=True):
        """
        GridSearchの実行時に分類器のパラメータ名を取得
        """
        if not deep:
            return super(MajorityVoteClassifier, self).get_params(deep=False)
        else:
            out = self.named_classifiers.copy()
            for name, step in self.named_classifiers.items():
                for key, value in step.get_params(deep=True).items():
                    out['{}_{}'.format(name, key)] = value
            return out

7.2.1 多数決方式の分類アルゴリズムを組み合わせる



In [23]:

    
predictions = np.asarray([1, 1, 0]).T
print(predictions)
maj_vote = np.apply_along_axis(
                lambda x:
                np.argmax(np.bincount(x, weights=[0.2, 0.2, 0.6])),
                axis=1,
                arr=predictions
            )
maj_vote









    



[1 1 0]






    



---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-23-592adf5a518d> in <module>()
      5                 np.argmax(np.bincount(x, weights=[0.2, 0.2, 0.6])),
      6                 axis=1,
----> 7                 arr=predictions
      8             )
      9 maj_vote

/Users/takanori/Private/python-machine-learning/venv/lib/python3.5/site-packages/numpy/lib/shape_base.py in apply_along_axis(func1d, axis, arr, *args, **kwargs)
     81     if (axis >= nd):
     82         raise ValueError("axis must be less than arr.ndim; axis=%d, rank=%d."
---> 83             % (axis, nd))
     84     ind = [0]*(nd-1)
     85     i = zeros(nd, 'O')

ValueError: axis must be less than arr.ndim; axis=1, rank=1.



In [ ]:

from sklearn import datasets from sklearn.preprocessing import StandardScaler from sklearn.preprocessing import LabelEncoder if Version(sklearn_version) < '0.18': from sklearn.cross_validation import train_test_split else: from sklearn.model_selection import train_test_split

iris = datasets.load_iris() X = iris.data[50:, [1, 2]] y = iris.target[50:] le = LabelEncoder() y = le.fit_transform(y)



In [8]:

    
# 50% のトレーニングデータと 50% のテストデータに分割
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5,random_state=1)



In [9]:

    
# ロジスティック回帰、決定木、k近傍をそれぞれつかって分類
# 10分割交差検証を使う
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier 
from sklearn.pipeline import Pipeline
if Version(sklearn_version) < '0.18':
    from sklearn.cross_validation import cross_val_score
else:
    from sklearn.model_selection import cross_val_score

clf1 = LogisticRegression(penalty='l2', C=0.001, random_state=0)
clf2 = DecisionTreeClassifier(max_depth=1, criterion='entropy', random_state=0)
clf3 = KNeighborsClassifier(n_neighbors=1, p=2, metric='minkowski')

pipe1 = Pipeline([['sc', StandardScaler()],
                  ['clf', clf1]])
pipe3 = Pipeline([['sc', StandardScaler()],
                  ['clf', clf3]])
clfs = [pipe1, clf2, pipe3]
clf_labels = ['Logistic Regression', 'Decision Tree', 'KNN']

for clf, label in zip(clfs, clf_labels):
    scores = cross_val_score(estimator=clf, X=X_train, y=y_train, cv=10, scoring='roc_auc')
    print('ROC AUC: {:.2f} (+/- {:.2f}) [{}]'.format(scores.mean(), scores.std(), label))









    



ROC AUC: 0.92 (+/- 0.20) [Logistic Regression]
ROC AUC: 0.92 (+/- 0.15) [Decision Tree]
ROC AUC: 0.93 (+/- 0.10) [KNN]



In [10]:

    
mv_clf = MajorityVoteClassifier(classifiers=[pipe1, clf2, pipe3])
clfs.append(mv_clf)
clf_labels.append('Majority Voting')

for clf, label in zip(clfs, clf_labels):
    scores = cross_val_score(estimator=clf, X=X_train, y=y_train, cv=10, scoring='roc_auc')
    print('ROC AUC: {:.2f} (+/- {:.2f}) [{}]'.format(scores.mean(), scores.std(), label))









    



ROC AUC: 0.92 (+/- 0.20) [Logistic Regression]
ROC AUC: 0.92 (+/- 0.15) [Decision Tree]
ROC AUC: 0.93 (+/- 0.10) [KNN]
ROC AUC: 0.97 (+/- 0.10) [Majority Voting]

7.3 アンサンブル分類器の評価とチューニング



In [11]:

    
from sklearn.metrics import roc_curve
from sklearn.metrics import auc
colors = ['black', 'orange', 'blue', 'green']
linestyles = [':', '--', '-.', '-']

for clf, label, clr, ls in zip(clfs, clf_labels, colors, linestyles):
    # 陽性クラスのラベルは1であることが前提
    y_pred = clf.fit(X_train, y_train).predict_proba(X_test)[:, 1]
    fpr, tpr, thresholds = roc_curve(y_true=y_test, y_score=y_pred)
    roc_auc = auc(x=fpr, y=tpr)
    plt.plot(fpr, tpr, color=clr, linestyle=ls, label='{} (auc={:.2f})'.format(label, roc_auc))

plt.legend(loc='lower right')
plt.plot([0, 1], [0, 1], linestyle='--', color='gray', linewidth=2)
plt.xlim([-0.1, 1.1])
plt.ylim([-0.1, 1.1])
plt.grid()
plt.xlabel('False Positive Rate')
plt.xlabel('True Positive Rate')
plt.show()



In [12]:

    
from itertools import product
import numpy as np

sc = StandardScaler()
X_train_std = sc.fit_transform(X_train)
# 決定領域を描画する最小値、最大値を生成
x_min = X_train_std[:, 0].min() - 1
x_max = X_train_std[:, 0].max() + 1
y_min = X_train_std[:, 1].min() - 1
y_max = X_train_std[:, 1].max() + 1
print(x_min, x_max, y_min, y_max)
# グリッドポイントを生成
xx, yy = np.meshgrid(np.arange(x_min, x_max, 0.1), np.arange(y_min, y_max, 0.1))
# 描画領域を2行2列に分割
f, axarr = plt.subplots(nrows=2, ncols=2, sharex='col', sharey='row', figsize=(7, 5))

# 決定領域のプロット、青や赤の散布図の作成などを実行
# 変数 idx は各分類器を描画する行と列の位置を表すタプル
for idx, clf, tt in zip(product([0, 1], [0, 1]), clfs, clf_labels):
    clf.fit(X_train_std, y_train)
    Z = clf.predict(np.c_[xx.ravel(), yy.ravel()])
    Z = Z.reshape(xx.shape)
    # 描画対象のグラフ
    ax = axarr[idx[0], idx[1]]
    ax.contourf(xx, yy, Z, alpha=0.3)
    ax.scatter(X_train_std[y_train==0, 0], X_train_std[y_train==0, 1], c='blue', marker='^', s=50)
    ax.scatter(X_train_std[y_train==1, 0], X_train_std[y_train==1, 1], c='red', marker='o', s=50)
    ax.set_title(tt)

plt.text(-3.5, -4.5, s='Sepal width [standardized]', ha='center', va='center', fontsize=12)
plt.text(-11.5, 4.5, s='Petal length [standardized]', ha='center', va='center', fontsize=12, rotation=90)
plt.show()









    



-3.07478920904 2.55939571126 -2.79369815479 3.32448638427



In [13]:

    
# パラメータの一覧
mv_clf.get_params()









    Out[13]:





{'decisiontreeclassifier': DecisionTreeClassifier(class_weight=None, criterion='entropy', max_depth=1,
             max_features=None, max_leaf_nodes=None,
             min_impurity_split=1e-07, min_samples_leaf=1,
             min_samples_split=2, min_weight_fraction_leaf=0.0,
             presort=False, random_state=0, splitter='best'),
 'decisiontreeclassifier_class_weight': None,
 'decisiontreeclassifier_criterion': 'entropy',
 'decisiontreeclassifier_max_depth': 1,
 'decisiontreeclassifier_max_features': None,
 'decisiontreeclassifier_max_leaf_nodes': None,
 'decisiontreeclassifier_min_impurity_split': 1e-07,
 'decisiontreeclassifier_min_samples_leaf': 1,
 'decisiontreeclassifier_min_samples_split': 2,
 'decisiontreeclassifier_min_weight_fraction_leaf': 0.0,
 'decisiontreeclassifier_presort': False,
 'decisiontreeclassifier_random_state': 0,
 'decisiontreeclassifier_splitter': 'best',
 'pipeline-1': Pipeline(steps=[['sc', StandardScaler(copy=True, with_mean=True, with_std=True)], ['clf', LogisticRegression(C=0.001, class_weight=None, dual=False, fit_intercept=True,
           intercept_scaling=1, max_iter=100, multi_class='ovr', n_jobs=1,
           penalty='l2', random_state=0, solver='liblinear', tol=0.0001,
           verbose=0, warm_start=False)]]),
 'pipeline-1_clf': LogisticRegression(C=0.001, class_weight=None, dual=False, fit_intercept=True,
           intercept_scaling=1, max_iter=100, multi_class='ovr', n_jobs=1,
           penalty='l2', random_state=0, solver='liblinear', tol=0.0001,
           verbose=0, warm_start=False),
 'pipeline-1_clf__C': 0.001,
 'pipeline-1_clf__class_weight': None,
 'pipeline-1_clf__dual': False,
 'pipeline-1_clf__fit_intercept': True,
 'pipeline-1_clf__intercept_scaling': 1,
 'pipeline-1_clf__max_iter': 100,
 'pipeline-1_clf__multi_class': 'ovr',
 'pipeline-1_clf__n_jobs': 1,
 'pipeline-1_clf__penalty': 'l2',
 'pipeline-1_clf__random_state': 0,
 'pipeline-1_clf__solver': 'liblinear',
 'pipeline-1_clf__tol': 0.0001,
 'pipeline-1_clf__verbose': 0,
 'pipeline-1_clf__warm_start': False,
 'pipeline-1_sc': StandardScaler(copy=True, with_mean=True, with_std=True),
 'pipeline-1_sc__copy': True,
 'pipeline-1_sc__with_mean': True,
 'pipeline-1_sc__with_std': True,
 'pipeline-1_steps': [['sc',
   StandardScaler(copy=True, with_mean=True, with_std=True)],
  ['clf',
   LogisticRegression(C=0.001, class_weight=None, dual=False, fit_intercept=True,
             intercept_scaling=1, max_iter=100, multi_class='ovr', n_jobs=1,
             penalty='l2', random_state=0, solver='liblinear', tol=0.0001,
             verbose=0, warm_start=False)]],
 'pipeline-2': Pipeline(steps=[['sc', StandardScaler(copy=True, with_mean=True, with_std=True)], ['clf', KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
            metric_params=None, n_jobs=1, n_neighbors=1, p=2,
            weights='uniform')]]),
 'pipeline-2_clf': KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
            metric_params=None, n_jobs=1, n_neighbors=1, p=2,
            weights='uniform'),
 'pipeline-2_clf__algorithm': 'auto',
 'pipeline-2_clf__leaf_size': 30,
 'pipeline-2_clf__metric': 'minkowski',
 'pipeline-2_clf__metric_params': None,
 'pipeline-2_clf__n_jobs': 1,
 'pipeline-2_clf__n_neighbors': 1,
 'pipeline-2_clf__p': 2,
 'pipeline-2_clf__weights': 'uniform',
 'pipeline-2_sc': StandardScaler(copy=True, with_mean=True, with_std=True),
 'pipeline-2_sc__copy': True,
 'pipeline-2_sc__with_mean': True,
 'pipeline-2_sc__with_std': True,
 'pipeline-2_steps': [['sc',
   StandardScaler(copy=True, with_mean=True, with_std=True)],
  ['clf',
   KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
              metric_params=None, n_jobs=1, n_neighbors=1, p=2,
              weights='uniform')]]}



In [14]:

    
# ロジスティック回帰分類器の逆正則化パラメータCと決定木の深さをチューニング
if Version(sklearn_version) < '0.18':
    from sklearn.cross_validation import GridSearchCV
else:
    from sklearn.model_selection import GridSearchCV

params = {'decisiontreeclassifier_max_depth': [1, 2],
          'pipeline-1_clf__C': [0.001, 0.1, 100.0]}
grid = GridSearchCV(estimator=mv_clf, param_grid=params, cv=10, scoring='roc_auc')
grid.fit(X_train, y_train)









    Out[14]:





GridSearchCV(cv=10, error_score='raise',
       estimator=MajorityVoteClassifier(classifiers=[Pipeline(steps=[['sc', StandardScaler(copy=True, with_mean=True, with_std=True)], ['clf', LogisticRegression(C=0.001, class_weight=None, dual=False, fit_intercept=True,
          intercept_scaling=1, max_iter=100, multi_class='ovr', n_jobs=1,
          penalty='l2', random_state=0, solv...ski',
           metric_params=None, n_jobs=1, n_neighbors=1, p=2,
           weights='uniform')]])],
            vote='classlabel', weights=None),
       fit_params={}, iid=True, n_jobs=1,
       param_grid={'decisiontreeclassifier_max_depth': [1, 2], 'pipeline-1_clf__C': [0.001, 0.1, 100.0]},
       pre_dispatch='2*n_jobs', refit=True, return_train_score=True,
       scoring='roc_auc', verbose=0)



In [15]:

    
# ハイパーパラメータ値の組み合わせとROC曲線の平均値を出力
# scikit-learn 0.18 以前
#for params, mean_score, scores in grid.grid_scores_:
#    print('{:.3f}+/-{:.2f} {}'.format(mean_score, scores.std() / 2 , params))

# scikit-learn 0.18 以上の場合
cv_keys = ('mean_test_score', 'std_test_score','params')

for r, _ in enumerate(grid.cv_results_['mean_test_score']):
    print('{:.3f}+/-{:.2f} {}'.format(grid.cv_results_[cv_keys[0]][r], 
                                       grid.cv_results_[cv_keys[1]][r] / 2.0, 
                                       grid.cv_results_[cv_keys[2]][r]))

# 最も良いスコアを出したパラメーターとスコアを出力
print('Best parameters: {}'.format(grid.best_params_))
print('Accuracy: {:.2f}'.format(grid.best_score_))









    



0.967+/-0.05 {'decisiontreeclassifier_max_depth': 1, 'pipeline-1_clf__C': 0.001}
0.967+/-0.05 {'decisiontreeclassifier_max_depth': 1, 'pipeline-1_clf__C': 0.1}
1.000+/-0.00 {'decisiontreeclassifier_max_depth': 1, 'pipeline-1_clf__C': 100.0}
0.967+/-0.05 {'decisiontreeclassifier_max_depth': 2, 'pipeline-1_clf__C': 0.001}
0.967+/-0.05 {'decisiontreeclassifier_max_depth': 2, 'pipeline-1_clf__C': 0.1}
1.000+/-0.00 {'decisiontreeclassifier_max_depth': 2, 'pipeline-1_clf__C': 100.0}
Best parameters: {'decisiontreeclassifier_max_depth': 1, 'pipeline-1_clf__C': 100.0}
Accuracy: 1.00

7.4 バギング: ブートストラップ標本を使った分類器アンサンブルの構築

バギング(Bagging)
ブートストラップ集約(bootstrap aggregating)
BAGGING: Bootstrap AGGregatING



In [16]:

    
import pandas as pd
# ワインのデータを読み込む
df_wine = pd.read_csv('https://archive.ics.uci.edu/ml/machine-learning-databases/wine/wine.data', header=None)
df_wine.columns = ['Class label', 'Alcohol', 'Malic acid', 'Ash',
                   'Alcalinity of ash', 'Magnesium', 'Total phenols',
                   'Flavanoids', 'Nonflavanoid phenols', 'Proanthocyanins',
                   'Color intensity', 'Hue', 'OD280/OD315 of diluted wines',
                   'Proline']
# クラス2とクラス3のみ
df_wine = df_wine[df_wine['Class label'] != 1]
y = df_wine['Class label'].values
# 2つの特徴量(Alcohol, Hue)を選択
X = df_wine[['Alcohol', 'Hue']].values
df_wine.head()









    Out[16]:






  
    
      
      Class label
      Alcohol
      Malic acid
      Ash
      Alcalinity of ash
      Magnesium
      Total phenols
      Flavanoids
      Nonflavanoid phenols
      Proanthocyanins
      Color intensity
      Hue
      OD280/OD315 of diluted wines
      Proline
    
  
  
    
      59
      2
      12.37
      0.94
      1.36
      10.6
      88
      1.98
      0.57
      0.28
      0.42
      1.95
      1.05
      1.82
      520
    
    
      60
      2
      12.33
      1.10
      2.28
      16.0
      101
      2.05
      1.09
      0.63
      0.41
      3.27
      1.25
      1.67
      680
    
    
      61
      2
      12.64
      1.36
      2.02
      16.8
      100
      2.02
      1.41
      0.53
      0.62
      5.75
      0.98
      1.59
      450
    
    
      62
      2
      13.67
      1.25
      1.92
      18.0
      94
      2.10
      1.79
      0.32
      0.73
      3.80
      1.23
      2.46
      630
    
    
      63
      2
      12.37
      1.13
      2.16
      19.0
      87
      3.50
      3.10
      0.19
      1.87
      4.45
      1.22
      2.87
      420



In [17]:

    
from sklearn.preprocessing import LabelEncoder
if Version(sklearn_version) < '0.18':
    from sklearn.cross_validation import train_test_split
else:
    from sklearn.model_selection import train_test_split
    
le = LabelEncoder()
y = le.fit_transform(y)
# 60%のトレーニングデータセットと40%のテストデータセットに分割
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.40, random_state=1)
print(len(X_train), len(X_test))



In [18]:

    
from sklearn.ensemble import BaggingClassifier
from sklearn.tree import DecisionTreeClassifier
tree = DecisionTreeClassifier(criterion='entropy', max_depth=None, random_state=1)
# 500この決定木からなるアンサンブルを作成し、トレーニングデータセットの異なるブートストラップ標本で学習する
bag = BaggingClassifier(base_estimator=tree, n_estimators=500, max_samples=1.0, max_features=1.0,
                        bootstrap=True, bootstrap_features=False, n_jobs=1, random_state=1)
bag









    Out[18]:





BaggingClassifier(base_estimator=DecisionTreeClassifier(class_weight=None, criterion='entropy', max_depth=None,
            max_features=None, max_leaf_nodes=None,
            min_impurity_split=1e-07, min_samples_leaf=1,
            min_samples_split=2, min_weight_fraction_leaf=0.0,
            presort=False, random_state=1, splitter='best'),
         bootstrap=True, bootstrap_features=False, max_features=1.0,
         max_samples=1.0, n_estimators=500, n_jobs=1, oob_score=False,
         random_state=1, verbose=0, warm_start=False)



In [19]:

    
# 普通の決定木での性能
from sklearn.metrics import accuracy_score
tree = tree.fit(X_train, y_train)
y_train_pred = tree.predict(X_train)
y_test_pred = tree.predict(X_test)
tree_train = accuracy_score(y_train, y_train_pred)
tree_test = accuracy_score(y_test, y_test_pred)
print('Decision tree train/test accuracies {:.3f}/{:.3f}'.format(tree_train, tree_test))









    



Decision tree train/test accuracies 1.000/0.833



In [20]:

    
# バギングでの性能
bag = bag.fit(X_train, y_train)
y_train_pred = bag.predict(X_train)
y_test_pred = bag.predict(X_test)
bag_train = accuracy_score(y_train, y_train_pred)
bag_test = accuracy_score(y_test, y_test_pred)
print('Bagging traain/test accuracies {:.3f}/{:.3f}'.format(bag_train, bag_test))









    



Bagging traain/test accuracies 1.000/0.896



In [21]:

    
# 決定木とバギング分類器の決定領域を比較
import numpy as np
import matplotlib.pyplot as plt

# 決定領域を描画する最小値、最大値を生成
x_min = X_train[:, 0].min() - 1
x_max = X_train[:, 0].max() + 1
y_min = X_train[:, 1].min() - 1
y_max = X_train[:, 1].max() + 1
print(x_min, x_max, y_min, y_max)
# グリッドポイントを生成
xx, yy = np.meshgrid(np.arange(x_min, x_max, 0.1), np.arange(y_min, y_max, 0.1))
# 描画領域を2列に分割
f, axarr = plt.subplots(nrows=1, ncols=2, sharex='col', sharey='row', figsize=(8, 3))
for idx, clf, tt in zip([0, 1], [tree, bag], ['Decision Tree', 'Bagging']):
    clf.fit(X_train, y_train)
    Z = clf.predict(np.c_[xx.ravel(), yy.ravel()])
    Z = Z.reshape(xx.shape)
    # 描画対象のグラフ
    ax = axarr[idx]
    ax.contourf(xx, yy, Z, alpha=0.3)
    ax.scatter(X_train[y_train==0, 0], X_train[y_train==0, 1], c='blue', marker='^')
    ax.scatter(X_train[y_train==1, 0], X_train[y_train==1, 1], c='red', marker='o')
    ax.set_title(tt)

axarr[0].set_ylabel('Alcohol', fontsize=12)
plt.text(10.2, -1.2, s='Hue', ha='center', va='center', fontsize=12)
plt.show()









    



10.45 14.88 -0.52 2.45

7.5 アダブーストによる弱学習器の活用

ブースティング(Boosting)
アダブースト(Adaptive Boosting: AdaBoost)
弱学習器(weak learner)



In [24]:

    
from sklearn.ensemble import AdaBoostClassifier
tree = DecisionTreeClassifier(criterion='entropy', max_depth=1, random_state=0)

ada = AdaBoostClassifier(base_estimator=tree, n_estimators=500, learning_rate=0.1, random_state=0)

tree = tree.fit(X_train, y_train)
y_train_pred = tree.predict(X_train)
y_test_pred = tree.predict(X_test)
tree_train = accuracy_score(y_train, y_train_pred)
tree_test = accuracy_score(y_test, y_test_pred)
print('Decision tree train/test accuracies {:.3f}/{:.3f}'.format(tree_train, tree_test))









    



Decision tree train/test accuracies 0.845/0.854



In [25]:

    
ada = ada.fit(X_train, y_train)
y_train_pred = ada.predict(X_train)
y_test_pred = ada.predict(X_test)
ada_train = accuracy_score(y_train, y_train_pred) 
ada_test = accuracy_score(y_test, y_test_pred) 
print('AdaBoost train/test accuracies {:.3f}/{:.3f}'.format(ada_train, ada_test))









    



AdaBoost train/test accuracies 1.000/0.875



In [26]:

    
# 決定木とアダブースト分類器の決定領域を比較
import numpy as np
import matplotlib.pyplot as plt

# 決定領域を描画する最小値、最大値を生成
x_min = X_train[:, 0].min() - 1
x_max = X_train[:, 0].max() + 1
y_min = X_train[:, 1].min() - 1
y_max = X_train[:, 1].max() + 1
print(x_min, x_max, y_min, y_max)
# グリッドポイントを生成
xx, yy = np.meshgrid(np.arange(x_min, x_max, 0.1), np.arange(y_min, y_max, 0.1))
# 描画領域を2列に分割
f, axarr = plt.subplots(nrows=1, ncols=2, sharex='col', sharey='row', figsize=(8, 3))
for idx, clf, tt in zip([0, 1], [tree, ada], ['Decision Tree', 'AdaBoost']):
    clf.fit(X_train, y_train)
    Z = clf.predict(np.c_[xx.ravel(), yy.ravel()])
    Z = Z.reshape(xx.shape)
    # 描画対象のグラフ
    ax = axarr[idx]
    ax.contourf(xx, yy, Z, alpha=0.3)
    ax.scatter(X_train[y_train==0, 0], X_train[y_train==0, 1], c='blue', marker='^')
    ax.scatter(X_train[y_train==1, 0], X_train[y_train==1, 1], c='red', marker='o')
    ax.set_title(tt)

axarr[0].set_ylabel('Alcohol', fontsize=12)
plt.text(10.2, -1.2, s='Hue', ha='center', va='center', fontsize=12)
plt.show()









    



10.45 14.88 -0.52 2.45

	Class label	Alcohol	Malic acid	Ash	Alcalinity of ash	Magnesium	Total phenols	Flavanoids	Nonflavanoid phenols	Proanthocyanins	Color intensity	Hue	OD280/OD315 of diluted wines	Proline
59	2	12.37	0.94	1.36	10.6	88	1.98	0.57	0.28	0.42	1.95	1.05	1.82	520
60	2	12.33	1.10	2.28	16.0	101	2.05	1.09	0.63	0.41	3.27	1.25	1.67	680
61	2	12.64	1.36	2.02	16.8	100	2.02	1.41	0.53	0.62	5.75	0.98	1.59	450
62	2	13.67	1.25	1.92	18.0	94	2.10	1.79	0.32	0.73	3.80	1.23	2.46	630
63	2	12.37	1.13	2.16	19.0	87	3.50	3.10	0.19	1.87	4.45	1.22	2.87	420