In [17]:
from sklearn.datasets import load_iris
from sklearn.tree import DecisionTreeClassifier

iris = load_iris()

X = iris.data
y = iris.target

In [18]:
# Voting

from sklearn.ensemble import RandomForestClassifier, VotingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC

log_clf = LogisticRegression()
rnd_clf = RandomForestClassifier()
svm_clf = SVC()

voting_clf = VotingClassifier(
    estimators=[('lr', log_clf), ('rf', rnd_clf), ('svc', svm_clf)],
    voting='hard'
)

voting_clf.fit(X, y)


C:\Users\Han\Anaconda3\envs\sklearn\lib\site-packages\sklearn\linear_model\logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.
  FutureWarning)
C:\Users\Han\Anaconda3\envs\sklearn\lib\site-packages\sklearn\linear_model\logistic.py:469: FutureWarning: Default multi_class will be changed to 'auto' in 0.22. Specify the multi_class option to silence this warning.
  "this warning.", FutureWarning)
C:\Users\Han\Anaconda3\envs\sklearn\lib\site-packages\sklearn\ensemble\forest.py:245: FutureWarning: The default value of n_estimators will change from 10 in version 0.20 to 100 in 0.22.
  "10 in version 0.20 to 100 in 0.22.", FutureWarning)
C:\Users\Han\Anaconda3\envs\sklearn\lib\site-packages\sklearn\svm\base.py:193: FutureWarning: The default value of gamma will change from 'auto' to 'scale' in version 0.22 to account better for unscaled features. Set gamma explicitly to 'auto' or 'scale' to avoid this warning.
  "avoid this warning.", FutureWarning)
Out[18]:
VotingClassifier(estimators=[('lr',
                              LogisticRegression(C=1.0, class_weight=None,
                                                 dual=False, fit_intercept=True,
                                                 intercept_scaling=1,
                                                 l1_ratio=None, max_iter=100,
                                                 multi_class='warn',
                                                 n_jobs=None, penalty='l2',
                                                 random_state=None,
                                                 solver='warn', tol=0.0001,
                                                 verbose=0, warm_start=False)),
                             ('rf',
                              RandomForestClassifier(bootstrap=True,
                                                     class_weight=None,
                                                     criterion='gini',...
                                                     oob_score=False,
                                                     random_state=None,
                                                     verbose=0,
                                                     warm_start=False)),
                             ('svc',
                              SVC(C=1.0, cache_size=200, class_weight=None,
                                  coef0=0.0, decision_function_shape='ovr',
                                  degree=3, gamma='auto_deprecated',
                                  kernel='rbf', max_iter=-1, probability=False,
                                  random_state=None, shrinking=True, tol=0.001,
                                  verbose=False))],
                 flatten_transform=True, n_jobs=None, voting='hard',
                 weights=None)

In [22]:
voting_clf.predict(X[-1:])


Out[22]:
array([2])

In [23]:
# Bagging

from sklearn.ensemble import BaggingClassifier
from sklearn.tree import DecisionTreeClassifier

bag_clf = BaggingClassifier(
    DecisionTreeClassifier(), n_estimators=500,
    max_samples=100, bootstrap=True, n_jobs=-1, oob_score=True
)

bag_clf.fit(X,y)
print(bag_clf.predict(X[-1:]))

# Out of bag
print(bag_clf.oob_score_)


[2]
0.96

In [30]:
# Random Forests == Bagging of Decision Tree

from sklearn.ensemble import RandomForestClassifier

rnd_clf = RandomForestClassifier(n_estimators=500, max_leaf_nodes=16, n_jobs=-1)
rnd_clf.fit(X,y)
print(rnd_clf.predict(X[-20:]))


[2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2]

In [25]:
rnd_clf.fit(iris['data'],y)

for name, score in zip(iris['feature_names'], rnd_clf.feature_importances_):
    print(name, score)


sepal length (cm) 0.09740842914877314
sepal width (cm) 0.023936688951077444
petal length (cm) 0.4210851087151574
petal width (cm) 0.457569773184992

In [32]:
# Boosting

## AdaBoosting

from sklearn.ensemble import AdaBoostClassifier

ada_clf = AdaBoostClassifier(
    DecisionTreeClassifier(max_depth=1), n_estimators=200,
    algorithm='SAMME.R', learning_rate=0.5
)

ada_clf.fit(X,y)
print(ada_clf.predict(X[-20:]), y[-20:])


[2 2 2 1 1 2 2 2 1 2 2 2 2 2 2 2 2 2 2 2] [2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2]

In [38]:
## Gradient Boosting
## fit the residual errors maed by the previous predictor

from sklearn.tree import DecisionTreeRegressor


tree_reg1 = DecisionTreeRegressor(max_depth=2)
tree_reg1.fit(X,y)

y2 = y - tree_reg1.predict(X)

tree_reg2 = DecisionTreeRegressor(max_depth=2)
tree_reg2.fit(X,y2)

y_pred = sum(tree.predict(X) for tree in (tree_reg1, tree_reg2))

from sklearn.ensemble import GradientBoostingRegressor

gbrt = GradientBoostingRegressor(max_depth=2, n_estimators=120, learning_rate=1.0)

import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error

X_train, X_val, y_train, y_val = train_test_split(X,y)

gbrt.fit(X_train, y_train)

errors = [mean_squared_error(y_val, y_pred) for y_pred in gbrt.staged_predict(X_val)]
print(errors)
best_n_estimators = np.argmin(errors)
print(best_n_estimators)


[0.07303974221267455, 0.08022899074725032, 0.08098567318219749, 0.08209984855372128, 0.0854189580727079, 0.08515105833272861, 0.0852302566491742, 0.08505579304353011, 0.08478050807014986, 0.08542962604448674, 0.08540486110324295, 0.08498140671213517, 0.08527929375120978, 0.08530233687209562, 0.08547430536045572, 0.08552439841428615, 0.08560412671631466, 0.08531940656194066, 0.08536788108224565, 0.08536889723786337, 0.08603223879985877, 0.08634888905635055, 0.08635062629625095, 0.08628332306831957, 0.08633166401519414, 0.08634062661284556, 0.08626494659869503, 0.08620602214819373, 0.08622066949681403, 0.08619643726143772, 0.08616490943400529, 0.08612759595796733, 0.08613305798004461, 0.08610633711153905, 0.08609750900880718, 0.08594110375826418, 0.08602594173136416, 0.08603229045365769, 0.08605319272769119, 0.08605245066021856, 0.08605836166518477, 0.08605472482537385, 0.08611468823370229, 0.08612178940516342, 0.08612087111458133, 0.08612813064471606, 0.08610293211989166, 0.08612111773730911, 0.08611821638212018, 0.08609126745700324, 0.08618681524310762, 0.0860920648569453, 0.08600587894901439, 0.0859919040749938, 0.08599574944410207, 0.08598533447880628, 0.08598939387971039, 0.08591559759266493, 0.08591924993878816, 0.08590146342588317, 0.08590935035838869, 0.08589104169875991, 0.08581895851040723, 0.08582207300646273, 0.08582093923441665, 0.08582384488295608, 0.0858245740169624, 0.08582653235547053, 0.08588968377223714, 0.08591140424313526, 0.08590801217429385, 0.08591345517038153, 0.08589815337754528, 0.0858990740864639, 0.08590092959025356, 0.08590274154629939, 0.08589972137880329, 0.08587462269454767, 0.08587466232541174, 0.08587400218365236, 0.08588828587293744, 0.08587940603491126, 0.08588635627846325, 0.08588668278999814, 0.08587558740433188, 0.08576925947411405, 0.08576693583584523, 0.08575856795307957, 0.08575818174848797, 0.08576272200557408, 0.08576054539975536, 0.08574607390678382, 0.08574546656434857, 0.08574697760491674, 0.08574923064228303, 0.08575335841632263, 0.08575880263714761, 0.08576000862531605, 0.0857330954596985, 0.08570092295523024, 0.08570274058849216, 0.08570307435393067, 0.08570502700834631, 0.08568885084193235, 0.08568857081883084, 0.08569490245024694, 0.08569239868830276, 0.08569359196774391, 0.08567860619721168, 0.08568084739736387, 0.08567593614070153, 0.08568128944285465, 0.08567585258192612, 0.08567728679188702, 0.08567833219831474, 0.08567961095054684, 0.0856790607868201, 0.08567968209020378, 0.08569817085661871, 0.08569530614247794]
0

In [ ]:
# Stacking