Bagging_and_random_forest



In [1]:
import numpy as np
import sklearn as skl
import pandas as pd
from matplotlib import pyplot as plt

In [3]:
%matplotlib inline

In [5]:
from sklearn.datasets import load_digits
data = load_digits()

In [7]:
target = data['target']
data = data['data']

In [36]:
data.shape


Out[36]:
(1797, 64)

In [10]:
target


Out[10]:
array([0, 1, 2, ..., 8, 9, 8])

In [11]:
from sklearn.cross_validation import cross_val_score

In [12]:
# Task 1
from sklearn import tree
estimator = tree.DecisionTreeClassifier()
scores = cross_val_score(estimator=estimator, X=data, y=target, n_jobs=-1, cv=10, scoring='accuracy')
score = scores.mean()
score


Out[12]:
0.82540576364715579

In [15]:
#Task 2
from sklearn.ensemble import BaggingClassifier
bagg_estimator = BaggingClassifier(base_estimator=estimator, n_estimators=100)
scores = cross_val_score(estimator=bagg_estimator, X=data, y=target, n_jobs=-1, cv=10, scoring='accuracy')
score = scores.mean()
score


Out[15]:
0.9232059501168548

In [37]:
#Task 3
ans3 = 0
for _ in range(10):
    estimator = tree.DecisionTreeClassifier()
    bagg_estimator = BaggingClassifier(base_estimator=estimator, max_features=int(np.sqrt(data.shape[1])), n_estimators=100)
    scores = cross_val_score(estimator=bagg_estimator, X=data, y=target, cv=10, scoring='accuracy', n_jobs=-1)
    score = scores.mean()
    ans3 += score
print(ans3 / 10)


0.925554227306

In [38]:
#Task 4
ans4 = 0
for _ in range(10):
    estimator = tree.DecisionTreeClassifier(max_features=int(np.sqrt(data.shape[1])))
    bagg_estimator = BaggingClassifier(base_estimator=estimator, n_estimators=100)
    scores = cross_val_score(estimator=bagg_estimator, X=data, y=target, cv=10, scoring='accuracy', n_jobs=-1)
    score = scores.mean()
    ans4 += score
print(ans4 / 10)


0.955545923539

In [ ]: