In [1]:
import numpy as np
import sklearn as skl
import pandas as pd
from matplotlib import pyplot as plt
In [3]:
%matplotlib inline
In [5]:
from sklearn.datasets import load_digits
data = load_digits()
In [7]:
target = data['target']
data = data['data']
In [36]:
data.shape
Out[36]:
In [10]:
target
Out[10]:
In [11]:
from sklearn.cross_validation import cross_val_score
In [12]:
# Task 1
from sklearn import tree
estimator = tree.DecisionTreeClassifier()
scores = cross_val_score(estimator=estimator, X=data, y=target, n_jobs=-1, cv=10, scoring='accuracy')
score = scores.mean()
score
Out[12]:
In [15]:
#Task 2
from sklearn.ensemble import BaggingClassifier
bagg_estimator = BaggingClassifier(base_estimator=estimator, n_estimators=100)
scores = cross_val_score(estimator=bagg_estimator, X=data, y=target, n_jobs=-1, cv=10, scoring='accuracy')
score = scores.mean()
score
Out[15]:
In [37]:
#Task 3
ans3 = 0
for _ in range(10):
estimator = tree.DecisionTreeClassifier()
bagg_estimator = BaggingClassifier(base_estimator=estimator, max_features=int(np.sqrt(data.shape[1])), n_estimators=100)
scores = cross_val_score(estimator=bagg_estimator, X=data, y=target, cv=10, scoring='accuracy', n_jobs=-1)
score = scores.mean()
ans3 += score
print(ans3 / 10)
In [38]:
#Task 4
ans4 = 0
for _ in range(10):
estimator = tree.DecisionTreeClassifier(max_features=int(np.sqrt(data.shape[1])))
bagg_estimator = BaggingClassifier(base_estimator=estimator, n_estimators=100)
scores = cross_val_score(estimator=bagg_estimator, X=data, y=target, cv=10, scoring='accuracy', n_jobs=-1)
score = scores.mean()
ans4 += score
print(ans4 / 10)
In [ ]: