notebook.community

Edit and run



In [1]:

    
import numpy as np
import sklearn as skl
import pandas as pd
from matplotlib import pyplot as plt



In [3]:

    
%matplotlib inline



In [5]:

    
from sklearn.datasets import load_digits
data = load_digits()



In [7]:

    
target = data['target']
data = data['data']



In [36]:

    
data.shape









    Out[36]:





(1797, 64)



In [10]:

    
target









    Out[10]:





array([0, 1, 2, ..., 8, 9, 8])



In [11]:

    
from sklearn.cross_validation import cross_val_score



In [12]:

    
# Task 1
from sklearn import tree
estimator = tree.DecisionTreeClassifier()
scores = cross_val_score(estimator=estimator, X=data, y=target, n_jobs=-1, cv=10, scoring='accuracy')
score = scores.mean()
score









    Out[12]:





0.82540576364715579



In [15]:

    
#Task 2
from sklearn.ensemble import BaggingClassifier
bagg_estimator = BaggingClassifier(base_estimator=estimator, n_estimators=100)
scores = cross_val_score(estimator=bagg_estimator, X=data, y=target, n_jobs=-1, cv=10, scoring='accuracy')
score = scores.mean()
score









    Out[15]:





0.9232059501168548



In [37]:

    
#Task 3
ans3 = 0
for _ in range(10):
    estimator = tree.DecisionTreeClassifier()
    bagg_estimator = BaggingClassifier(base_estimator=estimator, max_features=int(np.sqrt(data.shape[1])), n_estimators=100)
    scores = cross_val_score(estimator=bagg_estimator, X=data, y=target, cv=10, scoring='accuracy', n_jobs=-1)
    score = scores.mean()
    ans3 += score
print(ans3 / 10)









    



0.925554227306



In [38]:

    
#Task 4
ans4 = 0
for _ in range(10):
    estimator = tree.DecisionTreeClassifier(max_features=int(np.sqrt(data.shape[1])))
    bagg_estimator = BaggingClassifier(base_estimator=estimator, n_estimators=100)
    scores = cross_val_score(estimator=bagg_estimator, X=data, y=target, cv=10, scoring='accuracy', n_jobs=-1)
    score = scores.mean()
    ans4 += score
print(ans4 / 10)









    



0.955545923539



In [ ]: