In [2]:
from sklearn import cross_validation, model_selection, linear_model, tree, ensemble
from sklearn.datasets import load_digits

import numpy as np
import pandas as pd

In [3]:
%pylab inline


Populating the interactive namespace from numpy and matplotlib

In [4]:
# Load data
digits = load_digits()
X = digits.data
y = digits.target

In [5]:
# Оценка качества
lasso = linear_model.Lasso()
val_cross_score = model_selection.cross_val_score(lasso, X, y, cv = 10)
print val_cross_score.mean()


0.423640007073

In [6]:
# task - 1, Create a DecisionTreeClassifier with the default
# settings and measure the quality

estimator_DTC = tree.DecisionTreeClassifier()
cross_score_DTC = model_selection.cross_val_score(estimator_DTC, X, y, cv = 10)
ans1 = cross_score_DTC.mean()
print ans1


0.833098485775

In [7]:
with open('ans1.txt', 'w') as file_out:
        file_out.write(str(ans1))

In [8]:
# task - 2 
bagging = ensemble.BaggingClassifier (estimator_DTC,n_estimators=100)#learning,tree
cross_score_badding = model_selection.cross_val_score(bagging, X, y, cv = 10,n_jobs = 6)
ans2 = cross_score_badding.mean()
print ans2


0.921574884943

In [9]:
with open('ans2.txt', 'w') as file_out:
        file_out.write(str(ans2))

In [10]:
# task - 3 
d = int(np.sqrt(X.shape[1]))
bagging_2 = ensemble.BaggingClassifier(estimator_DTC,n_estimators=100, max_features = d)#learning,tree
cross_score_badding_2 = model_selection.cross_val_score(bagging_2, X, y, cv = 10,n_jobs = 6)
ans3 = cross_score_badding_2.mean()
print ans3


0.928183695662

In [11]:
with open('ans3.txt', 'w') as file_out:
        file_out.write(str(ans3))

In [12]:
# task - 4
estimator_DTC = tree.DecisionTreeClassifier(max_features = d)
bagging_3 = ensemble.BaggingClassifier(estimator_DTC,n_estimators=100)#learning,tree
cross_score_badding_3 = model_selection.cross_val_score(bagging_3, X, y, cv = 10,n_jobs = 6)
ans4 = cross_score_badding_3.mean()
print ans4


0.956608454456

In [13]:
with open('ans4.txt', 'w') as file_out:
        file_out.write(str(ans4))

In [14]:
pylab.grid(True)
pylab.ylim((0., 1.))
pylab.plot(cross_score_badding_3)


Out[14]:
[<matplotlib.lines.Line2D at 0xd75e160>]

In [15]:
random_forest = ensemble.RandomForestClassifier(max_depth = 15,random_state = 1)#learning,tree
cross_score_random = model_selection.cross_val_score(random_forest, X, y, cv = 10,n_jobs = 6)
ans5 = cross_score_random.mean()
print ans5


0.919929754031

In [16]:
pylab.grid(True)
pylab.ylim((0., 1.))
pylab.plot(cross_score_random)


Out[16]:
[<matplotlib.lines.Line2D at 0xdb34e48>]

In [17]:
# task - 6
trees = 1 + np.arange(0,100,5)
res_val_trees = []
for i in trees:
    random_forest = ensemble.RandomForestClassifier(max_depth = 5,random_state = 1,n_estimators = i)#learning,tree
    cross_score_random = model_selection.cross_val_score(random_forest, X, y, cv = 10,n_jobs = 6)
    res_val_trees.append(cross_score_random)

In [18]:
pylab.grid(True)
pylab.plot(trees,np.asmatrix(res_val_trees).mean(axis=1),marker='*')
pylab.title(u'Зависимость от кол-ва деверьев')
pylab.xlabel('trees')
pylab.ylabel('accuracy')


Out[18]:
<matplotlib.text.Text at 0xca605f8>

In [19]:
d2 = X.shape[1]
res_val_features = []
for i in np.arange(0.1,1.1,0.1):
    random_forest = ensemble.RandomForestClassifier(max_depth = 5,random_state = 1,n_estimators = 60,max_features = int(i * d2))#learning,tree
    cross_score_random = model_selection.cross_val_score(random_forest, X, y, cv = 10,n_jobs = 6)
    res_val_features.append(cross_score_random)

In [20]:
pylab.grid(True)
pylab.plot(np.arange(0.1,1.1,0.1),np.asmatrix(res_val_features).mean(axis=1),marker='*')
pylab.title(u'Зависимость от выборки')
pylab.xlabel('features')
pylab.ylabel('accuracy')


Out[20]:
<matplotlib.text.Text at 0xe279128>

In [21]:
random_forest = ensemble.RandomForestClassifier(max_depth = 15,random_state = 1,n_estimators = 60)#learning,tree
cross_score_random = model_selection.cross_val_score(random_forest, X, y, cv = 10,n_jobs = 6)
cross_score_random.mean()


Out[21]:
0.95668140849751171

In [24]:
ans5 = '2 3 4 7'
with open("ans5.txt", "w") as fout:
    fout.write(str(ans5))

In [ ]: