In [1]:
from sklearn import cross_validation
In [2]:
from sklearn import datasets
In [3]:
iris = datasets.load_iris()
In [21]:
data_train, data_test, target_train, target_test = cross_validation.train_test_split(iris.data[:, :2],
iris.target, test_size=0.3333, random_state = 0)
In [14]:
from sklearn import tree
In [23]:
clf = tree.DecisionTreeClassifier()
In [24]:
clf = clf.fit(data_train, target_train)
In [25]:
pred = clf.predict(data_test)
In [26]:
1.0*sum(pred!=target_test)/len(target_test)
Out[26]:
In [27]:
clf.score(data_test, target_test)
Out[27]:
In [28]:
scores = cross_validation.cross_val_score(clf, data_train, target_train, cv=5)
In [29]:
print scores
In [30]:
print scores.mean()
In [48]:
bestPruning = -1
bestAccuracy = 0
for i in range(2, len(target_train)):
clf = tree.DecisionTreeClassifier()
scores = cross_validation.cross_val_score(clf, data_train, target_train, cv=i)
if scores.mean() > bestAccuracy:
bestAccuracy = scores.mean()
bestPruning = i
In [49]:
print bestAccuracy
print bestPruning
In [47]:
In [ ]: