Evaluation scores vary across data sets

License

This notebook:

  • is a simply example on a standard data sets that shows that the error estimates in cross-validation can vary (greatly) in one split.

In [66]:
# plotting library
%matplotlib inline
import matplotlib.pyplot as plt

from sklearn import datasets
from sklearn.model_selection import cross_val_score

# some classifiers
from sklearn import svm
from sklearn.neighbors import KNeighborsClassifier
from sklearn import tree
import numpy

In [67]:
# read the iris data set
data = datasets.load_breast_cancer()

In [68]:
c_svm = svm.SVC(kernel='linear', C=1)
scores1 = cross_val_score(c_svm, data.data, data.target, cv=10, scoring='f1_weighted')
print(scores1)
print("SVM -- mean: " + str(numpy.mean(scores1)) + ", standard deviation: " + str(numpy.std(scores1)))


[0.98267623 0.92940109 0.92797784 0.94708706 0.96491228 0.98236235
 0.92982456 0.9466633  0.96457219 0.96428571]
SVM -- mean: 0.9539762621662449, standard deviation: 0.019828554681133054

In [69]:
c_knn = KNeighborsClassifier(3)
scores2 = cross_val_score(c_knn, data.data, data.target, cv=10, scoring='f1_weighted')
print(scores2)
print("3-NN -- mean: " + str(numpy.mean(scores2)) + ", standard deviation: " + str(numpy.std(scores2)))


[0.91236333 0.85648471 0.89473684 0.94708706 0.94708706 0.94708706
 0.96450925 0.94615252 0.91169154 0.92857143]
3-NN -- mean: 0.92557708159848, standard deviation: 0.0307056240140939

In [70]:
c_dt = tree.DecisionTreeClassifier()
scores3 = cross_val_score(c_dt, data.data, data.target, cv=10, scoring='f1_weighted')
print(scores3)
print("DT -- mean: " + str(numpy.mean(scores3)) + ", standard deviation: " + str(numpy.std(scores3)))


[0.94848632 0.86064883 0.92901849 0.87775613 0.96491228 0.89473684
 0.84126119 0.9466633  0.92951252 0.98205084]
DT -- mean: 0.9175046737000159, standard deviation: 0.044322127764291355