Iris Classifiers Compare

This is a companion notebook for the new Data Science Solutions book.

The notebook extends for the Iris dataset, classifier algorithm accuracy comparison as coded in Gender Classifier by Naresh.


In [155]:
%matplotlib inline
from sklearn import datasets

from sklearn import tree
from sklearn.svm import SVC
from sklearn.linear_model import Perceptron
from sklearn.linear_model import SGDClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB

from sklearn.metrics import accuracy_score
import pandas as pd
import numpy as np

iris = datasets.load_iris()

df = pd.DataFrame(data=np.c_[iris['data'], iris['target']],
                  columns=iris['feature_names'] + ['target'])

df.head()


Out[155]:
sepal length (cm) sepal width (cm) petal length (cm) petal width (cm) target
0 5.1 3.5 1.4 0.2 0.0
1 4.9 3.0 1.4 0.2 0.0
2 4.7 3.2 1.3 0.2 0.0
3 4.6 3.1 1.5 0.2 0.0
4 5.0 3.6 1.4 0.2 0.0

In [156]:
df.tail()


Out[156]:
sepal length (cm) sepal width (cm) petal length (cm) petal width (cm) target
145 6.7 3.0 5.2 2.3 2.0
146 6.3 2.5 5.0 1.9 2.0
147 6.5 3.0 5.2 2.0 2.0
148 6.2 3.4 5.4 2.3 2.0
149 5.9 3.0 5.1 1.8 2.0

In [157]:
df.describe()


Out[157]:
sepal length (cm) sepal width (cm) petal length (cm) petal width (cm) target
count 150.000000 150.000000 150.000000 150.000000 150.000000
mean 5.843333 3.054000 3.758667 1.198667 1.000000
std 0.828066 0.433594 1.764420 0.763161 0.819232
min 4.300000 2.000000 1.000000 0.100000 0.000000
25% 5.100000 2.800000 1.600000 0.300000 0.000000
50% 5.800000 3.000000 4.350000 1.300000 1.000000
75% 6.400000 3.300000 5.100000 1.800000 2.000000
max 7.900000 4.400000 6.900000 2.500000 2.000000

In [158]:
X = iris.data[0:150, :]
X.shape


Out[158]:
(150, 4)

In [159]:
Y = iris.target[0:150]
Y.shape


Out[159]:
(150,)

In [160]:
model_tree = tree.DecisionTreeClassifier()
model_svm = SVC()
model_per = Perceptron()
model_sgd = SGDClassifier()
model_KNN = KNeighborsClassifier()
model_GNB = GaussianNB()

model_tree.fit(X, Y)
model_svm.fit(X, Y)
model_per.fit(X, Y)
model_sgd.fit(X, Y)
model_KNN.fit(X, Y)
model_GNB.fit(X, Y)


# Testing using the same data
pred_tree = model_tree.predict(X)
acc_tree = accuracy_score(Y, pred_tree) * 100
print('Accuracy for DecisionTree: {0:.{1}f}'.format(acc_tree, 0))

pred_svm = model_svm.predict(X)
acc_svm = accuracy_score(Y, pred_svm) * 100
print('Accuracy for SVM: {0:.{1}f}'.format(acc_svm, 0))

pred_per = model_per.predict(X)
acc_per = accuracy_score(Y, pred_per) * 100
print('Accuracy for Perceptron: {0:.{1}f}'.format(acc_per, 0))

pred_sgd = model_sgd.predict(X)
acc_sgd = accuracy_score(Y, pred_sgd) * 100
print('Accuracy for SGD: {0:.{1}f}'.format(acc_sgd, 0))

pred_KNN = model_KNN.predict(X)
acc_KNN = accuracy_score(Y, pred_KNN) * 100
print('Accuracy for KNN: {0:.{1}f}'.format(acc_KNN, 0))

pred_GNB = model_GNB.predict(X)
acc_GNB = accuracy_score(Y, pred_GNB) * 100
print('Accuracy for GaussianNB: {0:.{1}f}'.format(acc_GNB, 0))


# The best classifier
best = np.argmax([acc_svm, acc_per, acc_KNN, acc_tree, acc_GNB, acc_sgd])
classifiers = {0: 'SVM', 1: 'Perceptron', 2: 'KNN', 3: 'DecisionTree', 4: 'GNB', 5: 'SGD'}
print('Best iris classifier is {}'.format(classifiers[best]))


Accuracy for DecisionTree: 100
Accuracy for SVM: 99
Accuracy for Perceptron: 67
Accuracy for SGD: 67
Accuracy for KNN: 97
Accuracy for GaussianNB: 96
Best iris classifier is DecisionTree