Iris Classifiers Compare

This is a companion notebook for the new Data Science Solutions book.

The notebook extends for the Iris dataset, classifier algorithm accuracy comparison as coded in Gender Classifier by Naresh.



In [155]:

    
%matplotlib inline
from sklearn import datasets

from sklearn import tree
from sklearn.svm import SVC
from sklearn.linear_model import Perceptron
from sklearn.linear_model import SGDClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB

from sklearn.metrics import accuracy_score
import pandas as pd
import numpy as np

iris = datasets.load_iris()

df = pd.DataFrame(data=np.c_[iris['data'], iris['target']],
                  columns=iris['feature_names'] + ['target'])

df.head()









    Out[155]:






  
    
      
      sepal length (cm)
      sepal width (cm)
      petal length (cm)
      petal width (cm)
      target
    
  
  
    
      0
      5.1
      3.5
      1.4
      0.2
      0.0
    
    
      1
      4.9
      3.0
      1.4
      0.2
      0.0
    
    
      2
      4.7
      3.2
      1.3
      0.2
      0.0
    
    
      3
      4.6
      3.1
      1.5
      0.2
      0.0
    
    
      4
      5.0
      3.6
      1.4
      0.2
      0.0



In [156]:

    
df.tail()









    Out[156]:






  
    
      
      sepal length (cm)
      sepal width (cm)
      petal length (cm)
      petal width (cm)
      target
    
  
  
    
      145
      6.7
      3.0
      5.2
      2.3
      2.0
    
    
      146
      6.3
      2.5
      5.0
      1.9
      2.0
    
    
      147
      6.5
      3.0
      5.2
      2.0
      2.0
    
    
      148
      6.2
      3.4
      5.4
      2.3
      2.0
    
    
      149
      5.9
      3.0
      5.1
      1.8
      2.0



In [157]:

    
df.describe()









    Out[157]:






  
    
      
      sepal length (cm)
      sepal width (cm)
      petal length (cm)
      petal width (cm)
      target
    
  
  
    
      count
      150.000000
      150.000000
      150.000000
      150.000000
      150.000000
    
    
      mean
      5.843333
      3.054000
      3.758667
      1.198667
      1.000000
    
    
      std
      0.828066
      0.433594
      1.764420
      0.763161
      0.819232
    
    
      min
      4.300000
      2.000000
      1.000000
      0.100000
      0.000000
    
    
      25%
      5.100000
      2.800000
      1.600000
      0.300000
      0.000000
    
    
      50%
      5.800000
      3.000000
      4.350000
      1.300000
      1.000000
    
    
      75%
      6.400000
      3.300000
      5.100000
      1.800000
      2.000000
    
    
      max
      7.900000
      4.400000
      6.900000
      2.500000
      2.000000



In [158]:

    
X = iris.data[0:150, :]
X.shape









    Out[158]:





(150, 4)



In [159]:

    
Y = iris.target[0:150]
Y.shape









    Out[159]:





(150,)



In [160]:

    
model_tree = tree.DecisionTreeClassifier()
model_svm = SVC()
model_per = Perceptron()
model_sgd = SGDClassifier()
model_KNN = KNeighborsClassifier()
model_GNB = GaussianNB()

model_tree.fit(X, Y)
model_svm.fit(X, Y)
model_per.fit(X, Y)
model_sgd.fit(X, Y)
model_KNN.fit(X, Y)
model_GNB.fit(X, Y)


# Testing using the same data
pred_tree = model_tree.predict(X)
acc_tree = accuracy_score(Y, pred_tree) * 100
print('Accuracy for DecisionTree: {0:.{1}f}'.format(acc_tree, 0))

pred_svm = model_svm.predict(X)
acc_svm = accuracy_score(Y, pred_svm) * 100
print('Accuracy for SVM: {0:.{1}f}'.format(acc_svm, 0))

pred_per = model_per.predict(X)
acc_per = accuracy_score(Y, pred_per) * 100
print('Accuracy for Perceptron: {0:.{1}f}'.format(acc_per, 0))

pred_sgd = model_sgd.predict(X)
acc_sgd = accuracy_score(Y, pred_sgd) * 100
print('Accuracy for SGD: {0:.{1}f}'.format(acc_sgd, 0))

pred_KNN = model_KNN.predict(X)
acc_KNN = accuracy_score(Y, pred_KNN) * 100
print('Accuracy for KNN: {0:.{1}f}'.format(acc_KNN, 0))

pred_GNB = model_GNB.predict(X)
acc_GNB = accuracy_score(Y, pred_GNB) * 100
print('Accuracy for GaussianNB: {0:.{1}f}'.format(acc_GNB, 0))


# The best classifier
best = np.argmax([acc_svm, acc_per, acc_KNN, acc_tree, acc_GNB, acc_sgd])
classifiers = {0: 'SVM', 1: 'Perceptron', 2: 'KNN', 3: 'DecisionTree', 4: 'GNB', 5: 'SGD'}
print('Best iris classifier is {}'.format(classifiers[best]))









    



Accuracy for DecisionTree: 100
Accuracy for SVM: 99
Accuracy for Perceptron: 67
Accuracy for SGD: 67
Accuracy for KNN: 97
Accuracy for GaussianNB: 96
Best iris classifier is DecisionTree

	sepal length (cm)	sepal width (cm)	petal length (cm)	petal width (cm)
0	5.1	3.5	1.4	0.2
1	4.9	3.0	1.4	0.2
2	4.7	3.2	1.3	0.2
3	4.6	3.1	1.5	0.2
4	5.0	3.6	1.4	0.2

	sepal length (cm)	sepal width (cm)	petal length (cm)	petal width (cm)	target
145	6.7	3.0	5.2	2.3	2.0
146	6.3	2.5	5.0	1.9	2.0
147	6.5	3.0	5.2	2.0	2.0
148	6.2	3.4	5.4	2.3	2.0
149	5.9	3.0	5.1	1.8	2.0

	sepal length (cm)	sepal width (cm)	petal length (cm)	petal width (cm)	target
count	150.000000	150.000000	150.000000	150.000000	150.000000
mean	5.843333	3.054000	3.758667	1.198667	1.000000
std	0.828066	0.433594	1.764420	0.763161	0.819232
min	4.300000	2.000000	1.000000	0.100000	0.000000
25%	5.100000	2.800000	1.600000	0.300000	0.000000
50%	5.800000	3.000000	4.350000	1.300000	1.000000
75%	6.400000	3.300000	5.100000	1.800000	2.000000
max	7.900000	4.400000	6.900000	2.500000	2.000000

	sepal length (cm)	sepal width (cm)	petal length (cm)	petal width (cm)
0	5.1	3.5	1.4	0.2
1	4.9	3.0	1.4	0.2
2	4.7	3.2	1.3	0.2
3	4.6	3.1	1.5	0.2
4	5.0	3.6	1.4	0.2

	sepal length (cm)	sepal width (cm)	petal length (cm)	petal width (cm)	target
145	6.7	3.0	5.2	2.3	2.0
146	6.3	2.5	5.0	1.9	2.0
147	6.5	3.0	5.2	2.0	2.0
148	6.2	3.4	5.4	2.3	2.0
149	5.9	3.0	5.1	1.8	2.0

	sepal length (cm)	sepal width (cm)	petal length (cm)	petal width (cm)
0	5.1	3.5	1.4	0.2
1	4.9	3.0	1.4	0.2
2	4.7	3.2	1.3	0.2
3	4.6	3.1	1.5	0.2
4	5.0	3.6	1.4	0.2

	sepal length (cm)	sepal width (cm)	petal length (cm)	petal width (cm)	target
145	6.7	3.0	5.2	2.3	2.0
146	6.3	2.5	5.0	1.9	2.0
147	6.5	3.0	5.2	2.0	2.0
148	6.2	3.4	5.4	2.3	2.0
149	5.9	3.0	5.1	1.8	2.0