Iris Decision Tree

This is a companion notebook for the new Data Science Solutions book. The code is explained in the book.



In [75]:

    
%matplotlib inline
from sklearn import datasets
from sklearn import tree
import pandas as pd
import numpy as np
import seaborn as sns
import random as rnd

iris = datasets.load_iris()

df = pd.DataFrame(data=np.c_[iris['data'], iris['target']],
                  columns=iris['feature_names'] + ['target'])

df.head()









    Out[75]:






  
    
      
      sepal length (cm)
      sepal width (cm)
      petal length (cm)
      petal width (cm)
      target
    
  
  
    
      0
      5.1
      3.5
      1.4
      0.2
      0.0
    
    
      1
      4.9
      3.0
      1.4
      0.2
      0.0
    
    
      2
      4.7
      3.2
      1.3
      0.2
      0.0
    
    
      3
      4.6
      3.1
      1.5
      0.2
      0.0
    
    
      4
      5.0
      3.6
      1.4
      0.2
      0.0



In [76]:

    
df.tail()









    Out[76]:






  
    
      
      sepal length (cm)
      sepal width (cm)
      petal length (cm)
      petal width (cm)
      target
    
  
  
    
      145
      6.7
      3.0
      5.2
      2.3
      2.0
    
    
      146
      6.3
      2.5
      5.0
      1.9
      2.0
    
    
      147
      6.5
      3.0
      5.2
      2.0
      2.0
    
    
      148
      6.2
      3.4
      5.4
      2.3
      2.0
    
    
      149
      5.9
      3.0
      5.1
      1.8
      2.0



In [77]:

    
df.describe()









    Out[77]:






  
    
      
      sepal length (cm)
      sepal width (cm)
      petal length (cm)
      petal width (cm)
      target
    
  
  
    
      count
      150.000000
      150.000000
      150.000000
      150.000000
      150.000000
    
    
      mean
      5.843333
      3.054000
      3.758667
      1.198667
      1.000000
    
    
      std
      0.828066
      0.433594
      1.764420
      0.763161
      0.819232
    
    
      min
      4.300000
      2.000000
      1.000000
      0.100000
      0.000000
    
    
      25%
      5.100000
      2.800000
      1.600000
      0.300000
      0.000000
    
    
      50%
      5.800000
      3.000000
      4.350000
      1.300000
      1.000000
    
    
      75%
      6.400000
      3.300000
      5.100000
      1.800000
      2.000000
    
    
      max
      7.900000
      4.400000
      6.900000
      2.500000
      2.000000



In [78]:

    
sns.set_style('whitegrid')
sns.pairplot(df, hue='target')









    Out[78]:





<seaborn.axisgrid.PairGrid at 0x12a2eaf10>



In [79]:

    
X = iris.data[0:150, :]
X.shape









    Out[79]:





(150, 4)



In [80]:

    
Y = iris.target[0:150]
Y.shape









    Out[80]:





(150,)



In [81]:

    
setosa_index = rnd.randrange(0, 49)
test_setosa = [iris.data[setosa_index, :]]
X = np.delete(X, setosa_index, 0)
Y = np.delete(Y, setosa_index, 0)
test_setosa, iris.target_names[iris.target[setosa_index]], X.shape, Y.shape









    Out[81]:





([array([ 4.4,  2.9,  1.4,  0.2])], 'setosa', (149, 4), (149,))



In [82]:

    
virginica_index = rnd.randrange(100, 150)
test_virginica = [iris.data[virginica_index, :]]
X = np.delete(X, virginica_index, 0)
Y = np.delete(Y, virginica_index, 0)
test_virginica, iris.target_names[iris.target[virginica_index]], X.shape, Y.shape









    Out[82]:





([array([ 5.8,  2.7,  5.1,  1.9])], 'virginica', (148, 4), (148,))



In [83]:

    
versicolor_index = rnd.randrange(50, 99)
test_versicolor = [iris.data[versicolor_index, :]]
X = np.delete(X, versicolor_index, 0)
Y = np.delete(Y, versicolor_index, 0)
test_versicolor, iris.target_names[iris.target[versicolor_index]], X.shape, Y.shape









    Out[83]:





([array([ 6.2,  2.2,  4.5,  1.5])], 'versicolor', (147, 4), (147,))



In [84]:

    
# Decision Tree Classifier Model
model_tree = tree.DecisionTreeClassifier()

# Training the model
model_tree.fit(X, Y)

pred_tree_setosa = model_tree.predict(test_setosa)
print('Decision Tree predicts {} for test_setosa'
      .format(iris.target_names[pred_tree_setosa]))









    



Decision Tree predicts ['setosa'] for test_setosa



In [85]:

    
pred_tree_virginica = model_tree.predict(test_virginica)
print('Decision Tree predicts {} for test_virginica'
      .format(iris.target_names[pred_tree_virginica]))









    



Decision Tree predicts ['virginica'] for test_virginica



In [86]:

    
pred_tree_versicolor = model_tree.predict(test_versicolor)
print('Decision Tree predicts {} for test_versicolor'
      .format(iris.target_names[pred_tree_versicolor]))









    



Decision Tree predicts ['versicolor'] for test_versicolor

	sepal length (cm)	sepal width (cm)	petal length (cm)	petal width (cm)
0	5.1	3.5	1.4	0.2
1	4.9	3.0	1.4	0.2
2	4.7	3.2	1.3	0.2
3	4.6	3.1	1.5	0.2
4	5.0	3.6	1.4	0.2

	sepal length (cm)	sepal width (cm)	petal length (cm)	petal width (cm)	target
145	6.7	3.0	5.2	2.3	2.0
146	6.3	2.5	5.0	1.9	2.0
147	6.5	3.0	5.2	2.0	2.0
148	6.2	3.4	5.4	2.3	2.0
149	5.9	3.0	5.1	1.8	2.0

	sepal length (cm)	sepal width (cm)	petal length (cm)	petal width (cm)	target
count	150.000000	150.000000	150.000000	150.000000	150.000000
mean	5.843333	3.054000	3.758667	1.198667	1.000000
std	0.828066	0.433594	1.764420	0.763161	0.819232
min	4.300000	2.000000	1.000000	0.100000	0.000000
25%	5.100000	2.800000	1.600000	0.300000	0.000000
50%	5.800000	3.000000	4.350000	1.300000	1.000000
75%	6.400000	3.300000	5.100000	1.800000	2.000000
max	7.900000	4.400000	6.900000	2.500000	2.000000

	sepal length (cm)	sepal width (cm)	petal length (cm)	petal width (cm)
0	5.1	3.5	1.4	0.2
1	4.9	3.0	1.4	0.2
2	4.7	3.2	1.3	0.2
3	4.6	3.1	1.5	0.2
4	5.0	3.6	1.4	0.2

	sepal length (cm)	sepal width (cm)	petal length (cm)	petal width (cm)	target
145	6.7	3.0	5.2	2.3	2.0
146	6.3	2.5	5.0	1.9	2.0
147	6.5	3.0	5.2	2.0	2.0
148	6.2	3.4	5.4	2.3	2.0
149	5.9	3.0	5.1	1.8	2.0

	sepal length (cm)	sepal width (cm)	petal length (cm)	petal width (cm)
0	5.1	3.5	1.4	0.2
1	4.9	3.0	1.4	0.2
2	4.7	3.2	1.3	0.2
3	4.6	3.1	1.5	0.2
4	5.0	3.6	1.4	0.2

	sepal length (cm)	sepal width (cm)	petal length (cm)	petal width (cm)	target
145	6.7	3.0	5.2	2.3	2.0
146	6.3	2.5	5.0	1.9	2.0
147	6.5	3.0	5.2	2.0	2.0
148	6.2	3.4	5.4	2.3	2.0
149	5.9	3.0	5.1	1.8	2.0