notebook.community

Edit and run



In [230]:

    
# Read all the comments, the first approach is taken to highlight a wrong approach
import pandas as pd
import matplotlib.pyplot as plt
from sklearn import datasets
%matplotlib inline



In [231]:

    
iris = datasets.load_iris()
iris_pd=pd.DataFrame(iris.data)



In [232]:

    
iris_pd.head()



In [233]:

    
iris.keys()









    Out[233]:





dict_keys(['target', 'target_names', 'DESCR', 'feature_names', 'data'])



In [234]:

    
iris_pd.columns=iris.feature_names
iris_pd.head()









    Out[234]:






  
    
      
      sepal length (cm)
      sepal width (cm)
      petal length (cm)
      petal width (cm)
    
  
  
    
      0
      5.1
      3.5
      1.4
      0.2
    
    
      1
      4.9
      3.0
      1.4
      0.2
    
    
      2
      4.7
      3.2
      1.3
      0.2
    
    
      3
      4.6
      3.1
      1.5
      0.2
    
    
      4
      5.0
      3.6
      1.4
      0.2



In [235]:

    
# The Iris Setosa from wiki pulled out
from IPython.display import Image
url = 'http://upload.wikimedia.org/wikipedia/commons/5/56/Kosaciec_szczecinkowaty_Iris_setosa.jpg'
Image(url,width=300, height=300)









    Out[235]:



In [236]:

    
# The Iris Versicolor from Wiki pull out
from IPython.display import Image
url = 'http://upload.wikimedia.org/wikipedia/commons/4/41/Iris_versicolor_3.jpg'
Image(url,width=300, height=300)









    Out[236]:



In [237]:

    
# The Iris Virginica from wiki pull out
from IPython.display import Image
url = 'http://upload.wikimedia.org/wikipedia/commons/9/9f/Iris_virginica.jpg'
Image(url,width=300, height=300)









    Out[237]:



In [238]:

    
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y)



In [239]:

    
# Incorrect approch as we are not seperating the predicted variable
# from sklearn import svm
from sklearn.svm import SVC
svc_model = SVC()
svc_model.fit(X_train,y_train)









    Out[239]:





SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape=None, degree=3, gamma='auto', kernel='rbf',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False)



In [240]:

    
predictions = svc_model.predict(X_test)
from sklearn.metrics import classification_report,confusion_matrix
print(confusion_matrix(y_test,predictions))









    



[[13  0  0]
 [ 0 13  0]
 [ 0  0 12]]



In [241]:

    
# Print confusion Matrix



In [242]:

    
print(classification_report(y_test,predictions))
# Wrong prediction as we have not dropped the predicted variable









    



             precision    recall  f1-score   support

     setosa       1.00      1.00      1.00        13
 versicolor       1.00      1.00      1.00        13
  virginica       1.00      1.00      1.00        12

avg / total       1.00      1.00      1.00        38



In [243]:

    
# Correct approach below
#import seaborn as sns
iris = sns.load_dataset('iris')



In [244]:

    
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline



In [245]:

    
# Setosa is the most separable. 
sns.pairplot(iris,hue='species',palette='Dark2')









    Out[245]:





<seaborn.axisgrid.PairGrid at 0x22085864b00>



In [246]:

    
from sklearn.model_selection import train_test_split



In [247]:

    
import seaborn as sns
iris = sns.load_dataset('iris')
X = iris.drop('species',axis=1)
y = iris['species']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.30)



In [248]:

    
from sklearn.svm import SVC



In [249]:

    
svc_model = SVC()



In [250]:

    
svc_model.fit(X_train,y_train)









    Out[250]:





SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape=None, degree=3, gamma='auto', kernel='rbf',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False)



In [251]:

    
predictions = svc_model.predict(X_test)



In [252]:

    
from sklearn.metrics import classification_report,confusion_matrix



In [253]:

    
print(confusion_matrix(y_test,predictions))









    



[[21  0  0]
 [ 0 13  1]
 [ 0  0 10]]



In [254]:

    
print(classification_report(y_test,predictions))









    



             precision    recall  f1-score   support

     setosa       1.00      1.00      1.00        21
 versicolor       1.00      0.93      0.96        14
  virginica       0.91      1.00      0.95        10

avg / total       0.98      0.98      0.98        45



In [ ]:

    
# Good prediction after column dropped at 98%

	0	1	2	3
0	5.1	3.5	1.4	0.2
1	4.9	3.0	1.4	0.2
2	4.7	3.2	1.3	0.2
3	4.6	3.1	1.5	0.2
4	5.0	3.6	1.4	0.2

	sepal length (cm)	sepal width (cm)	petal length (cm)	petal width (cm)
0	5.1	3.5	1.4	0.2
1	4.9	3.0	1.4	0.2
2	4.7	3.2	1.3	0.2
3	4.6	3.1	1.5	0.2
4	5.0	3.6	1.4	0.2

	0	1	2	3
0	5.1	3.5	1.4	0.2
1	4.9	3.0	1.4	0.2
2	4.7	3.2	1.3	0.2
3	4.6	3.1	1.5	0.2
4	5.0	3.6	1.4	0.2

	sepal length (cm)	sepal width (cm)	petal length (cm)	petal width (cm)
0	5.1	3.5	1.4	0.2
1	4.9	3.0	1.4	0.2
2	4.7	3.2	1.3	0.2
3	4.6	3.1	1.5	0.2
4	5.0	3.6	1.4	0.2

	0	1	2	3
0	5.1	3.5	1.4	0.2
1	4.9	3.0	1.4	0.2
2	4.7	3.2	1.3	0.2
3	4.6	3.1	1.5	0.2
4	5.0	3.6	1.4	0.2

	sepal length (cm)	sepal width (cm)	petal length (cm)	petal width (cm)
0	5.1	3.5	1.4	0.2
1	4.9	3.0	1.4	0.2
2	4.7	3.2	1.3	0.2
3	4.6	3.1	1.5	0.2
4	5.0	3.6	1.4	0.2