In [230]:
# Read all the comments, the first approach is taken to highlight a wrong approach
import pandas as pd
import matplotlib.pyplot as plt
from sklearn import datasets
%matplotlib inline
In [231]:
iris = datasets.load_iris()
iris_pd=pd.DataFrame(iris.data)
In [232]:
iris_pd.head()
Out[232]:
In [233]:
iris.keys()
Out[233]:
In [234]:
iris_pd.columns=iris.feature_names
iris_pd.head()
Out[234]:
In [235]:
# The Iris Setosa from wiki pulled out
from IPython.display import Image
url = 'http://upload.wikimedia.org/wikipedia/commons/5/56/Kosaciec_szczecinkowaty_Iris_setosa.jpg'
Image(url,width=300, height=300)
Out[235]:
In [236]:
# The Iris Versicolor from Wiki pull out
from IPython.display import Image
url = 'http://upload.wikimedia.org/wikipedia/commons/4/41/Iris_versicolor_3.jpg'
Image(url,width=300, height=300)
Out[236]:
In [237]:
# The Iris Virginica from wiki pull out
from IPython.display import Image
url = 'http://upload.wikimedia.org/wikipedia/commons/9/9f/Iris_virginica.jpg'
Image(url,width=300, height=300)
Out[237]:
In [238]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y)
In [239]:
# Incorrect approch as we are not seperating the predicted variable
# from sklearn import svm
from sklearn.svm import SVC
svc_model = SVC()
svc_model.fit(X_train,y_train)
Out[239]:
In [240]:
predictions = svc_model.predict(X_test)
from sklearn.metrics import classification_report,confusion_matrix
print(confusion_matrix(y_test,predictions))
In [241]:
# Print confusion Matrix
In [242]:
print(classification_report(y_test,predictions))
# Wrong prediction as we have not dropped the predicted variable
In [243]:
# Correct approach below
#import seaborn as sns
iris = sns.load_dataset('iris')
In [244]:
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
In [245]:
# Setosa is the most separable.
sns.pairplot(iris,hue='species',palette='Dark2')
Out[245]:
In [246]:
from sklearn.model_selection import train_test_split
In [247]:
import seaborn as sns
iris = sns.load_dataset('iris')
X = iris.drop('species',axis=1)
y = iris['species']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.30)
In [248]:
from sklearn.svm import SVC
In [249]:
svc_model = SVC()
In [250]:
svc_model.fit(X_train,y_train)
Out[250]:
In [251]:
predictions = svc_model.predict(X_test)
In [252]:
from sklearn.metrics import classification_report,confusion_matrix
In [253]:
print(confusion_matrix(y_test,predictions))
In [254]:
print(classification_report(y_test,predictions))
In [ ]:
# Good prediction after column dropped at 98%