This is a companion notebook for the new Data Science Solutions book. The code is explained in the book.
In [75]:
%matplotlib inline
from sklearn import datasets
from sklearn import tree
import pandas as pd
import numpy as np
import seaborn as sns
import random as rnd
iris = datasets.load_iris()
df = pd.DataFrame(data=np.c_[iris['data'], iris['target']],
columns=iris['feature_names'] + ['target'])
df.head()
Out[75]:
In [76]:
df.tail()
Out[76]:
In [77]:
df.describe()
Out[77]:
In [78]:
sns.set_style('whitegrid')
sns.pairplot(df, hue='target')
Out[78]:
In [79]:
X = iris.data[0:150, :]
X.shape
Out[79]:
In [80]:
Y = iris.target[0:150]
Y.shape
Out[80]:
In [81]:
setosa_index = rnd.randrange(0, 49)
test_setosa = [iris.data[setosa_index, :]]
X = np.delete(X, setosa_index, 0)
Y = np.delete(Y, setosa_index, 0)
test_setosa, iris.target_names[iris.target[setosa_index]], X.shape, Y.shape
Out[81]:
In [82]:
virginica_index = rnd.randrange(100, 150)
test_virginica = [iris.data[virginica_index, :]]
X = np.delete(X, virginica_index, 0)
Y = np.delete(Y, virginica_index, 0)
test_virginica, iris.target_names[iris.target[virginica_index]], X.shape, Y.shape
Out[82]:
In [83]:
versicolor_index = rnd.randrange(50, 99)
test_versicolor = [iris.data[versicolor_index, :]]
X = np.delete(X, versicolor_index, 0)
Y = np.delete(Y, versicolor_index, 0)
test_versicolor, iris.target_names[iris.target[versicolor_index]], X.shape, Y.shape
Out[83]:
In [84]:
# Decision Tree Classifier Model
model_tree = tree.DecisionTreeClassifier()
# Training the model
model_tree.fit(X, Y)
pred_tree_setosa = model_tree.predict(test_setosa)
print('Decision Tree predicts {} for test_setosa'
.format(iris.target_names[pred_tree_setosa]))
In [85]:
pred_tree_virginica = model_tree.predict(test_virginica)
print('Decision Tree predicts {} for test_virginica'
.format(iris.target_names[pred_tree_virginica]))
In [86]:
pred_tree_versicolor = model_tree.predict(test_versicolor)
print('Decision Tree predicts {} for test_versicolor'
.format(iris.target_names[pred_tree_versicolor]))