This is a companion notebook for the new Data Science Solutions book. The code is explained in the book.
In [2]:
%matplotlib inline
from sklearn import datasets
from sklearn import tree
import pandas as pd
import numpy as np
import seaborn as sns
import random as rnd
iris = datasets.load_iris()
df = pd.DataFrame(data=np.c_[iris['data'], iris['target']],
columns=iris['feature_names'] + ['target'])
df.head()
Out[2]:
In [3]:
df.tail()
Out[3]:
In [4]:
df.describe()
Out[4]:
In [5]:
sns.set_style('whitegrid')
sns.pairplot(df, hue='target')
Out[5]:
In [6]:
X = iris.data[0:150, :]
X.shape
Out[6]:
In [7]:
Y = iris.target[0:150]
Y.shape
Out[7]:
In [8]:
setosa_index = rnd.randrange(0, 49)
test_setosa = [iris.data[setosa_index, :]]
X = np.delete(X, setosa_index, 0)
Y = np.delete(Y, setosa_index, 0)
test_setosa, iris.target_names[iris.target[setosa_index]], X.shape, Y.shape
Out[8]:
In [9]:
virginica_index = rnd.randrange(100, 150)
test_virginica = [iris.data[virginica_index, :]]
X = np.delete(X, virginica_index, 0)
Y = np.delete(Y, virginica_index, 0)
test_virginica, iris.target_names[iris.target[virginica_index]], X.shape, Y.shape
Out[9]:
In [10]:
versicolor_index = rnd.randrange(50, 99)
test_versicolor = [iris.data[versicolor_index, :]]
X = np.delete(X, versicolor_index, 0)
Y = np.delete(Y, versicolor_index, 0)
test_versicolor, iris.target_names[iris.target[versicolor_index]], X.shape, Y.shape
Out[10]:
In [11]:
# Decision Tree Classifier Model
model_tree = tree.DecisionTreeClassifier()
# Training the model
model_tree.fit(X, Y)
pred_tree_setosa = model_tree.predict(test_setosa)
print('Decision Tree predicts {} for test_setosa'
.format(iris.target_names[pred_tree_setosa]))
In [12]:
pred_tree_virginica = model_tree.predict(test_virginica)
print('Decision Tree predicts {} for test_virginica'
.format(iris.target_names[pred_tree_virginica]))
In [13]:
pred_tree_versicolor = model_tree.predict(test_versicolor)
print('Decision Tree predicts {} for test_versicolor'
.format(iris.target_names[pred_tree_versicolor]))