In [10]:
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
from sklearn.tree import DecisionTreeClassifier
from sklearn.cross_validation import train_test_split
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score
from sklearn.datasets import load_iris
from sklearn.tree import export_graphviz

import pydot
import graphviz

In [2]:
dataset_iris = load_iris()

df = pd.DataFrame(data=dataset_iris.data, columns=dataset_iris.feature_names)

In [3]:
df.head(5)


Out[3]:
sepal length (cm) sepal width (cm) petal length (cm) petal width (cm)
0 5.1 3.5 1.4 0.2
1 4.9 3.0 1.4 0.2
2 4.7 3.2 1.3 0.2
3 4.6 3.1 1.5 0.2
4 5.0 3.6 1.4 0.2

In [4]:
X_train, X_test, Y_train, Y_test = train_test_split(dataset_iris.data, 
                                                    dataset_iris.target, 
                                                    test_size = 0.33)

In [5]:
clf = DecisionTreeClassifier(max_depth= None,
                             max_features = None,
                             criterion = 'entropy',
                             min_samples_leaf = 1,
                             min_samples_split = 2)

model = clf.fit(X_train, Y_train)
predicts = model.predict(X_test)

In [6]:
print(confusion_matrix(Y_test, predicts))


[[18  0  0]
 [ 0 13  0]
 [ 0  2 17]]

In [7]:
print('Acuracia com dados de teste: ', accuracy_score(Y_test, predicts))


Acuracia com dados de teste:  0.96

In [8]:
output = '/home/avinicius/workspace/machine-learning/tree.dot'

In [9]:
export_graphviz(clf, out_file=output, feature_names=dataset_iris.feature_names)

with open(output) as file:
    dot_graph = file.read()
    
graphviz.Source(dot_graph)


Out[9]:
Tree 0 petal length (cm) <= 2.6 entropy = 1.581 samples = 100 value = [32, 37, 31] 1 entropy = 0.0 samples = 32 value = [32, 0, 0] 0->1 True 2 petal length (cm) <= 4.75 entropy = 0.994 samples = 68 value = [0, 37, 31] 0->2 False 3 entropy = 0.0 samples = 34 value = [0, 34, 0] 2->3 4 petal length (cm) <= 5.15 entropy = 0.431 samples = 34 value = [0, 3, 31] 2->4 5 sepal width (cm) <= 3.1 entropy = 0.881 samples = 10 value = [0, 3, 7] 4->5 14 entropy = 0.0 samples = 24 value = [0, 0, 24] 4->14 6 petal width (cm) <= 1.7 entropy = 0.764 samples = 9 value = [0, 2, 7] 5->6 13 entropy = 0.0 samples = 1 value = [0, 1, 0] 5->13 7 petal length (cm) <= 4.95 entropy = 1.0 samples = 4 value = [0, 2, 2] 6->7 12 entropy = 0.0 samples = 5 value = [0, 0, 5] 6->12 8 entropy = 0.0 samples = 1 value = [0, 1, 0] 7->8 9 petal width (cm) <= 1.55 entropy = 0.918 samples = 3 value = [0, 1, 2] 7->9 10 entropy = 0.0 samples = 2 value = [0, 0, 2] 9->10 11 entropy = 0.0 samples = 1 value = [0, 1, 0] 9->11

In [ ]: