In [1]:
import os
import pandas as pd
import math
import numpy as np
from sklearn.tree import DecisionTreeClassifier
In [2]:
headers = ["buying", "maint", "doors", "persons","lug_boot", "safety", "class"]
data = pd.read_csv("car_data.csv", header=None, names=headers)
data = data.sample(frac=1).reset_index(drop=True) # shuffle
In [3]:
for h in headers:
data[h] = data[h].astype('category')
data[h] = data[h].cat.codes
data.set_index("class", inplace=True)
data.head()
Out[3]:
In [4]:
size = len(data)
train_size = int(math.floor(size * 0.7))
train_data = data[:train_size]
test_data = data[train_size:]
In [5]:
#Usando critério gini
d_tree = DecisionTreeClassifier(criterion="gini")
d_tree.fit(train_data, train_data.index)
Out[5]:
In [6]:
d_tree.predict(test_data.iloc[:, 0:6])
d_tree.score(test_data, test_data.index)
Out[6]:
In [7]:
d_tree2 = DecisionTreeClassifier(criterion="entropy")
d_tree2.fit(train_data, train_data.index)
Out[7]:
In [8]:
d_tree2.predict(test_data.iloc[:, 0:6])
d_tree2.score(test_data, test_data.index)
Out[8]:
In [ ]: