In [1]:
import os
import sys
root_path = os.path.abspath("../../../")
if root_path not in sys.path:
sys.path.append(root_path)
import numpy as np
from Util.Util import DataUtil
train_num = 6000
(x_train, y_train), (x_test, y_test) = DataUtil.get_dataset(
"mushroom", "../../../_Data/mushroom.txt",
n_train=train_num, tar_idx=0
)
x_train, y_train, wc, features, feat_dicts, label_dict = DataUtil.quantize_data(x_train, y_train)
x_test, y_test = DataUtil.transform_data(x_test, y_test, wc, feat_dicts, label_dict)
from sklearn.preprocessing import OneHotEncoder
enc = OneHotEncoder()
x_train_one_hot = enc.fit_transform(x_train)
x_test_one_hot = enc.transform(x_test)
In [2]:
from sklearn.naive_bayes import MultinomialNB
clf = MultinomialNB()
clf.fit(x_train_one_hot, y_train)
print(np.mean(y_test == clf.predict(x_test_one_hot)))
In [3]:
from sklearn.tree import DecisionTreeClassifier
clf = DecisionTreeClassifier()
clf.fit(x_train, y_train)
print(np.mean(y_test == clf.predict(x_test)))
clf.fit(x_train_one_hot, y_train)
print(np.mean(y_test == clf.predict(x_test_one_hot)))
In [4]:
from sklearn.svm import SVC
clf = SVC()
clf.fit(x_train, y_train)
print(np.mean(y_test == clf.predict(x_test)))
clf.fit(x_train_one_hot, y_train)
print(np.mean(y_test == clf.predict(x_test_one_hot)))
In [5]:
from sklearn.linear_model import LogisticRegression
clf = LogisticRegression()
clf.fit(x_train, y_train)
print(np.mean(y_test == clf.predict(x_test)))
clf.fit(x_train_one_hot, y_train)
print(np.mean(y_test == clf.predict(x_test_one_hot)))