This notebook contains the algorithm comparison for the measurements done with the following parameters (specifically):
noise amp grc = 10m gaussian
Please refer to the other notebooks to see the results of the comparison for different measurement parameters.
In [2]:
%load_ext autoreload
In [3]:
%autoreload 2
In [4]:
import setup_dataset
In [37]:
data, labels = setup_dataset.setup_iterables("with_dc")
In [ ]:
print(len(data))
print(data[1])
print(labels[1])
print(len([x for x in labels_test if x==1]))
mul = 0
for i in data[4500*mul:(4500*mul)+20]:
print(i)
In [16]:
data_test, labels_test = setup_dataset.setup_simple_iterables("with_dc/")
In [17]:
print(len(data_test))
print(data_test[1])
print(len(labels_test))
print(len([x for x in labels_test if x==3]))
for i in data_test[:20]:
print(i)
In [18]:
data, labels = setup_dataset.setup_simple_iterables("with_dc")
In [19]:
X_train, X_test, y_train, y_test = setup_dataset.slice_data(data, labels)
In [20]:
# Setting up various complexities for the different algorithms.
# Number of neighbors
knn_c = (2, 4, 10, 50)
# Maximum depth in a decision tree
dtc_c = (2, 5, 10, 50)
# complexities for the rbf kernel
svc_c = (1, 1000, 1000000)
# Number of estimators in the random forest classifier
rfc_c = (1, 10, 100, 1000, 10000, 100000)
# Number of parallel jobs (CPU)
rfc_jobs = (3, -2)
gpc_jobs = (3, -2)
# Number of iteration in the Gaussian Process Classifier
gpc_c = (20, 50, 100)
In [21]:
from sklearn.preprocessing import MaxAbsScaler
from sklearn.preprocessing import StandardScaler
X_train_scaled, X_test_scaled = setup_dataset.scale_sliced_data(X_train, X_test, StandardScaler())
In [22]:
knn_accs, knn_pred, knn_pred_times, knn_fit_times = \
setup_dataset.run_knn(X_train, X_test, y_train, y_test, knn_c)
In [23]:
setup_dataset.compute_cm(y_test, knn_pred, knn_c)
In [24]:
knn_accs_scaled, knn_pred_scaled, knn_pred_times_scaled, knn_fit_times_scaled =\
setup_dataset.run_knn(X_train_scaled, X_test_scaled, y_train, y_test, knn_c)
In [ ]:
setup_dataset.compute_cm(y_test, knn_pred_scaled, knn_c)
In [25]:
for line in knn_accs :
print(line)
print("====================")
for line in knn_accs_scaled:
print(line)
In [ ]:
dtc_accs, dtc_pred, dtc_pred_times, dtc_fit_times = \
setup_dataset.run_decision_tree(X_train, X_test, y_train, y_test, dtc_c)
In [ ]:
dtc_accs, dtc_pred, dtc_pred_times, dtc_fit_times = \
setup_dataset.run_decision_tree(X_train, X_test, y_train, y_test, dtc_c)
In [ ]:
dtc_accs_scaled, dtc_pred_scaled, dtc_pred_times_scaled, dtc_fit_times_scaled = \
setup_dataset.run_decision_tree(X_train_scaled, X_test_scaled, y_train, y_test, dtc_c)
In [ ]:
setup_dataset.compute_cm(y_test, dtc_pred, dtc_c)
In [ ]:
setup_dataset.compute_cm(y_test, dtc_pred_scaled, dtc_c)
In [ ]:
nbc_accs, nbc_pred, nbc_pred_times, nbc_fit_times = \
setup_dataset.run_naive_bayes(X_train, X_test, y_train, y_test, (1,))
In [ ]:
nbc_accs_scaled, nbc_pred_scaled, nbc_pred_times_scaled, nbc_fit_times_scaled = \
setup_dataset.run_naive_bayes(X_train_scaled, X_test_scaled, y_train, y_test, (1,))
In [ ]:
setup_dataset.compute_cm(y_test, nbc_pred, [1])
In [ ]:
setup_dataset.compute_cm(y_test, nbc_pred_scaled, [1])
In [ ]:
abc_accs, abc_pred, abc_pred_times, abc_fit_times = \
setup_dataset.run_adaboost(X_train, X_test, y_train, y_test, (1,))
In [ ]:
abc_accs_scaled, abc_pred_scaled, abc_pred_times_scaled, abc_fit_times_scaled = \
setup_dataset.run_adaboost(X_train_scaled, X_test_scaled, y_train, y_test, (1,))
In [ ]:
setup_dataset.compute_cm(y_test, abc_pred, [1])
In [ ]:
setup_dataset.compute_cm(y_test, abc_pred_scaled, [1])
In [ ]:
qda_accs, qda_pred, qda_pred_times, qda_fit_times = \
setup_dataset.run_quadratic(X_train, X_test, y_train, y_test, (1,))
In [ ]:
qda_accs_scaled, qda_pred_scaled, qda_pred_times_scaled, qda_fit_times_scaled = \
setup_dataset.run_quadratic(X_train_scaled, X_test_scaled, y_train, y_test, (1,))
In [ ]:
setup_dataset.compute_cm(y_test, qda_pred, [1])
In [ ]:
setup_dataset.compute_cm(y_test, qda_pred_scaled, [1])
In [ ]:
dtc_accs, dtc_pred, dtc_pred_times, dtc_fit_times = \
setup_dataset.run_decision_tree(X_train, X_test, y_train, y_test, dtc_c)
In [ ]:
dtc_accs_scaled, dtc_pred_scaled, dtc_pred_times_scaled, dtc_fit_times_scaled = \
setup_dataset.run_decision_tree(X_train_scaled, X_test_scaled, y_train, y_test, dtc_c)
In [ ]:
setup_dataset.compute_cm(y_test, dtc_pred, dtc_c)
In [ ]:
setup_dataset.compute_cm(y_test, dtc_pred_scaled, dtc_c)
In [ ]:
svc_accs, svc_pred, svc_pred_times, svc_fit_times = \
setup_dataset.run_svc(X_train, X_test, y_train, y_test, svc_c)
In [ ]:
svc_accs_scaled, svc_pred_scaled, svc_pred_times_scaled, svc_fit_times_scaled = \
setup_dataset.run_svc(X_train_scaled, X_test_scaled, y_train, y_test, svc_c)
In [ ]:
setup_dataset.compute_cm(y_test, svc_pred, svc_c)
In [ ]:
setup_dataset.compute_cm(y_test, svc_pred_scaled, svc_c)
In [ ]:
for line in svc_accs :
print(line)
print("====================")
for line in svc_accs_scaled:
print(line)
In [ ]:
# THIS MAKES THE KERNEL CRASH!
rfc_accs, rfc_pred, rfc_pred_times, rfc_fit_times = \
setup_dataset.run_random_forest(X_train, X_test, y_train, y_test, rfc_c, rfc_jobs)
In [ ]:
rfc_accs_scaled, rfc_pred_scaled, rfc_pred_times_scaled, rfc_fit_times_scaled = \
setup_dataset.run_random_forest(X_train_scaled, X_test_scaled, y_train, y_test, rfc_c, rfc_jobs)
In [ ]:
setup_dataset.compute_cm(y_test, rfc_pred, rfc_c)
In [ ]:
setup_dataset.compute_cm(y_test, rfc_pred_scaled, rfc_c)
In [ ]:
gpc_accs, gpc_pred, gpc_pred_times, gpc_fit_times = \
setup_dataset.run_gaussian(X_train, X_test, y_train, y_test, gpc_c, gpc_jobs)
In [ ]:
gpc_accs_scaled, gpc_pred_scaled, gpc_pred_times_scaled, gpc_fit_times_scaled = \
setup_dataset.run_gaussian(X_train_scaled, X_test_scaled, y_train, y_test, gpc_c, rfc_jobs)
In [ ]:
setup_dataset.compute_cm(y_test, gpc_pred, gpc_c)
In [ ]:
setup_dataset.compute_cm(y_test, gpc_pred_scaled, gpc_c)
In [ ]:
import numpy as np
import matplotlib.pyplot as plt
plt.figure()
x = np.arange(len(knn_accs[0]))
y = [[] for _ in range(len(knn_accs[0]))]
for i in range(len(knn_accs[0])):
y[i] = knn_accs[i]
plt.plot(x, y[i], linestyle='-', label="complexity {}".format(i))
# plt.scatter(x, y[i], label="data {}".format(i))
plt.legend(bbox_to_anchor=(0., 1.02, 1., .102), loc=3,
ncol=2, mode="expand", borderaxespad=0.)
plt.show()
In [ ]:
plt.figure()
x = np.arange(len(knn_fit_times[0]))
y = [[] for _ in range(len(knn_fit_times[0]))]
for i in range(len(knn_fit_times[0])):
y[i] = knn_fit_times[i]
plt.plot(x, y[i], linestyle='-', label="complexity {}".format(i))
# plt.scatter(x, y[i], label="data {}".format(i))
plt.legend(bbox_to_anchor=(0., 1.02, 1., .102), loc=3,
ncol=2, mode="expand", borderaxespad=0.)
plt.show()
In [ ]:
plt.figure()
x = np.arange(len(knn_accs_scaled[0]))
y = [[] for _ in range(len(knn_accs_scaled[0]))]
for i in range(len(knn_accs_scaled[0])):
y[i] = knn_accs_scaled[i]
plt.plot(x, y[i], linestyle='-', label="complexity {}".format(i))
# plt.scatter(x, y[i], label="data {}".format(i))
plt.legend(bbox_to_anchor=(0., 1.02, 1., .102), loc=3,
ncol=2, mode="expand", borderaxespad=0.)
plt.show()
In [ ]:
plt.figure()
x = np.arange(len(svc_accs[0]))
y = [[] for _ in range(len(svc_accs[0]))]
for i in range(len(svc_accs[0])):
y[i] = svc_accs[i]
plt.plot(x, y[i], linestyle='-', label="complexity {}".format(i))
plt.legend(bbox_to_anchor=(0., 1.02, 1., .102), loc=3,
ncol=2, mode="expand", borderaxespad=0.)
plt.show()
In [ ]:
plt.figure()
x = np.arange(len(svc_accs_scaled[0]))
y = [[] for _ in range(len(svc_accs_scaled[0]))]
for i in range(len(svc_accs_scaled[0])):
y[i] = svc_accs_scaled[i]
plt.plot(x, y[i], linestyle='-', label="complexity {}".format(i))
plt.legend(bbox_to_anchor=(0., 1.02, 1., .102), loc=3,
ncol=2, mode="expand", borderaxespad=0.)
plt.show()
In [ ]: