In [ ]:
import numpy as np
import matplotlib.pyplot as plt
import numpy as np
import os
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.datasets import make_moons, make_circles, make_classification
from sklearn.neural_network import MLPClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn import clone
from sklearn.ensemble import (RandomForestClassifier, ExtraTreesClassifier,
AdaBoostClassifier)
from sklearn.externals.six.moves import xrange
from sklearn.tree import DecisionTreeClassifier
from sklearn.gaussian_process import GaussianProcessClassifier
from sklearn.gaussian_process.kernels import RBF
from sklearn.tree import DecisionTreeClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis
from helper_functions import *
# Load all pre-processed data sets if available.
data_list = get_data_list(divs=[4,10,20])
# Parameters
n_classes = 5
n_estimators = 15
RANDOM_SEED = 42 # fix the seed on each iteration
In [ ]:
names = ["Nearest Neighbors", "Linear SVM", "RBF SVM", "Gaussian Process",
"Decision Tree", "Random Forest", "Neural Net", "AdaBoost",
"Naive Bayes", "QDA"]
classifiers = [
KNeighborsClassifier(5),
SVC(kernel="linear", C=0.025),
SVC(gamma=2, C=1),
SVC(gamma=2, C=1,degree=4),
GaussianProcessClassifier(1.0 * RBF(1.0), warm_start=True),
DecisionTreeClassifier(max_depth=None),
RandomForestClassifier(max_depth=None, n_estimators=n_estimators, max_features=int(np.sqrt(n_estimators))),
MLPClassifier(),
MLPClassifier(hidden_layer_sizes=(10,10)),
MLPClassifier(hidden_layer_sizes=(10,10,10)),
AdaBoostClassifier(),
GaussianNB(),
QuadraticDiscriminantAnalysis()]
divs=[4,10,20]
combis = get_combis(divs)
for index, version in enumerate(data_list):
feature_size = combis[index]
X = version.drop(['Target','label'],axis=1).values
y = version['Target']
X = StandardScaler().fit_transform(X)
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)
# iterate over classifiers
for name, clf in zip(names, classifiers):
clf.fit(X_train, y_train)
score = clf.score(X_test, y_test)
# Plot the decision boundary. For that, we will assign a color to each
# point in the mesh [x_min, x_max]x[y_min, y_max].
# if hasattr(clf, "decision_function"):
# Z = clf.decision_function(np.c_[xx.ravel(), yy.ravel()])
# else:
# Z = clf.predict_proba(np.c_[xx.ravel(), yy.ravel()])[:, 1]
print("{} with features {} has a score of {}".format(name,feature_size,score))
In [ ]:
models = [DecisionTreeClassifier(max_depth=None),
RandomForestClassifier(n_estimators=n_estimators),
ExtraTreesClassifier(n_estimators=n_estimators),
AdaBoostClassifier(DecisionTreeClassifier(max_depth=3),
n_estimators=n_estimators)]
for version in data_list:
feature_size = combis[index]
X = version.drop(['Target','label'],axis=1).values
y = version['Target']
X = StandardScaler().fit_transform(X)
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)
# iterate over classifiers
for model in models:
# Train models
clf = clone(model)
clf = model.fit(X_train, y_train)
scores = clf.score(X_test, y_test)
# Create a title for each column and the console by using str() and
# slicing away useless parts of the string
model_title = str(type(model)).split(".")[-1][:-2][:-len("Classifier")]
model_details = model_title
if hasattr(model, "estimators_"):
model_details += " with {} estimators".format(len(model.estimators_))
print( model_details + " with features", feature_size, "has a score of", scores )