Notebook by Youssef Emad El-Din (Github ID: youssef-emad)
This notebook demonstrates different classification methods in Shogun. The point is to compare and visualize the decision boundaries of different classifiers on two different datasets, where one is linear seperable, and one is not.
In [ ]:
%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt
import os
SHOGUN_DATA_DIR=os.getenv('SHOGUN_DATA_DIR', '../../../data')
from shogun import *
In [ ]:
#Needed lists for the final plot
classifiers_linear = []*10
classifiers_non_linear = []*10
classifiers_names = []*10
fadings = []*10
Transformation of features to Shogun format using RealFeatures and BinaryLables classes.
In [ ]:
shogun_feats_linear = RealFeatures(CSVFile(os.path.join(SHOGUN_DATA_DIR, 'toy/classifier_binary_2d_linear_features_train.dat')))
shogun_labels_linear = BinaryLabels(CSVFile(os.path.join(SHOGUN_DATA_DIR, 'toy/classifier_binary_2d_linear_labels_train.dat')))
shogun_feats_non_linear = RealFeatures(CSVFile(os.path.join(SHOGUN_DATA_DIR, 'toy/classifier_binary_2d_nonlinear_features_train.dat')))
shogun_labels_non_linear = BinaryLabels(CSVFile(os.path.join(SHOGUN_DATA_DIR, 'toy/classifier_binary_2d_nonlinear_labels_train.dat')))
feats_linear = shogun_feats_linear.get_feature_matrix()
labels_linear = shogun_labels_linear.get_labels()
feats_non_linear = shogun_feats_non_linear.get_feature_matrix()
labels_non_linear = shogun_labels_non_linear.get_labels()
Data visualization methods.
In [ ]:
def plot_binary_data(plot,X_train, y_train):
"""
This function plots 2D binary data with different colors for different labels.
"""
plot.xlabel(r"$x$")
plot.ylabel(r"$y$")
plot.plot(X_train[0, np.argwhere(y_train == 1)], X_train[1, np.argwhere(y_train == 1)], 'ro')
plot.plot(X_train[0, np.argwhere(y_train == -1)], X_train[1, np.argwhere(y_train == -1)], 'bo')
In [ ]:
def compute_plot_isolines(classifier,features,size=200,fading=True):
"""
This function computes the classification of points on the grid
to get the decision boundaries used in plotting
"""
x1 = np.linspace(1.2*min(features[0]), 1.2*max(features[0]), size)
x2 = np.linspace(1.2*min(features[1]), 1.2*max(features[1]), size)
x, y = np.meshgrid(x1, x2)
plot_features=RealFeatures(np.array((np.ravel(x), np.ravel(y))))
if fading == True:
plot_labels = classifier.apply(plot_features).get_values()
else:
plot_labels = classifier.apply(plot_features).get_labels()
z = plot_labels.reshape((size, size))
return x,y,z
In [ ]:
def plot_model(plot,classifier,features,labels,fading=True):
"""
This function plots an input classification model
"""
x,y,z = compute_plot_isolines(classifier,features,fading=fading)
plot.pcolor(x,y,z,cmap='RdBu_r')
plot.contour(x, y, z, linewidths=1, colors='black')
plot_binary_data(plot,features, labels)
In [ ]:
plt.figure(figsize=(15,5))
plt.subplot(121)
plt.title("Linear Features")
plot_binary_data(plt,feats_linear, labels_linear)
plt.subplot(122)
plt.title("Non Linear Features")
plot_binary_data(plt,feats_non_linear, labels_non_linear)
Shogun provide Liblinear which is a library for large-scale linear learning focusing on SVM used for classification
In [ ]:
plt.figure(figsize=(15,5))
c = 0.5
epsilon =1e-3
svm_linear = LibLinear(c,shogun_feats_linear,shogun_labels_linear)
svm_linear.set_liblinear_solver_type(L2R_L2LOSS_SVC)
svm_linear.set_epsilon(epsilon)
svm_linear.train()
classifiers_linear.append(svm_linear)
classifiers_names.append("SVM Linear")
fadings.append(True)
plt.subplot(121)
plt.title("Linear SVM - Linear Features")
plot_model(plt,svm_linear,feats_linear,labels_linear)
svm_non_linear = LibLinear(c,shogun_feats_non_linear,shogun_labels_non_linear)
svm_non_linear.set_liblinear_solver_type(L2R_L2LOSS_SVC)
svm_non_linear.set_epsilon(epsilon)
svm_non_linear.train()
classifiers_non_linear.append(svm_non_linear)
plt.subplot(122)
plt.title("Linear SVM - Non Linear Features")
plot_model(plt,svm_non_linear,feats_non_linear,labels_non_linear)
Shogun provides many options for using kernel functions. Kernels in Shogun are based on two classes which are CKernel and CKernelMachine base class.
In [ ]:
gaussian_c=0.7
gaussian_kernel_linear=GaussianKernel(shogun_feats_linear, shogun_feats_linear, 100)
gaussian_svm_linear=LibSVM(gaussian_c, gaussian_kernel_linear, shogun_labels_linear)
gaussian_svm_linear.train()
classifiers_linear.append(gaussian_svm_linear)
fadings.append(True)
gaussian_kernel_non_linear=GaussianKernel(shogun_feats_non_linear, shogun_feats_non_linear, 100)
gaussian_svm_non_linear=LibSVM(gaussian_c, gaussian_kernel_non_linear, shogun_labels_non_linear)
gaussian_svm_non_linear.train()
classifiers_non_linear.append(gaussian_svm_non_linear)
classifiers_names.append("SVM Gaussian Kernel")
plt.figure(figsize=(15,5))
plt.subplot(121)
plt.title("SVM Gaussian Kernel - Linear Features")
plot_model(plt,gaussian_svm_linear,feats_linear,labels_linear)
plt.subplot(122)
plt.title("SVM Gaussian Kernel - Non Linear Features")
plot_model(plt,gaussian_svm_non_linear,feats_non_linear,labels_non_linear)
In [ ]:
sigmoid_c = 0.9
sigmoid_kernel_linear = SigmoidKernel(shogun_feats_linear,shogun_feats_linear,200,1,0.5)
sigmoid_svm_linear = LibSVM(sigmoid_c, sigmoid_kernel_linear, shogun_labels_linear)
sigmoid_svm_linear.train()
classifiers_linear.append(sigmoid_svm_linear)
classifiers_names.append("SVM Sigmoid Kernel")
fadings.append(True)
plt.figure(figsize=(15,5))
plt.subplot(121)
plt.title("SVM Sigmoid Kernel - Linear Features")
plot_model(plt,sigmoid_svm_linear,feats_linear,labels_linear)
sigmoid_kernel_non_linear = SigmoidKernel(shogun_feats_non_linear,shogun_feats_non_linear,400,2.5,2)
sigmoid_svm_non_linear = LibSVM(sigmoid_c, sigmoid_kernel_non_linear, shogun_labels_non_linear)
sigmoid_svm_non_linear.train()
classifiers_non_linear.append(sigmoid_svm_non_linear)
plt.subplot(122)
plt.title("SVM Sigmoid Kernel - Non Linear Features")
plot_model(plt,sigmoid_svm_non_linear,feats_non_linear,labels_non_linear)
In [ ]:
poly_c = 0.5
degree = 4
poly_kernel_linear = PolyKernel(shogun_feats_linear, shogun_feats_linear, degree, True)
poly_svm_linear = LibSVM(poly_c, poly_kernel_linear, shogun_labels_linear)
poly_svm_linear.train()
classifiers_linear.append(poly_svm_linear)
classifiers_names.append("SVM Polynomial kernel")
fadings.append(True)
plt.figure(figsize=(15,5))
plt.subplot(121)
plt.title("SVM Polynomial Kernel - Linear Features")
plot_model(plt,poly_svm_linear,feats_linear,labels_linear)
poly_kernel_non_linear=PolyKernel(shogun_feats_non_linear, shogun_feats_non_linear, degree, True)
poly_svm_non_linear = LibSVM(poly_c, poly_kernel_non_linear, shogun_labels_non_linear)
poly_svm_non_linear.train()
classifiers_non_linear.append(poly_svm_non_linear)
plt.subplot(122)
plt.title("SVM Polynomial Kernel - Non Linear Features")
plot_model(plt,poly_svm_non_linear,feats_non_linear,labels_non_linear)
In [ ]:
multiclass_labels_linear = shogun_labels_linear.get_labels()
for i in range(0,len(multiclass_labels_linear)):
if multiclass_labels_linear[i] == -1:
multiclass_labels_linear[i] = 0
multiclass_labels_non_linear = shogun_labels_non_linear.get_labels()
for i in range(0,len(multiclass_labels_non_linear)):
if multiclass_labels_non_linear[i] == -1:
multiclass_labels_non_linear[i] = 0
shogun_multiclass_labels_linear = MulticlassLabels(multiclass_labels_linear)
shogun_multiclass_labels_non_linear = MulticlassLabels(multiclass_labels_non_linear)
naive_bayes_linear = GaussianNaiveBayes()
naive_bayes_linear.set_features(shogun_feats_linear)
naive_bayes_linear.set_labels(shogun_multiclass_labels_linear)
naive_bayes_linear.train()
classifiers_linear.append(naive_bayes_linear)
classifiers_names.append("Naive Bayes")
fadings.append(False)
plt.figure(figsize=(15,5))
plt.subplot(121)
plt.title("Naive Bayes - Linear Features")
plot_model(plt,naive_bayes_linear,feats_linear,labels_linear,fading=False)
naive_bayes_non_linear = GaussianNaiveBayes()
naive_bayes_non_linear.set_features(shogun_feats_non_linear)
naive_bayes_non_linear.set_labels(shogun_multiclass_labels_non_linear)
naive_bayes_non_linear.train()
classifiers_non_linear.append(naive_bayes_non_linear)
plt.subplot(122)
plt.title("Naive Bayes - Non Linear Features")
plot_model(plt,naive_bayes_non_linear,feats_non_linear,labels_non_linear,fading=False)
In [ ]:
number_of_neighbors = 10
distances_linear = EuclideanDistance(shogun_feats_linear, shogun_feats_linear)
knn_linear = KNN(number_of_neighbors,distances_linear,shogun_labels_linear)
knn_linear.train()
classifiers_linear.append(knn_linear)
classifiers_names.append("Nearest Neighbors")
fadings.append(False)
plt.figure(figsize=(15,5))
plt.subplot(121)
plt.title("Nearest Neighbors - Linear Features")
plot_model(plt,knn_linear,feats_linear,labels_linear,fading=False)
distances_non_linear = EuclideanDistance(shogun_feats_non_linear, shogun_feats_non_linear)
knn_non_linear = KNN(number_of_neighbors,distances_non_linear,shogun_labels_non_linear)
knn_non_linear.train()
classifiers_non_linear.append(knn_non_linear)
plt.subplot(122)
plt.title("Nearest Neighbors - Non Linear Features")
plot_model(plt,knn_non_linear,feats_non_linear,labels_non_linear,fading=False)
In [ ]:
gamma = 0.1
lda_linear = LDA(gamma, shogun_feats_linear, shogun_labels_linear)
lda_linear.train()
classifiers_linear.append(lda_linear)
classifiers_names.append("LDA")
fadings.append(True)
plt.figure(figsize=(15,5))
plt.subplot(121)
plt.title("LDA - Linear Features")
plot_model(plt,lda_linear,feats_linear,labels_linear)
lda_non_linear = LDA(gamma, shogun_feats_non_linear, shogun_labels_non_linear)
lda_non_linear.train()
classifiers_non_linear.append(lda_non_linear)
plt.subplot(122)
plt.title("LDA - Non Linear Features")
plot_model(plt,lda_non_linear,feats_non_linear,labels_non_linear)
In [ ]:
qda_linear = QDA(shogun_feats_linear, shogun_multiclass_labels_linear)
qda_linear.train()
classifiers_linear.append(qda_linear)
classifiers_names.append("QDA")
fadings.append(False)
plt.figure(figsize=(15,5))
plt.subplot(121)
plt.title("QDA - Linear Features")
plot_model(plt,qda_linear,feats_linear,labels_linear,fading=False)
qda_non_linear = QDA(shogun_feats_non_linear, shogun_multiclass_labels_non_linear)
qda_non_linear.train()
classifiers_non_linear.append(qda_non_linear)
plt.subplot(122)
plt.title("QDA - Non Linear Features")
plot_model(plt,qda_non_linear,feats_non_linear,labels_non_linear,fading=False)
Shogun's CLogitLikelihood and CEPInferenceMethod classes are used.
In [ ]:
# create Gaussian kernel with width = 2.0
kernel = GaussianKernel(10, 2)
# create zero mean function
zero_mean = ZeroMean()
# create logit likelihood model
likelihood = LogitLikelihood()
# specify EP approximation inference method
inference_model_linear = EPInferenceMethod(kernel, shogun_feats_linear, zero_mean, shogun_labels_linear, likelihood)
# create and train GP classifier, which uses Laplace approximation
gaussian_logit_linear = GaussianProcessClassification(inference_model_linear)
gaussian_logit_linear.train()
classifiers_linear.append(gaussian_logit_linear)
classifiers_names.append("Gaussian Process Logit")
fadings.append(True)
plt.figure(figsize=(15,5))
plt.subplot(121)
plt.title("Gaussian Process - Logit - Linear Features")
plot_model(plt,gaussian_logit_linear,feats_linear,labels_linear)
inference_model_non_linear = EPInferenceMethod(kernel, shogun_feats_non_linear, zero_mean,
shogun_labels_non_linear, likelihood)
gaussian_logit_non_linear = GaussianProcessClassification(inference_model_non_linear)
gaussian_logit_non_linear.train()
classifiers_non_linear.append(gaussian_logit_non_linear)
plt.subplot(122)
plt.title("Gaussian Process - Logit - Non Linear Features")
plot_model(plt,gaussian_logit_non_linear,feats_non_linear,labels_non_linear)
Shogun's CProbitLikelihood class is used.
In [ ]:
likelihood = ProbitLikelihood()
inference_model_linear = EPInferenceMethod(kernel, shogun_feats_linear, zero_mean, shogun_labels_linear, likelihood)
gaussian_probit_linear = GaussianProcessClassification(inference_model_linear)
gaussian_probit_linear.train()
classifiers_linear.append(gaussian_probit_linear)
classifiers_names.append("Gaussian Process Probit")
fadings.append(True)
plt.figure(figsize=(15,5))
plt.subplot(121)
plt.title("Gaussian Process - Probit - Linear Features")
plot_model(plt,gaussian_probit_linear,feats_linear,labels_linear)
inference_model_non_linear = EPInferenceMethod(kernel, shogun_feats_non_linear,
zero_mean, shogun_labels_non_linear, likelihood)
gaussian_probit_non_linear = GaussianProcessClassification(inference_model_non_linear)
gaussian_probit_non_linear.train()
classifiers_non_linear.append(gaussian_probit_non_linear)
plt.subplot(122)
plt.title("Gaussian Process - Probit - Non Linear Features")
plot_model(plt,gaussian_probit_non_linear,feats_non_linear,labels_non_linear)
In [ ]:
figure = plt.figure(figsize=(30,9))
plt.subplot(2,11,1)
plot_binary_data(plt,feats_linear, labels_linear)
for i in range(0,10):
plt.subplot(2,11,i+2)
plt.title(classifiers_names[i])
plot_model(plt,classifiers_linear[i],feats_linear,labels_linear,fading=fadings[i])
plt.subplot(2,11,12)
plot_binary_data(plt,feats_non_linear, labels_non_linear)
for i in range(0,10):
plt.subplot(2,11,13+i)
plot_model(plt,classifiers_non_linear[i],feats_non_linear,labels_non_linear,fading=fadings[i])