Visual Comparison Between Different Classification Methods in Shogun

Notebook by Youssef Emad El-Din (Github ID: youssef-emad)

This notebook demonstrates different classification methods in Shogun. The point is to compare and visualize the decision boundaries of different classifiers on two different datasets, where one is linear seperable, and one is not.

  1. Data Generation and Visualization
  2. Support Vector Machine
    1. Linear SVM
    2. Gaussian Kernel
    3. Sigmoid Kernel
    4. Polynomial Kernel
  3. Naive Bayes
  4. Nearest Neighbors
  5. Linear Discriminant Analysis
  6. Quadratic Discriminat Analysis
  7. Gaussian Process
    1. Logit Likelihood model
    2. Probit Likelihood model
  8. Putting It All Together

In [ ]:
%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt
import os
SHOGUN_DATA_DIR=os.getenv('SHOGUN_DATA_DIR', '../../../data')
from shogun import *
import shogun as sg

In [ ]:
#Needed lists for the final plot
classifiers_linear = []*10
classifiers_non_linear = []*10
classifiers_names = []*10
fadings = []*10

Transformation of features to Shogun format using RealFeatures and BinaryLables classes.


In [ ]:
shogun_feats_linear = features(CSVFile(os.path.join(SHOGUN_DATA_DIR, 'toy/classifier_binary_2d_linear_features_train.dat')))
shogun_labels_linear = BinaryLabels(CSVFile(os.path.join(SHOGUN_DATA_DIR, 'toy/classifier_binary_2d_linear_labels_train.dat')))

shogun_feats_non_linear = features(CSVFile(os.path.join(SHOGUN_DATA_DIR, 'toy/classifier_binary_2d_nonlinear_features_train.dat')))
shogun_labels_non_linear = BinaryLabels(CSVFile(os.path.join(SHOGUN_DATA_DIR, 'toy/classifier_binary_2d_nonlinear_labels_train.dat')))

feats_linear = shogun_feats_linear.get('feature_matrix')
labels_linear = shogun_labels_linear.get('labels')

feats_non_linear = shogun_feats_non_linear.get('feature_matrix')
labels_non_linear = shogun_labels_non_linear.get('labels')

Data visualization methods.


In [ ]:
def plot_binary_data(plot,X_train, y_train):
    """
    This function plots 2D binary data with different colors for different labels.
    """
    plot.xlabel(r"$x$")
    plot.ylabel(r"$y$")
    plot.plot(X_train[0, np.argwhere(y_train == 1)], X_train[1, np.argwhere(y_train == 1)], 'ro')
    plot.plot(X_train[0, np.argwhere(y_train == -1)], X_train[1, np.argwhere(y_train == -1)], 'bo')

In [ ]:
def compute_plot_isolines(classifier,feats,size=200,fading=True):
    """
    This function computes the classification of points on the grid
    to get the decision boundaries used in plotting
    """
    x1 = np.linspace(1.2*min(feats[0]), 1.2*max(feats[0]), size)
    x2 = np.linspace(1.2*min(feats[1]), 1.2*max(feats[1]), size)

    x, y = np.meshgrid(x1, x2)

    plot_features=features(np.array((np.ravel(x), np.ravel(y))))
    
    if fading == True:
        plot_labels = classifier.apply(plot_features).get('current_values')
    else:
        plot_labels = classifier.apply(plot_features).get('labels')
    z = plot_labels.reshape((size, size))
    return x,y,z

In [ ]:
def plot_model(plot,classifier,features,labels,fading=True):
    """
    This function plots an input classification model
    """
    x,y,z = compute_plot_isolines(classifier,features,fading=fading)
    plot.pcolor(x,y,z,cmap='RdBu_r')
    plot.contour(x, y, z, linewidths=1, colors='black')
    plot_binary_data(plot,features, labels)

In [ ]:
plt.figure(figsize=(15,5))
plt.subplot(121)
plt.title("Linear Features")
plot_binary_data(plt,feats_linear, labels_linear)
plt.subplot(122)
plt.title("Non Linear Features")
plot_binary_data(plt,feats_non_linear, labels_non_linear)

Shogun provide Liblinear which is a library for large-scale linear learning focusing on SVM used for classification


In [ ]:
plt.figure(figsize=(15,5))
c  = 0.5
epsilon =1e-3

svm_linear = LibLinear(c,shogun_feats_linear,shogun_labels_linear)
svm_linear.put('liblinear_solver_type', L2R_L2LOSS_SVC) 
svm_linear.put('epsilon', epsilon)
svm_linear.train()
classifiers_linear.append(svm_linear)
classifiers_names.append("SVM Linear")
fadings.append(True)

plt.subplot(121)
plt.title("Linear SVM - Linear Features")
plot_model(plt,svm_linear,feats_linear,labels_linear)

svm_non_linear = LibLinear(c,shogun_feats_non_linear,shogun_labels_non_linear)
svm_non_linear.put('liblinear_solver_type', L2R_L2LOSS_SVC) 
svm_non_linear.put('epsilon', epsilon)
svm_non_linear.train()
classifiers_non_linear.append(svm_non_linear)

plt.subplot(122)
plt.title("Linear SVM - Non Linear Features")
plot_model(plt,svm_non_linear,feats_non_linear,labels_non_linear)

SVM - Kernels

Shogun provides many options for using kernel functions. Kernels in Shogun are based on two classes which are Kernel and KernelMachine base class.


In [ ]:
gaussian_c=0.7

gaussian_kernel_linear=sg.kernel("GaussianKernel", log_width=np.log(100))
gaussian_svm_linear=sg.machine('LibSVM', C1=gaussian_c, C2=gaussian_c, kernel=gaussian_kernel_linear, labels=shogun_labels_linear)
gaussian_svm_linear.train(shogun_feats_linear)
classifiers_linear.append(gaussian_svm_linear)
fadings.append(True)

gaussian_kernel_non_linear=sg.kernel("GaussianKernel", log_width=np.log(100))
gaussian_svm_non_linear=sg.machine('LibSVM', C1=gaussian_c, C2=gaussian_c, kernel=gaussian_kernel_non_linear, labels=shogun_labels_non_linear)
gaussian_svm_non_linear.train(shogun_feats_non_linear)
classifiers_non_linear.append(gaussian_svm_non_linear)
classifiers_names.append("SVM Gaussian Kernel")

plt.figure(figsize=(15,5))
plt.subplot(121)
plt.title("SVM Gaussian Kernel - Linear Features")
plot_model(plt,gaussian_svm_linear,feats_linear,labels_linear)

plt.subplot(122)
plt.title("SVM Gaussian Kernel - Non Linear Features")
plot_model(plt,gaussian_svm_non_linear,feats_non_linear,labels_non_linear)

In [ ]:
sigmoid_c = 0.9

sigmoid_kernel_linear = SigmoidKernel(shogun_feats_linear,shogun_feats_linear,200,1,0.5)
sigmoid_svm_linear = sg.machine('LibSVM', C1=sigmoid_c, C2=sigmoid_c, kernel=sigmoid_kernel_linear, labels=shogun_labels_linear)
sigmoid_svm_linear.train()
classifiers_linear.append(sigmoid_svm_linear)
classifiers_names.append("SVM Sigmoid Kernel")
fadings.append(True)

plt.figure(figsize=(15,5))
plt.subplot(121)
plt.title("SVM Sigmoid Kernel - Linear Features")
plot_model(plt,sigmoid_svm_linear,feats_linear,labels_linear)

sigmoid_kernel_non_linear = SigmoidKernel(shogun_feats_non_linear,shogun_feats_non_linear,400,2.5,2)
sigmoid_svm_non_linear = sg.machine('LibSVM', C1=sigmoid_c, C2=sigmoid_c, kernel=sigmoid_kernel_non_linear, labels=shogun_labels_non_linear)
sigmoid_svm_non_linear.train()
classifiers_non_linear.append(sigmoid_svm_non_linear)

plt.subplot(122)
plt.title("SVM Sigmoid Kernel - Non Linear Features")
plot_model(plt,sigmoid_svm_non_linear,feats_non_linear,labels_non_linear)

In [ ]:
poly_c = 0.5
degree = 4

poly_kernel_linear = sg.kernel('PolyKernel', degree=degree, c=1.0)
poly_kernel_linear.init(shogun_feats_linear, shogun_feats_linear)
poly_svm_linear = sg.machine('LibSVM', C1=poly_c, C2=poly_c, kernel=poly_kernel_linear, labels=shogun_labels_linear)
poly_svm_linear.train()
classifiers_linear.append(poly_svm_linear)
classifiers_names.append("SVM Polynomial kernel")
fadings.append(True)

plt.figure(figsize=(15,5))
plt.subplot(121)
plt.title("SVM Polynomial Kernel - Linear Features")
plot_model(plt,poly_svm_linear,feats_linear,labels_linear)

poly_kernel_non_linear = sg.kernel('PolyKernel', degree=degree, c=1.0)
poly_kernel_non_linear.init(shogun_feats_non_linear, shogun_feats_non_linear)
poly_svm_non_linear = sg.machine('LibSVM', C1=poly_c, C2=poly_c, kernel=poly_kernel_non_linear, labels=shogun_labels_non_linear)
poly_svm_non_linear.train()
classifiers_non_linear.append(poly_svm_non_linear)

plt.subplot(122)
plt.title("SVM Polynomial Kernel - Non Linear Features")
plot_model(plt,poly_svm_non_linear,feats_non_linear,labels_non_linear)

In [ ]:
multiclass_labels_linear = shogun_labels_linear.get('labels')
for i in range(0,len(multiclass_labels_linear)):
    if multiclass_labels_linear[i] == -1:
        multiclass_labels_linear[i] = 0

multiclass_labels_non_linear = shogun_labels_non_linear.get('labels')
for i in range(0,len(multiclass_labels_non_linear)):
    if multiclass_labels_non_linear[i] == -1:
        multiclass_labels_non_linear[i] = 0


shogun_multiclass_labels_linear = MulticlassLabels(multiclass_labels_linear)
shogun_multiclass_labels_non_linear = MulticlassLabels(multiclass_labels_non_linear)

naive_bayes_linear = GaussianNaiveBayes()
naive_bayes_linear.put('features', shogun_feats_linear)
naive_bayes_linear.put('labels', shogun_multiclass_labels_linear)
naive_bayes_linear.train()
classifiers_linear.append(naive_bayes_linear)
classifiers_names.append("Naive Bayes")
fadings.append(False)

plt.figure(figsize=(15,5))
plt.subplot(121)
plt.title("Naive Bayes - Linear Features")
plot_model(plt,naive_bayes_linear,feats_linear,labels_linear,fading=False)

naive_bayes_non_linear = GaussianNaiveBayes()
naive_bayes_non_linear.put('features', shogun_feats_non_linear)
naive_bayes_non_linear.put('labels', shogun_multiclass_labels_non_linear)
naive_bayes_non_linear.train()
classifiers_non_linear.append(naive_bayes_non_linear)

plt.subplot(122)
plt.title("Naive Bayes - Non Linear Features")
plot_model(plt,naive_bayes_non_linear,feats_non_linear,labels_non_linear,fading=False)

In [ ]:
number_of_neighbors = 10

distances_linear = sg.distance('EuclideanDistance')
distances_linear.init(shogun_feats_linear, shogun_feats_linear)
knn_linear = KNN(number_of_neighbors,distances_linear,shogun_labels_linear)
knn_linear.train()
classifiers_linear.append(knn_linear)
classifiers_names.append("Nearest Neighbors")
fadings.append(False)

plt.figure(figsize=(15,5))
plt.subplot(121)
plt.title("Nearest Neighbors - Linear Features")
plot_model(plt,knn_linear,feats_linear,labels_linear,fading=False)

distances_non_linear = sg.distance('EuclideanDistance')
distances_non_linear.init(shogun_feats_non_linear, shogun_feats_non_linear)
knn_non_linear = KNN(number_of_neighbors,distances_non_linear,shogun_labels_non_linear)
knn_non_linear.train()
classifiers_non_linear.append(knn_non_linear)

plt.subplot(122)
plt.title("Nearest Neighbors - Non Linear Features")
plot_model(plt,knn_non_linear,feats_non_linear,labels_non_linear,fading=False)

In [ ]:
gamma = 0.1

lda_linear = sg.machine('LDA', gamma=gamma, labels=shogun_labels_linear)
lda_linear.train(shogun_feats_linear)
classifiers_linear.append(lda_linear)
classifiers_names.append("LDA")
fadings.append(True)

plt.figure(figsize=(15,5))
plt.subplot(121)
plt.title("LDA - Linear Features")
plot_model(plt,lda_linear,feats_linear,labels_linear)

lda_non_linear = sg.machine('LDA', gamma=gamma, labels=shogun_labels_non_linear)
lda_non_linear.train(shogun_feats_non_linear)
classifiers_non_linear.append(lda_non_linear)

plt.subplot(122)
plt.title("LDA - Non Linear Features")
plot_model(plt,lda_non_linear,feats_non_linear,labels_non_linear)

In [ ]:
qda_linear = QDA(shogun_feats_linear, shogun_multiclass_labels_linear)
qda_linear.train()
classifiers_linear.append(qda_linear)
classifiers_names.append("QDA")
fadings.append(False)

plt.figure(figsize=(15,5))
plt.subplot(121)
plt.title("QDA - Linear Features")
plot_model(plt,qda_linear,feats_linear,labels_linear,fading=False)

qda_non_linear = QDA(shogun_feats_non_linear, shogun_multiclass_labels_non_linear)
qda_non_linear.train()
classifiers_non_linear.append(qda_non_linear)

plt.subplot(122)
plt.title("QDA - Non Linear Features")
plot_model(plt,qda_non_linear,feats_non_linear,labels_non_linear,fading=False)

Shogun's LogitLikelihood and EPInferenceMethod classes are used.


In [ ]:
# create Gaussian kernel with width = 2.0
kernel = sg.kernel("GaussianKernel", log_width=np.log(2))
# create zero mean function
zero_mean = ZeroMean()
# create logit likelihood model
likelihood = LogitLikelihood()
# specify EP approximation inference method
inference_model_linear = EPInferenceMethod(kernel, shogun_feats_linear, zero_mean, shogun_labels_linear, likelihood)
# create and train GP classifier, which uses Laplace approximation
gaussian_logit_linear = GaussianProcessClassification(inference_model_linear)
gaussian_logit_linear.train()
classifiers_linear.append(gaussian_logit_linear)
classifiers_names.append("Gaussian Process Logit")
fadings.append(True)

plt.figure(figsize=(15,5))
plt.subplot(121)
plt.title("Gaussian Process - Logit - Linear Features")
plot_model(plt,gaussian_logit_linear,feats_linear,labels_linear)

inference_model_non_linear = EPInferenceMethod(kernel, shogun_feats_non_linear, zero_mean,
                                               shogun_labels_non_linear, likelihood)
gaussian_logit_non_linear = GaussianProcessClassification(inference_model_non_linear)
gaussian_logit_non_linear.train()
classifiers_non_linear.append(gaussian_logit_non_linear)

plt.subplot(122)
plt.title("Gaussian Process - Logit - Non Linear Features")
plot_model(plt,gaussian_logit_non_linear,feats_non_linear,labels_non_linear)

Shogun's ProbitLikelihood class is used.


In [ ]:
likelihood = ProbitLikelihood()

inference_model_linear = EPInferenceMethod(kernel, shogun_feats_linear, zero_mean, shogun_labels_linear, likelihood)
gaussian_probit_linear = GaussianProcessClassification(inference_model_linear)
gaussian_probit_linear.train()
classifiers_linear.append(gaussian_probit_linear)
classifiers_names.append("Gaussian Process Probit")
fadings.append(True)

plt.figure(figsize=(15,5))
plt.subplot(121)
plt.title("Gaussian Process - Probit - Linear Features")
plot_model(plt,gaussian_probit_linear,feats_linear,labels_linear)

inference_model_non_linear = EPInferenceMethod(kernel, shogun_feats_non_linear,
                                               zero_mean, shogun_labels_non_linear, likelihood)
gaussian_probit_non_linear = GaussianProcessClassification(inference_model_non_linear)
gaussian_probit_non_linear.train()
classifiers_non_linear.append(gaussian_probit_non_linear)

plt.subplot(122)
plt.title("Gaussian Process - Probit - Non Linear Features")
plot_model(plt,gaussian_probit_non_linear,feats_non_linear,labels_non_linear)

In [ ]:
figure = plt.figure(figsize=(30,9))
plt.subplot(2,11,1)
plot_binary_data(plt,feats_linear, labels_linear)
for i in range(0,10):
    plt.subplot(2,11,i+2)
    plt.title(classifiers_names[i])
    plot_model(plt,classifiers_linear[i],feats_linear,labels_linear,fading=fadings[i])

plt.subplot(2,11,12)
plot_binary_data(plt,feats_non_linear, labels_non_linear)

for i in range(0,10):
    plt.subplot(2,11,13+i)
    plot_model(plt,classifiers_non_linear[i],feats_non_linear,labels_non_linear,fading=fadings[i])