Support Vector Machines have really nice math behind it. Let's take a look how this algorithm will fare in our classfication problem.


In [1]:
%matplotlib inline

import numpy as np
import matplotlib.pyplot as plt

from sklearn import datasets, svm
from sklearn.cross_validation import train_test_split, KFold
from sklearn.grid_search import GridSearchCV

digits = datasets.load_digits()

# split dataset into train and test
X_train, X_test, y_train, y_test = train_test_split( digits.data, 
                                                    digits.target,
                                                    test_size=0.33 )

clf = svm.LinearSVC(penalty='l1', loss='squared_hinge',
                    dual=False, tol=1e-3 )

# parameters for grid search
param_grid = dict( C=np.logspace(-2.3, -1.3, 10) )

# we will use grid search to choose "optimal" hyperparameters
grid = GridSearchCV(clf, refit=False, param_grid=param_grid,
                            cv=KFold( len( X_train ), 30 ) )
grid.fit( X_train, y_train )


Out[1]:
GridSearchCV(cv=sklearn.cross_validation.KFold(n=1203, n_folds=30, shuffle=False, random_state=None),
       error_score='raise',
       estimator=LinearSVC(C=1.0, class_weight=None, dual=False, fit_intercept=True,
     intercept_scaling=1, loss='squared_hinge', max_iter=1000,
     multi_class='ovr', penalty='l1', random_state=None, tol=0.001,
     verbose=0),
       fit_params={}, iid=True, loss_func=None, n_jobs=1,
       param_grid={'C': array([ 0.00501,  0.00647,  0.00836,  0.0108 ,  0.01395,  0.01801,
        0.02326,  0.03005,  0.03881,  0.05012])},
       pre_dispatch='2*n_jobs', refit=False, score_func=None, scoring=None,
       verbose=0)

In [2]:
# fit over entire train dataset
clf.C = grid.best_params_[ 'C' ]
clf.fit( X_train, y_train )

# run tests
print( 'Test score:{0}'.format( clf.score( X_test, y_test ) ) )


Test score:0.959595959596