Support Vector Machines have really nice math behind it. Let's take a look how this algorithm will fare in our classfication problem.
In [1]:
%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt
from sklearn import datasets, svm
from sklearn.cross_validation import train_test_split, KFold
from sklearn.grid_search import GridSearchCV
digits = datasets.load_digits()
# split dataset into train and test
X_train, X_test, y_train, y_test = train_test_split( digits.data,
digits.target,
test_size=0.33 )
clf = svm.LinearSVC(penalty='l1', loss='squared_hinge',
dual=False, tol=1e-3 )
# parameters for grid search
param_grid = dict( C=np.logspace(-2.3, -1.3, 10) )
# we will use grid search to choose "optimal" hyperparameters
grid = GridSearchCV(clf, refit=False, param_grid=param_grid,
cv=KFold( len( X_train ), 30 ) )
grid.fit( X_train, y_train )
Out[1]:
In [2]:
# fit over entire train dataset
clf.C = grid.best_params_[ 'C' ]
clf.fit( X_train, y_train )
# run tests
print( 'Test score:{0}'.format( clf.score( X_test, y_test ) ) )