In [1]:
from sklearn import svm
from sklearn import cross_validation as cv
import pandas as pd
plt.style.use('ggplot')
from kcat.datasets import CongressionalVoting
from kcat.kernels import helpers as kh
from kcat.kernels import search as ks
In [2]:
# Load the dataset
dataset = CongressionalVoting()
In [3]:
data = {'Kernel': [], 'Test Error': []}
for i in range(5):
print("Iteration {}".format(i))
# Split train and test
X_train, X_test, y_train, y_test = dataset.train_test_split(test_size=0.33, random_state=i)
# Cross-validation
cvf = cv.StratifiedKFold(y_train, 10)
# Evaluate models
for model_class in kh.RBF, kh.K0, kh.K1, kh.M3, kh.M4, kh.M5, kh.M6, kh.M7, kh.M9, kh.MC, kh.MD, kh.ME:
model = model_class()
print(model.name, end=', ')
best_fit = model.train(cvf, X_train, y_train)
results = model.test(best_fit, X_test, y_test)
data['Kernel'].append(model.name)
data['Test Error'].append(1 - results['test_score'])
print()
In [4]:
df = pd.DataFrame(data=data)
df.groupby('Kernel')['Test Error'].mean()
Out[4]:
In [5]:
df.boxplot(by='Kernel');