In [1]:
from sklearn import cross_validation as cv
from sklearn import svm
plt.style.use('ggplot')
from kcat.datasets import Synthetic
In [2]:
# Define parameters
sizes = (50, 100, 200, 400)
ps = np.linspace(0, 1, num=11)
repeat = 150
# Try all possible combinations
results = np.zeros((len(sizes), len(ps), repeat))
for i, m in enumerate(sizes):
for j, p in enumerate(ps):
print("{} {}".format(m, p), end=', ')
for k in range(repeat):
# Generate a new dataset
Xq, Xc, y = Synthetic(m, n=25, c=2, p=p).data_arrays
clf = svm.SVC(kernel='rbf')
results[i][j][k] = cv.cross_val_score(clf, Xc, y, cv=5).mean()
# Invert results to show error rate instead of success rate
results = 1.0 - results
In [3]:
# Plot error
figure(figsize=(10, 5))
styles = (':', '-.', '--', '-')
for i, m in enumerate(sizes):
plot(ps, results[i].mean(axis=1), styles[i], linewidth=1.5, color=(0.7, 0.4, 0))
xlabel("P")
ylabel("Error")
ylim(0, 0.5)
legend(["Size {}".format(m) for m in sizes])
# title("Classification Error using RBF Kernel".format(m))
Out[3]: