Homework: https://work.caltech.edu/homework/hw8.pdf
✔ Answers:
Answer key: https://work.caltech.edu/homework/hw8_sol.pdf
In [4]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
from IPython.display import display
In [71]:
train = pd.read_fwf('http://www.amlbook.com/data/zip/features.train', header=None,
names=['digit', 'intensity', 'symmetry'])
test = pd.read_fwf('http://www.amlbook.com/data/zip/features.test', header=None,
names=['digit', 'intensity', 'symmetry'])
In [72]:
display(train.describe())
display(train.head())
display(test.describe())
display(test.head())
In [75]:
X_in = train.iloc[:, 1:]
y_in = train.iloc[:, 0]
X_out = test.iloc[:, 1:]
y_out = test.iloc[:, 0]
In [83]:
def get_E_in(target, X_in, y_in):
classifier = svm.SVC(C=0.01, # Penalty parameter C of the error term.
kernel='poly', # It must be one of ‘linear’, ‘poly’, ‘rbf’, ‘sigmoid’, ‘precomputed’ or a callable. If none is given, ‘rbf’ will be used
degree=2, # Degree of the polynomial kernel, ignored by others
gamma=1.0, # Kernel coefficient for ‘rbf’, ‘poly’ and ‘sigmoid’. If gamma is ‘auto’ then 1/n_features will be used instead.
coef0=1.0, # Independent term in kernel function. It is only significant in ‘poly’ and ‘sigmoid’.
shrinking=False, # Whether to use the shrinking heuristic.
probability=False, # Whether to enable probability estimates. This must be enabled prior to calling fit, and will slow down that method.
tol=0.001, # Tolerance for stopping criterion.
cache_size=200, # Specify the size of the kernel cache (in MB).
class_weight=None,
verbose=False,
max_iter=-1,
decision_function_shape='ovr', # Whether to return a one-vs-rest (‘ovo’) ecision function of shape (n_samples, n_classes) as all other classifiers, or the original one-vs-one (‘ovo’) decision function of libsvm which has shape (n_samples, n_classes * (n_classes - 1) / 2). The default of None will currently behave as ‘ovo’ for backward compatibility and raise a deprecation warning, but will change ‘ovr’ in 0.18.
random_state=None)
y = y_in.copy()
y[y != target] = -1
classifier.fit(X_in, y)
y_pred = classifier.predict(X_in)
misclassified = y_pred != y
return classifier, sum(misclassified)*100./len(misclassified)
In [84]:
for target in [0, 2, 4, 6, 8]:
clf, E_in = get_E_in(target, X_in, y_in)
print "E_in({}) = {:.2f}%, N_SVs = {}".format(target, E_in, clf.n_support_)
In [85]:
for target in [1, 3, 5, 7, 9]:
clf, E_in = get_E_in(target, X_in, y_in)
print "E_in({}) = {:.2f}%, N_SVs = {}".format(target, E_in, clf.n_support_)
In [86]:
print "Difference in support vectors is: {}".format(1090 + 1089 - 193 - 193)
In [102]:
def computeError(clf, X, y):
y_pred = clf.predict(X)
misclassified = y_pred != y
return sum(misclassified)*100./len(misclassified)
def run_1_vs_5(train, test):
train_subset = train.loc[train.digit.isin([1, 5])]
X_in = train_subset.iloc[:, 1:]
y_in = train_subset.iloc[:, 0]
test_subset = test.loc[test.digit.isin([1, 5])]
X_out = test_subset.iloc[:, 1:]
y_out = test_subset.iloc[:, 0]
for Q_val in [2, 5]:
for C_val in [0.0001, 0.001, 0.01, 0.1, 1]:
classifier = svm.SVC(C=C_val, # Penalty parameter C of the error term.
kernel='poly', # It must be one of ‘linear’, ‘poly’, ‘rbf’, ‘sigmoid’, ‘precomputed’ or a callable. If none is given, ‘rbf’ will be used
degree=Q_val, # Degree of the polynomial kernel, ignored by others
gamma=1.0, # Kernel coefficient for ‘rbf’, ‘poly’ and ‘sigmoid’. If gamma is ‘auto’ then 1/n_features will be used instead.
coef0=1.0, # Independent term in kernel function. It is only significant in ‘poly’ and ‘sigmoid’.
shrinking=False, # Whether to use the shrinking heuristic.
probability=False, # Whether to enable probability estimates. This must be enabled prior to calling fit, and will slow down that method.
tol=0.001, # Tolerance for stopping criterion.
cache_size=200, # Specify the size of the kernel cache (in MB).
class_weight=None,
verbose=False,
max_iter=-1,
decision_function_shape='ovr', # Whether to return a one-vs-rest (‘ovo’) ecision function of shape (n_samples, n_classes) as all other classifiers, or the original one-vs-one (‘ovo’) decision function of libsvm which has shape (n_samples, n_classes * (n_classes - 1) / 2). The default of None will currently behave as ‘ovo’ for backward compatibility and raise a deprecation warning, but will change ‘ovr’ in 0.18.
random_state=None)
classifier.fit(X_in, y_in)
E_in = computeError(classifier, X_in, y_in)
E_out = computeError(classifier, X_out, y_out)
print "Q: {}, C: {:.4f}, SV_s: {}, E_in: {:.2f}, E_out: {:.2f}".format(Q_val, C_val, classifier.n_support_, E_in, E_out)
run_1_vs_5(train, test)
In [132]:
classifier = svm.SVC(C=.01, # Penalty parameter C of the error term.
kernel='poly', # It must be one of ‘linear’, ‘poly’, ‘rbf’, ‘sigmoid’, ‘precomputed’ or a callable. If none is given, ‘rbf’ will be used
degree=2, # Degree of the polynomial kernel, ignored by others
gamma=1.0, # Kernel coefficient for ‘rbf’, ‘poly’ and ‘sigmoid’. If gamma is ‘auto’ then 1/n_features will be used instead.
coef0=1.0, # Independent term in kernel function. It is only significant in ‘poly’ and ‘sigmoid’.
shrinking=False, # Whether to use the shrinking heuristic.
probability=False, # Whether to enable probability estimates. This must be enabled prior to calling fit, and will slow down that method.
tol=0.001, # Tolerance for stopping criterion.
cache_size=200, # Specify the size of the kernel cache (in MB).
class_weight=None,
verbose=False,
max_iter=-1,
decision_function_shape='ovr', # Whether to return a one-vs-rest (‘ovo’) ecision function of shape (n_samples, n_classes) as all other classifiers, or the original one-vs-one (‘ovo’) decision function of libsvm which has shape (n_samples, n_classes * (n_classes - 1) / 2). The default of None will currently behave as ‘ovo’ for backward compatibility and raise a deprecation warning, but will change ‘ovr’ in 0.18.
random_state=None)
E_CV_s = []
skf = cross_validation.KFold(len(y_in), n_folds=10)
print len(X_in)
for train_ind, test_ind in skf:
print len(train_ind), len(X_in.iloc[train_ind, :])
print len(test_ind), len(X_in.iloc[test_ind, :])
print
classifier.fit(X_in.iloc[train_ind, :], y_in[train_ind])
E_CV_s.append(computeError(classifier, X_in.iloc[test_ind, :], y_in[test_ind]))
print E_CV_s
E_cv = float(sum(E_CV_s))/len(E_CV_s)
print E_cv
In [147]:
from sklearn import cross_validation
def computeError(clf, X, y):
y_pred = clf.predict(X)
misclassified = y_pred != y
return sum(misclassified)*100./len(misclassified)
def run_1_vs_5_with_CF(train, test):
train_subset = train.loc[train.digit.isin([1, 5])]
X_in = train_subset.iloc[:, 1:]
y_in = train_subset.iloc[:, 0]
print "Len of train", (len(train))
print "Len of train subset", len(train_subset)
print "Len of X_in and y_in", len(X_in), len(y_in)
test_subset = test.loc[test.digit.isin([1, 5])]
X_out = test_subset.iloc[:, 1:]
y_out = test_subset.iloc[:, 0]
Q_val = 2
chosen = {}
for i in range(100):
chosen_C = None
chosen_E_cv = None
for C_val in [0.0001, 0.001, 0.01, 0.1, 1]:
classifier = svm.SVC(C=C_val, # Penalty parameter C of the error term.
kernel='poly', # It must be one of ‘linear’, ‘poly’, ‘rbf’, ‘sigmoid’, ‘precomputed’ or a callable. If none is given, ‘rbf’ will be used
degree=Q_val, # Degree of the polynomial kernel, ignored by others
gamma=1.0, # Kernel coefficient for ‘rbf’, ‘poly’ and ‘sigmoid’. If gamma is ‘auto’ then 1/n_features will be used instead.
coef0=1.0, # Independent term in kernel function. It is only significant in ‘poly’ and ‘sigmoid’.
shrinking=False, # Whether to use the shrinking heuristic.
probability=False, # Whether to enable probability estimates. This must be enabled prior to calling fit, and will slow down that method.
tol=0.001, # Tolerance for stopping criterion.
cache_size=200, # Specify the size of the kernel cache (in MB).
class_weight=None,
verbose=False,
max_iter=-1,
decision_function_shape='ovr', # Whether to return a one-vs-rest (‘ovo’) ecision function of shape (n_samples, n_classes) as all other classifiers, or the original one-vs-one (‘ovo’) decision function of libsvm which has shape (n_samples, n_classes * (n_classes - 1) / 2). The default of None will currently behave as ‘ovo’ for backward compatibility and raise a deprecation warning, but will change ‘ovr’ in 0.18.
random_state=None)
E_CV_s = []
skf = cross_validation.KFold(len(y_in), n_folds=10)
for train_ind, test_ind in skf:
classifier.fit(X_in.iloc[train_ind, :], y_in.iloc[train_ind])
E_CV_s.append(computeError(classifier, X_in.iloc[test_ind, :], y_in.iloc[test_ind]))
E_cv = float(sum(E_CV_s))/len(E_CV_s)
if chosen_C is None or chosen_E_cv > E_cv:
chosen_C = C_val
chosen_E_cv = E_cv
if chosen_C not in chosen:
chosen[chosen_C] = []
chosen[chosen_C].append(chosen_E_cv)
for k, v in chosen.iteritems():
print k, len(v), sum(v)*1./len(v)
run_1_vs_5_with_CF(train, test)
In [115]:
def computeError(clf, X, y):
y_pred = clf.predict(X)
misclassified = y_pred != y
return sum(misclassified)*100./len(misclassified)
def run_1_vs_5_RBF(train, test):
train_subset = train.loc[train.digit.isin([1, 5])]
X_in = train_subset.iloc[:, 1:]
y_in = train_subset.iloc[:, 0]
test_subset = test.loc[test.digit.isin([1, 5])]
X_out = test_subset.iloc[:, 1:]
y_out = test_subset.iloc[:, 0]
for C_val in [0.01, 1, 100, 10 ** 4, 10 ** 6]:
classifier = svm.SVC(C=C_val, # Penalty parameter C of the error term.
kernel='rbf', # It must be one of ‘linear’, ‘poly’, ‘rbf’, ‘sigmoid’, ‘precomputed’ or a callable. If none is given, ‘rbf’ will be used
gamma=1.0, # Kernel coefficient for ‘rbf’, ‘poly’ and ‘sigmoid’. If gamma is ‘auto’ then 1/n_features will be used instead.
shrinking=False, # Whether to use the shrinking heuristic.
probability=False, # Whether to enable probability estimates. This must be enabled prior to calling fit, and will slow down that method.
decision_function_shape='ovr', # Whether to return a one-vs-rest (‘ovo’) ecision function of shape (n_samples, n_classes) as all other classifiers, or the original one-vs-one (‘ovo’) decision function of libsvm which has shape (n_samples, n_classes * (n_classes - 1) / 2). The default of None will currently behave as ‘ovo’ for backward compatibility and raise a deprecation warning, but will change ‘ovr’ in 0.18.
)
classifier.fit(X_in, y_in)
E_in = computeError(classifier, X_in, y_in)
E_out = computeError(classifier, X_out, y_out)
print "C: {:.4f}, SV_s: {}, E_in: {:.2f}, E_out: {:.2f}".format(C_val, classifier.n_support_, E_in, E_out)
run_1_vs_5_RBF(train, test)
In [ ]: