In [1]:
%matplotlib inline

import numpy as np
import pandas as pd
import sklearn.svm
import seaborn as sns
import scipy.io as sio
import matplotlib.pyplot as plt

In [2]:
mat = sio.loadmat('./data/ex6data1.mat')
print(mat.keys())
data = pd.DataFrame(mat.get('X'), columns=['X1', 'X2'])
data['y'] = mat.get('y')

data.head()


dict_keys(['__globals__', 'X', 'y', '__version__', '__header__'])
Out[2]:
X1 X2 y
0 1.9643 4.5957 1
1 2.2753 3.8589 1
2 2.9781 4.5651 1
3 2.9320 3.5519 1
4 3.5772 2.8560 1

visualize data

pay attention to the edge case at the left hand side


In [3]:
fig, ax = plt.subplots(figsize=(8,6))
ax.scatter(data['X1'], data['X2'], s=50, c=data['y'], cmap='Reds')
ax.set_title('Raw data')
ax.set_xlabel('X1')
ax.set_ylabel('X2')


Out[3]:
<matplotlib.text.Text at 0x112cee390>

In [4]:
svc1 = sklearn.svm.LinearSVC(C=1, loss='hinge')
svc1.fit(data[['X1', 'X2']], data['y'])
svc1.score(data[['X1', 'X2']], data['y'])


Out[4]:
0.98039215686274506

In [5]:
data['SVM1 Confidence'] = svc1.decision_function(data[['X1', 'X2']])

In [6]:
fig, ax = plt.subplots(figsize=(8,6))
ax.scatter(data['X1'], data['X2'], s=50, c=data['SVM1 Confidence'], cmap='RdBu')
ax.set_title('SVM (C=1) Decision Confidence')


Out[6]:
<matplotlib.text.Text at 0x1151d5ac8>

try $C=100$

with large C, you try to overfit the data, so the left hand side edge case now is categorized right


In [7]:
svc100 = sklearn.svm.LinearSVC(C=100, loss='hinge')
svc100.fit(data[['X1', 'X2']], data['y'])
svc100.score(data[['X1', 'X2']], data['y'])


Out[7]:
0.98039215686274506

In [8]:
data['SVM100 Confidence'] = svc100.decision_function(data[['X1', 'X2']])

In [9]:
fig, ax = plt.subplots(figsize=(8,6))
ax.scatter(data['X1'], data['X2'], s=50, c=data['SVM100 Confidence'], cmap='RdBu')
ax.set_title('SVM (C=100) Decision Confidence')


Out[9]:
<matplotlib.text.Text at 0x115247b00>

In [10]:
data.head()


Out[10]:
X1 X2 y SVM1 Confidence SVM100 Confidence
0 1.9643 4.5957 1 0.799366 3.485448
1 2.2753 3.8589 1 0.381775 1.622359
2 2.9781 4.5651 1 1.374058 4.537510
3 2.9320 3.5519 1 0.519575 1.440891
4 3.5772 2.8560 1 0.333060 0.077828

In [ ]: