In [1]:
    
%matplotlib inline
    
In [2]:
    
import sklearn.linear_model
import sklearn.svm
import numpy as np
import plotly.plotly as py
import plotly.graph_objs as go
    
40 separable points
In [3]:
    
# to make sure we always get the same "random" data
np.random.seed(1)
X = np.r_[np.random.randn(20, 2), np.random.randn(20, 2) + [4, 4]]
# allows for graphs with width > height without distorting the aspect ratio
X[:, 1] = X[:, 1] / 2.0
y = np.r_[np.zeros(20), np.ones(20)]
    
2 features
In [4]:
    
feature_1 = np.atleast_2d(X[:, 0]).T
feature_2 = np.atleast_2d(X[:, 1]).T
    
In [5]:
    
# Notice the very relaxed regularization
logistic = sklearn.linear_model.LogisticRegression(C=10)
    
In [6]:
    
logistic.fit(feature_1, y)
    
    Out[6]:
In [7]:
    
logistic.intercept_[0], logistic.coef_[0]
    
    Out[7]:
In [34]:
    
xaxis = go.XAxis(title='Feature', zeroline=False)
yaxis = go.YAxis(title='y', range=[-0.5, 1.5], zeroline=False)
x_range = np.arange(np.min(feature_1), np.max(feature_1), 0.05)
z_func = logistic.intercept_[0] + (logistic.coef_[0][0] * x_range)
g_func = 1 / (1 + np.exp(-z_func))
samples_y0 = go.Scatter(
    x=feature_1[:20],
    y=y[:20],
    mode='markers',
    name='samples (y=0)',
    marker=go.Marker(
        color='red',
        size=8,
    ),
)
samples_y1 = go.Scatter(
    x=feature_1[20:],
    y=y[20:],
    mode='markers',
    name='samples (y=1)',
    marker=go.Marker(
        color='blue',
        size=8,
    ),
)
linear_function = go.Scatter(
    x=x_range,
    y=z_func,
    mode='lines',
    name='linear: z',
    line=go.Line(
        dash='dash',
        color='orange',
    ),
)
logistic_function = go.Scatter(
    x=x_range,
    y=g_func,
    mode='lines',
    name='logistic: g(z)',
    line=go.Line(color='rgb(44, 160, 44)'),
)
data = go.Data([linear_function, logistic_function, samples_y0, samples_y1])
layout = go.Layout(
    title='Logistic Regression: 1 Feature',
    legend=go.Legend(traceorder='reversed'),
    xaxis=xaxis,
    yaxis=yaxis,
    autosize=False,
    width=800,
    height=400,
)
fig = go.Figure(data=data, layout=layout)
py.iplot(fig, filename='Logistic Regression: 1 Feature', height=400)
    
    Out[34]:
In [8]:
    
xaxis = go.XAxis(title='Feature 1', zeroline=False)
yaxis = go.YAxis(title='Feature 2', zeroline=False)
samples_y0 = go.Scatter(
    x=feature_1[:20],
    y=feature_2[:20],
    mode='markers',
    name='samples (y=0)',
    marker=go.Marker(
        color='red',
        size=8,
    ),
)
samples_y1 = go.Scatter(
    x=feature_1[20:],
    y=feature_2[20:],
    mode='markers',
    name='samples (y=1)',
    marker=go.Marker(
        color='blue',
        size=8,
    ),
)
data = go.Data([samples_y1, samples_y0])
layout = go.Layout(
    title='Data: 2 Features',
    hovermode='closest',
    xaxis=xaxis,
    yaxis=yaxis,
)
fig = go.Figure(data=data, layout=layout)
py.iplot(fig, filename='Data: 2 Features')
    
    Out[8]:
In [9]:
    
logistic = sklearn.linear_model.LogisticRegression(C=10)
    
In [10]:
    
logistic.fit(X, y)
    
    Out[10]:
In [11]:
    
logistic.intercept_[0], logistic.coef_[0]
    
    Out[11]:
In [13]:
    
def fxy(x, y):
    return (
        1 / (
            1 + np.exp(
                -(logistic.intercept_[0] + (logistic.coef_[0][0] * x) + (logistic.coef_[0][1] * y))
            )
        )
    )
xaxis = go.XAxis(title='Feature 1')
yaxis = go.YAxis(title='Feature 2')
zaxis = go.ZAxis(title='y', zeroline=False)
x_mesh = y_mesh = np.arange(np.min(X), np.max(X), 0.1)
y_mesh_column = y_mesh[:, np.newaxis]
z = fxy(x_mesh, y_mesh_column)
surface = go.Surface(
    z=z,
    x=x_mesh,
    y=y_mesh,
    name='logistic regression function surface',
    colorscale='Greys',
)
samples_y0 = go.Scatter3d(
    x=feature_1[:20],
    y=feature_2[:20],
    z=y[:20],
    mode='markers',
    name='y = 0',
    marker=go.Marker(
        size=8,
        color='red',
    ),
)
samples_y1 = go.Scatter3d(
    x=feature_1[20:],
    y=feature_2[20:],
    z=y[20:],
    mode='markers',
    name='y = 1',
    marker=go.Marker(
        size=8,
        color='blue',
    ),
)
data = go.Data([samples_y1, samples_y0, surface])
layout = go.Layout(
    title='Logistic Regression: 2 Features',
    scene=go.Scene(
        xaxis=xaxis,
        yaxis=yaxis,
        zaxis=zaxis,
    ),
)
fig = go.Figure(data=data, layout=layout)
py.iplot(data, filename='Logistic Regression: 2 Features')
    
    Out[13]:
In [14]:
    
def fxy(x, y):
    return (
        1 / (
            1 + np.exp(
                -(logistic.intercept_[0] + (logistic.coef_[0][0] * x) + (logistic.coef_[0][1] * y))
            )
        )
    )
xaxis = go.XAxis(title='Feature 1')
yaxis = go.YAxis(title='Feature 2')
zaxis = go.ZAxis(title='y', zeroline=False)
x_mesh = y_mesh = np.arange(np.min(X), np.max(X), 0.1)
y_mesh_column = y_mesh[:, np.newaxis]
z = fxy(x_mesh, y_mesh_column)
surface = go.Surface(
    z=z,
    x=x_mesh,
    y=y_mesh,
    name='logistic regression function surface',
    colorscale='Greys',
)
samples_y0 = go.Scatter3d(
    x=feature_1[:20],
    y=feature_2[:20],
    z=(
        1 / (
            1 + np.exp(
                -(logistic.intercept_[0] +
                  (logistic.coef_[0][0] * feature_1[:20]) +
                  (logistic.coef_[0][1] * feature_2[:20])
                 )
            )
        )
    ),
    mode='markers',
    name='y = 0',
    marker=go.Marker(
        size=8,
        color='red',
    ),
)
samples_y1 = go.Scatter3d(
    x=feature_1[20:],
    y=feature_2[20:],
    z=(
        1 / (
            1 + np.exp(
                -(logistic.intercept_[0] + 
                  (logistic.coef_[0][0] * feature_1[20:]) + 
                  (logistic.coef_[0][1] * feature_2[20:])
                 )
            )
        )
    ),
    mode='markers',
    name='y = 1',
    marker=go.Marker(
        size=8,
        color='blue',
    ),
)
data = go.Data([samples_y1, samples_y0, surface])
layout = go.Layout(
    title='Logistic Regression: 2 Features - Projected',
    scene=go.Scene(
        xaxis=xaxis,
        yaxis=yaxis,
        zaxis=zaxis,
    ),
)
fig = go.Figure(data=data, layout=layout)
py.iplot(data, filename='Logistic Regression: 2 Features - Projected')
    
    Out[14]:
In [15]:
    
linear_svm = sklearn.svm.SVC(kernel='linear', C=10)
    
In [16]:
    
linear_svm.fit(X, y)
    
    Out[16]:
In [17]:
    
linear_svm.support_vectors_
    
    Out[17]:
In [18]:
    
linear_svm.intercept_[0], linear_svm.coef_[0]
    
    Out[18]:
In [30]:
    
xaxis = go.XAxis(title='Feature 1', zeroline=False)
yaxis = go.YAxis(title='Feature 2', zeroline=False)
x_range = np.arange(np.min(feature_1), np.max(feature_1), 0.05)
a = linear_svm.coef_[0][0] / -linear_svm.coef_[0][1]
sv_1 = linear_svm.support_vectors_[0]
sv_2 = linear_svm.support_vectors_[-1]
samples_y0 = go.Scatter(
    x=feature_1[:20],
    y=feature_2[:20],
    mode='markers',
    name='samples (y=0)',
    marker=go.Marker(
        color='red',
        size=8,
    ),
)
samples_y1 = go.Scatter(
    x=feature_1[20:],
    y=feature_2[20:],
    mode='markers',
    name='samples (y=1)',
    marker=go.Marker(
        color='blue',
        size=8,
    ),
)
decision_boundary = go.Scatter(
    x=x_range,
    y=(
        (linear_svm.intercept_[0] / -linear_svm.coef_[0][1]) + 
        (linear_svm.coef_[0][0] / -linear_svm.coef_[0][1]) * x_range
    ),
    mode='lines',
    name='decision boundary',
)
parallel_sv1 = go.Scatter(
    x=x_range,
    y=(sv_1[1] - a * sv_1[0]) + a * x_range,
    mode='lines',
    line=go.Line(dash='dash', color='grey'),
    name='parallel through SV 1'
)
parallel_sv2 = go.Scatter(
    x=x_range,
    y=(sv_2[1] - a * sv_2[0]) + a * x_range,
    mode='lines',
    line=go.Line(dash='dash', color='grey'),
    name='parallel through SV 2',
)
support_vectors = go.Scatter(
    x=linear_svm.support_vectors_[:, 0],
    y=linear_svm.support_vectors_[:, 1],
    mode='markers',
    marker=go.Marker(
        size=12,
        color='grey',
        line=go.Line(dash='dash', color='grey'),
    ),
    name='support vectors',
)
data = go.Data([parallel_sv2, parallel_sv1, decision_boundary, support_vectors, samples_y0, samples_y1])
layout = go.Layout(
    title='Linear SVM: Decision Boundary',
    legend=go.Legend(traceorder='reversed'),
    hovermode='closest',
    xaxis=xaxis,
    yaxis=yaxis,
    autosize=False,
    width=800,
    height=600,
)
fig = go.Figure(data=data, layout=layout)
py.iplot(fig, filename='Linear SVM: Decision Boundary', height=600)
    
    Out[30]:
In [28]:
    
xaxis = go.XAxis(title='Feature 1', zeroline=False)
yaxis = go.YAxis(title='Feature 2', zeroline=False)
x_range = np.arange(np.min(feature_1), np.max(feature_1), 0.05)
samples_y0 = go.Scatter(
    x=feature_1[:20],
    y=feature_2[:20],
    mode='markers',
    name='samples (y=0)',
    marker=go.Marker(
        color='red',
        size=8,
    ),
)
samples_y1 = go.Scatter(
    x=feature_1[20:],
    y=feature_2[20:],
    mode='markers',
    name='samples (y=1)',
    marker=go.Marker(
        color='blue',
        size=8,
    ),
)
decision_boundary_svm = go.Scatter(
    x=x_range,
    y=(
        (linear_svm.intercept_[0] / -linear_svm.coef_[0][1]) + 
        (linear_svm.coef_[0][0] / -linear_svm.coef_[0][1]) * x_range
    ),
    mode='lines',
    name='SVM',
    line=go.Line(dash='dash', color='rgb(44, 160, 44)'),
)
decision_boundary_lr = go.Scatter(
    x=x_range,
    y=(
        ((logistic.intercept_[0] - 0.5) / -logistic.coef_[0][1]) + 
        (logistic.coef_[0][0] / -logistic.coef_[0][1]) * x_range
    ),
    mode='lines',
    name='Logistic Regression',
)
data = go.Data([samples_y1, samples_y0, decision_boundary_lr, decision_boundary_svm])
layout = go.Layout(
    title='Logistic Regression: Decision Boundary',
    hovermode='closest',
    xaxis=xaxis,
    yaxis=yaxis,
    autosize=False,
    width=800,
    height=500,
)
fig = go.Figure(data=data, layout=layout)
py.iplot(fig, filename='Logistic Regression: Decision Boundary', height=500)
    
    Out[28]: