In [1]:
%matplotlib inline
In [2]:
import sklearn.linear_model
import sklearn.svm
import numpy as np
import plotly.plotly as py
import plotly.graph_objs as go
40 separable points
In [3]:
# to make sure we always get the same "random" data
np.random.seed(1)
X = np.r_[np.random.randn(20, 2), np.random.randn(20, 2) + [4, 4]]
# allows for graphs with width > height without distorting the aspect ratio
X[:, 1] = X[:, 1] / 2.0
y = np.r_[np.zeros(20), np.ones(20)]
2 features
In [4]:
feature_1 = np.atleast_2d(X[:, 0]).T
feature_2 = np.atleast_2d(X[:, 1]).T
In [5]:
# Notice the very relaxed regularization
logistic = sklearn.linear_model.LogisticRegression(C=10)
In [6]:
logistic.fit(feature_1, y)
Out[6]:
In [7]:
logistic.intercept_[0], logistic.coef_[0]
Out[7]:
In [34]:
xaxis = go.XAxis(title='Feature', zeroline=False)
yaxis = go.YAxis(title='y', range=[-0.5, 1.5], zeroline=False)
x_range = np.arange(np.min(feature_1), np.max(feature_1), 0.05)
z_func = logistic.intercept_[0] + (logistic.coef_[0][0] * x_range)
g_func = 1 / (1 + np.exp(-z_func))
samples_y0 = go.Scatter(
x=feature_1[:20],
y=y[:20],
mode='markers',
name='samples (y=0)',
marker=go.Marker(
color='red',
size=8,
),
)
samples_y1 = go.Scatter(
x=feature_1[20:],
y=y[20:],
mode='markers',
name='samples (y=1)',
marker=go.Marker(
color='blue',
size=8,
),
)
linear_function = go.Scatter(
x=x_range,
y=z_func,
mode='lines',
name='linear: z',
line=go.Line(
dash='dash',
color='orange',
),
)
logistic_function = go.Scatter(
x=x_range,
y=g_func,
mode='lines',
name='logistic: g(z)',
line=go.Line(color='rgb(44, 160, 44)'),
)
data = go.Data([linear_function, logistic_function, samples_y0, samples_y1])
layout = go.Layout(
title='Logistic Regression: 1 Feature',
legend=go.Legend(traceorder='reversed'),
xaxis=xaxis,
yaxis=yaxis,
autosize=False,
width=800,
height=400,
)
fig = go.Figure(data=data, layout=layout)
py.iplot(fig, filename='Logistic Regression: 1 Feature', height=400)
Out[34]:
In [8]:
xaxis = go.XAxis(title='Feature 1', zeroline=False)
yaxis = go.YAxis(title='Feature 2', zeroline=False)
samples_y0 = go.Scatter(
x=feature_1[:20],
y=feature_2[:20],
mode='markers',
name='samples (y=0)',
marker=go.Marker(
color='red',
size=8,
),
)
samples_y1 = go.Scatter(
x=feature_1[20:],
y=feature_2[20:],
mode='markers',
name='samples (y=1)',
marker=go.Marker(
color='blue',
size=8,
),
)
data = go.Data([samples_y1, samples_y0])
layout = go.Layout(
title='Data: 2 Features',
hovermode='closest',
xaxis=xaxis,
yaxis=yaxis,
)
fig = go.Figure(data=data, layout=layout)
py.iplot(fig, filename='Data: 2 Features')
Out[8]:
In [9]:
logistic = sklearn.linear_model.LogisticRegression(C=10)
In [10]:
logistic.fit(X, y)
Out[10]:
In [11]:
logistic.intercept_[0], logistic.coef_[0]
Out[11]:
In [13]:
def fxy(x, y):
return (
1 / (
1 + np.exp(
-(logistic.intercept_[0] + (logistic.coef_[0][0] * x) + (logistic.coef_[0][1] * y))
)
)
)
xaxis = go.XAxis(title='Feature 1')
yaxis = go.YAxis(title='Feature 2')
zaxis = go.ZAxis(title='y', zeroline=False)
x_mesh = y_mesh = np.arange(np.min(X), np.max(X), 0.1)
y_mesh_column = y_mesh[:, np.newaxis]
z = fxy(x_mesh, y_mesh_column)
surface = go.Surface(
z=z,
x=x_mesh,
y=y_mesh,
name='logistic regression function surface',
colorscale='Greys',
)
samples_y0 = go.Scatter3d(
x=feature_1[:20],
y=feature_2[:20],
z=y[:20],
mode='markers',
name='y = 0',
marker=go.Marker(
size=8,
color='red',
),
)
samples_y1 = go.Scatter3d(
x=feature_1[20:],
y=feature_2[20:],
z=y[20:],
mode='markers',
name='y = 1',
marker=go.Marker(
size=8,
color='blue',
),
)
data = go.Data([samples_y1, samples_y0, surface])
layout = go.Layout(
title='Logistic Regression: 2 Features',
scene=go.Scene(
xaxis=xaxis,
yaxis=yaxis,
zaxis=zaxis,
),
)
fig = go.Figure(data=data, layout=layout)
py.iplot(data, filename='Logistic Regression: 2 Features')
Out[13]:
In [14]:
def fxy(x, y):
return (
1 / (
1 + np.exp(
-(logistic.intercept_[0] + (logistic.coef_[0][0] * x) + (logistic.coef_[0][1] * y))
)
)
)
xaxis = go.XAxis(title='Feature 1')
yaxis = go.YAxis(title='Feature 2')
zaxis = go.ZAxis(title='y', zeroline=False)
x_mesh = y_mesh = np.arange(np.min(X), np.max(X), 0.1)
y_mesh_column = y_mesh[:, np.newaxis]
z = fxy(x_mesh, y_mesh_column)
surface = go.Surface(
z=z,
x=x_mesh,
y=y_mesh,
name='logistic regression function surface',
colorscale='Greys',
)
samples_y0 = go.Scatter3d(
x=feature_1[:20],
y=feature_2[:20],
z=(
1 / (
1 + np.exp(
-(logistic.intercept_[0] +
(logistic.coef_[0][0] * feature_1[:20]) +
(logistic.coef_[0][1] * feature_2[:20])
)
)
)
),
mode='markers',
name='y = 0',
marker=go.Marker(
size=8,
color='red',
),
)
samples_y1 = go.Scatter3d(
x=feature_1[20:],
y=feature_2[20:],
z=(
1 / (
1 + np.exp(
-(logistic.intercept_[0] +
(logistic.coef_[0][0] * feature_1[20:]) +
(logistic.coef_[0][1] * feature_2[20:])
)
)
)
),
mode='markers',
name='y = 1',
marker=go.Marker(
size=8,
color='blue',
),
)
data = go.Data([samples_y1, samples_y0, surface])
layout = go.Layout(
title='Logistic Regression: 2 Features - Projected',
scene=go.Scene(
xaxis=xaxis,
yaxis=yaxis,
zaxis=zaxis,
),
)
fig = go.Figure(data=data, layout=layout)
py.iplot(data, filename='Logistic Regression: 2 Features - Projected')
Out[14]:
In [15]:
linear_svm = sklearn.svm.SVC(kernel='linear', C=10)
In [16]:
linear_svm.fit(X, y)
Out[16]:
In [17]:
linear_svm.support_vectors_
Out[17]:
In [18]:
linear_svm.intercept_[0], linear_svm.coef_[0]
Out[18]:
In [30]:
xaxis = go.XAxis(title='Feature 1', zeroline=False)
yaxis = go.YAxis(title='Feature 2', zeroline=False)
x_range = np.arange(np.min(feature_1), np.max(feature_1), 0.05)
a = linear_svm.coef_[0][0] / -linear_svm.coef_[0][1]
sv_1 = linear_svm.support_vectors_[0]
sv_2 = linear_svm.support_vectors_[-1]
samples_y0 = go.Scatter(
x=feature_1[:20],
y=feature_2[:20],
mode='markers',
name='samples (y=0)',
marker=go.Marker(
color='red',
size=8,
),
)
samples_y1 = go.Scatter(
x=feature_1[20:],
y=feature_2[20:],
mode='markers',
name='samples (y=1)',
marker=go.Marker(
color='blue',
size=8,
),
)
decision_boundary = go.Scatter(
x=x_range,
y=(
(linear_svm.intercept_[0] / -linear_svm.coef_[0][1]) +
(linear_svm.coef_[0][0] / -linear_svm.coef_[0][1]) * x_range
),
mode='lines',
name='decision boundary',
)
parallel_sv1 = go.Scatter(
x=x_range,
y=(sv_1[1] - a * sv_1[0]) + a * x_range,
mode='lines',
line=go.Line(dash='dash', color='grey'),
name='parallel through SV 1'
)
parallel_sv2 = go.Scatter(
x=x_range,
y=(sv_2[1] - a * sv_2[0]) + a * x_range,
mode='lines',
line=go.Line(dash='dash', color='grey'),
name='parallel through SV 2',
)
support_vectors = go.Scatter(
x=linear_svm.support_vectors_[:, 0],
y=linear_svm.support_vectors_[:, 1],
mode='markers',
marker=go.Marker(
size=12,
color='grey',
line=go.Line(dash='dash', color='grey'),
),
name='support vectors',
)
data = go.Data([parallel_sv2, parallel_sv1, decision_boundary, support_vectors, samples_y0, samples_y1])
layout = go.Layout(
title='Linear SVM: Decision Boundary',
legend=go.Legend(traceorder='reversed'),
hovermode='closest',
xaxis=xaxis,
yaxis=yaxis,
autosize=False,
width=800,
height=600,
)
fig = go.Figure(data=data, layout=layout)
py.iplot(fig, filename='Linear SVM: Decision Boundary', height=600)
Out[30]:
In [28]:
xaxis = go.XAxis(title='Feature 1', zeroline=False)
yaxis = go.YAxis(title='Feature 2', zeroline=False)
x_range = np.arange(np.min(feature_1), np.max(feature_1), 0.05)
samples_y0 = go.Scatter(
x=feature_1[:20],
y=feature_2[:20],
mode='markers',
name='samples (y=0)',
marker=go.Marker(
color='red',
size=8,
),
)
samples_y1 = go.Scatter(
x=feature_1[20:],
y=feature_2[20:],
mode='markers',
name='samples (y=1)',
marker=go.Marker(
color='blue',
size=8,
),
)
decision_boundary_svm = go.Scatter(
x=x_range,
y=(
(linear_svm.intercept_[0] / -linear_svm.coef_[0][1]) +
(linear_svm.coef_[0][0] / -linear_svm.coef_[0][1]) * x_range
),
mode='lines',
name='SVM',
line=go.Line(dash='dash', color='rgb(44, 160, 44)'),
)
decision_boundary_lr = go.Scatter(
x=x_range,
y=(
((logistic.intercept_[0] - 0.5) / -logistic.coef_[0][1]) +
(logistic.coef_[0][0] / -logistic.coef_[0][1]) * x_range
),
mode='lines',
name='Logistic Regression',
)
data = go.Data([samples_y1, samples_y0, decision_boundary_lr, decision_boundary_svm])
layout = go.Layout(
title='Logistic Regression: Decision Boundary',
hovermode='closest',
xaxis=xaxis,
yaxis=yaxis,
autosize=False,
width=800,
height=500,
)
fig = go.Figure(data=data, layout=layout)
py.iplot(fig, filename='Logistic Regression: Decision Boundary', height=500)
Out[28]: