In [1]:
import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
from sklearn import svm
from sklearn import metrics
import sklearn
import re

import plotly.plotly as py
import plotly.graph_objs as go

In [2]:
np.random.seed(0)
# create 400 points randomly distributed between -3 and 3 (along x and y)
X = np.random.uniform(-3,3,(400,2))

# points within a circle centered at (1.5, 0) of radius 1 are labeled class +1,
# and -1 otherwise
shift = 1.5
radius = 1
X_shift = X[:,0]-shift
R_shift = np.sqrt(X_shift**2 + X[:,1]**2)

# class labels
Y = np.asarray(map(lambda x: 1 if x else -1, R_shift < radius))

In [3]:
def map_func(x):
    """
    Map d-dimensional data to d+1-dimensions.  Return a numpy array of data points. 
    
    x : matrix, a collection of data points, shape = [n_samples, n_features]
    """
    if len(x.shape) == 1:
        # x contains only a single data point
        return np.r_[x, calc_z(x)]
    else:
        z = [np.r_[j, calc_z(j)] for j in x]
        return np.array(z)

def calc_z(x):
    """
    Take the dot product of an array with itself.
    
    x : array, shape = [1, n_features]
    """
    return np.dot(x, x.T)

def my_kernel(x1, x2):
    """
    Custom kernel to pass to sklearn svm.SVC via its kernel parameter
    
    x1 : matrix, a collection of data points, shape = [n_samples, n_features]
    x2 : matrix, a collection of data points, shape = [n_samples, n_features]
    """
    return np.dot(map_func(x1), map_func(x2).T)

def calc_plane_norm(sv, coef):
    """
    Calculate the normal to the hyperplane (in mapped space)
    
    sv : matrix, contains mapped points, shape = [n_supportvectors, n_mappedfeatures]
    coef : array of floats, shape = [n_supportvectors, 1]
    """ 
    components = coef[:, np.newaxis]*sv
    return np.sum(components, axis = 0)

def calc_z_plane(x):
    """
    Calculate z-coordinates of the decision plane
    
    x: matrix, shape = [n_samples, n_features]
    """
    return (-w[0]*x[0] - w[1]*x[1] - b)/w[2]

In [4]:
model = svm.SVC(kernel=my_kernel, C=1000, tol=1e-3)
model.fit(X,Y)

# coef : dual coefficients alpha_i*y_i per support vector in decision function
coef = model.dual_coef_[0]
# intercept in decision function
b = model.intercept_

# bug in sklearn versions before 0.16: https://github.com/scikit-learn/scikit-learn/issues/4262
# The first class to appear in the dataset, i.e. Y[0], is mapped to the class +1, even if it is labeled -1
version_check = re.match(r"0\.(\d+).", sklearn.__version__).group(1)
if np.sign(Y[0]) == -1 and int(version_check) < 16:
    coef = -coef

In [5]:
# create mapped data points (from 2d to 3d)
Xm = map_func(X)

# Xm_sv is an array of the support vectors (in 3d)
# model.support_ contains the indices of the support vectors
Xm_sv = np.array([Xm[i] for i in model.support_])
w = calc_plane_norm(Xm_sv, coef)

In [6]:
# zgrid corresponds to z coordinates of paraboloid in the mapped space
xgrid, ygrid = np.meshgrid(np.arange(-3,3,0.1), np.arange(-3,3,0.1))
zgrid = np.array(map(calc_z, np.c_[xgrid.ravel(), ygrid.ravel()]))
zgrid = zgrid.reshape(xgrid.shape)

# zplane is the z coordinates of the hyperplane
zplane = np.array(map(calc_z_plane, np.c_[xgrid.ravel(), ygrid.ravel()]))
zplane = zplane.reshape(xgrid.shape)

In [7]:
Y_ind = (Y == 1)

In [8]:
# subscripts: 1->points inside circle, 2-> points outside circle
#x1 = x_all[Y_ind]
#x2 = x_all[~Y_ind]
#y1 = y_all[Y_ind]
#y2 = y_all[~Y_ind]
x1 = [k[1] for k in enumerate(Xm[:,0]) if Y_ind[k[0]]]
x2 = [k[1] for k in enumerate(Xm[:,0]) if ~Y_ind[k[0]]]
y1 = [k[1] for k in enumerate(Xm[:,1]) if Y_ind[k[0]]]
y2 = [k[1] for k in enumerate(Xm[:,1]) if ~Y_ind[k[0]]]
z1 = [k[1] for k in enumerate(Xm[:,2]) if Y_ind[k[0]]]
z2 = [k[1] for k in enumerate(Xm[:,2]) if ~Y_ind[k[0]]]

In [26]:
# 2-d points inside circle - scatter plot

scatter2d_in = go.Scatter3d(
    x = x1,
    y = y1,
    z = np.array([0]*len(x1)),
    mode = 'markers',
    marker = dict(
               size=3,
               symbol='dot',
               color='rgb(177,40,40)',
               opacity = 0.5
    ),
    showlegend = False
)

# 2-d points outside circle - scatter plot

scatter2d_out = go.Scatter3d(
    x = x2,
    y = y2,
    z = np.array([0]*len(x2)),
    mode = 'markers',
    marker = dict(
               size=3,
               symbol='dot',
               color='rgb(83,101,227)',
               opacity = 0.5
    ),
    showlegend = False
)

# 3-d points inside circle - scatter plot
scatter3d_in = go.Scatter3d(
    x = x1,
    y = y1,
    z = z1,
    mode = 'markers',
    marker = dict(
               size=3,
               symbol='dot',
               color='rgb(177,40,40)',
               opacity = 1
    ),
    showlegend = False
)

# 3-d points outside circle - scatter plot
scatter3d_out = go.Scatter3d(
    x = x2,
    y = y2,
    z = z2,
    mode = 'markers',
    marker = dict(
               size=3,
               symbol='dot',
               color='rgb(83,101,227)',
               opacity = 1
    ),
    showlegend = False
)

# hyperplane - surface plot
surface_plane = go.Surface(
    z = zplane,
    x = xgrid,
    y = ygrid,
    name = 'hyperplane',
    opacity = 1,
    showlegend = False,
    zauto = False,
    showscale = False,
    zmin = 0,
    zmax = 1,
    colorscale = [[0, 'rgb(70,70,70)'],[1, 'rgb(70,70,70)']]
)

# paraboloid - surface plot
surface_paraboloid = go.Surface(
    z = zgrid,
    x = xgrid,
    y = ygrid,
    name = 'paraboloid',
    opacity = 0.99,
    showlegend = False,
    zauto = False,
    showscale = False,
    zmin = 0,
    zmax = 1,
    colorscale = [[0, 'rgb(255,255,0)'],[1, 'rgb(255,255,0)']]
)

In [27]:
data = go.Data([scatter2d_in, scatter2d_out, scatter3d_in, scatter3d_out, surface_plane, surface_paraboloid])

In [28]:
layout = go.Layout(
    title='SVM classifier',
    autosize=False,
    width=800,
    height=800,
    margin=dict(
        l=0,
        r=0,
        b=0,
        t=0
    ),
    scene = go.Scene(
               zaxis = dict(
                         autorange = False,
                         range = [-2,18])
    )
)

In [29]:
fig = go.Figure(data=data, layout=layout)
plot_url = py.plot(fig, filename='SVM-classifier')

In [13]:
plot_url


Out[13]:
u'https://plot.ly/~frangipane/35'