Primal Slab SVM

Let $K \in R^{m \times m}$ and $K_{ij} = \texttt{kernel}(x_i,x_j)$ and $K_{i}$ the $i^{th}$ column of $K$

Then Primal Minimization Objective: $$\min_{\beta \in R^m,\rho \in R} \beta^T K \beta + \frac{1}{\nu m} \sum_i \texttt{loss}(K_i^T \beta, \rho)$$

Let $F$ be the objective function. $$F(\beta,\rho) = \beta^T K \beta + \frac{1}{\nu m} \sum_i \texttt{loss}(K_i^T \beta, \rho)$$

Gradients: $$\vec\nabla_\beta F(\beta,\rho) = 2K\beta + \frac {1}{\nu m} \sum_i K_i \circ \frac{d}{d\beta}\texttt{loss}(K_i^T \beta, \rho)$$ $$\nabla_\rho F(\beta,\rho) = \frac {1}{\nu m} \sum_i \frac{d}{d\rho}\texttt{loss}(K_i^T \beta, \rho)$$

Hessians: $$H_\beta = 2K + \frac {1}{\nu m} \sum_i \left( K_i \circ K_i \right) \circ \frac{d^2}{(d\beta)^2}\texttt{loss}(K_i^T \beta, \rho)$$ $$H_\rho = \frac {1}{\nu m} \sum_i \frac{d^2}{(d\rho)^2}\texttt{loss}(K_i^T \beta, \rho)$$

We consider losses: $$\texttt{loss}_{hinge}(t,\rho) = \max(~0,~ |~\rho - t~| - \delta ~)$$ $$\texttt{loss}_{square-hinge}(t,\rho) = \max(~0,~ |~\rho - t~| - \delta ~)^2$$

Loss Gradients: $$\frac{d}{dt}\texttt{loss}_{hinge}(t,\rho) = \begin{cases} 0, & \mbox{if } |~\rho - t~| \lt \delta \\ -1, & \mbox{if } ~\rho - t~ \gt \delta \\ 1, & \mbox{if } ~-\rho + t~ \gt \delta \end{cases}$$

$$\frac{d}{dt}\texttt{loss}_{square-hinge}(t,\rho) = \begin{cases} 0, & \mbox{if } |~\rho - t~| \lt \delta \\ -2\left(\rho-t-\delta\right), & \mbox{if } ~\rho - t~ \gt \delta \\ 2\left(-\rho+t-\delta\right), & \mbox{if } ~-\rho + t~ \gt \delta \end{cases}$$
$$\frac{d}{d\rho}\texttt{loss}_{hinge}(t,\rho) = \begin{cases} 0, & \mbox{if } |~\rho - t~| \lt \delta \\ 1, & \mbox{if } ~\rho - t~ \gt \delta \\ 1, & \mbox{if } ~-\rho + t~ \gt \delta \end{cases}$$$$\frac{d}{d\rho}\texttt{loss}_{square-hinge}(t,\rho) = \begin{cases} 0, & \mbox{if } |~\rho - t~| \lt \delta \\ 2\left(\rho-t-\delta\right), & \mbox{if } ~\rho - t~ \gt \delta \\ -2\left(-\rho+t-\delta\right), & \mbox{if } ~-\rho + t~ \gt \delta \end{cases}$$

Loss Hessians: $$\frac{d^2}{(dt)^2}\texttt{loss}_{hinge}(t,\rho) = \begin{cases} 0, & \mbox{if } |~\rho - t~| \lt \delta \\ 0, & \mbox{if } ~\rho - t~ \gt \delta \\ 0, & \mbox{if } ~-\rho + t~ \gt \delta \end{cases}$$

$$\frac{d^2}{(dt)^2}\texttt{loss}_{square-hinge}(t,\rho) = \begin{cases} 0, & \mbox{if } |~\rho - t~| \lt \delta \\ 2, & \mbox{if } ~\rho - t~ \gt \delta \\ 2, & \mbox{if } ~-\rho + t~ \gt \delta \end{cases}$$
$$\frac{d^2}{(d\rho)^2}\texttt{loss}_{hinge}(t,\rho) = \begin{cases} 0, & \mbox{if } |~\rho - t~| \lt \delta \\ 0, & \mbox{if } ~\rho - t~ \gt \delta \\ 0, & \mbox{if } ~-\rho + t~ \gt \delta \end{cases}$$$$\frac{d^2}{(d\rho)^2}\texttt{loss}_{square-hinge}(t,\rho) = \begin{cases} 0, & \mbox{if } |~\rho - t~| \lt \delta \\ 2, & \mbox{if } ~\rho - t~ \gt \delta \\ 2, & \mbox{if } ~-\rho + t~ \gt \delta \end{cases}$$

Evaluation: $$ \langle \Phi(x), w\rangle = \sum_k \beta_k K(x_k, x) $$ Surface: $$ \langle \Phi(x), w\rangle -\rho = \sum_k \beta_k K(x_k, x) -\rho $$


In [ ]:
% matplotlib inline

import numpy as np
from numpy import linalg, random, ones, zeros, matrix, eye, dot
from numpy.linalg import norm, cholesky, inv
from sklearn.cross_validation import train_test_split
import mosek
import math
from mpl_toolkits.mplot3d import Axes3D
import matplotlib.pyplot as plt
import sys
import time
import scipy
from collections import namedtuple

v = .00001
delta = 0.01
sigma = 100
initial_rho = 1
max_iter = 100
initial_step_size = .1
timer_thresh = .1
ep = .0001
points_count = 1000
points_std_from_surface = 0.0001


def pivot(A, k, n):
    y = np.amax(np.absolute(A[k:n + 1, k:n + 1]), axis=1)
    i = np.argmax(np.absolute(A[k:n + 1, k:n + 1]), axis=1)
    piv = np.amax(y)
    jpiv = np.argmax(y)
    ipiv = i[jpiv]
    jpiv = jpiv + k - 1;
    ipiv = ipiv + k - 1;
    Pk = eye(n)
    Pk[ipiv, ipiv] = 0
    Pk[k, k] = 0
    Pk[k, ipiv] = 1
    Pk[ipiv, k] = 1
    Qk = eye(n)
    Qk[jpiv, jpiv] = 0
    Qk[k, k] = 0
    Qk[k, jpiv] = 1
    Qk[jpiv, k] = 1
    return Pk, Qk


def incomplete_LU_decomp(A):
    start = time.time()

    assert A.shape[0] == A.shape[1]
    n = A.shape[0]
    for k in range(n - 1):
        Pk, Qk = pivot(A, k, n)
        A = dot(dot(Pk, A), Qk)
        print A
        for i in range(k + 1, n):
            if A[i, k] != 0:
                if A[k, k] == 0:
                    return 'Error: Null Pivot'
                A[i, k] = A[i, k] / A[k, k]
                for j in range(k + 1, n):
                    if A[i, j] != 0:
                        A[i, j] = A[i, j] - (A[i, k] / A[k, j])

    end = time.time()
    if end - start > timer_thresh:
        print 'incomplete_LU_decomp:', end - start, 'sec'
    return A


def get_K_LU():
    K_LU = scipy.linalg.cholesky(K, lower=True)
    K_LU = cholesky(K)
    K_LU2 = incomplete_LU_decomp(K.copy())

    assert K_LU.shape == K_LU2.shape
    for k in range(K_LU.shape[0]):
        for i in range(K_LU.shape[1]):
            assert abs(K_LU[k, i] - K_LU2[k, i]) < ep


def loss_der_der(t, rho):
    if g_loss_type == 'hinge':
        return 0
    if g_loss_type == 'square-hinge':
        if abs(rho - t) < delta:
            return 0
        else:
            return 2
    raise Exception(g_loss_type, t, rho, delta)


def loss_der_vect(grad, t, rho, opt_on):
    if g_loss_type == 'hinge':
        grad[np.absolute(rho - t) <= delta] = 0
        grad[rho - t > delta] = -1
        grad[-rho + t > delta] = 1
        return grad
    if g_loss_type == 'square-hinge':
        grad[np.absolute(rho - t) <= delta] = 0
        if opt_on == 'b':
            grad[rho - t > delta] = -2.0 * (rho - t[rho - t > delta] - delta)
            grad[-rho + t > delta] = 2.0 * (-rho + t[-rho + t > delta] - delta)
            return grad
        if opt_on == 'rho':
            grad[rho - t > delta] = 2 * (rho - t[rho - t > delta] - delta)
            grad[-rho + t > delta] = -2 * (-rho + t[-rho + t > delta] - delta)
            return grad
    raise Exception(grad, g_loss_type, t, rho, delta)


def kernel(x1, x2):
    return math.exp(-1 * math.pow(norm(x1 - x2), 2
                                  ) / (2 * math.pow(sigma, 2)))


def kernel_vect(x_list, x2):
    return np.exp(-1 * np.power(norm(x_list - x2, axis=1), 2) / (2 * math.pow(sigma, 2)))


def loss_vect(t, rho):
    if g_loss_type == 'hinge':
        return np.maximum(np.zeros(t.shape), np.absolute(rho - t) - delta)
    if g_loss_type == 'square-hinge':
        return np.power(np.maximum(np.zeros(t.shape), np.absolute(rho - t) - delta), 2)


def loss_matrix_rho_vect(t, rho):
    rho = np.matrix(rho).T
    t = np.matrix(t)
    if g_loss_type == 'hinge':
        return np.maximum(np.zeros((t.shape[0], rho.shape[1])), np.absolute(t - rho) - delta)
    if g_loss_type == 'square-hinge':
        return np.power(np.maximum(np.zeros((t.shape[0], rho.shape[1])), np.absolute(t - rho) - delta), 2)


def loss_matrix_t_matrix(t, rho):
    if g_loss_type == 'hinge':
        return np.maximum(np.zeros(t.shape), np.absolute(t - rho) - delta)
    if g_loss_type == 'square-hinge':
        return np.power(np.maximum(np.zeros(t.shape), np.absolute(t - rho) - delta), 2)


def get_K():
    start = time.time()

    K = np.zeros((len(g_x), len(g_x)))
    for i in range(len(g_x)):
        K[i, :] = kernel_vect(g_x, g_x[i])

    end = time.time()
    if end - start > timer_thresh:
        print 'get_K:', end - start, 'sec'
    return K
        
class Slab_SVM:
    def H(self, beta, rho, loss_vect_list, opt_on):
        start = time.time()

        if opt_on == 'b':
            ret = 2 * self.K + 2 / (v * self.m) * np.sum(
                np.multiply(self.K, self.K), axis=0)
        elif opt_on == 'rho':
            ret = 2 / (v * self.m)

        end = time.time()
        if end - start > timer_thresh:
            print 'H:', end - start, 'sec'
        return ret

    def obj_funct(self, beta, rho):
        start = time.time()

        obj = 1.0 / 2 * np.dot(beta, np.dot(self.K, beta)) + 1.0 / (v * self.m) * np.sum(
            loss_vect(
                np.dot(self.K[self.support_vectors, :],
                       beta),
                rho))

        end = time.time()
        if end - start > timer_thresh:
            print 'obj_funct:', end - start, 'sec'
        return obj

    def obj_funct_beta_matrix(self, beta, rho):
        start = time.time()

        obj = 1.0 / 2 * (norm(np.multiply(beta, np.dot(self.K, beta)), axis=0)) \
              + 1.0 / (v * self.m) * np.sum(np.asarray(loss_matrix_t_matrix(np.dot(self.K[self.support_vectors, :],
                                                                                   beta), 
                                                                            rho)
                                                        ),
                                            axis=0)

        end = time.time()
        if end - start > timer_thresh:
            print 'obj_funct_beta_matrix:', end - start, 'sec'
        return obj

    def obj_funct_rho_vect(self, beta, rho):
        start = time.time()
        
        obj = 1.0 / 2 * np.dot(beta, np.dot(self.K, beta)) + 1.0 / (v * self.m) * np.sum(np.asarray(
            loss_matrix_rho_vect(
                np.dot(
                    self.K[self.support_vectors, :], beta),
                rho)), axis=1)
        
        end = time.time()
        if end - start > timer_thresh:
            print 'obj_funct_rho_vect:', end - start, 'sec'
        return obj

    def f(self, x_test, beta, rho):
        start = time.time()

        w = np.dot(beta, kernel_vect(x, x_test)) - rho

        end = time.time()
        if end - start > timer_thresh:
            print 'f:', end - start, 'sec'
        return w

    def step(self, element, step_size, resid):
        return element - (step_size * resid)

    def backtrack_step_size_rho(self, step_size, obj, resid, beta, rho):
        start = time.time()
        number_of_steps = 2
        if step_size == ep ** 2:
            step_size = initial_step_size
        else:
            step_size *= 4.0

        iter_count = 0

        if obj > (self.obj_funct(beta, self.step(rho, step_size, resid))):
            end = time.time()
            if end - start > timer_thresh:
                print 'backtrack_step_size_rho:', end - start, 'sec', iter_count, 'times'
#             print 'backtrack_step_size_rho iter',iter_count
            return step_size

        while True:
            iter_count += 1
            steps = step_size * np.logspace(-1, -number_of_steps, num=number_of_steps, base=2.0)
            obj_many_steps = np.array(self.obj_funct_rho_vect(beta, self.step(rho, steps, resid))).ravel()

            if np.where(obj - obj_many_steps > 0)[0].shape[0] > 0:
                end = time.time()
                if end - start > timer_thresh:
                    print 'backtrack_step_size_rho:', end - start, 'sec', iter_count, 'times'
#                 print 'backtrack_step_size_rho iter',iter_count
                return steps[np.where(obj - obj_many_steps > 0)[0][0]]

            step_size = steps[-1]
            if step_size < ep ** 2:
                #             print 'WARNING: step size not found'
                step_size = ep ** 2
                end = time.time()
                if end - start > timer_thresh:
                    print 'backtrack_step_size_rho:', end - start, 'sec', iter_count, 'times'
#                 print 'backtrack_step_size_rho iter',iter_count,'Step size too small'
                return step_size
            number_of_steps *= 2

        assert False

    def backtrack_step_size_beta(self, step_size, obj, resid, beta, rho):
        start = time.time()
        number_of_steps = 2
        if step_size == ep ** 3:
            step_size = initial_step_size
        else:
            step_size *= 2.0

        iter_count = 0

        if obj > (self.obj_funct(self.step(beta, step_size, resid), rho)):
            end = time.time()
            if end - start > timer_thresh:
                print 'backtrack_step_size_beta:', end - start, 'sec', iter_count, 'times'
            print 'backtrack_step_size_beta iter',iter_count
            assert obj > (self.obj_funct(self.step(beta, step_size, resid), rho))
            return step_size

        while True:
            iter_count += 1
            steps = step_size * np.logspace(-1, -number_of_steps, num=number_of_steps, base=2.0)
            obj_many_steps = (self.obj_funct_beta_matrix(np.asarray(self.step(np.matrix(beta),
                                                                              np.matrix(steps).T,
                                                                              np.matrix(resid)).T
                                                                    ),
                                                         rho))
#             assert steps.shape[0] == number_of_steps
#             print  ' obj_many_steps.shape',obj_many_steps.shape
#             assert obj_many_steps.shape[0] == number_of_steps
#             for i in range(number_of_steps):
#                 assert steps[i] == step_size*(2**(-(i+1)))
#                 print 'x',np.asarray(self.step(np.matrix(beta),
#                                       np.matrix(steps).T,
#                                       np.matrix(resid)).T).shape
#                 print 'x',np.asarray(self.step(np.matrix(beta),
#                                       np.matrix(steps).T,
#                                       np.matrix(resid)).T)[0,i]
#                 print 'y',self.step(np.matrix(beta),
#                                                                           step_size*(2**(-(i+1))),
#                                                                           np.matrix(resid)).T.shape
#                 print 'y',self.step(np.matrix(beta),
#                                                                           step_size*(2**(-(i+1))),
#                                                                           np.matrix(resid)).T[0,0]
#                 assert abs(np.asarray(self.step(np.matrix(beta),
#                                       np.matrix(steps).T,
#                                       np.matrix(resid)).T)[0,i] - self.step(np.matrix(beta),
#                                                                           step_size*(2**(-(i+1))),
#                                                                           np.matrix(resid)).T[0,0]) \
#                         < .00000001

#             print 'obj_many_steps',obj_many_steps
#             print 'np.where(obj - obj_many_steps > 0)',np.where(obj - obj_many_steps > 0)
#             print 'np.where(obj - obj_many_steps >= 0)[0].shape[0]',np.where(obj - obj_many_steps >= 0)[0].shape[0]
#             print 'np.where(obj - obj_many_steps > 0)[0]',np.where(obj - obj_many_steps > 0)[0]
            if np.where(obj - obj_many_steps > 0)[0].shape[0] > 0:
#                 print 'obj - obj_many_steps[np.where(obj - obj_many_steps > 0)]',\
#                                 obj - obj_many_steps[np.where(obj - obj_many_steps > 0)]
                assert np.min(obj - obj_many_steps[np.where(obj - obj_many_steps > 0)]) > 0
                assert obj - obj_many_steps[np.where(obj - obj_many_steps > 0)[0][0]] > 0
                print 'np.where(obj - obj_many_steps > 0)[0][0]',np.where(obj - obj_many_steps > 0)[0][0]
                print 'obj_many_steps[np.where(obj - obj_many_steps > 0)[0][0]]',\
                                            obj_many_steps[np.where(obj - obj_many_steps > 0)[0][0]]
                print 'self.step(np.matrix(beta), steps[np.where(obj - obj_many_steps > 0)[0][0]],\
                                                                          np.matrix(resid)).T[0,0])', \
                                                              self.step(np.matrix(beta),
                                                                  steps[np.where(obj - obj_many_steps > 0)[0][0]],
                                                                      np.matrix(resid)).T[0,0]
                assert abs(obj_many_steps[np.where(obj - obj_many_steps > 0)[0][0]] - self.step(np.matrix(beta),
                                                                      steps[np.where(obj - obj_many_steps > 0)[0][0]],
                                                                          np.matrix(resid)).T[0,0]) < .000000001
                print 'obj_many_steps', obj_many_steps[np.where(obj - obj_many_steps > 0)[0][0]]
                print 'self.obj_funct(self.step(beta,', self.obj_funct(self.step(beta, 
                                               steps[np.where(obj - obj_many_steps > 0)[0][0]],
                                               resid), rho)
                assert abs(obj_many_steps[np.where(obj - obj_many_steps > 0)[0][0]] - \
                           self.obj_funct(self.step(beta, 
                                                    steps[np.where(obj - obj_many_steps > 0)[0][0]],
                                                    resid), rho)) < .000000000001
                end = time.time()
                if end - start > timer_thresh:
                    print 'backtrack_step_size_beta:', end - start, 'sec', iter_count, 'times'
                print 'backtrack_step_size_beta iter',iter_count

                
#                 print 'obj',obj

#                 print 'self.obj_funct(self.step(beta, steps[np.where(obj - obj_many_steps > 0)[0][0]],\
#                                                        resid), rho))', self.obj_funct(self.step(beta, 
#                                                                    steps[np.where(obj - obj_many_steps > 0)[0][0]],
#                                                                    resid), rho)
                
#                 assert obj - (self.obj_funct(self.step(beta, 
#                                                        steps[np.where(obj - obj_many_steps > 0)[0][0]],
#                                                        resid), rho)) > 0

                return steps[np.where(obj - obj_many_steps > 0)[0][0]]

            step_size = steps[-1]
            if step_size < ep ** 3:
                #             print 'WARNING: step size not found'
                step_size = ep ** 3
                end = time.time()
                if end - start > timer_thresh:
                    print 'backtrack_step_size_beta:', end - start, 'sec', iter_count, 'times'
                print 'backtrack_step_size_beta iter',iter_count,'Step size too small'
#                 assert abs(obj - (self.obj_funct(self.step(beta, step_size, resid), rho)))<.0001
                return step_size
            number_of_steps *= 10

        assert False

    def numer_grad(self, beta, rho, ep, direct=0, opt_on=''):  # const
        if opt_on == 'rho':
            return (obj_funct(beta, rho + ep) \
                    - obj_funct(beta, -rho * ep)) / (2 * ep)
        return (obj_funct(beta + (ep * direct), rho) \
                - obj_funct(beta - (ep * direct), rho)) / (2 * ep)

    def grad_checker(self, beta, rho, ep, opt_on):  # const
        start = time.time()

        if opt_on == 'rho':
            return numer_grad(beta, rho, ep, opt_on=opt_on)

        d = len(beta)
        w = np.zeros(d)
        for i in range(d):
            direct = np.zeros(beta.shape)
            direct[i] = 1
            w[i] = (numer_grad(beta, rho, ep, direct=direct, opt_on=opt_on))

        end = time.time()
        if end - start > timer_thresh:
            print 'grad_checker:', end - start, 'sec'
        return w

    def get_resid(self, beta, rho, grad, loss_vect_list, opt_on):
        start = time.time()

        if opt_on == 'b':

            # H_LU = scipy.linalg.cholesky(self.H(beta,rho,loss_vect_list,opt_on), lower=True)

            resid = linalg.solve(self.H(beta, rho, loss_vect_list, opt_on), grad)
        else:
            resid = grad / self.H(beta, rho, loss_vect_list, opt_on)

        end = time.time()
        if end - start > timer_thresh:
            print 'get_resid:', end - start, 'sec'
        return resid

    def grad_des_iterate(self, iterations, opt_on='b'):
        start = time.time()
        loss_vect_list = np.where(np.absolute(self.rho - \
                                              np.dot(self.K[self.support_vectors, :], self.beta)) \
                                  >= delta)[0]

        end = time.time()
        if end - start > timer_thresh:
            print 'find sv:', end - start, 'sec'

        obj = self.obj_funct(self.beta, self.rho)
        self.obj_array[iterations] = (obj)

        if opt_on == 'b':
            self.grad = 2.0 * np.dot(self.K, self.beta) + 1.0 / (v * self.m) * np.sum(
                np.multiply(self.K[self.support_vectors, :],
                            loss_der_vect(
                                self.grad_buffer,
                                np.dot(self.K[self.support_vectors, :], self.beta),
                                self.rho,
                                opt_on)),
                axis=0)
        elif opt_on == 'rho':
            self.grad = 1 / (v * self.m) * np.sum(loss_der_vect(
                self.grad_buffer,
                np.dot(self.K[self.support_vectors, :], self.beta), self.rho, opt_on))

        self.obj_grad_array[iterations] = norm(self.grad)
        #     obj_grad_check_array[iterations]=norm((grad-grad_checker(beta,rho,ep,opt_on)))

        if obj < ep:
            print 'Stopping crit: obj small', obj
            return True

        if norm(self.grad) < ep:
            print 'Stopping crit: norm(grad) small', norm(self.grad)
            return True
        
        
        if g_loss_type == 'square-hinge' and g_method == 'Newton':
            resid = self.get_resid(self.beta, self.rho, self.grad, loss_vect_list, opt_on)
        else:
            resid = self.grad

        if opt_on == 'rho':
            self.step_size_rho = self.backtrack_step_size_rho(self.step_size_rho, obj, resid, self.beta, self.rho)
            self.rho = self.step(self.rho, self.step_size_rho, resid)  # Update
        else:
            self.step_size_beta = self.backtrack_step_size_beta(self.step_size_beta, obj, resid, self.beta, self.rho)
            self.beta = self.step(self.beta, self.step_size_beta, resid)  # Update

        end = time.time()
        if end - start > timer_thresh:
            print 'grad_des_iterate:', end - start, 'sec'

        return False

    
    def grad_des(self, prev_run=None):
        start = time.time()

        if prev_run == None:
            self.beta = zeros(self.m)
            self.rho = initial_rho
        else:
            self.beta = prev_run.beta
            self.rho = prev_run.rho

        print 'grad_des, sv',len(np.where(np.absolute(self.rho - \
                                                      np.dot(self.K[self.support_vectors, :], self.beta)) \
                                          >= delta)[0])
        
        if len(np.where(np.absolute(self.rho - np.dot(self.K[self.support_vectors, :], self.beta)) \
                                          >= delta)[0])==0: # No support vectors (out of bounds vectors)
            return Run(None,None,None,self.beta, self.rho, 0)
        
        self.obj_array = -1 * np.ones(max_iter)
        self.obj_grad_array = np.zeros((max_iter))
        self.obj_grad_check_array = np.zeros(max_iter)

        self.grad_buffer = zeros(self.beta.shape)
        self.step_size_beta = initial_step_size
        self.step_size_rho = initial_step_size
        
        end = time.time()
        if end - start > timer_thresh:
            print 'grad_des alloc mem:', end - start, 'sec'

        iterations = 0
        for i in range(max_iter):

            converged = self.grad_des_iterate(iterations, opt_on='b')
            if converged:
                break

            print 'grad',norm(self.grad)

            converged = self.grad_des_iterate(iterations, opt_on='rho')
            if converged:
                break

            print 'grad',norm(self.grad)
            
            if i == max_iter - 1:
                print 'WARNING: Did not converge'

            iterations += 1

        print 'Desc iterations', iterations
        print 'Desc rho', self.rho
            
        end = time.time()
        if end - start > timer_thresh:
            print 'grad_des:', end - start, 'sec'
        return Run(self.obj_array, self.obj_grad_array, self.obj_grad_check_array, self.beta, self.rho, iterations)

    def get_K_inv(K):
        start = time.time()

        K_inv = inv(K)

        end = time.time()
        if end - start > timer_thresh:
            print 'get_K_inv:', end - start, 'sec'
        return K_inv

    def get_K_cond(K):
        start = time.time()

        K_cond = linalg.cond(K)

        end = time.time()
        if end - start > timer_thresh:
            print 'get_K_cond:', end - start, 'sec'
        return K_cond

    def pre_comp_K():
        start = time.time()

        K = get_K()

        end = time.time()
        if end - start > timer_thresh:
            print 'pre_comp_K:', end - start, 'sec'
        return K  # , K_inv

    def __init__(self, K, x_data, support_vectors=None):
        self.K = K
        self.x_data = x_data
        self.m = len(self.x_data)
        if support_vectors == None:
            self.support_vectors = np.arange(0, self.m)
        else:
            self.support_vectors = support_vectors
        print 'Slab_SVM',len(self.support_vectors)

    def run(self):
        if len(self.support_vectors) <= 500:
            return self.grad_des()
        this_run = Slab_SVM(self.K, self.x_data, random.choice(self.support_vectors, 
                                                               max(500,len(self.support_vectors) - 500),
                                                               replace=False)).run()
        return self.grad_des(this_run)


def get_data_points():
    start = time.time()
    points = random.random((points_count, 2)) * 2 * np.pi

    x = np.zeros((points_count, 3))
    for p in range(points_count):
        if points_std_from_surface > 0:
            r = random.normal(loc=1, scale=points_std_from_surface)
        else:
            r = 1
        z_cord = r * np.sin(points[p][1])

        r_temp = r * np.cos(points[p][1])
        y_cord = r_temp * np.sin(points[p][0])
        x_cord = r_temp * np.cos(points[p][0])

        x[p] = np.asarray([x_cord, y_cord, z_cord])

    end = time.time()
    if end - start > timer_thresh:
        print 'get_data_points:', end - start, 'sec'
    return x


g_x = get_data_points()

fig = plt.figure(figsize=(10, 12))
ax = fig.add_subplot(111, projection='3d')
ax.scatter(g_x[:, 0], g_x[:, 1], g_x[:, 2])
plt.show()

Run = namedtuple('Run', ['obj_array', 'obj_grad_array', 'obj_grad_check_array', 'beta', 'rho',
                         'iterations'])

g_K = get_K()

import warnings

with warnings.catch_warnings(record=True) as w:
    g_Desc = {}
    g_counter = 0
    for g_loss_type in ['square-hinge', 'hinge']:
        for g_method in ['Newton', '']:
            print '-----------------------------------'
#             print 'loss_type', g_loss_type
#             print 'method', g_method
            g_Desc[g_counter] = Slab_SVM(g_K,g_x).run()
#             print 'Desc iterations', g_Desc[g_counter].iterations
#             print 'Desc rho', g_Desc[g_counter].rho
            print '-----------------------------------'
            print

            g_counter += 1
            break
        break

#             if g_loss_type != 'square-hinge':
#                 break

In [ ]:
grid_steps = 25

def pop_data_grid(beta,rho):
    start = time.time()
    data = np.zeros((grid_steps,grid_steps,grid_steps))

    x0_range = np.linspace(-2, 2, grid_steps)
    x1_range = np.linspace(-2, 2, grid_steps)
    x2_range = np.linspace(-2, 2, grid_steps)
    end = time.time()
    if end - start > timer_thresh:
        print 'pop_data_grid alloc mem:',end - start

    for i in range(grid_steps):
        for j in range(grid_steps):
            for k in range(grid_steps):
                data[i,j,k] = f(np.asarray([x0_range[i],
                                x1_range[j],
                                x2_range[k]]), beta,rho)
                
    end = time.time()
    if end - start > timer_thresh:
        print 'pop_data_grid:',end - start,'sec'
    return data

def proc_data(beta,rho,data):
    start = time.time()

    print 'delta',delta
    print 'np.abs(data - delta) < .1 -> ',(np.where(np.abs(data - delta) < .1)[0].shape)
    print 'np.abs(data - delta) < .01 -> ',(np.where(np.abs(data - delta) < .01)[0].shape)
    print 'np.abs(data - delta) < .001 -> ',(np.where(np.abs(data - delta) < .001)[0].shape)
    print 'np.abs(data - delta) < .0001 -> ',(np.where(np.abs(data - delta) < .0001)[0].shape)
    print 'data < delta -> ',(np.where(data < delta )[0].shape)
    print 'data > delta -> ',(np.where(data > delta )[0].shape)
    print 'data < 0 -> ',(np.where( data < 0)[0].shape)
    print 'data == 0 -> ',(np.where( data == 0)[0].shape)
    print 'data > 0 -> ',(np.where( data > 0)[0].shape)
    print 'min -> ',(np.amin( data ))
    print 'max -> ',(np.amax( data ))
#     print 'data:',data
    
    end = time.time()
    if end - start > timer_thresh:
        print 'proc_results:',end - start

rho = Desc[0].rho
beta = Desc[0].beta

losses = []
for i in range(m):
    losses.append(f(x[i], beta, rho))

data = pop_data_grid(beta,rho)

In [ ]:
%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt
import sys
from mpl_toolkits.mplot3d.art3d import Poly3DCollection

from skimage import measure
from skimage.draw import ellipsoid

# Use marching cubes to obtain the surface mesh of these ellipsoids
verts, faces = measure.marching_cubes(data, 0)

# Display resulting triangular mesh using Matplotlib. This can also be done
# with mayavi (see skimage.measure.marching_cubes docstring).
fig = plt.figure(figsize=(10, 12))
ax = fig.add_subplot(111, projection='3d')

# Fancy indexing: `verts[faces]` to generate a collection of triangles
mesh = Poly3DCollection(verts[faces])
ax.add_collection3d(mesh)

ax.set_xlabel("x-axis")
ax.set_ylabel("y-axis")
ax.set_zlabel("z-axis")

ax.set_xlim(-0, 30)  
ax.set_ylim(-0, 30)  
ax.set_zlim(-0, 30)  

plt.show()

In [ ]:
%matplotlib nbagg
plt.clf()
plt.cla()

ax = plt.subplot(1,1,1)

ax.scatter(range(1,Newton_Desc.iterations+1),
           Newton_Desc.obj_array[0:Newton_Desc.iterations],marker='^',
           label='Non-Stochastic Newtons Method')
ax.scatter(range(1,Steepest_Desc.iterations+1),
           Steepest_Desc.obj_array[0:Steepest_Desc.iterations],marker='*',
           label='Non-Stochastic Steepest Descent')

handles, labels = ax.get_legend_handles_labels()
plt.legend(handles, labels)
plt.title('Objective Function over iterations')
plt.ylabel('F (w)')
plt.xlabel('Iteration')

In [ ]:
%matplotlib nbagg
plt.clf()
plt.cla()

from numpy.linalg import norm

ax = plt.subplot(1,1,1)

ax.scatter(range(1,(Newton_Desc.iterations)+1),
           Newton_Desc.obj_grad_array[0:Newton_Desc.iterations],
           marker='^',
           label='Non-Stochastic Newtons Method')

ax.scatter(range(1,(Steepest_Desc.iterations)+1),
           Steepest_Desc.obj_grad_array[0:Steepest_Desc.iterations],
           marker='*', 
           label='Non-Stochastic Steepest Descent')

handles, labels = ax.get_legend_handles_labels()
plt.legend(handles, labels)
plt.title('Gradient Norm over iterations')
plt.ylabel('norm(d/dw F (w))')
plt.xlabel('Iteration')

In [ ]:
%matplotlib nbagg
plt.clf()
plt.cla()

from numpy.linalg import norm

ax = plt.subplot(1,1,1)

ax.scatter(range(1,(Steepest_Desc.iterations)+1),
           Steepest_Desc.obj_grad_check_array[0:Steepest_Desc.iterations],
           marker='*',
           label='Non-Stochastic Steepest Descent')

handles, labels = ax.get_legend_handles_labels()
plt.legend(handles, labels)
plt.title('Gradient Norm and Approx. Gradient Norm Difference \n over iterations')
plt.ylabel('Difference')
plt.xlabel('Iteration')

In [ ]: