Primal Slab SVM

Let $K \in R^{m \times m}$ and $K_{ij} = \texttt{kernel}(x_i,x_j)$ and $K_{i}$ the $i^{th}$ column of $K$

Evaluation: $$ \langle \Phi(x), w\rangle = \sum_k \beta_k k(x_k, x) $$

Then Primal Minimization Objective: $$\min_{\beta \in R^m,\rho \in R} \beta^T K \beta + \frac{1}{\nu m} \sum_i \texttt{loss}(\langle \Phi(x), w\rangle, \rho) - \rho$$ $$\min_{\beta \in R^m,\rho \in R} \beta^T K \beta + \frac{1}{\nu m} \sum_i \texttt{loss}(K_i^T \beta, \rho) - \rho$$

Let $F$ be the objective function. $$F(\beta,\rho) = \beta^T K \beta + \frac{1}{\nu m} \sum_i \texttt{loss}(K_i^T \beta, \rho) - \rho$$

Gradients: $$\vec\nabla_\beta F(\beta,\rho) = 2K\beta + \frac {1}{\nu m} \sum_i K_i \circ \frac{d}{d\beta}\texttt{loss}(K_i^T \beta, \rho)$$ $$\nabla_\rho F(\beta,\rho) = \frac {1}{\nu m} \sum_i \frac{d}{d\rho}\texttt{loss}(K_i^T \beta, \rho)-1$$

Hessians: $$H_\beta = 2K + \frac {1}{\nu m} \sum_i \left( K_i \circ K_i \right) \circ \frac{d^2}{(d\beta)^2}\texttt{loss}(K_i^T \beta, \rho)$$ $$H_\rho = \frac {1}{\nu m} \sum_i \frac{d^2}{(d\rho)^2}\texttt{loss}(K_i^T \beta, \rho)$$

We consider losses: $$\texttt{loss}_{hinge}(t,\rho) = \max(~0,~ |~\rho - t~| - \delta ~)$$ $$\texttt{loss}_{square-hinge}(t,\rho) = \max(~0,~ |~\rho - t~| - \delta ~)^2$$

Loss Gradients: $$\frac{d}{dt}\texttt{loss}_{hinge}(t,\rho) = \begin{cases} 0, & \mbox{if } |~\rho - t~| \lt \delta \\ -1, & \mbox{if } ~\rho - t~ \gt \delta \\ 1, & \mbox{if } ~-\rho + t~ \gt \delta \end{cases}$$

$$\frac{d}{dt}\texttt{loss}_{square-hinge}(t,\rho) = \begin{cases} 0, & \mbox{if } |~\rho - t~| \lt \delta \\ -2\left(\rho-t-\delta\right), & \mbox{if } ~\rho - t~ \gt \delta \\ 2\left(-\rho+t-\delta\right), & \mbox{if } ~-\rho + t~ \gt \delta \end{cases}$$
$$\frac{d}{d\rho}\texttt{loss}_{hinge}(t,\rho) = \begin{cases} 0, & \mbox{if } |~\rho - t~| \lt \delta \\ 1, & \mbox{if } ~\rho - t~ \gt \delta \\ 1, & \mbox{if } ~-\rho + t~ \gt \delta \end{cases}$$$$\frac{d}{d\rho}\texttt{loss}_{square-hinge}(t,\rho) = \begin{cases} 0, & \mbox{if } |~\rho - t~| \lt \delta \\ 2\left(\rho-t-\delta\right), & \mbox{if } ~\rho - t~ \gt \delta \\ -2\left(-\rho+t-\delta\right), & \mbox{if } ~-\rho + t~ \gt \delta \end{cases}$$

Loss Hessians: $$\frac{d^2}{(dt)^2}\texttt{loss}_{hinge}(t,\rho) = \begin{cases} 0, & \mbox{if } |~\rho - t~| \lt \delta \\ 0, & \mbox{if } ~\rho - t~ \gt \delta \\ 0, & \mbox{if } ~-\rho + t~ \gt \delta \end{cases}$$

$$\frac{d^2}{(dt)^2}\texttt{loss}_{square-hinge}(t,\rho) = \begin{cases} 0, & \mbox{if } |~\rho - t~| \lt \delta \\ 2, & \mbox{if } ~\rho - t~ \gt \delta \\ 2, & \mbox{if } ~-\rho + t~ \gt \delta \end{cases}$$
$$\frac{d^2}{(d\rho)^2}\texttt{loss}_{hinge}(t,\rho) = \begin{cases} 0, & \mbox{if } |~\rho - t~| \lt \delta \\ 0, & \mbox{if } ~\rho - t~ \gt \delta \\ 0, & \mbox{if } ~-\rho + t~ \gt \delta \end{cases}$$$$\frac{d^2}{(d\rho)^2}\texttt{loss}_{square-hinge}(t,\rho) = \begin{cases} 0, & \mbox{if } |~\rho - t~| \lt \delta \\ 2, & \mbox{if } ~\rho - t~ \gt \delta \\ 2, & \mbox{if } ~-\rho + t~ \gt \delta \end{cases}$$

Evaluation: $$ \langle \Phi(x), w\rangle = \sum_k \beta_k k(x_k, x) $$ Surface: $$ \langle \Phi(x), w\rangle -\rho = \sum_k \beta_k k(x_k, x) -\rho $$


In [ ]:
%matplotlib inline

import numpy as np
from numpy import linalg, random, ones, zeros, matrix, eye, dot
from numpy.linalg import norm,cholesky,inv
from sklearn.cross_validation import train_test_split
import mosek
import math
from mpl_toolkits.mplot3d import Axes3D
import matplotlib.pyplot as plt
import sys
import time 
from collections import namedtuple

v=.001
delta = 0.0
sigma = 10
initial_rho = 1
max_iter = 100
initial_step_size = .1
timer_thresh = .1
ep = .00000001
points_count = 1000
points_std_from_surface = 0

def incomplete_cholesky_decomp(K,G):
    start = time.time()

    assert K.shape[0] == K.shape[1]
    n = K.shape[0]

    k=-1

#     G[0:n,0:n] = 
    np.fill_diagonal(G[0:n,0:n], np.diagonal(K[0:n,0:n]))
    
    for i in range(n):
        if np.sum(np.diagonal(G[i:n,i:n])) > .5:
            j = np.argmax(np.diagonal(G[i:n,i:n]))+i

            temp = K.T[0:n,i].copy()
            K.T[0:n,i] = K.T[0:n,j]
            K.T[0:n,j] = temp

            temp = K.T[i,0:n].copy()
            K.T[i,0:n] = K.T[j,0:n]
            K.T[j,0:n] = temp

            temp = G[i,0:i+1].copy()
            G[i,0:i+1] = G[j,0:i+1]
            G[j,0:i+1] = temp

            G[i,i] = math.sqrt(K[i,i])

            G[i+1:n,i] = 1/G[i,i]*(K[i+1:n,i]- np.dot(G[i+1:n,0:i],G[i,0:i]) )

            
            np.fill_diagonal(G[i:n,i:n], np.diagonal(K[i:n,i:n]) - np.sum(np.power(G[i:n,0:i],2),axis=1))
        else:
            k=i
            break

    end = time.time()
    if end - start > timer_thresh:
        print 'product_form_cholesky_decomp:', end - start, 'sec'
    return G,k


# class Primal_Opt:
class Slab_SVM:

    def H(self,beta,rho,loss_vect_list,opt_on):
        start = time.time()

    #     assert g_loss_type != 'hinge'

        if opt_on=='b':
#             ret = 2*self.K
#             ret = np.multiply(self.K[:,loss_vect_list],self.K[:,loss_vect_list])
#             ret = 2/(v*self.m)*np.sum(np.multiply(self.K[:,loss_vect_list],self.K[:,loss_vect_list]),axis=0)
            ret = 2*self.K + 2/(v*self.m)*np.sum(np.multiply(self.K,self.K),axis=0)
        elif opt_on == 'rho':
            ret = 2/(v*self.m)

        end = time.time()
        if end - start > timer_thresh:
            print 'H:',end - start,'sec'
        return ret

    def loss_der_der(self,t,rho):
        if g_loss_type == 'hinge':
            return 0
        if g_loss_type == 'square-hinge':
            if abs(rho - t) < delta:
                return 0
            else:
                return 2
        raise Exception(g_loss_type,t,rho,delta)

    def loss_der_vect(self,grad,t,rho,opt_on):
#         print 'grad',self.grad.shape
        grad.fill(0)
        if g_loss_type == 'hinge':
            grad[ np.absolute(rho - t) <= delta ] = 0
            grad[ rho - t > delta ] = -1
            grad[ -rho + t > delta ] = 1
            return grad
        if g_loss_type == 'square-hinge':
            grad[ np.absolute(rho - t) <= delta ] = 0
            if opt_on=='b':
                grad[ rho - t > delta ] = -2.0*(rho - t[rho - t > delta] - delta)
                grad[ -rho + t > delta ] = 2.0*(-rho + t[-rho + t > delta] - delta)
                return grad
            if opt_on=='rho':
                grad[ rho - t > delta ] = 2*(rho - t[rho - t > delta] - delta)
                grad[ -rho + t > delta ] = -2*(-rho + t[-rho + t > delta] - delta)
                return grad
        raise Exception(grad,g_loss_type,t,rho,delta)

    def z(self,x1,w,b):
#         w = random.normal(0, 1.0/sigma, size=(D,len(x1)))
#         b = random.uniform(0,2*np.pi,size=D)
        return math.sqrt(2.0/D) * np.cos(np.dot(w, x1) + b)

    def kernel(self,x1,x2):
        return math.exp(-1*math.pow(norm(x1-x2),2
                                   )/(2*math.pow(sigma, 2)))

    def kernel_vect(self,x_list,x2):
        return np.exp(-1*np.power(norm(x_list-x2,axis=1),2 )/(2*math.pow(sigma, 2)))

    def loss_vect(self,t,rho):
        if g_loss_type == 'hinge':
            return np.maximum(np.zeros(t.shape), np.absolute(rho - t) - delta )
        if g_loss_type == 'square-hinge':
            return np.power(np.maximum(np.zeros(t.shape), np.absolute(rho - t) - delta ),2)

#     def loss_matrix_rho_vect(self,t,rho):
#         rho = np.matrix(rho).T
#         t = np.matrix(t)
#         if g_loss_type == 'hinge':
#             return np.maximum(np.zeros((t.shape[0],rho.shape[1])), np.absolute(t - rho) - delta )
#         if g_loss_type == 'square-hinge':
#             return np.power(np.maximum(np.zeros((t.shape[0],rho.shape[1])), np.absolute(t - rho) - delta ),2)

#     def loss_matrix_t_matrix(self,t,rho):
#         if g_loss_type == 'hinge':
#             return np.maximum(np.zeros(t.shape), np.absolute(t - rho) - delta )
#         if g_loss_type == 'square-hinge':
#             return np.power(np.maximum(np.zeros(t.shape), np.absolute(t - rho) - delta ),2)

    def obj_funct(self,beta,rho):
        start = time.time()

        obj = 1.0/2*np.dot(beta,np.dot(self.K,beta)) + 1.0 / (v*self.m) * np.sum(
                                                                            self.loss_vect(np.dot(self.K,beta),
                                                                                           rho))

        end = time.time()
        if end - start > timer_thresh:
            print 'obj_funct:',end - start,'sec'
        return obj

#     def obj_funct_beta_matrix(self,beta,rho):
#         start = time.time()

#         obj = 1.0/2*(norm(np.multiply(beta,np.dot(self.K,beta)),axis=0)) \
#                 + 1.0 / (v*self.m) * np.sum(self.loss_matrix_t_matrix(np.dot(self.K,beta),rho),axis=0)

#         end = time.time()
#         if end - start > timer_thresh:
#             print 'obj_funct_beta_matrix:',end - start,'sec'
#         return obj

#     def obj_funct_rho_vect(self,beta,rho):
#         start = time.time()

#         obj = 1.0/2*np.dot(beta,np.dot(self.K,beta)) + 1.0 / (v*self.m) * np.sum(
#                                                                     self.loss_matrix_rho_vect(np.dot(self.K,beta),
#                                                                                                  rho),axis=1)
#     #     assert len(obj) == len(rho)

#         end = time.time()
#         if end - start > timer_thresh:
#             print 'obj_funct_rho_vect:',end - start,'sec'
#         return obj

    def f(self,x_test, beta,rho):
        start = time.time()

        w = np.dot(beta,self.kernel_vect(self.x_data,x_test)) - rho

        end = time.time()
        if end - start > timer_thresh:
            print 'f:',end - start,'sec'
        return w

    def step(self,element,step_size,resid):
        return element - (step_size * resid)

    def backtrack_step_size_rho(self,step_size,obj,resid,beta,rho):
        start = time.time()
        number_of_steps = 2
        if step_size == ep**2:
            step_size = initial_step_size
        else:
            step_size *= 4.0

        iter_count=0

        if obj > (self.obj_funct( beta, self.step(rho,step_size,resid)) ):
            end = time.time()
            if end - start > timer_thresh:
                print 'backtrack_step_size_rho:',end - start,'sec',iter_count,'times'
            return step_size

        while True:
            iter_count += 1
            steps = step_size*np.logspace(-1,-number_of_steps,num = number_of_steps,base=2.0)
#             obj_many_steps = np.array(self.obj_funct_rho_vect( beta, self.step(rho,steps,resid))).ravel()
            obj_many_steps = np.zeros(steps.shape)
            for i in range(number_of_steps):
                obj_many_steps[i] = self.obj_funct( beta, self.step(rho,steps[i],resid))

            if np.where(obj - obj_many_steps >= 0)[0].shape[0] > 0:
                end = time.time()
                if end - start > timer_thresh:
                    print 'backtrack_step_size_rho:',end - start,'sec',iter_count,'times'
                return steps[np.where(obj - obj_many_steps >= 0)[0][0]]

            step_size = steps[-1]
            if step_size < ep**2:
    #             print 'WARNING: step size not found'
                step_size = ep**2
                end = time.time()
                if end - start > timer_thresh:
                    print 'backtrack_step_size_rho:',end - start,'sec',iter_count,'times'
                return step_size
            number_of_steps *= 2

        assert False

    def backtrack_step_size_beta(self,step_size,obj,resid,beta,rho):
        start = time.time()
        number_of_steps = 2
        if step_size == ep**2:
            step_size = initial_step_size
        else:
            step_size *= 2.0

        iter_count=0

        if obj > (self.obj_funct( self.step(beta,step_size,resid),rho) ):
            end = time.time()
            if end - start > timer_thresh:
                print 'backtrack_step_size_beta:',end - start,'sec',iter_count,'times'
            return step_size

        while True:
            iter_count += 1
            steps = step_size*np.logspace(-1,-number_of_steps,num = number_of_steps,base=2.0)
#             obj_many_steps = (self.obj_funct_beta_matrix(np.asarray(self.step(np.matrix(beta),
#                                                                     np.matrix(steps).T,
#                                                                     np.matrix(resid)).T
#                                                               ), 
#                                                     rho))
            obj_many_steps = np.zeros(steps.shape)
            for i in range(number_of_steps):
                obj_many_steps[i] = self.obj_funct( self.step(beta,steps[i],resid),rho)


            if np.where(obj - obj_many_steps >= 0)[0].shape[0] > 0:
                end = time.time()
                if end - start > timer_thresh:
                    print 'backtrack_step_size_beta:',end - start,'sec',iter_count,'times'
                return steps[np.where(obj - obj_many_steps >= 0)[0][0]]

            step_size = steps[-1]
            if step_size < ep**2:
    #             print 'WARNING: step size not found'
                step_size = ep**2
                end = time.time()
                if end - start > timer_thresh:
                    print 'backtrack_step_size_beta:',end - start,'sec',iter_count,'times'
                return step_size
            number_of_steps *= 10

        assert False

    def numer_grad(self,beta,rho,ep,direct=0,opt_on=''): # const
        if opt_on == 'rho':
            return (obj_funct(beta,rho+ep) \
                   -obj_funct(beta,-rho*ep))/(2*ep)
        return (obj_funct(beta+(ep*direct),rho) \
               -obj_funct(beta-(ep*direct),rho))/(2*ep)

    def grad_checker(self,beta,rho,ep,opt_on): # const
        start = time.time()

        if opt_on == 'rho':
            return numer_grad(beta,rho,ep,opt_on=opt_on)

        d=len(beta)
        w=np.zeros(d)
        for i in range(d):
            direct=np.zeros(beta.shape)
            direct[i] = 1
            w[i]=(numer_grad(beta,rho,ep,direct=direct,opt_on=opt_on))

        end = time.time()
        if end - start > timer_thresh:
            print 'grad_checker:',end - start,'sec'        
        return w
    
    def timed_solve(self,A,B):
        start = time.time()
        
        ret = linalg.solve(A,B)
        
        end = time.time()
        if end - start > timer_thresh:
            print 'timed_solve:',end - start,'sec'
        return ret

    def get_resid(self,beta,rho,grad,loss_vect_list,opt_on):
        start = time.time()

        if opt_on=='b':
#             print 'eigvalsh',linalg.eigvalsh(self.K)

            incomplete_cholesky,k = incomplete_cholesky_decomp(self.H(beta,rho,loss_vect_list,opt_on),
                                                             np.zeros(self.K.shape))
#             print 'k',k
#             print 'incomplete_cholesky',incomplete_cholesky
            
            resid = self.timed_solve(incomplete_cholesky,grad)
            resid = self.timed_solve(self.H(beta,rho,loss_vect_list,opt_on),grad)
        else:
            resid = grad/self.H(beta,rho,loss_vect_list,opt_on)

        end = time.time()
        if end - start > timer_thresh:
            print 'get_resid:',end - start,'sec'
        return resid

    def grad_des_iterate(self,iterations,opt_on='b'):
        start = time.time()
        loss_vect_list = np.where(np.absolute(self.rho - np.dot(self.K,self.beta)) >= delta)[0]            
        end = time.time()
        if end - start > timer_thresh:
            print 'find sv:',end - start,'sec'

        obj = self.obj_funct(self.beta,self.rho)
    #     print 'obj',obj
        self.obj_array[iterations]=(obj)

        if opt_on == 'b':
            self.grad = 2.0*np.dot(self.K,self.beta) + 1.0/(v*self.m)*np.sum(
                                                                (self.K * self.loss_der_vect(
                                                                                             self.grad_buffer,
                                                                                             np.dot(self.K,self.beta),
                                                                                             self.rho,
                                                                                             opt_on)),
                                                                             axis=0)
        elif opt_on == 'rho':
            self.grad = 1/(v*self.m)*np.sum(self.loss_der_vect(
                    self.grad_buffer,
                    np.dot(self.K,self.beta),self.rho,              opt_on))

        self.obj_grad_array[iterations]=norm(self.grad)
    #     obj_grad_check_array[iterations]=norm((grad-grad_checker(beta,rho,ep,opt_on)))

        if obj < ep:
            print 'Stopping crit: obj small',obj,'opt_on',opt_on
            return True
        if norm(self.grad) < ep:
            print 'Stopping crit: norm(grad) small',norm(self.grad),'opt_on',opt_on
            return True

        if g_loss_type == 'square-hinge' and g_method == 'Newton':
            resid = self.get_resid(self.beta,self.rho,self.grad,loss_vect_list,opt_on)
        else:
            resid = self.grad

        if opt_on == 'rho':
            self.step_size_rho = self.backtrack_step_size_rho(self.step_size_rho,obj,resid,self.beta,self.rho)
            self.rho = self.step(self.rho,self.step_size_rho,resid) # Update
        else:
            self.step_size_beta = self.backtrack_step_size_beta(self.step_size_beta,obj,resid,self.beta,self.rho)
            self.beta = self.step(self.beta,self.step_size_beta,resid) # Update

        end = time.time()
        if end - start > timer_thresh:
            print 'grad_des_iterate:',end - start,'sec'

        return False
        return False,beta,rho,step_size_beta,step_size_rho,obj_array,obj_grad_array,obj_grad_check_array

    def grad_des(self):
        start = time.time()

        self.obj_array = -1*np.ones(max_iter)
        self.obj_grad_array = np.zeros((max_iter))
        self.obj_grad_check_array = np.zeros(max_iter)

        self.beta = zeros(self.m)
        self.rho = initial_rho

        self.grad_buffer = zeros(self.beta.shape)
        self.step_size_beta = initial_step_size
        self.step_size_rho = initial_step_size
        self.iterations = 0
        for i in range(max_iter):
            
            converged_b = self.grad_des_iterate(self.iterations,opt_on='b')

            converged_rho = self.grad_des_iterate(self.iterations,opt_on='rho')
            
            if converged_b and converged_rho:
                break

            if i == max_iter-1:
                print 'WARNING: Did not converge'

            self.iterations += 1

        end = time.time()
#         if end - start > timer_thresh:
        print 'grad_des:',end - start,'sec'
#         return Run(self.obj_array,self.obj_grad_array,self.obj_grad_check_array,self.beta,self.rho,iterations)

    def pop_K(self):
        start = time.time()

        self.K=np.zeros((self.m,self.m))
        if Fourier_Feature:
            z_cache = np.zeros((self.m,D))
            w = random.normal(0, 1.0/sigma, size=(self.m*D,len(self.x_data[0])))
            b = random.uniform(0,2*np.pi,size=self.m*D)
            for i in range(self.m):
                z_cache[i]=self.z(self.x_data[i],w[i:i+D,:],b[i:i+D])
            end = time.time()
            if end - start > timer_thresh:
                print 'z_cache:',end - start,'sec'

            for i in range(self.m):
#                 for j in range(self.m):
                self.K[i,:] = np.dot(z_cache,z_cache[i])
#                         self.z(self.x_data[i]),self.z(self.x_data[j]))
        else:
            for i in range(self.m):
                self.K[i,:] = self.kernel_vect(self.x_data,self.x_data[i])

        if Fourier_Feature:
            K_test=np.zeros((self.m,self.m))
            for i in range(self.m):
                K_test[i,:] = self.kernel_vect(self.x_data,self.x_data[i])

            print 'Fourier norm diff', norm(K_test-self.K)
            
        end = time.time()
        if end - start > timer_thresh:
            print 'get_K:',end - start,'sec'

    def get_K_inv(K):
        start = time.time()

        K_inv = inv(K)

        end = time.time()
        if end - start > timer_thresh:
            print 'get_K_inv:',end - start,'sec'
        return K_inv

    def get_K_cond(K):
        start = time.time()

        K_cond = linalg.cond(K)

        end = time.time()
        if end - start > timer_thresh:
            print 'get_K_cond:',end - start,'sec'
        return K_cond

    def pre_comp_K():
        start = time.time()

        K = get_K()    

        end = time.time()
        if end - start > timer_thresh:
            print 'pre_comp_K:',end - start,'sec'
        return K #, K_inv

    def __init__(self, x_data):
        self.x_data = x_data
        self.m = len(self.x_data)
        self.pop_K()
        self.grad_des()
        
def get_data_points():
    start = time.time()
    points = random.random((points_count,2))*2*np.pi

    x = np.zeros((points_count,3))
    for p in range(points_count):
        radius = 1
        if points_std_from_surface > 0:
            r = random.normal(loc=radius,scale=points_std_from_surface)
        else:
            r = radius
        z_cord = r * np.sin(points[p][1])

        r_temp = r * np.cos(points[p][1])
        y_cord = r_temp * np.sin(points[p][0])
        x_cord = r_temp * np.cos(points[p][0])

        x[p] = np.asarray([x_cord, y_cord, z_cord])

    end = time.time()
    if end - start > timer_thresh:
        print 'get_data_points:',end - start,'sec'
    return x
    
g_x = get_data_points()        

fig = plt.figure(figsize=(10, 12))
ax = fig.add_subplot(111, projection='3d')
ax.scatter(g_x[:,0],g_x[:,1],g_x[:,2])
plt.show()

# Run = namedtuple('Run', ['obj_array','obj_grad_array','obj_grad_check_array','beta','rho',
#                          'iterations'])

Fourier_Feature = False

g_Desc = {}
g_counter=0
for g_loss_type in ['square-hinge', 'hinge']:
    for g_method in ['Newton', '']:

        print '-----------------------------------'
        print 'loss_type',g_loss_type
        print 'method',g_method        
        g_Desc[g_counter] = Slab_SVM(g_x)
        g_Desc[g_counter].D = 0
        print 'Desc iterations',g_Desc[g_counter].iterations
        print 'Desc rho',g_Desc[g_counter].rho
        g_counter += 1
        print '-----------------------------------'
        print 

#         Fourier_Feature = True

#         for i in range(13):# [1.,2.,50.,100.,500.,750.,800.,900.,1000.]:
#             D = 2**i
#             print '-----------------------------------'
#             print 'D',D
#             print 'loss_type',g_loss_type
#             print 'method',g_method        
#             g_Desc[g_counter] = Slab_SVM(g_x)
#             g_Desc[g_counter].D = D
#             print 'Desc iterations',g_Desc[g_counter].iterations
#             print 'Desc rho',g_Desc[g_counter].rho
#             g_counter += 1
#             print '-----------------------------------'
#             print 
        
        break
    break

In [ ]:
grid_steps = 25

def pop_data_grid(beta,rho):
    start = time.time()
    data = np.zeros((grid_steps,grid_steps,grid_steps))

    x0_range = np.linspace(-2, 2, grid_steps)
    x1_range = np.linspace(-2, 2, grid_steps)
    x2_range = np.linspace(-2, 2, grid_steps)
    end = time.time()
    if end - start > timer_thresh:
        print 'alloc mem:',end - start

    for i in range(grid_steps):
        for j in range(grid_steps):
            for k in range(grid_steps):
                data[i,j,k] = f(np.asarray([x0_range[i],
                                x1_range[j],
                                x2_range[k]]), beta,rho)
                
    end = time.time()
    if end - start > timer_thresh:
        print 'pop_data_grid:',end - start,'sec'
    return data

def proc_data(beta,rho,data):
    start = time.time()

    print 'delta',delta
    print 'rho',rho
    print 'np.abs(data - delta) < .1 -> ',(np.where(np.abs(data - delta) < .1)[0].shape)
    print 'np.abs(data - delta) < .01 -> ',(np.where(np.abs(data - delta) < .01)[0].shape)
    print 'np.abs(data - delta) < .001 -> ',(np.where(np.abs(data - delta) < .001)[0].shape)
    print 'np.abs(data - delta) < .0001 -> ',(np.where(np.abs(data - delta) < .0001)[0].shape)
    print 'data < delta -> ',(np.where(data < delta )[0].shape)
    print 'data > delta -> ',(np.where(data > delta )[0].shape)
    print 'data < 0 -> ',(np.where( data < 0)[0].shape)
    print 'data == 0 -> ',(np.where( data == 0)[0].shape)
    print 'data > 0 -> ',(np.where( data > 0)[0].shape)
    print 'min -> ',(np.amin( data ))
    print 'max -> ',(np.amax( data ))
#     print 'data:',data
    
    end = time.time()
    if end - start > timer_thresh:
        print 'proc_results:',end - start

for counter in g_Desc:
    rho = g_Desc[counter].rho
    beta = g_Desc[counter].beta
    print 'obj_funct',g_Desc[counter].obj_funct(beta,rho)
    f = g_Desc[counter].f
    g_m = len(g_x)

    print 'D',g_Desc[counter].D
    losses = []
    for i in range(g_m):
        losses.append(f(g_x[i], beta, rho))
    losses = np.asarray(losses)
    print 'losses min -> ',(np.amin( losses ))
    print 'losses min -> ',(np.argmin( losses ))
    print 'losses min -> ',g_x[(np.argmin( losses ))]
    print 'losses max -> ',(np.amax( losses ))
    print 'losses max -> ',(np.argmax( losses ))
    print 'losses max -> ',g_x[(np.argmax( losses ))]

        
    data = pop_data_grid(beta,rho)
    proc_data(beta,rho,data)
#     break

    %matplotlib inline
    import numpy as np
    import matplotlib.pyplot as plt
    import sys
    from mpl_toolkits.mplot3d.art3d import Poly3DCollection

    from skimage import measure
    from skimage.draw import ellipsoid

    # Use marching cubes to obtain the surface mesh of these ellipsoids
    verts, faces = measure.marching_cubes(data, 0)

    # Display resulting triangular mesh using Matplotlib. This can also be done
    # with mayavi (see skimage.measure.marching_cubes docstring).
    fig = plt.figure(figsize=(10, 12))
    ax = fig.add_subplot(111, projection='3d')

    # Fancy indexing: `verts[faces]` to generate a collection of triangles
    mesh = Poly3DCollection(verts[faces])
    ax.add_collection3d(mesh)

    ax.set_xlabel("x-axis")
    ax.set_ylabel("y-axis")
    ax.set_zlabel("z-axis")

    ax.set_xlim(-0, 30)  
    ax.set_ylim(-0, 30)  
    ax.set_zlim(-0, 30)  

    plt.show()

In [ ]:
%matplotlib nbagg
plt.clf()
plt.cla()

ax = plt.subplot(1,1,1)

ax.scatter(range(1,Newton_Desc.iterations+1),
           Newton_Desc.obj_array[0:Newton_Desc.iterations],marker='^',
           label='Non-Stochastic Newtons Method')
ax.scatter(range(1,Steepest_Desc.iterations+1),
           Steepest_Desc.obj_array[0:Steepest_Desc.iterations],marker='*',
           label='Non-Stochastic Steepest Descent')

handles, labels = ax.get_legend_handles_labels()
plt.legend(handles, labels)
plt.title('Objective Function over iterations')
plt.ylabel('F (w)')
plt.xlabel('Iteration')

In [ ]:
%matplotlib nbagg
plt.clf()
plt.cla()

from numpy.linalg import norm

ax = plt.subplot(1,1,1)

ax.scatter(range(1,(Newton_Desc.iterations)+1),
           Newton_Desc.obj_grad_array[0:Newton_Desc.iterations],
           marker='^',
           label='Non-Stochastic Newtons Method')

ax.scatter(range(1,(Steepest_Desc.iterations)+1),
           Steepest_Desc.obj_grad_array[0:Steepest_Desc.iterations],
           marker='*', 
           label='Non-Stochastic Steepest Descent')

handles, labels = ax.get_legend_handles_labels()
plt.legend(handles, labels)
plt.title('Gradient Norm over iterations')
plt.ylabel('norm(d/dw F (w))')
plt.xlabel('Iteration')

In [ ]:
%matplotlib nbagg
plt.clf()
plt.cla()

from numpy.linalg import norm

ax = plt.subplot(1,1,1)

ax.scatter(range(1,(Steepest_Desc.iterations)+1),
           Steepest_Desc.obj_grad_check_array[0:Steepest_Desc.iterations],
           marker='*',
           label='Non-Stochastic Steepest Descent')

handles, labels = ax.get_legend_handles_labels()
plt.legend(handles, labels)
plt.title('Gradient Norm and Approx. Gradient Norm Difference \n over iterations')
plt.ylabel('Difference')
plt.xlabel('Iteration')

In [ ]: