Let $K \in R^{m \times m}$ and $K_{ij} = \texttt{kernel}(x_i,x_j)$ and $K_{i}$ the $i^{th}$ column of $K$
Then Primal Minimization Objective: $$\min_{\beta \in R^m,\rho \in R} \beta^T K \beta + \frac{1}{\nu m} \sum_i \texttt{loss}(K_i^T \beta, \rho)$$
Let $F$ be the objective function. $$F(\beta,\rho) = \beta^T K \beta + \frac{1}{\nu m} \sum_i \texttt{loss}(K_i^T \beta, \rho)$$
Gradients: $$\vec\nabla_\beta F(\beta,\rho) = 2K\beta + \frac {1}{\nu m} \sum_i K_i \circ \frac{d}{d\beta}\texttt{loss}(K_i^T \beta, \rho)$$ $$\nabla_\rho F(\beta,\rho) = \frac {1}{\nu m} \sum_i \frac{d}{d\rho}\texttt{loss}(K_i^T \beta, \rho)$$
Hessians: $$H_\beta = 2K + \frac {1}{\nu m} \sum_i \left( K_i \circ K_i \right) \circ \frac{d^2}{(d\beta)^2}\texttt{loss}(K_i^T \beta, \rho)$$ $$H_\rho = \frac {1}{\nu m} \sum_i \frac{d^2}{(d\rho)^2}\texttt{loss}(K_i^T \beta, \rho)$$
We consider losses: $$\texttt{loss}_{hinge}(t,\rho) = \max(~0,~ |~\rho - t~| - \delta ~)$$ $$\texttt{loss}_{square-hinge}(t,\rho) = \max(~0,~ |~\rho - t~| - \delta ~)^2$$
Loss Gradients: $$\frac{d}{dt}\texttt{loss}_{hinge}(t,\rho) = \begin{cases} 0, & \mbox{if } |~\rho - t~| \lt \delta \\ -1, & \mbox{if } ~\rho - t~ \gt \delta \\ 1, & \mbox{if } ~-\rho + t~ \gt \delta \end{cases}$$
$$\frac{d}{dt}\texttt{loss}_{square-hinge}(t,\rho) = \begin{cases} 0, & \mbox{if } |~\rho - t~| \lt \delta \\ -2\left(\rho-t-\delta\right), & \mbox{if } ~\rho - t~ \gt \delta \\ 2\left(-\rho+t-\delta\right), & \mbox{if } ~-\rho + t~ \gt \delta \end{cases}$$Loss Hessians: $$\frac{d^2}{(dt)^2}\texttt{loss}_{hinge}(t,\rho) = \begin{cases} 0, & \mbox{if } |~\rho - t~| \lt \delta \\ 0, & \mbox{if } ~\rho - t~ \gt \delta \\ 0, & \mbox{if } ~-\rho + t~ \gt \delta \end{cases}$$
$$\frac{d^2}{(dt)^2}\texttt{loss}_{square-hinge}(t,\rho) = \begin{cases} 0, & \mbox{if } |~\rho - t~| \lt \delta \\ 2, & \mbox{if } ~\rho - t~ \gt \delta \\ 2, & \mbox{if } ~-\rho + t~ \gt \delta \end{cases}$$Evaluation: $$ \langle \Phi(x), w\rangle = \sum_k \beta_k K(x_k, x) $$ Surface: $$ \langle \Phi(x), w\rangle -\rho = \sum_k \beta_k K(x_k, x) -\rho $$
In [ ]:
% matplotlib inline
import numpy as np
from numpy import linalg, random, ones, zeros, matrix, eye, dot
from numpy.linalg import norm, cholesky, inv
from sklearn.cross_validation import train_test_split
import mosek
import math
from mpl_toolkits.mplot3d import Axes3D
import matplotlib.pyplot as plt
import sys
import time
import scipy
from collections import namedtuple
v = .00001
delta = 0.01
sigma = 100
initial_rho = 1
max_iter = 100
initial_step_size = .1
timer_thresh = .1
ep = .0001
points_count = 1000
points_std_from_surface = 0.0001
def pivot(A, k, n):
y = np.amax(np.absolute(A[k:n + 1, k:n + 1]), axis=1)
i = np.argmax(np.absolute(A[k:n + 1, k:n + 1]), axis=1)
piv = np.amax(y)
jpiv = np.argmax(y)
ipiv = i[jpiv]
jpiv = jpiv + k - 1;
ipiv = ipiv + k - 1;
Pk = eye(n)
Pk[ipiv, ipiv] = 0
Pk[k, k] = 0
Pk[k, ipiv] = 1
Pk[ipiv, k] = 1
Qk = eye(n)
Qk[jpiv, jpiv] = 0
Qk[k, k] = 0
Qk[k, jpiv] = 1
Qk[jpiv, k] = 1
return Pk, Qk
def incomplete_LU_decomp(A):
start = time.time()
assert A.shape[0] == A.shape[1]
n = A.shape[0]
for k in range(n - 1):
Pk, Qk = pivot(A, k, n)
A = dot(dot(Pk, A), Qk)
print A
for i in range(k + 1, n):
if A[i, k] != 0:
if A[k, k] == 0:
return 'Error: Null Pivot'
A[i, k] = A[i, k] / A[k, k]
for j in range(k + 1, n):
if A[i, j] != 0:
A[i, j] = A[i, j] - (A[i, k] / A[k, j])
end = time.time()
if end - start > timer_thresh:
print 'incomplete_LU_decomp:', end - start, 'sec'
return A
def get_K_LU():
K_LU = scipy.linalg.cholesky(K, lower=True)
K_LU = cholesky(K)
K_LU2 = incomplete_LU_decomp(K.copy())
assert K_LU.shape == K_LU2.shape
for k in range(K_LU.shape[0]):
for i in range(K_LU.shape[1]):
assert abs(K_LU[k, i] - K_LU2[k, i]) < ep
def loss_der_der(t, rho):
if g_loss_type == 'hinge':
return 0
if g_loss_type == 'square-hinge':
if abs(rho - t) < delta:
return 0
else:
return 2
raise Exception(g_loss_type, t, rho, delta)
def loss_der_vect(grad, t, rho, opt_on):
if g_loss_type == 'hinge':
grad[np.absolute(rho - t) <= delta] = 0
grad[rho - t > delta] = -1
grad[-rho + t > delta] = 1
return grad
if g_loss_type == 'square-hinge':
grad[np.absolute(rho - t) <= delta] = 0
if opt_on == 'b':
grad[rho - t > delta] = -2.0 * (rho - t[rho - t > delta] - delta)
grad[-rho + t > delta] = 2.0 * (-rho + t[-rho + t > delta] - delta)
return grad
if opt_on == 'rho':
grad[rho - t > delta] = 2 * (rho - t[rho - t > delta] - delta)
grad[-rho + t > delta] = -2 * (-rho + t[-rho + t > delta] - delta)
return grad
raise Exception(grad, g_loss_type, t, rho, delta)
def kernel(x1, x2):
return math.exp(-1 * math.pow(norm(x1 - x2), 2
) / (2 * math.pow(sigma, 2)))
def kernel_vect(x_list, x2):
return np.exp(-1 * np.power(norm(x_list - x2, axis=1), 2) / (2 * math.pow(sigma, 2)))
def loss_vect(t, rho):
if g_loss_type == 'hinge':
return np.maximum(np.zeros(t.shape), np.absolute(rho - t) - delta)
if g_loss_type == 'square-hinge':
return np.power(np.maximum(np.zeros(t.shape), np.absolute(rho - t) - delta), 2)
def loss_matrix_rho_vect(t, rho):
rho = np.matrix(rho).T
t = np.matrix(t)
if g_loss_type == 'hinge':
return np.maximum(np.zeros((t.shape[0], rho.shape[1])), np.absolute(t - rho) - delta)
if g_loss_type == 'square-hinge':
return np.power(np.maximum(np.zeros((t.shape[0], rho.shape[1])), np.absolute(t - rho) - delta), 2)
def loss_matrix_t_matrix(t, rho):
if g_loss_type == 'hinge':
return np.maximum(np.zeros(t.shape), np.absolute(t - rho) - delta)
if g_loss_type == 'square-hinge':
return np.power(np.maximum(np.zeros(t.shape), np.absolute(t - rho) - delta), 2)
def get_K():
start = time.time()
K = np.zeros((len(g_x), len(g_x)))
for i in range(len(g_x)):
K[i, :] = kernel_vect(g_x, g_x[i])
end = time.time()
if end - start > timer_thresh:
print 'get_K:', end - start, 'sec'
return K
class Slab_SVM:
def H(self, beta, rho, loss_vect_list, opt_on):
start = time.time()
if opt_on == 'b':
ret = 2 * self.K + 2 / (v * self.m) * np.sum(
np.multiply(self.K, self.K), axis=0)
elif opt_on == 'rho':
ret = 2 / (v * self.m)
end = time.time()
if end - start > timer_thresh:
print 'H:', end - start, 'sec'
return ret
def obj_funct(self, beta, rho):
start = time.time()
obj = 1.0 / 2 * np.dot(beta, np.dot(self.K, beta)) + 1.0 / (v * self.m) * np.sum(
loss_vect(
np.dot(self.K[self.support_vectors, :],
beta),
rho))
end = time.time()
if end - start > timer_thresh:
print 'obj_funct:', end - start, 'sec'
return obj
def obj_funct_beta_matrix(self, beta, rho):
start = time.time()
obj = 1.0 / 2 * (norm(np.multiply(beta, np.dot(self.K, beta)), axis=0)) \
+ 1.0 / (v * self.m) * np.sum(np.asarray(loss_matrix_t_matrix(np.dot(self.K[self.support_vectors, :],
beta),
rho)
),
axis=0)
end = time.time()
if end - start > timer_thresh:
print 'obj_funct_beta_matrix:', end - start, 'sec'
return obj
def obj_funct_rho_vect(self, beta, rho):
start = time.time()
obj = 1.0 / 2 * np.dot(beta, np.dot(self.K, beta)) + 1.0 / (v * self.m) * np.sum(np.asarray(
loss_matrix_rho_vect(
np.dot(
self.K[self.support_vectors, :], beta),
rho)), axis=1)
end = time.time()
if end - start > timer_thresh:
print 'obj_funct_rho_vect:', end - start, 'sec'
return obj
def f(self, x_test, beta, rho):
start = time.time()
w = np.dot(beta, kernel_vect(x, x_test)) - rho
end = time.time()
if end - start > timer_thresh:
print 'f:', end - start, 'sec'
return w
def step(self, element, step_size, resid):
return element - (step_size * resid)
def backtrack_step_size_rho(self, step_size, obj, resid, beta, rho):
start = time.time()
number_of_steps = 2
if step_size == ep ** 2:
step_size = initial_step_size
else:
step_size *= 4.0
iter_count = 0
if obj > (self.obj_funct(beta, self.step(rho, step_size, resid))):
end = time.time()
if end - start > timer_thresh:
print 'backtrack_step_size_rho:', end - start, 'sec', iter_count, 'times'
# print 'backtrack_step_size_rho iter',iter_count
return step_size
while True:
iter_count += 1
steps = step_size * np.logspace(-1, -number_of_steps, num=number_of_steps, base=2.0)
obj_many_steps = np.array(self.obj_funct_rho_vect(beta, self.step(rho, steps, resid))).ravel()
if np.where(obj - obj_many_steps > 0)[0].shape[0] > 0:
end = time.time()
if end - start > timer_thresh:
print 'backtrack_step_size_rho:', end - start, 'sec', iter_count, 'times'
# print 'backtrack_step_size_rho iter',iter_count
return steps[np.where(obj - obj_many_steps > 0)[0][0]]
step_size = steps[-1]
if step_size < ep ** 2:
# print 'WARNING: step size not found'
step_size = ep ** 2
end = time.time()
if end - start > timer_thresh:
print 'backtrack_step_size_rho:', end - start, 'sec', iter_count, 'times'
# print 'backtrack_step_size_rho iter',iter_count,'Step size too small'
return step_size
number_of_steps *= 2
assert False
def backtrack_step_size_beta(self, step_size, obj, resid, beta, rho):
start = time.time()
number_of_steps = 2
if step_size == ep ** 3:
step_size = initial_step_size
else:
step_size *= 2.0
iter_count = 0
if obj > (self.obj_funct(self.step(beta, step_size, resid), rho)):
end = time.time()
if end - start > timer_thresh:
print 'backtrack_step_size_beta:', end - start, 'sec', iter_count, 'times'
print 'backtrack_step_size_beta iter',iter_count
assert obj > (self.obj_funct(self.step(beta, step_size, resid), rho))
return step_size
while True:
iter_count += 1
steps = step_size * np.logspace(-1, -number_of_steps, num=number_of_steps, base=2.0)
obj_many_steps = (self.obj_funct_beta_matrix(np.asarray(self.step(np.matrix(beta),
np.matrix(steps).T,
np.matrix(resid)).T
),
rho))
# assert steps.shape[0] == number_of_steps
# print ' obj_many_steps.shape',obj_many_steps.shape
# assert obj_many_steps.shape[0] == number_of_steps
# for i in range(number_of_steps):
# assert steps[i] == step_size*(2**(-(i+1)))
# print 'x',np.asarray(self.step(np.matrix(beta),
# np.matrix(steps).T,
# np.matrix(resid)).T).shape
# print 'x',np.asarray(self.step(np.matrix(beta),
# np.matrix(steps).T,
# np.matrix(resid)).T)[0,i]
# print 'y',self.step(np.matrix(beta),
# step_size*(2**(-(i+1))),
# np.matrix(resid)).T.shape
# print 'y',self.step(np.matrix(beta),
# step_size*(2**(-(i+1))),
# np.matrix(resid)).T[0,0]
# assert abs(np.asarray(self.step(np.matrix(beta),
# np.matrix(steps).T,
# np.matrix(resid)).T)[0,i] - self.step(np.matrix(beta),
# step_size*(2**(-(i+1))),
# np.matrix(resid)).T[0,0]) \
# < .00000001
# print 'obj_many_steps',obj_many_steps
# print 'np.where(obj - obj_many_steps > 0)',np.where(obj - obj_many_steps > 0)
# print 'np.where(obj - obj_many_steps >= 0)[0].shape[0]',np.where(obj - obj_many_steps >= 0)[0].shape[0]
# print 'np.where(obj - obj_many_steps > 0)[0]',np.where(obj - obj_many_steps > 0)[0]
if np.where(obj - obj_many_steps > 0)[0].shape[0] > 0:
# print 'obj - obj_many_steps[np.where(obj - obj_many_steps > 0)]',\
# obj - obj_many_steps[np.where(obj - obj_many_steps > 0)]
assert np.min(obj - obj_many_steps[np.where(obj - obj_many_steps > 0)]) > 0
assert obj - obj_many_steps[np.where(obj - obj_many_steps > 0)[0][0]] > 0
print 'np.where(obj - obj_many_steps > 0)[0][0]',np.where(obj - obj_many_steps > 0)[0][0]
print 'obj_many_steps[np.where(obj - obj_many_steps > 0)[0][0]]',\
obj_many_steps[np.where(obj - obj_many_steps > 0)[0][0]]
print 'self.step(np.matrix(beta), steps[np.where(obj - obj_many_steps > 0)[0][0]],\
np.matrix(resid)).T[0,0])', \
self.step(np.matrix(beta),
steps[np.where(obj - obj_many_steps > 0)[0][0]],
np.matrix(resid)).T[0,0]
assert abs(obj_many_steps[np.where(obj - obj_many_steps > 0)[0][0]] - self.step(np.matrix(beta),
steps[np.where(obj - obj_many_steps > 0)[0][0]],
np.matrix(resid)).T[0,0]) < .000000001
print 'obj_many_steps', obj_many_steps[np.where(obj - obj_many_steps > 0)[0][0]]
print 'self.obj_funct(self.step(beta,', self.obj_funct(self.step(beta,
steps[np.where(obj - obj_many_steps > 0)[0][0]],
resid), rho)
assert abs(obj_many_steps[np.where(obj - obj_many_steps > 0)[0][0]] - \
self.obj_funct(self.step(beta,
steps[np.where(obj - obj_many_steps > 0)[0][0]],
resid), rho)) < .000000000001
end = time.time()
if end - start > timer_thresh:
print 'backtrack_step_size_beta:', end - start, 'sec', iter_count, 'times'
print 'backtrack_step_size_beta iter',iter_count
# print 'obj',obj
# print 'self.obj_funct(self.step(beta, steps[np.where(obj - obj_many_steps > 0)[0][0]],\
# resid), rho))', self.obj_funct(self.step(beta,
# steps[np.where(obj - obj_many_steps > 0)[0][0]],
# resid), rho)
# assert obj - (self.obj_funct(self.step(beta,
# steps[np.where(obj - obj_many_steps > 0)[0][0]],
# resid), rho)) > 0
return steps[np.where(obj - obj_many_steps > 0)[0][0]]
step_size = steps[-1]
if step_size < ep ** 3:
# print 'WARNING: step size not found'
step_size = ep ** 3
end = time.time()
if end - start > timer_thresh:
print 'backtrack_step_size_beta:', end - start, 'sec', iter_count, 'times'
print 'backtrack_step_size_beta iter',iter_count,'Step size too small'
# assert abs(obj - (self.obj_funct(self.step(beta, step_size, resid), rho)))<.0001
return step_size
number_of_steps *= 10
assert False
def numer_grad(self, beta, rho, ep, direct=0, opt_on=''): # const
if opt_on == 'rho':
return (obj_funct(beta, rho + ep) \
- obj_funct(beta, -rho * ep)) / (2 * ep)
return (obj_funct(beta + (ep * direct), rho) \
- obj_funct(beta - (ep * direct), rho)) / (2 * ep)
def grad_checker(self, beta, rho, ep, opt_on): # const
start = time.time()
if opt_on == 'rho':
return numer_grad(beta, rho, ep, opt_on=opt_on)
d = len(beta)
w = np.zeros(d)
for i in range(d):
direct = np.zeros(beta.shape)
direct[i] = 1
w[i] = (numer_grad(beta, rho, ep, direct=direct, opt_on=opt_on))
end = time.time()
if end - start > timer_thresh:
print 'grad_checker:', end - start, 'sec'
return w
def get_resid(self, beta, rho, grad, loss_vect_list, opt_on):
start = time.time()
if opt_on == 'b':
# H_LU = scipy.linalg.cholesky(self.H(beta,rho,loss_vect_list,opt_on), lower=True)
resid = linalg.solve(self.H(beta, rho, loss_vect_list, opt_on), grad)
else:
resid = grad / self.H(beta, rho, loss_vect_list, opt_on)
end = time.time()
if end - start > timer_thresh:
print 'get_resid:', end - start, 'sec'
return resid
def grad_des_iterate(self, iterations, opt_on='b'):
start = time.time()
loss_vect_list = np.where(np.absolute(self.rho - \
np.dot(self.K[self.support_vectors, :], self.beta)) \
>= delta)[0]
end = time.time()
if end - start > timer_thresh:
print 'find sv:', end - start, 'sec'
obj = self.obj_funct(self.beta, self.rho)
self.obj_array[iterations] = (obj)
if opt_on == 'b':
self.grad = 2.0 * np.dot(self.K, self.beta) + 1.0 / (v * self.m) * np.sum(
np.multiply(self.K[self.support_vectors, :],
loss_der_vect(
self.grad_buffer,
np.dot(self.K[self.support_vectors, :], self.beta),
self.rho,
opt_on)),
axis=0)
elif opt_on == 'rho':
self.grad = 1 / (v * self.m) * np.sum(loss_der_vect(
self.grad_buffer,
np.dot(self.K[self.support_vectors, :], self.beta), self.rho, opt_on))
self.obj_grad_array[iterations] = norm(self.grad)
# obj_grad_check_array[iterations]=norm((grad-grad_checker(beta,rho,ep,opt_on)))
if obj < ep:
print 'Stopping crit: obj small', obj
return True
if norm(self.grad) < ep:
print 'Stopping crit: norm(grad) small', norm(self.grad)
return True
if g_loss_type == 'square-hinge' and g_method == 'Newton':
resid = self.get_resid(self.beta, self.rho, self.grad, loss_vect_list, opt_on)
else:
resid = self.grad
if opt_on == 'rho':
self.step_size_rho = self.backtrack_step_size_rho(self.step_size_rho, obj, resid, self.beta, self.rho)
self.rho = self.step(self.rho, self.step_size_rho, resid) # Update
else:
self.step_size_beta = self.backtrack_step_size_beta(self.step_size_beta, obj, resid, self.beta, self.rho)
self.beta = self.step(self.beta, self.step_size_beta, resid) # Update
end = time.time()
if end - start > timer_thresh:
print 'grad_des_iterate:', end - start, 'sec'
return False
def grad_des(self, prev_run=None):
start = time.time()
if prev_run == None:
self.beta = zeros(self.m)
self.rho = initial_rho
else:
self.beta = prev_run.beta
self.rho = prev_run.rho
print 'grad_des, sv',len(np.where(np.absolute(self.rho - \
np.dot(self.K[self.support_vectors, :], self.beta)) \
>= delta)[0])
if len(np.where(np.absolute(self.rho - np.dot(self.K[self.support_vectors, :], self.beta)) \
>= delta)[0])==0: # No support vectors (out of bounds vectors)
return Run(None,None,None,self.beta, self.rho, 0)
self.obj_array = -1 * np.ones(max_iter)
self.obj_grad_array = np.zeros((max_iter))
self.obj_grad_check_array = np.zeros(max_iter)
self.grad_buffer = zeros(self.beta.shape)
self.step_size_beta = initial_step_size
self.step_size_rho = initial_step_size
end = time.time()
if end - start > timer_thresh:
print 'grad_des alloc mem:', end - start, 'sec'
iterations = 0
for i in range(max_iter):
converged = self.grad_des_iterate(iterations, opt_on='b')
if converged:
break
print 'grad',norm(self.grad)
converged = self.grad_des_iterate(iterations, opt_on='rho')
if converged:
break
print 'grad',norm(self.grad)
if i == max_iter - 1:
print 'WARNING: Did not converge'
iterations += 1
print 'Desc iterations', iterations
print 'Desc rho', self.rho
end = time.time()
if end - start > timer_thresh:
print 'grad_des:', end - start, 'sec'
return Run(self.obj_array, self.obj_grad_array, self.obj_grad_check_array, self.beta, self.rho, iterations)
def get_K_inv(K):
start = time.time()
K_inv = inv(K)
end = time.time()
if end - start > timer_thresh:
print 'get_K_inv:', end - start, 'sec'
return K_inv
def get_K_cond(K):
start = time.time()
K_cond = linalg.cond(K)
end = time.time()
if end - start > timer_thresh:
print 'get_K_cond:', end - start, 'sec'
return K_cond
def pre_comp_K():
start = time.time()
K = get_K()
end = time.time()
if end - start > timer_thresh:
print 'pre_comp_K:', end - start, 'sec'
return K # , K_inv
def __init__(self, K, x_data, support_vectors=None):
self.K = K
self.x_data = x_data
self.m = len(self.x_data)
if support_vectors == None:
self.support_vectors = np.arange(0, self.m)
else:
self.support_vectors = support_vectors
print 'Slab_SVM',len(self.support_vectors)
def run(self):
if len(self.support_vectors) <= 500:
return self.grad_des()
this_run = Slab_SVM(self.K, self.x_data, random.choice(self.support_vectors,
max(500,len(self.support_vectors) - 500),
replace=False)).run()
return self.grad_des(this_run)
def get_data_points():
start = time.time()
points = random.random((points_count, 2)) * 2 * np.pi
x = np.zeros((points_count, 3))
for p in range(points_count):
if points_std_from_surface > 0:
r = random.normal(loc=1, scale=points_std_from_surface)
else:
r = 1
z_cord = r * np.sin(points[p][1])
r_temp = r * np.cos(points[p][1])
y_cord = r_temp * np.sin(points[p][0])
x_cord = r_temp * np.cos(points[p][0])
x[p] = np.asarray([x_cord, y_cord, z_cord])
end = time.time()
if end - start > timer_thresh:
print 'get_data_points:', end - start, 'sec'
return x
g_x = get_data_points()
fig = plt.figure(figsize=(10, 12))
ax = fig.add_subplot(111, projection='3d')
ax.scatter(g_x[:, 0], g_x[:, 1], g_x[:, 2])
plt.show()
Run = namedtuple('Run', ['obj_array', 'obj_grad_array', 'obj_grad_check_array', 'beta', 'rho',
'iterations'])
g_K = get_K()
import warnings
with warnings.catch_warnings(record=True) as w:
g_Desc = {}
g_counter = 0
for g_loss_type in ['square-hinge', 'hinge']:
for g_method in ['Newton', '']:
print '-----------------------------------'
# print 'loss_type', g_loss_type
# print 'method', g_method
g_Desc[g_counter] = Slab_SVM(g_K,g_x).run()
# print 'Desc iterations', g_Desc[g_counter].iterations
# print 'Desc rho', g_Desc[g_counter].rho
print '-----------------------------------'
print
g_counter += 1
break
break
# if g_loss_type != 'square-hinge':
# break
In [ ]:
grid_steps = 25
def pop_data_grid(beta,rho):
start = time.time()
data = np.zeros((grid_steps,grid_steps,grid_steps))
x0_range = np.linspace(-2, 2, grid_steps)
x1_range = np.linspace(-2, 2, grid_steps)
x2_range = np.linspace(-2, 2, grid_steps)
end = time.time()
if end - start > timer_thresh:
print 'pop_data_grid alloc mem:',end - start
for i in range(grid_steps):
for j in range(grid_steps):
for k in range(grid_steps):
data[i,j,k] = f(np.asarray([x0_range[i],
x1_range[j],
x2_range[k]]), beta,rho)
end = time.time()
if end - start > timer_thresh:
print 'pop_data_grid:',end - start,'sec'
return data
def proc_data(beta,rho,data):
start = time.time()
print 'delta',delta
print 'np.abs(data - delta) < .1 -> ',(np.where(np.abs(data - delta) < .1)[0].shape)
print 'np.abs(data - delta) < .01 -> ',(np.where(np.abs(data - delta) < .01)[0].shape)
print 'np.abs(data - delta) < .001 -> ',(np.where(np.abs(data - delta) < .001)[0].shape)
print 'np.abs(data - delta) < .0001 -> ',(np.where(np.abs(data - delta) < .0001)[0].shape)
print 'data < delta -> ',(np.where(data < delta )[0].shape)
print 'data > delta -> ',(np.where(data > delta )[0].shape)
print 'data < 0 -> ',(np.where( data < 0)[0].shape)
print 'data == 0 -> ',(np.where( data == 0)[0].shape)
print 'data > 0 -> ',(np.where( data > 0)[0].shape)
print 'min -> ',(np.amin( data ))
print 'max -> ',(np.amax( data ))
# print 'data:',data
end = time.time()
if end - start > timer_thresh:
print 'proc_results:',end - start
rho = Desc[0].rho
beta = Desc[0].beta
losses = []
for i in range(m):
losses.append(f(x[i], beta, rho))
data = pop_data_grid(beta,rho)
In [ ]:
%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt
import sys
from mpl_toolkits.mplot3d.art3d import Poly3DCollection
from skimage import measure
from skimage.draw import ellipsoid
# Use marching cubes to obtain the surface mesh of these ellipsoids
verts, faces = measure.marching_cubes(data, 0)
# Display resulting triangular mesh using Matplotlib. This can also be done
# with mayavi (see skimage.measure.marching_cubes docstring).
fig = plt.figure(figsize=(10, 12))
ax = fig.add_subplot(111, projection='3d')
# Fancy indexing: `verts[faces]` to generate a collection of triangles
mesh = Poly3DCollection(verts[faces])
ax.add_collection3d(mesh)
ax.set_xlabel("x-axis")
ax.set_ylabel("y-axis")
ax.set_zlabel("z-axis")
ax.set_xlim(-0, 30)
ax.set_ylim(-0, 30)
ax.set_zlim(-0, 30)
plt.show()
In [ ]:
%matplotlib nbagg
plt.clf()
plt.cla()
ax = plt.subplot(1,1,1)
ax.scatter(range(1,Newton_Desc.iterations+1),
Newton_Desc.obj_array[0:Newton_Desc.iterations],marker='^',
label='Non-Stochastic Newtons Method')
ax.scatter(range(1,Steepest_Desc.iterations+1),
Steepest_Desc.obj_array[0:Steepest_Desc.iterations],marker='*',
label='Non-Stochastic Steepest Descent')
handles, labels = ax.get_legend_handles_labels()
plt.legend(handles, labels)
plt.title('Objective Function over iterations')
plt.ylabel('F (w)')
plt.xlabel('Iteration')
In [ ]:
%matplotlib nbagg
plt.clf()
plt.cla()
from numpy.linalg import norm
ax = plt.subplot(1,1,1)
ax.scatter(range(1,(Newton_Desc.iterations)+1),
Newton_Desc.obj_grad_array[0:Newton_Desc.iterations],
marker='^',
label='Non-Stochastic Newtons Method')
ax.scatter(range(1,(Steepest_Desc.iterations)+1),
Steepest_Desc.obj_grad_array[0:Steepest_Desc.iterations],
marker='*',
label='Non-Stochastic Steepest Descent')
handles, labels = ax.get_legend_handles_labels()
plt.legend(handles, labels)
plt.title('Gradient Norm over iterations')
plt.ylabel('norm(d/dw F (w))')
plt.xlabel('Iteration')
In [ ]:
%matplotlib nbagg
plt.clf()
plt.cla()
from numpy.linalg import norm
ax = plt.subplot(1,1,1)
ax.scatter(range(1,(Steepest_Desc.iterations)+1),
Steepest_Desc.obj_grad_check_array[0:Steepest_Desc.iterations],
marker='*',
label='Non-Stochastic Steepest Descent')
handles, labels = ax.get_legend_handles_labels()
plt.legend(handles, labels)
plt.title('Gradient Norm and Approx. Gradient Norm Difference \n over iterations')
plt.ylabel('Difference')
plt.xlabel('Iteration')
In [ ]: