In [1]:
%matplotlib inline
# Future
from __future__ import print_function
# Numpy imports
import numpy as np
import numpy.random as random
import scipy as sp
import scipy.linalg as la
# Convex optimization package: CVXOPT
## Import the basic packages
from cvxopt import matrix, solvers
solvers.options['show_progress'] = False
## Import the CVXOPT version of LAPACK
from cvxopt import blas, lapack, sqrt, mul, cos, sin, log
# Import matplotlib.pyplot
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
# My imports
from ellipsoidal_outlier_detector import FEllipsoid
EPS = sp.finfo(float).eps
In [ ]:
In [ ]:
In [ ]:
In [2]:
# Generate random samples for testing the algorithm
Ndim = 3
Na = Ndim
Nsamples = 500
mean = np.array(Ndim * [5.0,])
cov = sp.eye(Ndim) + 5.0
samples = random.multivariate_normal(mean, cov, Nsamples)
samples[:20] = random.laplace(5, scale =10.0001, size=(20, mean.size))
#plt.plot(samples[:, 0], samples[:, 1], 'or', markersize=4)
#plt.axis('equal')
# Make a 3D figure
fig = plt.figure(0)
fig.clear()
ax = fig.add_subplot(1,1,1, projection='3d')
ax.scatter(samples[:,0],
samples[:,1],
samples[:,2], marker='.')
## Problem size estimates
print("Print some rough problem size estimates:")
d = Ndim
M = Nsamples
Na = int(d * (d+1) / 2.0 + d)
print("d = {0}\nM = {1}\nNa= {2}".format(d, M, Na), end='\n\n')
print("d**2 = {0}".format(d**2))
print("Na**2 = {0}".format(Na**2))
In [3]:
F = FEllipsoid(samples)
# Test the initial ellipse
vm = sp.inner(F.A0, F.xarray) + F.b0[:, None]
gm = (vm*vm).sum(0) - 1
print(gm.max())
In [4]:
ztest = sp.rand(F.M+1)
# Test the faster hessian calculations for i.e. constraints
out_old = F.hess_f1_slow(F.a0, ztest)
out_new = F.hess_f1(F.a0, ztest)
relerr_new_hess = abs(out_old - out_new).sum() / abs(out_old).sum()
# Test the faster gradient calculations for i.e. constraints
out_old = F.grad_f1_slow(F.a0)
out_new = F.grad_f1(F.a0)
relerr_new_grad = abs(out_old - out_new).sum() / abs(out_old).sum()
print("Fast Hessian error : {0}.".format(relerr_new_hess))
print("Fast Gradient error: {0}.".format(relerr_new_grad))
print(F)
In [5]:
def get_optimal_ellipse(F, solvers=solvers):
"Call the solver."
sol = solvers.cp(F)
solx = np.array(sol['x']).flatten()
b = F.get_b(solx)
A = F.get_A(solx)
Ainv = la.inv(A)
return A, b, Ainv
%prun A, b, Ainv = get_optimal_ellipse(F)
print(A.shape)
print(Ainv.shape)
print(b.shape)
In [6]:
ztest = sp.rand(F.M+1)
print("\nHessian slow:")
%time out = F.hess_f1_slow(F.a0, ztest)
print("\nHessian fast:")
%time out = F.hess_f1(F.a0, ztest)
print("\nGradient slow:")
%time out = F.grad_f1_slow(F.a0)
print("\nGradient fast:")
%time out = F.grad_f1(F.a0)
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [7]:
def xinxout(x, A=A, b=b):
"Define the signed distance to the boundary."
v = sp.dot(A, x.T) + b[:, None]
Iin = la.norm(v, axis=0) < (1 - 100 * sp.sqrt(EPS))
Iout = la.norm(v, axis=0) >= (1 - 100 * sp.sqrt(EPS))
xin = x[Iin, :]
xout = x[Iout, :]
return xin, xout
xin, xout = xinxout(samples)
print("Number of outliers : {0}".format(xout.shape[0]))
print("Number of inliers : {0}".format(xin.shape[0]))
In [8]:
def plot_xinxout(xin, xout, Ainv, b):
"Make a 3D scatter plot with some data"
# Make a 3D figure
fig = plt.figure()
ax = fig.add_subplot(1,1,1, projection='3d')
ax.scatter(xin[:,0], xin[:,1], xin[:,2], marker='.')
ax.scatter(xout[:,0], xout[:,1], xout[:,2], marker='o', c='r')
## Get points on a sphere
thetas = sp.linspace(100*EPS, (1.0-100*EPS) * sp.pi, 20)
phis = sp.linspace(0.0, 2*sp.pi, 20)
TT, PP = sp.meshgrid(thetas, phis, indexing='ij')
xx = sp.cos(PP) * sp.sin(TT)
yy = sp.sin(PP) * sp.sin(TT)
zz = sp.cos(TT)
Ainv_xx = Ainv[:,0, None, None] * (xx - b[None, None, 0])
Ainv_yy = Ainv[:,1, None, None] * (yy - b[None, None, 1])
Ainv_zz = Ainv[:,2, None, None] * (zz - b[None, None, 2])
xyz_ellipse = Ainv_xx + Ainv_yy + Ainv_zz
xe = xyz_ellipse[0]
ye = xyz_ellipse[1]
ze = xyz_ellipse[2]
ax.plot_wireframe(xe, ye, ze, linewidth=0.1)
return fig
plot_xinxout(xin, xout, Ainv, b)
## Problem size estimates
print("Print some rough problem size estimates:")
d = Ndim
M = Nsamples
Na = int(d * (d+1) / 2.0 + d)
print("d = {0}\nM = {1}\nNa= {2}".format(d, M, Na), end='\n\n')
print("d**2 = {0}".format(d**2))
print("Na**2 = {0}".format(Na**2))
In [ ]:
In [9]:
F_ellipsoid = FEllipsoid(samples)
def get_optimal_ellipse(F, solvers=solvers):
"Call the solver."
# Solve the convex optimization for ellipse
sol = solvers.cp(F)
astar = np.array(sol['x']).flatten()
# Compute the standard ellipse parameters
b = F.get_b(astar)
A = F.get_A(astar)
pstar = 1.0 / la.det(A)
return astar, pstar, A, b
def xinxout(F, A, b):
"Define the signed distance to the boundary."
x = F.xarray
v = sp.dot(A, x.T) + b[:, None]
Iin = (v**2).sum(0)-1 < - 10 * sp.sqrt(EPS)
Iout = (v**2).sum(0)-1 >= - 10 * sp.sqrt(EPS)
xin = x[Iin, :]
xout = x[Iout, :]
return xin, xout
def get_total_partition(xarray,
get_optimal_ellipse = get_optimal_ellipse,
xinxout = xinxout,
FEllipsoid = FEllipsoid):
"""
Compute outliers in sequance for all points.
in
--
xarray - array shaped (M, d) for M samples of d-dimensional vecs
out
---
xpart_list - list of partitions
volumes - array of ellipsoid volumes
A_arrs - A_arrs define the covarience of ellipse
b_vecs - b_vecs define the center of the ellipse
Fe - The interface to CVXOPT
"""
# Define an interface to cvxopt
Fe = FEllipsoid(xarray)
Mstart = Fe.M
# Initialize output
xout_list = []
xin_list = []
pstars = sp.zeros((Fe.M,))
A_arrs = sp.zeros((Fe.M, Fe.ndim, Fe.ndim))
b_vecs = sp.zeros((Fe.M, Fe.ndim))
# Loop until the set of points has been partitioned
for i in range(Mstart):
# Get the next ellipse
astar, pstar, A, b = get_optimal_ellipse(Fe)
# Separate points
xin, xout = xinxout(Fe, A, b)
# Make a new ellipsoid solver
Fe = FEllipsoid(xin, a0=astar)
# Store data for output
xout_list.append(xout)
xin_list.append(xin)
pstars[i] = pstar
A_arrs[i, :, :] = A
b_vecs[i, :] = b
if xin.shape[0] < Mstart / 2.0:
ibound = i + 1
break
# Prepare output
pstars = pstars[:ibound]
A_arrs = A_arrs[:ibound, :, :]
b_vecs = b_vecs[:ibound, :]
return xin_list, xout_list, pstars, A_arrs, b_vecs
In [10]:
out = get_total_partition(samples)
In [11]:
xins, xouts, ps, As, bs = out[:]
In [12]:
plt.semilogy(ps)
Out[12]:
In [13]:
nout = [len(_xout) for _xout in xouts]
nin = [len(_xin) for _xin in xins]
In [14]:
zip(nin, nout)
Out[14]:
In [ ]:
In [15]:
for i in range(len(xins))[:20]: #[0, 1, 2, 3, 10, 20]:
fig = plot_xinxout(xins[i], xouts[i], la.inv(As[i]), bs[i])
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]: