In [ ]:
import time
import os
import numpy as np
from sklearn.grid_search import ParameterGrid
from sklearn.base import clone
from sklearn.gaussian_process import GaussianProcess
from scipy.stats import norm
from joblib import Parallel, delayed
from utils.state import _save
from utils.functions_1d import f6, pressure2, heaviside
from utils.functions import gaussian
from utils.conformal import RRCM, CRR
from utils.KRR import KRR_AB
In [ ]:
%matplotlib inline
import matplotlib.pyplot as plt
In [ ]:
np.seterr(all="ignore")
In [ ]:
n_jobs, verbose = -1, 0
parallel_ = Parallel(n_jobs=n_jobs, verbose=verbose)
## The levels and the random state
seeds_ = [0xB5066DBC, 0x98E8576F, 0x3161F88E, 0x08CCA9D9,]
random_state = np.random.RandomState(seeds_[3])
levels = np.linspace(0, 0.9, num=51) #np.asanyarray([0.01, 0.05, 0.10, 0.25])[::-1]
## helpers
def _helper(y, A, B, proc=RRCM, levels=levels, parallel=None, n_jobs=1, verbose=0):
if not isinstance(parallel, Parallel):
parallel = Parallel(n_jobs=n_jobs, verbose=verbose)
regions = parallel(delayed(proc)(A[k], B[k], levels=levels)
for k in xrange(y.shape[0]))
hits_ = np.asarray(
[[np.any(((int_[:, 0] <= target) & (target <= int_[:, 1]))).astype(float)
for int_ in region]
for target, region in zip(y, regions)])
width_ = np.asarray(
[[np.sum(int_[:, 1] - int_[:, 0]) for int_ in region] for region in regions])
bounds_ = np.asarray(
[[[int_[:, 0].min(), int_[:, 1].max()] for int_ in region] for region in regions])
return hits_, width_, bounds_
## Define the grid
true_nugget = [1e-6, 1e-1,]
test_functions = [f6, heaviside, pressure2]
grid_ = ParameterGrid(dict(func=test_functions,
size=[25, 50, 100, 200, 400, 600, 800, 1000, 1200, 1400, 1600,],
nugget=true_nugget,
noise=true_nugget,
theta0=[1e+1, 1e+2, 1e+3, "auto"]))
## Initialize
kernel = 'rbf'
gp = GaussianProcess(beta0=0, normalize=False, corr='squared_exponential')
# Generate input
XX_test = np.linspace(0, 1, num=1001).reshape((-1, 1))
# XX_test = np.concatenate([XX_test, np.logspace(0, 3, num=2001).reshape((-1, 1))], axis=0)
test_ = np.s_[:XX_test.shape[0]]
In [ ]:
size_, nugget_, theta0_ = 1600, 1e-6, float(100.0)
func_, noise_ = f6, 1e-1
In [ ]:
# true_theta = 100.0
# func_ = lambda XX: gaussian(XX, scale=1.0, nugget=noise_, metric=kernel,
# random_state=random_state,
# gamma=true_theta)
In [ ]:
## Draw random train sample
train_ = random_state.uniform(size=(size_, 1))
X = np.sort(train_, axis=0)
## Draw f(x)
XX = np.concatenate([XX_test, X], axis=0)
yy = func_(XX)
if yy.ndim == 1:
yy = yy.reshape((-1, 1))
if noise_ > 0 and func_ in {f6, heaviside}:
yy += random_state.normal(size=yy.shape) * noise_
pass
In [ ]:
## Split the pooled sample
y, y_test = np.delete(yy, test_, axis=0), yy[test_]
## Fit a GPR
gp_ = clone(gp)
gp_.nugget = nugget_
if isinstance(theta0_, float):
gp_.theta0 = theta0_
elif theta0_ == "auto":
gp_.thetaL, gp_.thetaU, gp_.theta0 = 1.0, 1e4, float(size_)
gp_.fit(X, y)
## Compute the A, B matrices
A, B, y_hat_, MM, loo_, A_loo, B_loo = \
KRR_AB(X, y, XX_test, forecast=True,
nugget=gp_.nugget, metric=kernel, gamma=gp_.theta_[0])
del loo_
In [ ]:
gp_.theta_
In [ ]:
from scipy.linalg import cholesky, solve_triangular
from scipy.linalg.lapack import dtrtri
from sklearn.metrics.pairwise import pairwise_kernels
## Obtain AB vectors for the regression CCR problem
Kxx = np.asfortranarray(pairwise_kernels(X, metric=kernel, gamma=gp_.theta_[0]))
Kxx[np.diag_indices_from(Kxx)] += gp_.nugget
In [ ]:
fig = plt.figure(figsize=(16, 9))
ax =fig.add_subplot(111)
ax.imshow(Kxx, interpolation="nearest")
In [ ]:
## Get the in-place lower triangular Cholesky decomposition (in-place works if
## the array is in fortran layout).
## K_{xx} + a I_x = C C' ; K_{zx} Q_X y = (C^{-1} K_{xz})' (C^{-1} y)
cholesky(Kxx, lower=True, overwrite_a=True)
Ci_y = solve_triangular(Kxx, y.copy('F'), lower=True, overwrite_b=True)
In [ ]:
beta_ = solve_triangular(Kxx, Ci_y, lower=True, overwrite_b=True)
plt.scatter(y, beta_)
In [ ]:
ass_ = np.concatenate(RRCM(A[0, -1], B[-1], levels=levels), axis=0)
plt.plot(ass_[:,1]-ass_[:,0])
In [ ]:
## Construct the CKRR confidence interval: RRCM
rrcm_hits_, rrcm_width_, rrcm_bounds_ = \
_helper(y_test, A[0], B, proc=RRCM,
levels=np.linspace(0, 0.9, num=51), parallel=parallel_)
In [ ]:
fig = plt.figure(figsize=(16, 9))
ax = fig.add_subplot(111)
ax.imshow(rrcm_width_.T, interpolation="nearest")
In [ ]:
## Construct the GPR forecast interval
z_a = norm.ppf(1 - .5 * levels)
half_width_ = np.sqrt(MM * gp_.sigma2) * z_a[np.newaxis]
bf_bounds_ = np.stack([y_hat_ - half_width_, y_hat_ + half_width_], axis=-1)
bf_width_ = bf_bounds_[..., 1] - bf_bounds_[..., 0]
bf_hits_ = ((bf_bounds_[..., 0] <= y_test)
& (y_test <= bf_bounds_[..., 1])).astype(float)
In [ ]:
plt.plot(XX_test, (rrcm_bounds_[:, -1, 1] - y_hat_[:,0]) - (y_hat_[:,0] - rrcm_bounds_[:, -1, 0]))
In [ ]:
plt.plot(XX_test, np.abs(y_test-y_hat_))
In [ ]:
plt.plot(XX_test, rrcm_bounds_[:, 30, 1]-rrcm_bounds_[:, 30, 0])
In [ ]:
plt.plot(levels, rrcm_hits_.mean(axis=0)-(1-levels))
In [ ]:
1-levels
In [ ]:
rrcm_hits_.mean(axis=0)
In [ ]:
plt.hist(rrcm_hits_.mean(axis=0)-(1-levels))
In [ ]:
bf_hits_.mean(axis=0)
In [ ]:
plt.hist(bf_hits_.mean(axis=0)-(1-levels))
In [ ]:
j=1
plt.plot(XX_test, rrcm_bounds_[:, j, 0]-y_hat_[:, 0], c="r")
plt.plot(XX_test, rrcm_bounds_[:, j, 1]-y_hat_[:, 0], c="r")
plt.plot(XX_test, bf_bounds_[:, j, 0]-y_hat_[:, 0], c="b")
plt.plot(XX_test, bf_bounds_[:, j, 1]-y_hat_[:, 0], c="b")
# plt.plot(XX_test, y_hat_[:, 0]-y_hat_[:, 0], c="m")
plt.plot(XX_test, y_test[:, 0]-y_hat_[:, 0], c="k")
In [ ]:
plt.plot(XX_test, y_test[:, 0], c="k")
plt.plot(XX_test, y_hat_[:, 0], c="m")
Below are shown sample results obtained in this project. On left are level sets of the nominal bivariate Gaussian mixture distribution used to illustrate the K-LPE algorithm. In the middle are results of K-LPE with K= 6 and Euclidean distance metric for $m = 150$ test points drawn from an equal mixture of 2D uniform and the (nominal) bivariate distributions. Scores for the test points are based on 200 nominal training samples. Scores falling below a threshold level 0.05 are declared as anomalies. The dotted contour corresponds to the exact bivariate Gaussian density level set at level alpha= 0.05. On right is the empirical distribution of the test point scores associated with the bivariate Gaussian that appears to be uniform while scores for the test points drawn from 2D uniform distribution cluster around zero.
In [ ]:
import time
import os
import numpy as np
from sklearn.grid_search import ParameterGrid
from sklearn.base import clone
from sklearn.gaussian_process import GaussianProcess
from scipy.stats import norm
from joblib import Parallel, delayed
from utils.state import _save
from utils.functions_1d import f6, pressure2, heaviside
from utils.functions import gaussian
from utils.conformal import RRCM, CRR
from utils.KRR import KRR_AB
In [ ]:
%matplotlib inline
import matplotlib.pyplot as plt
In [ ]:
np.seterr(all="ignore")
random_state = np.random.RandomState(0x0B00B1E5)
In [ ]:
N, D, P = 1000, 2, 1
X = np.concatenate([random_state.normal(size=(N//4, D))*1 + np.array([[2, 2]]),
random_state.normal(size=(N//4, D))*2 + np.array([[-1, -2]]),
random_state.normal(size=(N//4, D))*.5 + np.array([[-2, 1]]),
random_state.normal(size=(N//4, D))*.75 + np.array([[2, -2]])], axis=0)
# X *= -np.log(random_state.uniform(size=(N, D)))*2.5
# W = random_state.normal(size=(D, P))
# F = random_state.normal(size=(N, P))
# X += np.dot(F, W.T)
X = X.reshape((-1, D))
In [ ]:
plt.scatter(X[:, 0], X[:, 1])
In [ ]:
from sklearn.cross_validation import train_test_split
In [ ]:
X_train, X_test = train_test_split(X, test_size=0.5, random_state=random_state)
In [ ]:
theta_ = 1
In [ ]:
from scipy.linalg import cholesky, solve_triangular
from scipy.linalg.lapack import dtrtri
from sklearn.metrics.pairwise import pairwise_kernels
## Create the kernel matrix
Kxx = pairwise_kernels(X_train, metric="rbf", gamma=theta_)
In [ ]:
Kxz = pairwise_kernels(X_train, X_test, metric="rbf", gamma=theta_)
delta_ = 1 + Kxz.sum(axis=0, keepdims=True) - Kxx.sum(axis=1, keepdims=True) - Kxz
pvalue_ = (np.sum(delta_ >= 0, axis=0) + 1) / (Kxx.shape[0] + 1.0)
# delta_ = (Kxx.sum(axis=1, keepdims=True) + Kxz) - (1 + Kxz.sum(axis=0, keepdims=True))
# pvalue_ = np.mean(delta_ >= 0, axis=0)
In [ ]:
np.sum(pvalue_ <= 0.05)
In [ ]:
plt.scatter(X_train[:, 0], X_train[:, 1], c="black", alpha=0.5, lw=0,)
plt.scatter(X_test[:, 0], X_test[:, 1], s=50*(1-pvalue_), lw=0)
plt.scatter(X_test[pvalue_ < 0.05, 0], X_test[pvalue_ < 0.05, 1], s=100, c="m", lw=0)
In [ ]:
plt.imshow(Kxx, interpolation="nearest")
CKDE vs. ocSVM
In [ ]:
X_train, X_test = train_test_split(X, test_size=0.25, random_state=random_state)
In [ ]:
theta_, kernel, alpha = 1.0, "rbf", 0.250
kwargs = dict(gamma=theta_)
Kxx = pairwise_kernels(X_train, metric=kernel, **kwargs)
Kxz = pairwise_kernels(X_train, X_test, metric=kernel, **kwargs)
delta_ = 1 + Kxz.sum(axis=0, keepdims=True) - Kxx.sum(axis=1, keepdims=True) - Kxz
pvalue_ = (np.sum(delta_ >= 0, axis=0) + 1) / (Kxx.shape[0] + 1.0)
In [ ]:
from sklearn import svm
clf = svm.OneClassSVM(nu=alpha, kernel=kernel, gamma=theta_).fit(X_train)
ocsvm = clf.predict(X_test) # decision_function(X_test)[:,0]
In [ ]:
np.mean(ocsvm>0), np.mean(pvalue_<alpha)
In [ ]:
fig = plt.figure(figsize=(16, 8))
ax = fig.add_subplot(121)
ax.scatter(X_test[:,0], X_test[:,1], c=pvalue_>alpha, cmap=plt.cm.coolwarm_r, lw=0)
ax.set_title("$\\alpha=%g$ Conformal KDE (%s, $\\theta=%g$)"%(alpha, kernel, theta_,))
ax = fig.add_subplot(122)
ax.scatter(X_test[:,0], X_test[:,1], c=ocsvm, cmap=plt.cm.coolwarm_r, lw=0)
ax.set_title("ocSVM (%s, $\\theta=%g, \\nu=%g$)"%(kernel, theta_, alpha,))
Contour plot
In [ ]:
mesh_ = np.meshgrid(*(2*[np.linspace(-10, 10, num=151)]))
X_test_ = np.stack(mesh_, axis=-1).reshape((-1, 2))
In [ ]:
# theta_, kernel, alpha = 1.0, "rbf", 0.250
# kwargs = dict(gamma=theta_)
Kxx = pairwise_kernels(X_train, metric=kernel, **kwargs)
Kxz = pairwise_kernels(X_train, X_test_, metric=kernel, **kwargs)
delta_ = 1 + Kxz.sum(axis=0, keepdims=True) - Kxx.sum(axis=1, keepdims=True) - Kxz
pvalue_ = (np.sum(delta_ >= 0, axis=0) + 1) / (Kxx.shape[0] + 1.0)
In [ ]:
from sklearn import svm
# clf = svm.OneClassSVM(nu=alpha, kernel=kernel, gamma=theta_).fit(X_train)
ocsvm = clf.decision_function(X_test_).reshape(mesh_[0].shape)
In [ ]:
eta_ocsvm = np.zeros((X_train.shape[0], 1), dtype=float)
eta_ocsvm[clf.support_, 0] = clf.dual_coef_
In [ ]:
svmcm = np.apply_along_axis(lambda z, Z: np.mean(Z>=z[np.newaxis], axis=0),
1, eta_ocsvm, eta_ocsvm)
plt.scatter(X_train[:, 0], X_train[:, 1], lw=0,
c=np.array(["b","r"])[(svmcm[:,0]<alpha).astype(int)])
In [ ]:
fig = plt.figure(figsize=(16, 8))
ax = fig.add_subplot(121)
ax.contourf(mesh_[0], mesh_[1], ocsvm.reshape(mesh_[0].shape),
levels=np.linspace(ocsvm.min(), ocsvm.max(), num=51), cmap=plt.cm.coolwarm)
# ax.scatter(X_test_[:, 0], X_test_[:, 1], c="m", s=20*(ocsvm < 0), lw=0)
ax.scatter(X_train[:, 0], X_train[:, 1], c="k", s=5, lw=0)
ax.set_title("ocSVM")
ax = fig.add_subplot(122)
ax.contourf(mesh_[0], mesh_[1], pvalue_.reshape(mesh_[0].shape),
levels=np.linspace(0, 1, num=51), cmap=plt.cm.coolwarm)
# ax.scatter(X_test_[:, 0], X_test_[:, 1], c="m", s=20*(pvalue_ < alpha), lw=0)
ax.scatter(X_train[:, 0], X_train[:, 1], c="k", s=5, lw=0)
ax.set_title("CKDE")
In [ ]: