In [ ]:
import time
import os

import numpy as np

from sklearn.grid_search import ParameterGrid
from sklearn.base import clone

from sklearn.gaussian_process import GaussianProcess

from scipy.stats import norm
from joblib import Parallel, delayed

from utils.state import _save
from utils.functions_1d import f6, pressure2, heaviside
from utils.functions import gaussian

from utils.conformal import RRCM, CRR
from utils.KRR import KRR_AB

In [ ]:
%matplotlib inline
import matplotlib.pyplot as plt

In [ ]:
np.seterr(all="ignore")

In [ ]:
n_jobs, verbose = -1, 0
parallel_ = Parallel(n_jobs=n_jobs, verbose=verbose)

## The levels and the random state
seeds_ = [0xB5066DBC, 0x98E8576F, 0x3161F88E, 0x08CCA9D9,]
random_state = np.random.RandomState(seeds_[3])

levels = np.linspace(0, 0.9, num=51) #np.asanyarray([0.01, 0.05, 0.10, 0.25])[::-1]

## helpers
def _helper(y, A, B, proc=RRCM, levels=levels, parallel=None, n_jobs=1, verbose=0):
    if not isinstance(parallel, Parallel):
        parallel = Parallel(n_jobs=n_jobs, verbose=verbose)

    regions = parallel(delayed(proc)(A[k], B[k], levels=levels)
                       for k in xrange(y.shape[0]))

    hits_ = np.asarray(
        [[np.any(((int_[:, 0] <= target) & (target <= int_[:, 1]))).astype(float)
          for int_ in region]
         for target, region in zip(y, regions)])

    width_ = np.asarray(
        [[np.sum(int_[:, 1] - int_[:, 0]) for int_ in region] for region in regions])
    
    bounds_ = np.asarray(
        [[[int_[:, 0].min(), int_[:, 1].max()] for int_ in region] for region in regions])

    return hits_, width_, bounds_

## Define the grid

true_nugget = [1e-6, 1e-1,]
test_functions = [f6, heaviside, pressure2]

grid_ = ParameterGrid(dict(func=test_functions,
                           size=[25, 50, 100, 200, 400, 600, 800, 1000, 1200, 1400, 1600,],
                           nugget=true_nugget,
                           noise=true_nugget,
                           theta0=[1e+1, 1e+2, 1e+3, "auto"]))
## Initialize
kernel = 'rbf'
gp = GaussianProcess(beta0=0, normalize=False, corr='squared_exponential')

# Generate input
XX_test = np.linspace(0, 1, num=1001).reshape((-1, 1))
# XX_test = np.concatenate([XX_test,  np.logspace(0, 3, num=2001).reshape((-1, 1))], axis=0)
test_ = np.s_[:XX_test.shape[0]]

In [ ]:
size_, nugget_, theta0_ = 1600, 1e-6, float(100.0)
func_, noise_ = f6, 1e-1

In [ ]:
# true_theta = 100.0
# func_ = lambda XX: gaussian(XX, scale=1.0, nugget=noise_, metric=kernel,
#                             random_state=random_state,
#                             gamma=true_theta)

In [ ]:
## Draw random train sample
train_ = random_state.uniform(size=(size_, 1))
X = np.sort(train_, axis=0)

## Draw f(x)
XX = np.concatenate([XX_test, X], axis=0)
yy = func_(XX)
if yy.ndim == 1:
    yy = yy.reshape((-1, 1))
if noise_ > 0 and func_ in {f6, heaviside}:
    yy += random_state.normal(size=yy.shape) * noise_
    pass

In [ ]:
## Split the pooled sample
y, y_test = np.delete(yy, test_, axis=0), yy[test_]

## Fit a GPR
gp_ = clone(gp)
gp_.nugget = nugget_
if isinstance(theta0_, float):
    gp_.theta0 = theta0_
elif theta0_ == "auto":
    gp_.thetaL, gp_.thetaU, gp_.theta0 = 1.0, 1e4, float(size_)
gp_.fit(X, y)

## Compute the A, B matrices
A, B, y_hat_, MM, loo_, A_loo, B_loo = \
    KRR_AB(X, y, XX_test, forecast=True,
           nugget=gp_.nugget, metric=kernel, gamma=gp_.theta_[0])
del loo_

In [ ]:
gp_.theta_

In [ ]:
from scipy.linalg import cholesky, solve_triangular
from scipy.linalg.lapack import dtrtri
from sklearn.metrics.pairwise import pairwise_kernels
## Obtain AB vectors for the regression CCR problem
Kxx = np.asfortranarray(pairwise_kernels(X, metric=kernel, gamma=gp_.theta_[0]))
Kxx[np.diag_indices_from(Kxx)] += gp_.nugget

In [ ]:
fig = plt.figure(figsize=(16, 9))
ax =fig.add_subplot(111)
ax.imshow(Kxx, interpolation="nearest")

In [ ]:
## Get the in-place lower triangular Cholesky decomposition (in-place works if
##  the array is in fortran layout).
## K_{xx} + a I_x = C C' ; K_{zx} Q_X y = (C^{-1} K_{xz})' (C^{-1} y)
cholesky(Kxx, lower=True, overwrite_a=True)
Ci_y = solve_triangular(Kxx, y.copy('F'), lower=True, overwrite_b=True)

In [ ]:
beta_ = solve_triangular(Kxx, Ci_y, lower=True, overwrite_b=True)
plt.scatter(y, beta_)

In [ ]:
ass_ = np.concatenate(RRCM(A[0, -1], B[-1], levels=levels), axis=0)
plt.plot(ass_[:,1]-ass_[:,0])

In [ ]:
## Construct the CKRR confidence interval: RRCM
rrcm_hits_, rrcm_width_, rrcm_bounds_ = \
    _helper(y_test, A[0], B, proc=RRCM,
            levels=np.linspace(0, 0.9, num=51), parallel=parallel_)

In [ ]:
fig = plt.figure(figsize=(16, 9))
ax = fig.add_subplot(111)
ax.imshow(rrcm_width_.T, interpolation="nearest")

In [ ]:
## Construct the GPR forecast interval
z_a = norm.ppf(1 - .5 * levels)
half_width_ = np.sqrt(MM * gp_.sigma2) * z_a[np.newaxis]
bf_bounds_ = np.stack([y_hat_ - half_width_, y_hat_ + half_width_], axis=-1)
bf_width_ = bf_bounds_[..., 1] - bf_bounds_[..., 0]
bf_hits_ = ((bf_bounds_[..., 0] <= y_test)
            & (y_test <= bf_bounds_[..., 1])).astype(float)

In [ ]:
plt.plot(XX_test, (rrcm_bounds_[:, -1, 1] - y_hat_[:,0]) - (y_hat_[:,0] - rrcm_bounds_[:, -1, 0]))

In [ ]:
plt.plot(XX_test, np.abs(y_test-y_hat_))

In [ ]:
plt.plot(XX_test, rrcm_bounds_[:, 30, 1]-rrcm_bounds_[:, 30, 0])

In [ ]:
plt.plot(levels, rrcm_hits_.mean(axis=0)-(1-levels))

In [ ]:
1-levels

In [ ]:
rrcm_hits_.mean(axis=0)

In [ ]:
plt.hist(rrcm_hits_.mean(axis=0)-(1-levels))

In [ ]:
bf_hits_.mean(axis=0)

In [ ]:
plt.hist(bf_hits_.mean(axis=0)-(1-levels))

In [ ]:
j=1

plt.plot(XX_test, rrcm_bounds_[:, j, 0]-y_hat_[:, 0], c="r")
plt.plot(XX_test, rrcm_bounds_[:, j, 1]-y_hat_[:, 0], c="r")
plt.plot(XX_test, bf_bounds_[:, j, 0]-y_hat_[:, 0], c="b")
plt.plot(XX_test, bf_bounds_[:, j, 1]-y_hat_[:, 0], c="b")
# plt.plot(XX_test, y_hat_[:, 0]-y_hat_[:, 0], c="m")
plt.plot(XX_test, y_test[:, 0]-y_hat_[:, 0], c="k")

In [ ]:
plt.plot(XX_test, y_test[:, 0], c="k")
plt.plot(XX_test, y_hat_[:, 0], c="m")

Conformal Anomaly Detector via RBF Kernel Embddeing

Below are shown sample results obtained in this project. On left are level sets of the nominal bivariate Gaussian mixture distribution used to illustrate the K-LPE algorithm. In the middle are results of K-LPE with K= 6 and Euclidean distance metric for $m = 150$ test points drawn from an equal mixture of 2D uniform and the (nominal) bivariate distributions. Scores for the test points are based on 200 nominal training samples. Scores falling below a threshold level 0.05 are declared as anomalies. The dotted contour corresponds to the exact bivariate Gaussian density level set at level alpha= 0.05. On right is the empirical distribution of the test point scores associated with the bivariate Gaussian that appears to be uniform while scores for the test points drawn from 2D uniform distribution cluster around zero.


In [ ]:
import time
import os

import numpy as np

from sklearn.grid_search import ParameterGrid
from sklearn.base import clone

from sklearn.gaussian_process import GaussianProcess

from scipy.stats import norm
from joblib import Parallel, delayed

from utils.state import _save
from utils.functions_1d import f6, pressure2, heaviside
from utils.functions import gaussian

from utils.conformal import RRCM, CRR
from utils.KRR import KRR_AB

In [ ]:
%matplotlib inline
import matplotlib.pyplot as plt

In [ ]:
np.seterr(all="ignore")
random_state = np.random.RandomState(0x0B00B1E5)

In [ ]:
N, D, P = 1000, 2, 1
X = np.concatenate([random_state.normal(size=(N//4, D))*1 + np.array([[2, 2]]),
                    random_state.normal(size=(N//4, D))*2 + np.array([[-1, -2]]),
                    random_state.normal(size=(N//4, D))*.5 + np.array([[-2, 1]]),
                    random_state.normal(size=(N//4, D))*.75 + np.array([[2, -2]])], axis=0)
# X *= -np.log(random_state.uniform(size=(N, D)))*2.5

# W = random_state.normal(size=(D, P))
# F = random_state.normal(size=(N, P))
# X += np.dot(F, W.T)

X = X.reshape((-1, D))

In [ ]:
plt.scatter(X[:, 0], X[:, 1])

In [ ]:
from sklearn.cross_validation import train_test_split

In [ ]:
X_train, X_test = train_test_split(X, test_size=0.5,  random_state=random_state)

In [ ]:
theta_ = 1

In [ ]:
from scipy.linalg import cholesky, solve_triangular
from scipy.linalg.lapack import dtrtri
from sklearn.metrics.pairwise import pairwise_kernels
## Create the kernel matrix
Kxx = pairwise_kernels(X_train, metric="rbf", gamma=theta_)

In [ ]:
Kxz = pairwise_kernels(X_train, X_test, metric="rbf", gamma=theta_)
delta_ = 1 + Kxz.sum(axis=0, keepdims=True) - Kxx.sum(axis=1, keepdims=True) - Kxz
pvalue_ = (np.sum(delta_ >= 0, axis=0) + 1) / (Kxx.shape[0] + 1.0)
# delta_ = (Kxx.sum(axis=1, keepdims=True) + Kxz) - (1 + Kxz.sum(axis=0, keepdims=True))
# pvalue_ = np.mean(delta_ >= 0, axis=0)

In [ ]:
np.sum(pvalue_ <= 0.05)

In [ ]:
plt.scatter(X_train[:, 0], X_train[:, 1], c="black", alpha=0.5, lw=0,)
plt.scatter(X_test[:, 0], X_test[:, 1], s=50*(1-pvalue_), lw=0)
plt.scatter(X_test[pvalue_ < 0.05, 0], X_test[pvalue_ < 0.05, 1], s=100, c="m", lw=0)

In [ ]:
plt.imshow(Kxx, interpolation="nearest")

CKDE vs. ocSVM


In [ ]:
X_train, X_test = train_test_split(X, test_size=0.25,  random_state=random_state)

In [ ]:
theta_, kernel, alpha = 1.0, "rbf", 0.250
kwargs = dict(gamma=theta_)

Kxx = pairwise_kernels(X_train, metric=kernel, **kwargs)
Kxz = pairwise_kernels(X_train, X_test, metric=kernel, **kwargs)
delta_ = 1 + Kxz.sum(axis=0, keepdims=True) - Kxx.sum(axis=1, keepdims=True) - Kxz
pvalue_ = (np.sum(delta_ >= 0, axis=0) + 1) / (Kxx.shape[0] + 1.0)

In [ ]:
from sklearn import svm
clf = svm.OneClassSVM(nu=alpha, kernel=kernel, gamma=theta_).fit(X_train)
ocsvm = clf.predict(X_test) # decision_function(X_test)[:,0]

In [ ]:
np.mean(ocsvm>0), np.mean(pvalue_<alpha)

In [ ]:
fig = plt.figure(figsize=(16, 8))
ax = fig.add_subplot(121)
ax.scatter(X_test[:,0], X_test[:,1], c=pvalue_>alpha, cmap=plt.cm.coolwarm_r, lw=0)
ax.set_title("$\\alpha=%g$ Conformal KDE (%s, $\\theta=%g$)"%(alpha, kernel, theta_,))

ax = fig.add_subplot(122)
ax.scatter(X_test[:,0], X_test[:,1], c=ocsvm, cmap=plt.cm.coolwarm_r, lw=0)
ax.set_title("ocSVM (%s, $\\theta=%g, \\nu=%g$)"%(kernel, theta_, alpha,))

Contour plot


In [ ]:
mesh_ = np.meshgrid(*(2*[np.linspace(-10, 10, num=151)]))
X_test_ = np.stack(mesh_, axis=-1).reshape((-1, 2))

In [ ]:
# theta_, kernel, alpha = 1.0, "rbf", 0.250
# kwargs = dict(gamma=theta_)

Kxx = pairwise_kernels(X_train, metric=kernel, **kwargs)
Kxz = pairwise_kernels(X_train, X_test_, metric=kernel, **kwargs)
delta_ = 1 + Kxz.sum(axis=0, keepdims=True) - Kxx.sum(axis=1, keepdims=True) - Kxz
pvalue_ = (np.sum(delta_ >= 0, axis=0) + 1) / (Kxx.shape[0] + 1.0)

In [ ]:
from sklearn import svm
# clf = svm.OneClassSVM(nu=alpha, kernel=kernel, gamma=theta_).fit(X_train)
ocsvm = clf.decision_function(X_test_).reshape(mesh_[0].shape)

In [ ]:
eta_ocsvm = np.zeros((X_train.shape[0], 1), dtype=float)
eta_ocsvm[clf.support_, 0] = clf.dual_coef_

In [ ]:
svmcm = np.apply_along_axis(lambda z, Z: np.mean(Z>=z[np.newaxis], axis=0),
                            1, eta_ocsvm, eta_ocsvm)
plt.scatter(X_train[:, 0], X_train[:, 1], lw=0,
            c=np.array(["b","r"])[(svmcm[:,0]<alpha).astype(int)])

In [ ]:
fig = plt.figure(figsize=(16, 8))
ax = fig.add_subplot(121)
ax.contourf(mesh_[0], mesh_[1], ocsvm.reshape(mesh_[0].shape),
            levels=np.linspace(ocsvm.min(), ocsvm.max(), num=51), cmap=plt.cm.coolwarm)
# ax.scatter(X_test_[:, 0], X_test_[:, 1], c="m", s=20*(ocsvm < 0), lw=0)
ax.scatter(X_train[:, 0], X_train[:, 1], c="k", s=5, lw=0)
ax.set_title("ocSVM")
ax = fig.add_subplot(122)
ax.contourf(mesh_[0], mesh_[1], pvalue_.reshape(mesh_[0].shape),
            levels=np.linspace(0, 1, num=51), cmap=plt.cm.coolwarm)
# ax.scatter(X_test_[:, 0], X_test_[:, 1], c="m", s=20*(pvalue_ < alpha), lw=0)
ax.scatter(X_train[:, 0], X_train[:, 1], c="k", s=5, lw=0)
ax.set_title("CKDE")

Kenrel embeddings of timeseries


In [ ]: