Conformal Kernel Distribution Embedding

Conformal Anomaly Detector via RBF Kernel Embddeing

Below are shown sample results obtained in this project. On left are level sets of the nominal bivariate Gaussian mixture distribution used to illustrate the K-LPE algorithm. In the middle are results of K-LPE with K= 6 and Euclidean distance metric for $m = 150$ test points drawn from an equal mixture of 2D uniform and the (nominal) bivariate distributions. Scores for the test points are based on 200 nominal training samples. Scores falling below a threshold level 0.05 are declared as anomalies. The dotted contour corresponds to the exact bivariate Gaussian density level set at level alpha= 0.05. On right is the empirical distribution of the test point scores associated with the bivariate Gaussian that appears to be uniform while scores for the test points drawn from 2D uniform distribution cluster around zero.



In [ ]:

    
import time
import os

import numpy as np

from sklearn.grid_search import ParameterGrid
from sklearn.base import clone

from sklearn.gaussian_process import GaussianProcess

from scipy.stats import norm
from joblib import Parallel, delayed

from utils.state import _save
from utils.functions_1d import f6, pressure2, heaviside
from utils.functions import gaussian

from utils.conformal import RRCM, CRR
from utils.KRR import KRR_AB



In [ ]:

    
%matplotlib inline
import matplotlib.pyplot as plt



In [ ]:

    
np.seterr(all="ignore")
random_state = np.random.RandomState(0x0B00B1E5)



In [ ]:

    
N, D, P = 1000, 2, 1
X = np.concatenate([
                    random_state.normal(size=(N//4, D))*1 + np.array([[2, 2]]),
                    random_state.normal(size=(N//4, D))*.5 + np.array([[-2, 1]]),
                    random_state.normal(size=(N//4, D))*.75 + np.array([[2, -2]]),
                    random_state.normal(size=(N//4, D))*2 + np.array([[-1, -2]]),
                   ], axis=0)
X = X.reshape((-1, D))



In [ ]:

    
fig = plt.figure(figsize=(8, 8))
ax = fig.add_subplot(111)
for i in range(4)[::-1]:
    X_ = X[(N//4)*i:(N//4)*(i+1)]
    ax.scatter(X_[:, 0], X_[:, 1], c="rbgk"[i], lw=0)
ax.set_ylabel("$x_2$")
ax.set_xlabel("$x_1$")
ax.set_title("Toy example")
ax.set_xlim(-5, 5) ; ax.set_ylim(-5, 5)
# ax.set_xlim(-10, 10) ; ax.set_ylim(-10, 10)
fig.savefig("../report/defense/toy_ckde.pdf", dpi=120)
plt.close()

train test splitting



In [ ]:

    
from sklearn.cross_validation import train_test_split
X_train, X_test = train_test_split(X, test_size=0.25,  random_state=random_state)



In [ ]:

    
theta_ = 1



In [ ]:

    
from scipy.linalg import cholesky, solve_triangular
from scipy.linalg.lapack import dtrtri
from sklearn.metrics.pairwise import pairwise_kernels
## Create the kernel matrix
Kxx = pairwise_kernels(X_train, metric="rbf", gamma=theta_)



In [ ]:

    
Kxz = pairwise_kernels(X_train, X_test, metric="rbf", gamma=theta_)
delta_ = 1 + Kxz.sum(axis=0, keepdims=True) - Kxx.sum(axis=1, keepdims=True) - Kxz
pvalue_ = (np.sum(delta_ >= 0, axis=0) + 1) / (Kxx.shape[0] + 1.0)
# delta_ = (Kxx.sum(axis=1, keepdims=True) + Kxz) - (1 + Kxz.sum(axis=0, keepdims=True))
# pvalue_ = np.mean(delta_ >= 0, axis=0)



In [ ]:

    
np.mean(pvalue_ <= 0.25)



In [ ]:

    
plt.scatter(X_train[:, 0], X_train[:, 1], c="black", alpha=0.5, lw=0,)
plt.scatter(X_test[:, 0], X_test[:, 1], s=50*(1-pvalue_), lw=0)
plt.scatter(X_test[pvalue_ < 0.05, 0], X_test[pvalue_ < 0.05, 1], s=100, c="m", lw=0)



In [ ]:

    
plt.imshow(Kxx, interpolation="nearest")

CKDE vs. ocSVM



In [ ]:

    
X_train, X_test = train_test_split(X, test_size=0.25,  random_state=random_state)



In [ ]:

    
theta_, kernel, alpha = 1.0, "rbf", 0.250
kwargs = dict(gamma=theta_)

Kxx = pairwise_kernels(X_train, metric=kernel, **kwargs)
Kxz = pairwise_kernels(X_train, X_test, metric=kernel, **kwargs)
delta_ = 1 + Kxz.sum(axis=0, keepdims=True) - Kxx.sum(axis=1, keepdims=True) - Kxz
pvalue_ = (np.sum(delta_ >= 0, axis=0) + 1) / (Kxx.shape[0] + 1.0)



In [ ]:

    
from sklearn import svm
clf = svm.OneClassSVM(nu=alpha, kernel=kernel, gamma=theta_).fit(X_train)
ocsvm = clf.predict(X_test) # decision_function(X_test)[:,0]



In [ ]:

    
np.mean(ocsvm>0), np.mean(pvalue_<alpha)



In [ ]:

    
fig = plt.figure(figsize=(16, 8))
ax = fig.add_subplot(121)
ax.scatter(X_test[:,0], X_test[:,1], c=pvalue_>alpha, cmap=plt.cm.coolwarm_r, lw=0)
ax.set_title("$\\alpha=%g$ Conformal KDE (%s, $\\theta=%g$)"%(alpha, kernel, theta_,))

ax = fig.add_subplot(122)
ax.scatter(X_test[:,0], X_test[:,1], c=ocsvm, cmap=plt.cm.coolwarm_r, lw=0)
ax.set_title("ocSVM (%s, $\\theta=%g, \\nu=%g$)"%(kernel, theta_, alpha,))

Contour plot



In [ ]:

    
mesh_ = np.meshgrid(*(2*[np.linspace(-10, 10, num=151)]))
X_test_ = np.stack(mesh_, axis=-1).reshape((-1, 2))



In [ ]:

    
theta_, kernel, alpha = 2.0, "rbf", 0.5
kwargs = dict(gamma=theta_)

Kxx = pairwise_kernels(X_train, metric=kernel, **kwargs)
Kxz = pairwise_kernels(X_train, X_test_, metric=kernel, **kwargs)
delta_ = 1 + Kxz.sum(axis=0, keepdims=True) - Kxx.sum(axis=1, keepdims=True) - Kxz
pvalue_ = (np.sum(delta_ >= 0, axis=0) + 1) / (Kxx.shape[0] + 1.0)



In [ ]:

    
from sklearn import svm
clf = svm.OneClassSVM(nu=alpha, kernel=kernel, gamma=theta_).fit(X_train)
ocsvm = np.exp(np.minimum(clf.decision_function(X_test_), 0)).reshape(mesh_[0].shape)



In [ ]:

    
eta_ocsvm = np.zeros((X_train.shape[0], 1), dtype=float)
eta_ocsvm[clf.support_, 0] = clf.dual_coef_



In [ ]:

    
svmcm = np.apply_along_axis(lambda z, Z: np.mean(Z>=z[np.newaxis], axis=0),
                            1, eta_ocsvm, eta_ocsvm)
plt.scatter(X_train[:, 0], X_train[:, 1], lw=0,
            c=np.array(["b","r"])[(svmcm[:,0]<alpha).astype(int)])



In [ ]:

    
plt.hist(np.exp(ocsvm.reshape(-1)))



In [ ]:

    
fig = plt.figure(figsize=(6, 6))
ax = fig.add_subplot(111)
ax.contourf(mesh_[0], mesh_[1], ocsvm.reshape(mesh_[0].shape),
            levels=np.linspace(ocsvm.min(), ocsvm.max(), num=51), cmap=plt.cm.coolwarm_r)
# ax.scatter(X_test_[:, 0], X_test_[:, 1], c="m", s=20*(ocsvm < 0), lw=0)
ax.scatter(X_train[:, 0], X_train[:, 1], c="k", s=5, lw=0)
ax.set_title("ocSVM (%s, $\\theta=%g, \\nu=%g$)"%(kernel, theta_, alpha,))
ax.set_ylabel("$x_2$") ; ax.set_xlabel("$x_1$")
ax.set_xlim(-5, 5) ; ax.set_ylim(-5, 5)
fig.savefig("../report/defense/ocSVM.pdf", dpi=120)
plt.close()



In [ ]:

    
fig = plt.figure(figsize=(6, 6))
ax = fig.add_subplot(111)
ax.contourf(mesh_[0], mesh_[1], pvalue_.reshape(mesh_[0].shape),
            levels=np.linspace(0, 1, num=51), cmap=plt.cm.coolwarm_r)
# ax.scatter(X_test_[:, 0], X_test_[:, 1], c="m", s=20*(pvalue_ < alpha), lw=0)
ax.scatter(X_train[:, 0], X_train[:, 1], c="k", s=5, lw=0)
ax.set_title("Conformal KDE (%s, $\\theta=%g$)"%(kernel, theta_,))
ax.set_ylabel("$x_2$") ; ax.set_xlabel("$x_1$")
ax.set_xlim(-5, 5) ; ax.set_ylim(-5, 5)
fig.savefig("../report/defense/ckde.pdf", dpi=120)
plt.close()



In [ ]:

    
kernel = "laplacian"
kwargs = dict(gamma=theta_)

Kxx = pairwise_kernels(X_train, metric=kernel, **kwargs)
Kxz = pairwise_kernels(X_train, X_test_, metric=kernel, **kwargs)
delta_ = 1 + Kxz.sum(axis=0, keepdims=True) - Kxx.sum(axis=1, keepdims=True) - Kxz
pvalue_ = (np.sum(delta_ >= 0, axis=0) + 1) / (Kxx.shape[0] + 1.0)
fig = plt.figure(figsize=(6, 6))
ax = fig.add_subplot(111)
ax.contourf(mesh_[0], mesh_[1], pvalue_.reshape(mesh_[0].shape),
            levels=np.linspace(0, 1, num=51), cmap=plt.cm.coolwarm_r)
# ax.scatter(X_test_[:, 0], X_test_[:, 1], c="m", s=20*(pvalue_ < alpha), lw=0)
ax.scatter(X_train[:, 0], X_train[:, 1], c="k", s=5, lw=0)
ax.set_title("Conformal KDE (%s, $\\theta=%g$)"%(kernel, theta_,))
ax.set_ylabel("$x_2$") ; ax.set_xlabel("$x_1$")
ax.set_xlim(-5, 5) ; ax.set_ylim(-5, 5)
fig.savefig("../report/defense/ckde-lap.pdf", dpi=120)
plt.close()

Kenrel embeddings of timeseries



In [ ]: