Nemirovsky procedure



In [ ]:

    
import os, time

%matplotlib inline
import matplotlib.pyplot as plt

import numpy as np
import pandas as pd

from scipy.linalg import cholesky
from sklearn.metrics import pairwise_kernels

from scipy.linalg import inv
from joblib import Parallel, delayed

from utils.functions_1d import triangle
from utils.functions import gaussian

from utils.conformal import RRCM

from scipy.stats import norm



In [ ]:

    
levels = np.asanyarray([0.01, 0.05, 0.10, 0.25])[::-1]
levels_ = pd.Index(["%2.1f"%(100*lvl,) for lvl in levels], name="alpha")



In [ ]:

    
random_state = np.random.RandomState(0x4A08F766)



In [ ]:

    
theta0, size = 100.0, 251
metric = 'rbf' # 'laplacian'
# X = np.linspace(-1, 1, num=size).reshape((-1, 1))
X = np.linspace(-.5, 1.5, num=size).reshape((-1, 1))
yy, labels_, filenames_ = list(), list(), list()
for noise_ in [1e-6, 1e-2, 5e-2, 1e-1, 5e-1]:
    y1 = gaussian(X, random_state=random_state, metric=metric, nugget=noise_, size=(2,), gamma=theta0)
    labels_.extend(y1.shape[1]*["gaussian ($\\theta=%g, \\gamma=%g$)"%(theta0, noise_,)])
    filenames_.extend("gaussian_%02d_%g_%g"%(j, theta0, noise_,)
                       for j in range(y1.shape[1]))
    
    y2 = triangle(X) + random_state.normal(size=(X.shape[0], 2)) * noise_
    labels_.extend(y2.shape[1]*["triangle ($\\gamma=%g$)"%(noise_,)])
    filenames_.extend("triangle_%02d_%g"%(j, noise_,)
                       for j in range(y1.shape[1]))
    
    yy.append(np.concatenate([y1, y2], axis=-1))

y = np.concatenate(yy, axis=-1)



In [ ]:

    
nugget_ = 1e-6
theta_grid = np.logspace(-2.0, 5.0, num=211)

loo_sigma_, A, B, y_hat_, y_hat_loo_ = list(), list(), list(), list(), list()
for theta_ in theta_grid:
    Kxx_ = np.asfortranarray(pairwise_kernels(X, metric=metric, gamma=theta_))
    Kxx_.flat[::Kxx_.shape[0] + 1] += nugget_

    inv(Kxx_, overwrite_a=True, check_finite=False)

    Mi_ = np.diag(Kxx_).reshape((1, -1))
    sigma2_ = np.diag(np.dot(y.T, np.dot(Kxx_, y))) / Kxx_.shape[0]
    loo_sigma2_ = sigma2_.reshape((-1, 1)) / Mi_

    yQM_ = np.dot(y.T, Kxx_)
    hat_loo_ = y.T - yQM_ / Mi_
    yQM_ *= nugget_
    hat_ = y.T - yQM_
    B_ = nugget_ * Kxx_

    A.append(yQM_[:, np.newaxis] - B_[np.newaxis] * y[np.newaxis].T)
    loo_sigma_.append(np.sqrt(loo_sigma2_))
    y_hat_loo_.append(hat_loo_)
    y_hat_.append(hat_)
    B.append(B_)

A, B = np.stack(A, axis=1), np.stack(B, axis=0)
loo_sigma_ = np.stack(loo_sigma_, axis=1)
y_hat_loo_ = np.stack(y_hat_loo_, axis=1)
y_hat_ = np.stack(y_hat_, axis=1)



In [ ]:

    
## Run the conformal procedure on each column of A-B
parallel_ = Parallel(n_jobs=-1, verbose=0)
results_ = parallel_(delayed(RRCM)(A[r, t, n], B[t, n], levels=levels, n=n)
                     for r in xrange(y.shape[1])
                     for t in xrange(len(theta_grid))
                     for n in xrange(X.shape[0]))

rrcm_intervals_ = np.reshape(np.array([(res_[0, 0], res_[-1, -1])
                                       for result_ in results_ for res_ in result_]),
                             (y.shape[1], len(theta_grid), X.shape[0], len(levels), 2))



In [ ]:

    
z_a = norm.ppf(1 - .5 * levels)
gpr_hwidth_ = loo_sigma_[..., np.newaxis] * z_a.reshape((1, 1, 1, -1))
gpr_intervals_ = np.stack([y_hat_loo_[..., np.newaxis] - gpr_hwidth_,
                           y_hat_loo_[..., np.newaxis] + gpr_hwidth_], axis=-1)



In [ ]:

    
def demo_int(intervals, y, y_hat, name, points, ax, a=-1, adaptive=True, plot_gpr=True):
    colors_ = plt.cm.rainbow(np.linspace(0, 1, num=len(points)))
    ax.set_xscale("log")
    for j, p in enumerate(points):
        if adaptive:
            ax.plot(theta_grid, intervals[:, p, a, 1] - y_hat[:, p],
                    c=colors_[j], marker="v", markersize=3)
            ax.plot(theta_grid, intervals[:, p, a, 0] - y_hat[:, p],
                    c=colors_[j], marker="^", markersize=3)
            ax.plot(theta_grid, y[p] - y_hat[:, p], c=colors_[j],
                    linestyle="-", label="$y_{x_{%d}} - \\hat{y}_{\\theta}(x_{%d})$"%(j, j,), alpha=0.5)
        else:
            ax.plot(theta_grid, intervals[:, p, a, 1],
                    c=colors_[j], marker="v", markersize=3)
            ax.plot(theta_grid, intervals[:, p, a, 0],
                    c=colors_[j], marker="^", markersize=3)
            ax.plot(theta_grid, y_hat[:, p], c=colors_[j],
                    linestyle="-", alpha=0.75)
            ax.axhline(y=y[p], c=colors_[j], linestyle="-", label="$y_{x_{%d}}$"%(j,), alpha=0.5)
    if adaptive:
        ax.set_title("loo-prediction centered %s%% %sintervals for %d arbitrary points"%(levels_[a], name, len(points)))
    else:
        ax.set_title("%s%% %sintervals for %d arbitrary points"%(levels_[a], name, len(points)))
    ax.set_xlabel("Gaussian kernel precision $\\theta$ : $K(x,x') = \\mathtt{exp}\\{-\\theta\\|x-x'\\|^2\\}$")
    ax.set_ylabel("Target $y_x$")
    ax.legend(loc="best", ncol=2)
    ax.grid()
    return ax



In [ ]:

    
## Intervals
points_ = [110, 125, 142, 189]
j, a, adaptive = 2, -1, False

fig = plt.figure(figsize=(16, 9))
ax = demo_int(rrcm_intervals_[j], y[:, j], y_hat_loo_[j], "RRCM ",
              points_, fig.add_subplot(121), a=a, adaptive=adaptive)
ax = demo_int(gpr_intervals_[j], y[:, j], y_hat_loo_[j], "GPR ",
              points_, fig.add_subplot(122, sharey=ax, sharex=ax), a=a, adaptive=adaptive)
plt.show()



In [ ]:

    
fig = plt.figure(figsize=(16, 9))
ax = fig.add_subplot(111)
ax.plot(X, y[:, j], c="b")
# ax.plot(X, y[:, j, np.newaxis] - y_hat_loo_[j, :].T)
colors_ = plt.cm.rainbow(np.linspace(0, 1, num=len(points_)))
for c, x in zip(colors_, X[points_]):
    ax.axvline(x=x, c=c, linestyle="-", marker="o")
ax.set_title("Sample path of the studied function")
ax.set_xlabel("x")
ax.set_ylabel("$y_x$")



In [ ]:

    
## Compute the running intersection over the theta axis (1)
reverse = False
if not reverse:
    top_ = np.minimum.accumulate(rrcm_intervals_[..., 1], axis=1)
    bottom_ = np.maximum.accumulate(rrcm_intervals_[..., 0], axis=1)
else:
    top_ = np.minimum.accumulate(rrcm_intervals_[:, ::-1, ..., 1], axis=1)
    bottom_ = np.maximum.accumulate(rrcm_intervals_[:, ::-1, ..., 0], axis=1)

width_ = top_ - bottom_
width_[width_ < 0] = -1

## t - b is nonincreasing, that is why the argmin of |t-b| is either +\infty
## or the point of crossing the zero level (again over the theta axis(1)).
theta_index_ = np.argmin(width_, axis=1)

## Find the largest index (theta) below which all intervals of all sample
##  points have nonempty intersection.
theta_star = np.min(theta_index_, axis=1)
if reverse:
## Since we need the largest k, for which all intervals beyond it overlap
##  tkae the `max`.
    theta_star = theta_index_.shape[1] - theta_star



In [ ]:

    
if reverse:
    plt.plot((width_[0, ::-1, 125]))
else:
    plt.plot((width_[0, :, 125]))



In [ ]:

    
## Just take the index (theta) which minimizes both effects: the 
widths_ = rrcm_intervals_[..., 1] - rrcm_intervals_[..., 0]
theta_star = np.median(np.argmin(widths_, axis=1), axis=1).astype(int)
theta_opt = np.median(theta_grid[np.argmin(widths_, axis=1)], axis=1)



In [ ]:

    
from sklearn.gaussian_process import GaussianProcess
a = 1

for j in range(y.shape[1]):
    gp = GaussianProcess(thetaL=1e-5, thetaU=1e+7, theta0=100.0, nugget=nugget_, normalize=False,
                         regr='constant', beta0=0, corr='squared_exponential').fit(X, y[:, j])
    fig = plt.figure(figsize=(6, 4))
    ax = fig.add_subplot(111)
    ax.plot(X, y[:, j], c="r")
    ax.plot(X, gp.predict(X), c="b",
            label="MLE $\\hat{\\theta}=%2.5g$"%(gp.theta_[0],))

#     theta_inx_ = theta_star[j, a]
    theta_inx_ = np.searchsorted(theta_grid, theta_opt[j, a])
    ax.plot(X, y_hat_[j, theta_inx_], c="g",
            label="C-BS-%s%% $\\hat{\\theta}=%2.5g$"%(levels_[a], theta_opt[j, a]))
    
    if labels_[j].startswith("triangle"):
        ax.plot(X, triangle(X), c="k", zorder=-99)

#     gp_ = GaussianProcess(theta0=theta_, nugget=nugget_, normalize=False,
#                           regr='constant', beta0=0, corr='squared_exponential').fit(X, y[:, j])
#     plt.plot(gp_.predict(X), c="g")
    ax.set_xlabel("$x$")
    ax.set_ylabel("$y_x$")
    ax.set_title(labels_[j])
    ax.legend(loc="best")
    fig.savefig(os.path.join("..", "report", "defense", "cbs_images",
                             filenames_[j].replace(".", ",") + ".pdf"), dpi=120)
#     plt.show()
    plt.close()
    print "Nemirovsky:\t%2.5g,\nGP:\t\t%2.5g\n"%(theta_grid[theta_inx_], gp.theta_[0])



In [ ]:



In [ ]: