In [ ]:
%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
In [ ]:
from scipy.linalg import cholesky
from sklearn.metrics import pairwise_kernels
In [ ]:
from utils.functions import gaussian
In [ ]:
from scipy.linalg import inv
from joblib import Parallel, delayed
In [ ]:
import os, time
In [ ]:
def mkdirifnot(path):
if not os.path.exists(path):
os.mkdir(path)
return path
In [ ]:
BASE_PATH = "./conf_prec_sel"
OUTPUT_PATH = mkdirifnot(os.path.join(BASE_PATH, "output"))
In [ ]:
levels = np.asanyarray([0.01, 0.05, 0.10, 0.25])[::-1]
levels_ = pd.Index(["%2.1f"%(100*lvl,) for lvl in levels], name="alpha")
In [ ]:
def loo_intervals(X, y, nugget=1.0, proc=None, levels=(0.05,), n_jobs=1, metric=None, **kwargs):
Kxx = np.asfortranarray(pairwise_kernels(X, metric=metric, **kwargs))
Kxx.flat[::Kxx.shape[0]+1] += nugget
inv(Kxx, overwrite_a=True, check_finite=False)
## Get the loo-residuals A and B matrices
A, B = np.dot(Kxx, y) - Kxx * y.T, Kxx
## Run the conformal procedure on each column of A-B
d_proc = delayed(proc)
parallel_ = Parallel(n_jobs=n_jobs, verbose=0)
results_ = parallel_(d_proc(A[:, n], B[:, n],
levels=levels, n=n)
for n in xrange(X.shape[0]))
## Return the convex hull of each confidence region
return np.array([[(res_[0, 0], res_[-1, -1]) for res_ in result_]
for result_ in results_])
In [ ]:
from sklearn.cross_validation import train_test_split
random_state = np.random.RandomState(0x4A04B766)
In [ ]:
from utils.functions_1d import triangle
In [ ]:
theta0, noise, size = 100.0, 5e-1, 251
metric = 'rbf' # 'laplacian'
# X = np.linspace(-1, 1, num=size).reshape((-1, 1))
# y = gaussian(X, random_state=random_state, metric=metric, nugget=noise, size=(1,), gamma=theta0)
X = np.linspace(-.5, 1.5, num=size).reshape((-1, 1))
y = triangle(X)
y = y + random_state.normal(size=(y.shape[0], 5)) * noise
Compute the GPR and conformal loo-interval widths
In [ ]:
np.allclose(Mi_.reshape((1, -1)), Mi_[np.newaxis])
In [ ]:
from utils.conformal import RRCM
nugget_ = 1e-6
theta_grid = np.logspace(-2.0, 5.0, num=71)
# theta_grid = np.logspace(-5.0, 7.0, num=73)
loo_sigma_, A, B, y_hat_ = list(), list(), list(), list()
for theta_ in theta_grid:
Kxx_ = np.asfortranarray(pairwise_kernels(X, metric=metric, gamma=theta_))
Kxx_.flat[::Kxx_.shape[0] + 1] += nugget_
inv(Kxx_, overwrite_a=True, check_finite=False)
Mi_ = np.diag(Kxx_).reshape((1, -1))
if True:
yQM_ = np.dot(y.T, Kxx_) / Mi_
B_ = Kxx_ / Mi_
A_ = yQM_[:, np.newaxis] - B_[np.newaxis] * y[np.newaxis].T
hat_ = y.T - yQM_
else:
yQM_ = np.dot(y.T, Kxx_)
B_ = Kxx_
A_ = yQM_[:, np.newaxis] - B_[np.newaxis] * y[np.newaxis].T
hat_ = y.T - nugget_ * yQM_
sigma2_ = np.diag(np.dot(y.T, np.dot(Kxx_, y))) / Kxx_.shape[0]
loo_sigma_.append(sigma2_.reshape((-1, 1)) / Mi_)
A.append(A_)
B.append(B_)
y_hat_.append(hat_) # y_hat_.append(y.T - nugget_ * np.dot(y.T, Kxx_))
A, B = np.stack(A, axis=1), np.stack(B, axis=0)
loo_sigma_ = np.sqrt(np.stack(loo_sigma_, axis=1))
y_hat_ = np.stack(y_hat_, axis=1)
In [ ]:
# plt.plot(y - nugget_ * np.dot(Kxx_, y))
plt.plot(y_hat_[0, 69].T);
In [ ]:
for j in range(y.shape[1]):
MM_ = 1.0 / np.diag(Kxx_).reshape((-1, 1))
AA_ = MM_ * (np.dot(Kxx_, y[:, j, np.newaxis]) - Kxx_ * y[:, j, np.newaxis].T)
assert np.allclose(A[j, -1], AA_.T)
In [ ]:
## Run the conformal procedure on each column of A-B
parallel_ = Parallel(n_jobs=-1, verbose=0)
results_ = parallel_(delayed(RRCM)(A[r, t, n], B[t, n], levels=levels, n=n)
for r in xrange(y.shape[1])
for t in xrange(len(theta_grid))
for n in xrange(X.shape[0]))
In [ ]:
intervals_ = np.reshape(np.array([(res_[0, 0], res_[-1, -1]) for result_ in results_ for res_ in result_]),
(y.shape[1], len(theta_grid), X.shape[0], len(levels), 2))
In [ ]:
y_ = y.T.reshape(y.shape[1], 1, y.shape[0], 1)
mask_ = (intervals_[..., 0] <= y_) & (y_ <= intervals_[..., 1])
print np.max(np.abs(1 - np.mean(mask_, axis=-2) - levels), axis=1).max(axis=0)
Create the color scheme
In [ ]:
points_ = [110, 125, 142, 189]
# [90, 101, 123, 124, 126, 142, 159] # [180, 203, 246, 249, 252, 285, 318] #[50, 100, 150, 200]
colors_ = plt.cm.rainbow(np.linspace(0, 1, num=len(points_)))
filename_template_ = "./tri_%d_%g_%g_%%s.pdf"%(size, noise, nugget_,)
Compare the summarized widths.
In [ ]:
fig = plt.figure(figsize=(16, 9))
ax = fig.add_subplot(111)
ax.plot(X, y[:, 0], c="b")
for j, p in enumerate(points_):
ax.axvline(x=X[p], c=colors_[j], linestyle="-", marker="o")
ax.set_title("Sample path of the studied function")
ax.set_xlabel("x")
ax.set_ylabel("$y_x$")
fig.savefig(filename_template_%("sample",), dpi=120)
plt.show()
In [ ]:
from scipy.stats import norm
def demo_gpr(ax, a=-1, adaptive=True):
ax.set_xscale("log")
for j, p in enumerate(points_):
z_a = norm.ppf(1 - .5 * levels)[a]
if adaptive:
ax.plot(theta_grid, loo_sigma_[0, :, p] * z_a,
c=colors_[j], marker="v", markersize=3)
ax.plot(theta_grid, -loo_sigma_[0, :, p] * z_a,
c=colors_[j], marker="^", markersize=3)
ax.plot(theta_grid, y[p, 0] - y_hat_[0, :, p], c=colors_[j],
linestyle="-", label="$y_{x_{%d}} - \\hat{y}_{\\theta}(x_{%d})$"%(j, j,), alpha=0.5)
else:
ax.plot(theta_grid, loo_sigma_[0, :, p] * z_a + y_hat_[0, :, p],
c=colors_[j], marker="v", markersize=3)
ax.plot(theta_grid, -loo_sigma_[0, :, p] * z_a + y_hat_[0, :, p],
c=colors_[j], marker="^", markersize=3)
ax.axhline(y=y[p, 0], c=colors_[j], linestyle="-", label="$y_{x_{%d}}$"%(j,), alpha=0.5)
if adaptive:
ax.set_title("prediction-centered %s%% GPR LOO-intervals for %d arbitrary points"%(levels_[a], len(points_)))
else:
ax.set_title("%s%% GPR LOO-intervals for %d arbitrary points"%(levels_[a], len(points_)))
ax.set_xlabel("Gaussian kernel precision $\\theta$ : $K(x,x') = \\mathtt{exp}\\{-\\theta\\|x-x'\\|^2\\}$")
ax.set_ylabel("Target $y_x$")
ax.legend(loc="best", ncol=2)
ax.grid()
return ax
In [ ]:
from scipy.stats import norm
def demo_conf(ax, a=-1, adaptive=True, plot_gpr=True):
ax.set_xscale("log")
for j, p in enumerate(points_):
if adaptive:
ax.plot(theta_grid, intervals_[0, :, p, a, 1] - y_hat_[0, :, p],
c=colors_[j], marker="v", markersize=3)
ax.plot(theta_grid, intervals_[0, :, p, a, 0] - y_hat_[0, :, p],
c=colors_[j], marker="^", markersize=3)
ax.plot(theta_grid, y[p, 0] - y_hat_[0, :, p], c=colors_[j],
linestyle="-", label="$y_{x_{%d}} - \\hat{y}_{\\theta}(x_{%d})$"%(j, j,), alpha=0.5)
else:
ax.plot(theta_grid, intervals_[0, :, p, a, 1],
c=colors_[j], marker="v", markersize=3)
ax.plot(theta_grid, intervals_[0, :, p, a, 0],
c=colors_[j], marker="^", markersize=3)
ax.axhline(y=y[p, 0], c=colors_[j], linestyle="-", label="$y_{x_{%d}}$"%(j,), alpha=0.5)
if adaptive:
ax.set_title("prediction-centered %s%% RRCM LOO-intervals for %d arbitrary points"%(levels_[a], len(points_)))
else:
ax.set_title("%s%% RRCM LOO-intervals for %d arbitrary points"%(levels_[a], len(points_)))
ax.set_xlabel("Gaussian kernel precision $\\theta$ : $K(x,x') = \\mathtt{exp}\\{-\\theta\\|x-x'\\|^2\\}$")
ax.set_ylabel("Target $y_x$")
ax.legend(loc="best", ncol=2)
ax.grid()
return ax
In [ ]:
a, adaptive = 2, False
fig = plt.figure(figsize=(16, 9))
ax = demo_conf(fig.add_subplot(121), a=a, adaptive=adaptive)
ax = demo_gpr(fig.add_subplot(122, sharey=ax, sharex=ax), a=a, adaptive=adaptive)
fig.savefig(filename_template_%("intervals",), dpi=120)
plt.show()
In [ ]:
from utils.state import _load
In [ ]:
# exp_triangle = _load("./conf_prec_sel/exp_triangle_1d0001-20160531_124611.gz")
# exp_triangle = _load("./conf_prec_sel/exp_triangle_1d0001-20160531_160409.gz")
# exp_triangle = _load("./conf_prec_sel/exp_triangle_1d0001-20160531_170831.gz")
exp_triangle = list()
for file_ in ['./conf_prec_sel/exp_triangle_1d0001-20160531_173311.gz',
'./conf_prec_sel/exp_triangle_1d0002-20160531_173805.gz']:
exp_triangle.extend(_load(file_))
In [ ]:
def demo_int(intervals, y, y_hat, name, points, ax, a=-1, adaptive=True, plot_gpr=True):
colors_ = plt.cm.rainbow(np.linspace(0, 1, num=len(points)))
theta_grid = np.logspace(-2.0, 5.0, num=71)
ax.set_xscale("log")
for j, p in enumerate(points):
if adaptive:
ax.plot(theta_grid, intervals[0, :, p, a, 1] - y_hat[0, :, p],
c=colors_[j], marker="v", markersize=3)
ax.plot(theta_grid, intervals[0, :, p, a, 0] - y_hat[0, :, p],
c=colors_[j], marker="^", markersize=3)
ax.plot(theta_grid, y[p, 0] - y_hat[0, :, p], c=colors_[j],
linestyle="-", label="$y_{x_{%d}} - \\hat{y}_{\\theta}(x_{%d})$"%(j, j,), alpha=0.5)
else:
ax.plot(theta_grid, intervals[0, :, p, a, 1],
c=colors_[j], marker="v", markersize=3)
ax.plot(theta_grid, intervals[0, :, p, a, 0],
c=colors_[j], marker="^", markersize=3)
ax.axhline(y=y[p, 0], c=colors_[j], linestyle="-", label="$y_{x_{%d}}$"%(j,), alpha=0.5)
if adaptive:
ax.set_title("prediction-centered %s%% %sintervals for %d arbitrary points"%(levels_[a], name, len(points)))
else:
ax.set_title("%s%% %sintervals for %d arbitrary points"%(levels_[a], name, len(points)))
ax.set_xlabel("Gaussian kernel precision $\\theta$ : $K(x,x') = \\mathtt{exp}\\{-\\theta\\|x-x'\\|^2\\}$")
ax.set_ylabel("Target $y_x$")
ax.legend(loc="best", ncol=2)
ax.grid()
return ax
In [ ]:
for noise_, size_, nugget_, loo_, result_ in exp_triangle:
X, y, y_hat_, gpr_intervals_, rrcm_intervals_ = result_
filename_template_ = "./tri_%d_%g_%g_%%s.pdf"%(size_, noise_, nugget_,)
title_template_ = "triangle ($n=%d$, $\\gamma=%g$): %%s"%(size_, noise_,)
if size_ == 101:
points_ = [50, 37, 62,]
elif size_ == 251:
points_ = [110, 125, 142, 189]
else:
points_ = [190, 200, 249, 295, 318]
output_path_ = mkdirifnot(os.path.join(OUTPUT_PATH, "%d"%(size_,)))
output_path_ = mkdirifnot(os.path.join(output_path_, "%g"%(noise_,)))
colors_ = plt.cm.rainbow(np.linspace(0, 1, num=len(points_)))
## Profile
fig = plt.figure(figsize=(16, 9))
ax = fig.add_subplot(111)
ax.plot(X, y[:, 0], c="b")
for j, p in enumerate(points_):
ax.axvline(x=X[p], c=colors_[j], linestyle="-", marker="o")
ax.set_title(title_template_%("sample path"))
ax.set_xlabel("x")
ax.set_ylabel("$y_x$")
filename_ = os.path.join(output_path_, filename_template_%("sample",))
fig.savefig(filename_, dpi=120)
plt.close()
## Intervals
a, adaptive = 2, False
fig = plt.figure(figsize=(16, 9))
ax = demo_int(rrcm_intervals_, y, y_hat_, "RRCM " + ("loo-" if loo_ else ""),
points_, fig.add_subplot(121), a=a, adaptive=adaptive)
ax = demo_int(gpr_intervals_, y, y_hat_, "GPR " + ("loo-" if loo_ else ""),
points_, fig.add_subplot(122, sharey=ax, sharex=ax), a=a, adaptive=adaptive)
filename_ = os.path.join(output_path_, filename_template_%(("loo-" if loo_ else "") + "intervals",))
fig.savefig(filename_, dpi=120)
plt.close()
# break
In [ ]:
from scipy.stats import norm
z_a = norm.ppf(1 - .5 * levels)
loo_rrcm_width_ = intervals_[..., 1] - intervals_[..., 0]
loo_gpr_width_ = 2 * np.sqrt(loo_sigma_)[..., np.newaxis] * z_a.reshape((1, 1, 1, -1))
In [ ]:
loo_rrcm_width_.shape
In [ ]:
fig = plt.figure(figsize=(16, 9))
ax = fig.add_subplot(111)
colors_conf_ = plt.cm.rainbow(np.linspace( 0, 1, 4 )[::-1])
## Conformal and GPR: meadian width
for j, lvl_ in enumerate(levels_):
ax.plot(theta_grid, np.median(np.median(loo_rrcm_width_[..., j], axis=-1), axis=0),
c=colors_conf_[j], zorder=99, label="RRCM %s%%"%(lvl_,))
# Gaussian widths
for j, lvl_ in enumerate(levels_):
ax.plot(theta_grid, np.median(np.median(loo_gpr_width_[..., j], axis=-1), axis=0),
c="k", linestyle="-", zorder=-99, label="GPR %s%%"%(levels_[j],))
ax.set_xscale("log") ; ax.set_xlabel("Isotropic Gaussian kernel precision ($\\theta$, log-scale)")
# ax.set_yscale("log") ;
ax.set_ylabel("Confidence reigion width (log)")
ax.legend(loc="best", ncol=2)
ax.set_title("Median width of predicitve regions across all sample points")
In [ ]:
loo_rrcm_width_.shape
In [ ]:
loo_rrcm_width_[0, :, 100, j].shape
In [ ]:
fig = plt.figure(figsize=(16, 9))
ax = fig.add_subplot(111)
colors_conf_ = plt.cm.rainbow(np.linspace( 0, 1, 4 )[::-1])
## Conformal and GPR: meadian width
for j, lvl_ in enumerate(levels_):
ax.plot(theta_grid, loo_rrcm_width_[0, :, 100, j],
c=colors_conf_[j], zorder=99, label="RRCM %s%%"%(lvl_,))
# Gaussian widths
for j, lvl_ in enumerate(levels_):
ax.plot(theta_grid, loo_gpr_width_[0, :, 100, j],
c="k", linestyle="-", zorder=-99, label="GPR %s%%"%(levels_[j],))
ax.set_xscale("log") ; ax.set_xlabel("Isotropic Gaussian kernel precision ($\\theta$, log-scale)")
# ax.set_yscale("log") ;
ax.set_ylabel("Confidence reigion width (log)")
ax.legend(loc="best", ncol=2)
ax.set_title("Median width of predicitve regions across all sample points")
Inspect the leverage
In [ ]:
fig = plt.figure(figsize=(16, 9))
ax = fig.add_subplot(111)
ax.plot(theta_grid, np.mean(leverage_, axis=1),
c="k", zorder=99, label="mean")
ax.plot(theta_grid, np.max(leverage_, axis=1),
c="k", marker="v", alpha=0.15, label="max")
ax.plot(theta_grid, np.min(leverage_, axis=1),
c="k", marker="^", alpha=0.15, label="min")
ax.set_xscale("log")
ax.set_title("Leverage of the KRR")
ax.legend(loc="best")
In [ ]:
import os, time
%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from scipy.linalg import cholesky
from sklearn.metrics import pairwise_kernels
from scipy.linalg import inv
from joblib import Parallel, delayed
from utils.functions_1d import triangle
from utils.functions import gaussian
from utils.conformal import RRCM
from scipy.stats import norm
In [ ]:
levels = np.asanyarray([0.01, 0.05, 0.10, 0.25])[::-1]
levels_ = pd.Index(["%2.1f"%(100*lvl,) for lvl in levels], name="alpha")
In [ ]:
random_state = np.random.RandomState(0x4A08F766)
In [ ]:
theta0, size = 100.0, 251
metric = 'rbf' # 'laplacian'
# X = np.linspace(-1, 1, num=size).reshape((-1, 1))
X = np.linspace(-.5, 1.5, num=size).reshape((-1, 1))
yy = list()
for noise_ in [1e-6, 1e-2, 5e-2, 1e-1, 5e-1]:
y1 = gaussian(X, random_state=random_state, metric=metric, nugget=noise_, size=(2,), gamma=theta0)
y2 = triangle(X) + random_state.normal(size=(X.shape[0], 2)) * noise_
yy.append(np.concatenate([y1, y2], axis=-1))
y = np.concatenate(yy, axis=-1)
In [ ]:
nugget_ = 1e-6
theta_grid = np.logspace(-2.0, 5.0, num=211)
loo_sigma_, A, B, y_hat_, y_hat_loo_ = list(), list(), list(), list(), list()
for theta_ in theta_grid:
Kxx_ = np.asfortranarray(pairwise_kernels(X, metric=metric, gamma=theta_))
Kxx_.flat[::Kxx_.shape[0] + 1] += nugget_
inv(Kxx_, overwrite_a=True, check_finite=False)
Mi_ = np.diag(Kxx_).reshape((1, -1))
sigma2_ = np.diag(np.dot(y.T, np.dot(Kxx_, y))) / Kxx_.shape[0]
loo_sigma2_ = sigma2_.reshape((-1, 1)) / Mi_
yQM_ = np.dot(y.T, Kxx_)
hat_loo_ = y.T - yQM_ / Mi_
yQM_ *= nugget_
hat_ = y.T - yQM_
B_ = nugget_ * Kxx_
A.append(yQM_[:, np.newaxis] - B_[np.newaxis] * y[np.newaxis].T)
loo_sigma_.append(np.sqrt(loo_sigma2_))
y_hat_loo_.append(hat_loo_)
y_hat_.append(hat_)
B.append(B_)
A, B = np.stack(A, axis=1), np.stack(B, axis=0)
loo_sigma_ = np.stack(loo_sigma_, axis=1)
y_hat_loo_ = np.stack(y_hat_loo_, axis=1)
y_hat_ = np.stack(y_hat_, axis=1)
In [ ]:
## Run the conformal procedure on each column of A-B
parallel_ = Parallel(n_jobs=-1, verbose=0)
results_ = parallel_(delayed(RRCM)(A[r, t, n], B[t, n], levels=levels, n=n)
for r in xrange(y.shape[1])
for t in xrange(len(theta_grid))
for n in xrange(X.shape[0]))
rrcm_intervals_ = np.reshape(np.array([(res_[0, 0], res_[-1, -1])
for result_ in results_ for res_ in result_]),
(y.shape[1], len(theta_grid), X.shape[0], len(levels), 2))
In [ ]:
z_a = norm.ppf(1 - .5 * levels)
gpr_hwidth_ = loo_sigma_[..., np.newaxis] * z_a.reshape((1, 1, 1, -1))
gpr_intervals_ = np.stack([y_hat_loo_[..., np.newaxis] - gpr_hwidth_,
y_hat_loo_[..., np.newaxis] + gpr_hwidth_], axis=-1)
In [ ]:
def demo_int(intervals, y, y_hat, name, points, ax, a=-1, adaptive=True, plot_gpr=True):
colors_ = plt.cm.rainbow(np.linspace(0, 1, num=len(points)))
ax.set_xscale("log")
for j, p in enumerate(points):
if adaptive:
ax.plot(theta_grid, intervals[:, p, a, 1] - y_hat[:, p],
c=colors_[j], marker="v", markersize=3)
ax.plot(theta_grid, intervals[:, p, a, 0] - y_hat[:, p],
c=colors_[j], marker="^", markersize=3)
ax.plot(theta_grid, y[p] - y_hat[:, p], c=colors_[j],
linestyle="-", label="$y_{x_{%d}} - \\hat{y}_{\\theta}(x_{%d})$"%(j, j,), alpha=0.5)
else:
ax.plot(theta_grid, intervals[:, p, a, 1],
c=colors_[j], marker="v", markersize=3)
ax.plot(theta_grid, intervals[:, p, a, 0],
c=colors_[j], marker="^", markersize=3)
ax.plot(theta_grid, y_hat[:, p], c=colors_[j],
linestyle="-", alpha=0.75)
ax.axhline(y=y[p], c=colors_[j], linestyle="-", label="$y_{x_{%d}}$"%(j,), alpha=0.5)
if adaptive:
ax.set_title("loo-prediction centered %s%% %sintervals for %d arbitrary points"%(levels_[a], name, len(points)))
else:
ax.set_title("%s%% %sintervals for %d arbitrary points"%(levels_[a], name, len(points)))
ax.set_xlabel("Gaussian kernel precision $\\theta$ : $K(x,x') = \\mathtt{exp}\\{-\\theta\\|x-x'\\|^2\\}$")
ax.set_ylabel("Target $y_x$")
ax.legend(loc="best", ncol=2)
ax.grid()
return ax
In [ ]:
## Intervals
points_ = [110, 125, 142, 189]
j, a, adaptive = 2, -1, False
fig = plt.figure(figsize=(16, 9))
ax = demo_int(rrcm_intervals_[j], y[:, j], y_hat_loo_[j], "RRCM ",
points_, fig.add_subplot(121), a=a, adaptive=adaptive)
ax = demo_int(gpr_intervals_[j], y[:, j], y_hat_loo_[j], "GPR ",
points_, fig.add_subplot(122, sharey=ax, sharex=ax), a=a, adaptive=adaptive)
plt.show()
In [ ]:
fig = plt.figure(figsize=(16, 9))
ax = fig.add_subplot(111)
ax.plot(X, y[:, j], c="b")
# ax.plot(X, y[:, j, np.newaxis] - y_hat_loo_[j, :].T)
colors_ = plt.cm.rainbow(np.linspace(0, 1, num=len(points_)))
for c, x in zip(colors_, X[points_]):
ax.axvline(x=x, c=c, linestyle="-", marker="o")
ax.set_title("Sample path of the studied function")
ax.set_xlabel("x")
ax.set_ylabel("$y_x$")
In [ ]:
## Compute the running intersection over the theta axis (1)
reverse = False
if not reverse:
top_ = np.minimum.accumulate(rrcm_intervals_[..., 1], axis=1)
bottom_ = np.maximum.accumulate(rrcm_intervals_[..., 0], axis=1)
else:
top_ = np.minimum.accumulate(rrcm_intervals_[:, ::-1, ..., 1], axis=1)
bottom_ = np.maximum.accumulate(rrcm_intervals_[:, ::-1, ..., 0], axis=1)
width_ = top_ - bottom_
width_[width_ < 0] = -1
## t - b is nonincreasing, that is why the argmin of |t-b| is either +\infty
## or the point of crossing the zero level (again over the theta axis(1)).
theta_index_ = np.argmin(width_, axis=1)
## Find the largest index (theta) below which all intervals of all sample
## points have nonempty intersection.
theta_star = np.min(theta_index_, axis=1)
if reverse:
## Since we need the largest k, for which all intervals beyond it overlap
## tkae the `max`.
theta_star = theta_index_.shape[1] - theta_star
In [ ]:
if reverse:
plt.plot((width_[0, ::-1, 125]))
else:
plt.plot((width_[0, :, 125]))
In [ ]:
## Just take the index (theta) which minimizes both effects: the
theta_star = np.median(np.argmin(rrcm_intervals_[..., 1] - rrcm_intervals_[..., 0], axis=1), axis=1).astype(int)
In [ ]:
from sklearn.gaussian_process import GaussianProcess
for j in range(y.shape[1]):
gp = GaussianProcess(thetaL=1e-5, thetaU=1e+7, theta0=100.0, nugget=nugget_, normalize=False,
regr='constant', beta0=0, corr='squared_exponential').fit(X, y[:, j])
plt.plot(y[:, j], c="r")
plt.plot(gp.predict(X), c="b", label="MLE")
theta_inx_ = theta_star[j, -1]
plt.plot(y_hat_[j, theta_inx_], c="g", label="Nem")
# gp_ = GaussianProcess(theta0=theta_, nugget=nugget_, normalize=False,
# regr='constant', beta0=0, corr='squared_exponential').fit(X, y[:, j])
# plt.plot(gp_.predict(X), c="g")
plt.legend(loc="best")
plt.show()
plt.close()
print "Nemirovsky:\t%2.5g,\nGP:\t\t%2.5g\n"%(theta_grid[theta_inx_], gp.theta_[0])
In [ ]: