In [ ]:
import os, time
import numpy as np
import pandas as pd
%matplotlib inline
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
from utils.mpl_mid_point_norm import MidPointNorm
from utils.state import _load
from scipy.stats import binom_test
from scipy.stats import ttest_1samp
In [ ]:
def mkdirifnot(path):
if not os.path.exists(path):
os.mkdir(path)
return path
In [ ]:
BASE_PATH = "."
DATA_PATH = os.path.join(BASE_PATH, "..", "thesis_exp")
OUTPUT_PATH = mkdirifnot(os.path.join(BASE_PATH, "output_pdf-3"))
PROFILE_PATH = mkdirifnot(os.path.join(OUTPUT_PATH, "profile"))
EXP1D_PATH = mkdirifnot(os.path.join(OUTPUT_PATH, "exp_1d"))
EXP2D_PATH = mkdirifnot(os.path.join(OUTPUT_PATH, "exp_2d"))
In [ ]:
from joblib import Parallel, delayed
In [ ]:
def load_dumps(path, n_jobs=-1, verbose=1, include_target=False):
parallel_ = Parallel(n_jobs=n_jobs, verbose=verbose)
jobs_ = (delayed(_load)(os.path.join(path, fname_))
for fname_ in os.listdir(path)
if fname_.endswith(".gz"))
dumps_ = parallel_(jobs_)
experiment = [exp_ for dump_ in dumps_ for exp_ in dump_]
temp_ = dict()
for exp_ in experiment:
key_ = exp_[0][:-1]
if key_ not in temp_:
temp_[key_] = list()
temp_[key_].append((exp_[0][-1], exp_[1:]))
temp_ = {key_ : sorted(res_, key=lambda x: x[0])
for key_, res_ in temp_.iteritems()}
results_ = dict()
for key_, result_ in temp_.iteritems():
ratio_ = np.stack([np.sqrt(np.mean((res_[1][0]-res_[1][1])**2, axis=0, keepdims=True)) /
np.std(res_[1][0], axis=0, keepdims=True) for res_ in result_], axis=0)
sizes_ = np.array([res_[0] for res_ in result_])
coverage_ = np.stack([np.stack([res_[1][3+2*j] for res_ in result_], axis=0)
for j in xrange(6)], axis=0)
width_ = np.stack([np.stack([res_[1][2+2*j] for res_ in result_], axis=0)
for j in xrange(6)], axis=0)
if include_target:
target_ = np.stack([res_[1][0] for res_ in result_], axis=0)
target_hat_ = np.stack([res_[1][1] for res_ in result_], axis=0)
results_[key_] = ratio_, sizes_, coverage_, width_, target_, target_hat_
else:
results_[key_] = ratio_, sizes_, coverage_, width_
return results_
In [ ]:
def load_profiles(path, n_jobs=-1, verbose=1):
parallel_ = Parallel(n_jobs=n_jobs, verbose=verbose)
jobs_ = (delayed(_load)(os.path.join(path, fname_))
for fname_ in os.listdir(path)
if fname_.endswith(".gz"))
dumps_ = parallel_(jobs_)
experiment = [exp_ for dump_ in dumps_ for exp_ in dump_]
temp_ = dict()
for exp_ in experiment:
key_ = exp_[0][:-1]
if key_ not in temp_:
temp_[key_] = list()
temp_[key_].append((exp_[0][-1], exp_[1:]))
temp_ = {key_ : sorted(res_, key=lambda x: x[0])
for key_, res_ in temp_.iteritems()}
results_ = dict()
for key_, result_ in temp_.iteritems():
ratio_ = np.stack([np.sqrt(np.mean((res_[1][0]-res_[1][1])**2, axis=0, keepdims=True)) /
np.std(res_[1][0], axis=0, keepdims=True) for res_ in result_], axis=0)
sizes_ = np.array([res_[0] for res_ in result_])
bounds_ = np.stack([np.stack([res_[1][2+j] for res_ in result_], axis=0)
for j in xrange(6)], axis=0)
target_ = np.stack([res_[1][0] for res_ in result_], axis=0)
target_hat_ = np.stack([res_[1][1] for res_ in result_], axis=0)
results_[key_] = ratio_, sizes_, bounds_, target_, target_hat_
return results_
In [ ]:
def coverage_plot(ax, sizes, cov, levels):
cov_med_ = np.median(cov, axis=-1)
cov_lo_, cov_hi_ = np.percentile(cov, [25, 75], axis=-1)
ax.set_ylim(0.65, 1.025)
ax.set_xlim(25, 1600)
ax.locator_params(axis="x", nbins=5)
ax.set_yticks(1-levels)
for i in xrange(cov_med_.shape[1]):
ax.plot(sizes, cov_med_[:, i], color="bgrm"[i%4])
ax.plot(sizes, cov_hi_[:, i], color="bgrm"[i%4], alpha=0.5)
ax.plot(sizes, cov_lo_[:, i], color="bgrm"[i%4], alpha=0.5)
ax.axhline(y=1 - levels[i], color='black', alpha=0.25)
return ax
def nomorethan(x, bound=0):
x_ = np.array(x, dtype=float)
x_[x_>bound] = np.nan
return x_
def width_plot(ax, sizes, width):
avg_width_ = width.mean(axis=-1)
aw_med_ = np.median(avg_width_, axis=-2)
aw_q95_ = np.percentile(avg_width_, [95,], axis=-2)[0]
aw_min_ = np.percentile(avg_width_, [ 5,], axis=-2)[0]
aw_max_ = np.max(avg_width_, axis=-2)
ax.set_xlim(25, 1600)
ax.locator_params(axis="x", nbins=5)
for i in xrange(aw_med_.shape[1]):
ax.plot(sizes, nomorethan(aw_med_[:, i], 2), color="bgrm"[i%4])
# ax.plot(sizes, nomorethan(aw_q95_[:, i], 2), color="bgrm"[i%4], alpha=0.5, marker='x')
ax.plot(sizes, nomorethan(aw_max_[:, i], 2), color="bgrm"[i%4], alpha=0.5, marker='v')
ax.plot(sizes, nomorethan(aw_min_[:, i], 2), color="bgrm"[i%4], alpha=0.5, marker='^')
return ax
In [ ]:
levels = np.asanyarray([0.01, 0.05, 0.10, 0.25])[::-1]
titles_ = pd.Index(["GPR-p", "GPR-f", "RRCM", "CRR", "RRCM-loo", "CRR-loo"], name="type")
ncms_ = pd.Index(["GPR", "GPR", "RRCM", "CRR", "RRCM", "CRR"], name="type")
levels_ = pd.Index(["%0.2f"%(lvl,) for lvl in levels], name="alpha")
In [ ]:
XX_test = np.linspace(0, 1, num=1001).reshape((-1, 1))
exp_gauss_1d = load_dumps(os.path.join(DATA_PATH, 'exp_gauss_1d_25'), verbose=1, n_jobs=-1)
exp_nongauss_1d = load_dumps(os.path.join(DATA_PATH, 'exp_nongauss_1d_25'), verbose=1, n_jobs=-1)
exp_gauss_1d.update(exp_nongauss_1d)
Make coverage tables
In [ ]:
from IPython.display import HTML, display
import matplotlib.gridspec as gridspec
for key_ in sorted(exp_gauss_1d.keys(), key=lambda x: (x[0], x[1], x[3], x[2])):
name_, noise_, theta0_, nugget_ = key_
ratio_, sizes_, coverage_, width_ = exp_gauss_1d[key_]
output_path_ = mkdirifnot(os.path.join(EXP1D_PATH, name_))
output_path_ = mkdirifnot(os.path.join(output_path_, "%g_%g"%(noise_, nugget_,)))
theta_= ("%g" if isinstance(theta0_, float) else "$'%s'$")%(theta0_,)
title_template_ = "%%s ($\\theta=%s, \\lambda=%g, \\gamma=%g$)"%(theta_, nugget_, noise_)
theta_= ("%g" if isinstance(theta0_, float) else "%s")%(theta0_,)
filename_template_ = "%%s%s %g %g %s %%s"%(name_, noise_, nugget_, theta_)
## width dynamics
output_path_current_ = mkdirifnot(os.path.join(output_path_, "width"))
for j in xrange(6):
output_path_local_ = mkdirifnot(os.path.join(output_path_current_, titles_[j]))
fig = plt.figure(figsize=(4, 3))
ax = fig.add_subplot(111)
# ax.set_yscale("log")
width_plot(ax, sizes_, width_[j])
ax.set_title(title_template_%(titles_[j],))
filename_ = (filename_template_%("width ", titles_[j],)).replace(" ", "_").replace(".", ",")
fig_file_name_ = os.path.join(output_path_local_, filename_ + ".pdf")
fig.savefig(fig_file_name_, dpi=120)
plt.close()
# print fig_file_name_
## Coverage asymptotics
output_path_current_ = mkdirifnot(os.path.join(output_path_, "coverage"))
for j in xrange(6):
output_path_local_ = mkdirifnot(os.path.join(output_path_current_, titles_[j]))
fig = plt.figure(figsize=(4, 3))
ax = fig.add_subplot(111)
coverage_plot(ax, sizes_, coverage_[j], levels)
ax.set_title(title_template_%(titles_[j],))
filename_ = (filename_template_%("coverage ", titles_[j],)).replace(" ", "_").replace(".", ",")
fig_file_name_ = os.path.join(output_path_local_, filename_ + ".pdf")
fig.savefig(fig_file_name_, dpi=120)
plt.close()
# print fig_file_name_
## rmse/var dynamics
output_path_current_ = output_path_
fig = plt.figure(figsize=(4, 3))
ax = fig.add_subplot(111)
ratio_ = nomorethan(ratio_.mean(axis=-1), 1)
# ax.set_ylim(bottom = -0.001)
ax.plot(sizes_, ratio_)
ax.set_title(title_template_%('RMSE/std',))
filename_ = (filename_template_%("", "ratio",)).replace(" ", "_").replace(".", ",")
fig_file_name_ = os.path.join(output_path_, filename_ + ".pdf")
fig.savefig(fig_file_name_, dpi=120)
plt.close()
# print fig_file_name_
# break
In [ ]:
XX_test = np.linspace(0, 1, num=501).reshape((-1, 1))
prof_gauss = load_profiles(os.path.join(DATA_PATH, 'prof_gauss'), verbose=1, n_jobs=1)
prof_nongauss = load_profiles(os.path.join(DATA_PATH, 'prof_nongauss'), verbose=1, n_jobs=1)
prof_gauss.update(prof_nongauss)
In [ ]:
for key_ in sorted(prof_gauss.keys(), key=lambda x: (x[0], x[1], x[3], x[2])):
name_, noise_, theta0_, nugget_ = key_
ratio_, sizes_, bounds_, y_test_, y_hat_ = prof_gauss[key_]
## Skip
# if name_ != "heaviside": continue
# if theta0_ == "auto": continue
output_path_ = mkdirifnot(os.path.join(PROFILE_PATH, name_))
output_path_ = mkdirifnot(os.path.join(output_path_, "%g_%g"%(noise_, nugget_,)))
theta_= ("%g" if isinstance(theta0_, float) else "$'%s'$")%(theta0_,)
title_template_ = "%%s: %%s ($\\theta=%s, \\lambda=%g, \\gamma=%g$)"%(theta_, nugget_, noise_)
theta_= ("%g" if isinstance(theta0_, float) else "%s")%(theta0_,)
filename_template_ = "%%s%s %g %g %s %%s"%(name_, noise_, nugget_, theta_)
## Profile
for s_ in xrange(len(sizes_)):
# if s_ > 1: continue
output_path_current_ = mkdirifnot(os.path.join(output_path_, "%d"%(sizes_[s_],)))
for i_ in xrange(4):
# max_, min_ = np.percentile(bounds_[:, s_, :, i_], [92.5, 7.5])*2
max_, min_ = y_test_[s_].max()*1.5, y_test_[s_].min()*1.5
if name_=="heaviside": min_, max_ = -0.95, 1.95
for ncm_ in pd.unique(ncms_):
fig = plt.figure(figsize=(5, 4))
ax = fig.add_subplot(111)
if np.isfinite(min_) and np.isfinite(max_):
ax.set_ylim(min_, max_)
ax.plot(XX_test, y_test_[s_], c="#c0c0c0", lw=2, alpha=.5, label="$y_x$")
ax.plot(XX_test, y_hat_[s_], c='k', label="$\\hat{y}_x$")
for j, b in enumerate(np.flatnonzero(ncms_==ncm_)):
ax.plot(XX_test, bounds_[b, s_, :, i_, 0], color="rb"[j], label=titles_[b])
ax.plot(XX_test, bounds_[b, s_, :, i_, 1], color="rb"[j])
ax.set_title(title_template_%("%.1f%%-%s"%(levels[i_]*100, ncm_,), name_,))
ax.legend(loc="best", ncol=2)
filename_ = (filename_template_%("profile ", "%dp-%s %d"%(levels[i_]*100, ncm_, sizes_[s_],),))
fig_file_name_ = os.path.join(output_path_current_,
filename_.replace(" ", "_").replace(".", ",") + ".pdf")
fig.savefig(fig_file_name_, dpi=120)
plt.close()
In [ ]:
nd = 2
mesh_ = np.meshgrid(*nd*[np.linspace(0, 1, num=51)])
XX_test = np.concatenate([ax_.reshape((-1, 1)) for ax_ in mesh_], axis=1)
exp_gauss_2d = load_dumps(os.path.join(DATA_PATH, 'exp_gauss_2d_25'),
verbose=1, n_jobs=1, include_target=True)
In [ ]:
exp_nongauss_2d = load_dumps(os.path.join(DATA_PATH, 'exp_nongauss_2d_25'),
verbose=1, n_jobs=1, include_target=True)
In [ ]:
exp_2d = dict()
exp_2d.update(exp_nongauss_2d)
exp_2d.update(exp_gauss_2d)
In [ ]:
df_ = pd.concat([pd.DataFrame(exp_2d[key_][0].mean(axis=-1).T,
index=pd.Index([key_], names=["name", "noise", "theta", "nugget"]),
columns=pd.Index(exp_2d[key_][1], name="size"))
for key_ in sorted(exp_2d.keys(), key=lambda x: (x[0], x[1], x[3], x[2]))[::-1]],
axis=0).unstack(level=0).swaplevel(0,1, axis=1).sort_index(axis=1)\
.swaplevel(1,2, axis=0).sort_index(axis=0)
print df_.unstack().T.xs("gaussian", axis=0).to_latex(float_format=lambda f: "%0.3f"%(f,))
In [ ]:
df_ = pd.concat({key_: pd.Panel(1-np.mean(exp_2d[key_][2], axis=-1), items=titles_, minor_axis=levels_,
major_axis=pd.Index(exp_2d[key_][1], name="size")).to_frame()
for key_ in sorted(exp_2d.keys(), key=lambda x: (x[0], x[1], x[3], x[2]))[::-1]},
axis=0, names=["name", "noise", "theta", "nugget"])\
.xs("gaussian", axis=0)\
.drop("GPR-p", axis=1).unstack(level=-1)
print \
df_.xs("GPR-f", axis=1, level=0).drop(10.0, axis=0, level=1).drop(1000.0, axis=0, level=1)\
.unstack(level=-1).unstack(level=1).T\
.swaplevel(0,2,axis=0).swaplevel(1,0,axis=0).sort_index(axis=0)\
.to_latex(float_format=lambda f: "%3.1f"%(100*f,))
In [ ]:
df_ = pd.concat({key_: pd.Panel(1-np.mean(exp_2d[key_][2], axis=-1), items=titles_, minor_axis=levels_,
major_axis=pd.Index(exp_2d[key_][1], name="size")).to_frame()
for key_ in sorted(exp_2d.keys(), key=lambda x: (x[0], x[1], x[3], x[2]))[::-1]},
axis=0, names=["name", "noise", "theta", "nugget"])\
.xs("gaussian", axis=0)\
.drop("GPR-p", axis=1).unstack(level=-1)
print \
df_.xs("GPR-f", axis=1, level=0).drop(100.0, axis=0, level=1).drop("auto", axis=0, level=1)\
.unstack(level=-1).unstack(level=1).T\
.swaplevel(0,2,axis=0).swaplevel(1,0,axis=0).sort_index(axis=0)\
.to_latex(float_format=lambda f: "%3.1f"%(100*f,))
In [ ]:
df_ = pd.concat({key_: pd.Panel(1-np.mean(exp_2d[key_][2], axis=-1) - levels, items=titles_, minor_axis=levels_,
major_axis=pd.Index(exp_2d[key_][1], name="size")).to_frame()
for key_ in sorted(exp_2d.keys(), key=lambda x: (x[0], x[1], x[3], x[2]))[::-1]},
axis=0, names=["name", "noise", "theta", "nugget"])\
.xs("gaussian", axis=0)\
.drop("GPR-p", axis=1).unstack(level=-1)
print \
np.abs(df_).max(axis=1, level=0).drop("GPR-f", axis=1)\
.unstack(level=-1).unstack(level=1).T\
.unstack(level=1).swaplevel(0, -1, axis=1).swaplevel(2, 1, axis=1).sort_index(axis=1)\
.to_latex(float_format=lambda f: "%2.1f"%(100*f,))
In [ ]:
df_ = pd.concat([pd.DataFrame(exp_2d[key_][0].mean(axis=-1).T,
index=pd.Index([key_], names=["name", "noise", "theta", "nugget"]),
columns=pd.Index(exp_2d[key_][1], name="size"))
for key_ in sorted(exp_2d.keys(), key=lambda x: (x[0], x[1], x[3], x[2]))[::-1]],
axis=0).unstack(level=0).swaplevel(0,1, axis=1).sort_index(axis=1)\
.swaplevel(1,2, axis=0).sort_index(axis=0)
print \
df_.unstack().T.drop("gaussian", axis=0)\
.unstack(0).swaplevel(0,2, axis=1).swaplevel(1,2, axis=1).sort_index(axis=1)\
.to_latex(float_format=lambda f: "%0.2f"%(f,))
In [ ]:
df_ = pd.concat({key_: pd.Panel(1-np.mean(exp_2d[key_][2], axis=-1), items=titles_, minor_axis=levels_,
major_axis=pd.Index(exp_2d[key_][1], name="size")).to_frame()
for key_ in sorted(exp_2d.keys(), key=lambda x: (x[0], x[1], x[3], x[2]))[::-1]},
axis=0, names=["name", "noise", "theta", "nugget"])\
.drop("gaussian", axis=0)\
.xs("GPR-f", axis=1).unstack(level=-1)
print \
df_.unstack(2).unstack(3).swaplevel(0, 2, axis=1).sort_index(axis=1).T\
.xs("f2", axis=1).to_latex(float_format=lambda f: "%3.1f"%(100*f,))
In [ ]:
df_ = pd.concat({key_: pd.Panel(1-np.mean(exp_2d[key_][2], axis=-1), items=titles_, minor_axis=levels_,
major_axis=pd.Index(exp_2d[key_][1], name="size")).to_frame()
for key_ in sorted(exp_2d.keys(), key=lambda x: (x[0], x[1], x[3], x[2]))[::-1]},
axis=0, names=["name", "noise", "theta", "nugget"])\
.drop("gaussian", axis=0)\
.xs("GPR-f", axis=1).unstack(level=-1)
print \
df_.unstack(2).unstack(3).swaplevel(0, 2, axis=1).sort_index(axis=1).T\
.xs("f5", axis=1).to_latex(float_format=lambda f: "%3.1f"%(100*f,))
In [ ]:
df_ = pd.concat({key_: pd.Panel(1-np.mean(exp_2d[key_][2], axis=-1)-levels, items=titles_, minor_axis=levels_,
major_axis=pd.Index(exp_2d[key_][1], name="size")).to_frame()
for key_ in sorted(exp_2d.keys(), key=lambda x: (x[0], x[1], x[3], x[2]))[::-1]},
axis=0, names=["name", "noise", "theta", "nugget"])\
.drop("gaussian", axis=0)\
.drop("GPR-p", axis=1).drop("GPR-f", axis=1).unstack(level=-1)
print \
np.abs(df_).max(axis=1, level=0).unstack(2).unstack(0)\
.swaplevel(1, 2, axis=0).swaplevel(1, 0, axis=0).sort_index(axis=0).T\
.swaplevel(1, 2, axis=0).swaplevel(1, 0, axis=0).sort_index(axis=0)\
.xs("f2", axis=0).to_latex(float_format=lambda f: "%2.1f"%(100*f,))
In [ ]:
df_ = pd.concat({key_: pd.Panel(1-np.mean(exp_2d[key_][2], axis=-1)-levels, items=titles_, minor_axis=levels_,
major_axis=pd.Index(exp_2d[key_][1], name="size")).to_frame()
for key_ in sorted(exp_2d.keys(), key=lambda x: (x[0], x[1], x[3], x[2]))[::-1]},
axis=0, names=["name", "noise", "theta", "nugget"])\
.drop("gaussian", axis=0)\
.drop("GPR-p", axis=1).drop("GPR-f", axis=1).unstack(level=-1)
print \
np.abs(df_).max(axis=1, level=0).unstack(2).unstack(0)\
.swaplevel(1, 2, axis=0).swaplevel(1, 0, axis=0).sort_index(axis=0).T\
.swaplevel(1, 2, axis=0).swaplevel(1, 0, axis=0).sort_index(axis=0)\
.xs("f5", axis=0).to_latex(float_format=lambda f: "%2.1f"%(100*f,))
In [ ]:
df_ = pd.concat({key_: pd.Panel(1-np.mean(exp_2d[key_][2], axis=-1)-levels, items=titles_, minor_axis=levels_,
major_axis=pd.Index(exp_2d[key_][1], name="size")).to_frame()
for key_ in sorted(exp_2d.keys(), key=lambda x: (x[0], x[1], x[3], x[2]))[::-1]},
axis=0, names=["name", "noise", "theta", "nugget"])\
.drop("gaussian", axis=0)\
.xs("RRCM", axis=1).unstack(level=-1)
df_.unstack(0).unstack(1)\
.swaplevel(0, 1, axis=1).swaplevel(2, 1, axis=1).sort_index(axis=1).T\
.swaplevel(0, 2, axis=1).swaplevel(1, 2, axis=1).sort_index(axis=1)\
.style.applymap(lambda x: 'font-weight: bold' if abs(x) > 0.01 else "")
In [ ]:
np.linspace(-1,1, num=51)
In [ ]:
2*(np.arange(51)-25)/50.0
In [ ]:
Fig. nongauss
In [ ]:
from IPython.display import display, HTML
# EXP2D_PATH
for key_ in sorted(exp_2d.keys(), key=lambda x: (x[0], x[1], x[3], x[2]))[::-1]:
name_, noise_, theta0_, nugget_ = key_
ratio_, sizes_, coverage_, width_, y_test, y_hat = exp_2d[key_]
if name_ == "gaussian":
nd = 2
mesh_ = np.meshgrid(*nd*[np.linspace(0, 1, num=51)])
else:
nd = 2
mesh_ = np.meshgrid(*nd*[np.linspace(-1, 1, num=51)])
XX_test = np.concatenate([ax_.reshape((-1, 1)) for ax_ in mesh_], axis=1)
##
output_path_ = mkdirifnot(os.path.join(EXP2D_PATH, name_))
output_path_ = mkdirifnot(os.path.join(output_path_, "%g_%g"%(noise_, nugget_,)))
theta_= ("%g" if isinstance(theta0_, float) else "$'%s'$")%(theta0_,)
title_template_ = "%%s: %%s ($\\theta=%s, \\lambda=%g, \\gamma=%g$)"%(theta_, nugget_, noise_)
title_noth_template_ = "%%s: %%s ($\\lambda=%g, \\gamma=%g$)"%(nugget_, noise_)
theta_= ("%g" if isinstance(theta0_, float) else "%s")%(theta0_,)
filename_template_ = "%%s%s %g %g %s %%s"%(name_, noise_, nugget_, theta_)
for s_ in range(len(sizes_)):
output_path_current_ = mkdirifnot(os.path.join(output_path_, "%d"%(sizes_[s_],)))
## Profile
output_path_local_ = mkdirifnot(os.path.join(output_path_current_, "profile"))
y_test_ = y_test[...,-1]#np.mean(y_test, axis=-1)
fig = plt.figure(figsize=(8, 6))
ax = fig.add_subplot(111, projection='3d')
ax.plot_surface(mesh_[0], mesh_[1], y_test_[s_].reshape(mesh_[0].shape),
cstride=1, rstride=1, cmap=plt.cm.coolwarm, lw=0,
antialiased=False, alpha=0.9)
ax.view_init(60, -60)
ax.set_title(title_noth_template_%("Sample", name_,))
filename_ = filename_template_%("", "test")
fig_file_name_ = os.path.join(output_path_local_,
filename_.replace(" ", "_").replace(".", ",") + ".pdf")
fig.savefig(fig_file_name_, dpi=120)
plt.close()
# break
In [ ]:
## The approximated surface
plot_ = np.abs(y_test_-y_hat_)-.5*avg_width_[2, :, -1]
plot_[plot_<0] = 0
fig = plt.figure(figsize=(6, 3))
ax = fig.add_subplot(111, projection='3d')
ax.plot_surface(mesh_[0], mesh_[1], (plot_).reshape(mesh_[0].shape),
cstride=1, rstride=1, cmap=plt.cm.coolwarm, lw=0,
antialiased=False, alpha=0.9)
ax.view_init(60, -60)
plt.show()
In [ ]:
## Show the true surface
y_test_ = y_hat[0].mean(axis=-1)
y_hat_ = y_hat[1].mean(axis=-1)
fig = plt.figure(figsize=(6, 3))
ax = fig.add_subplot(111, projection='3d')
ax.plot_surface(mesh_[0], mesh_[1], y_test_.reshape(mesh_[0].shape),
cstride=1, rstride=1, cmap=plt.cm.coolwarm, lw=0,
antialiased=False, alpha=0.9)
ax.view_init(60, -60)
plt.show()
## The approximated surface
fig = plt.figure(figsize=(6, 3))
ax = fig.add_subplot(111, projection='3d')
ax.plot_surface(mesh_[0], mesh_[1], np.abs(y_test_-y_hat_).reshape(mesh_[0].shape),
cstride=1, rstride=1, cmap=plt.cm.coolwarm, lw=0,
antialiased=False, alpha=0.9)
ax.view_init(60, -60)
plt.show()
In [ ]:
from utils.functions_2d import f2
In [ ]:
yy = f2(XX)
fig = plt.figure(figsize=(6, 3))
ax = fig.add_subplot(111, projection='3d')
ax.plot_surface(mesh_[0], mesh_[1], y_test_.reshape(mesh_[0].shape),
cstride=1, rstride=1, cmap=plt.cm.coolwarm, lw=0,
antialiased=False, alpha=0.9)
ax.view_init(60, -60)
plt.show()
In [ ]:
f2
In [ ]:
# EXP2D_PATH
for key_ in sorted(exp_gauss_2d.keys(), key=lambda x: (x[0], x[1], x[3], x[2]))[::-1]:
name_, noise_, theta0_, nugget_ = key_
ratio_, sizes_, coverage_, width_, y_test, y_hat = exp_gauss_2d[key_]
df_coverage_ = pd.Panel(np.mean(coverage_[:, 1:], axis=-1), items=titles_, minor_axis=levels_,
major_axis=pd.Index(sizes_[1:], name="size"))
avg_width_ = np.mean(width_, axis=-1)[:, 1:]
aw_med_ = np.median(avg_width_, axis=-2)
aw_q95_ = np.percentile(avg_width_, [95,], axis=-2)[0]
aw_min_ = np.percentile(avg_width_, [ 5,], axis=-2)[0]
aw_max_ = np.max(avg_width_, axis=-2)
pn_med_ = pd.Panel(aw_med_, items=titles_, minor_axis=levels_, major_axis=pd.Index(sizes_[1:], name="size"))
pn_q95_ = pd.Panel(aw_q95_, items=titles_, minor_axis=levels_, major_axis=pd.Index(sizes_[1:], name="size"))
pv_ = np.stack([ttest_1samp(coverage_[:, 1:, j], (1 - levels[j]), axis=-1)[1] for j in xrange(4)], axis=-1)
pn_pv_ = pd.Panel(pv_, items=titles_, minor_axis=levels_, major_axis=pd.Index(sizes_[1:], name="size"))
df_output_ = pd.concat({"width. med": pn_med_.to_frame(),
"width 95%": pn_q95_.to_frame(),
"coverage": df_coverage_.to_frame(),
"t-test": pn_pv_.to_frame()},
axis=0, names=["statistic"])\
.swaplevel(0, 1, axis=0).sort_index(axis=0)
print key_
# display(HTML(df_output_.to_html(float_format=lambda f: "%0.3f"%(f,))))
# print df_output_.to_latex(float_format=lambda f: "%0.3f"%(f,))
## Show the true surface
y_test_ = y_test[1].mean(axis=-1)
y_hat_ = y_hat[1].mean(axis=-1)
fig = plt.figure(figsize=(6, 3))
ax = fig.add_subplot(111, projection='3d')
ax.plot_surface(mesh_[0], mesh_[1], y_test_.reshape(mesh_[0].shape),
cstride=1, rstride=1, cmap=plt.cm.coolwarm, lw=0,
antialiased=False, alpha=0.9)
ax.view_init(60, -60)
plt.show()
## The approximated surface
fig = plt.figure(figsize=(6, 3))
ax = fig.add_subplot(111, projection='3d')
ax.plot_surface(mesh_[0], mesh_[1], y_hat_.reshape(mesh_[0].shape),
cstride=1, rstride=1, cmap=plt.cm.coolwarm, lw=0,
antialiased=False, alpha=0.9)
ax.view_init(60, -60)
plt.show()
# ## The GPR-p
# fig = plt.figure(figsize=(6, 3))
# ax = fig.add_subplot(111, projection='3d')
# ax.plot_surface(mesh_[0], mesh_[1], avg_width_[0, 0, :, 2].reshape(mesh_[0].shape),
# cstride=1, rstride=1, cmap=plt.cm.coolwarm, lw=0,
# antialiased=False, alpha=0.9)
# ax.view_init(60, -60)
# ax.set_title(titles_[0])
# plt.show()
# ## The RRCM / GPR-p : j=0..3, i=2..5
# i, j = 3, 2
# awr_ = avg_width_[i, 0, :, j] / avg_width_[0, 0, :, j] - 1
# fig = plt.figure(figsize=(6, 3))
# ax = fig.add_subplot(111, projection='3d')
# ax.plot_surface(mesh_[0], mesh_[1], awr_.reshape(mesh_[0].shape),
# cstride=1, rstride=1, cmap=plt.cm.coolwarm, lw=0,
# antialiased=False, alpha=0.9)
# ax.view_init(60, -60)
# ax.set_title(titles_[2])
# plt.show()
print ratio_.mean(axis=-1)[1, 0]
break
In [ ]:
plt.hist(np.log(awr_), bins=100)
In [ ]:
pn_pv_.to_frame()
In [ ]:
coverage_ = dict()
for key_, item_ in exp_gauss_1d.iteritems():
sizes_ = pd.Index(item_[1], name="size")
noise_, theta0_, nugget_ = key_[1:]
df_cov_ = pd.Panel(item_[2].mean(axis=-1), items=titles_, major_axis=sizes_, minor_axis=levels_).to_frame()
pv_ = np.stack([ttest_1samp(item_[2][:,:,j], (1-levels[j]), axis=-1)[1] for j in xrange(4)], axis=-1)
df_cov_pv_ = pd.Panel(pv_, items=titles_, major_axis=sizes_, minor_axis=levels_).to_frame()
coverage_[key_] = df_cov_, df_cov_pv_, item_[2]
In [ ]:
from scipy.stats import ttest_1samp
In [ ]:
item_[2].shape
In [ ]:
np.stack([ttest_1samp(item_[2][:,:,j], (1-levels[j]), axis=-1)[1] for j in xrange(4)], axis=-1)
In [ ]:
np.abs(item_[2].mean(axis=-1)-(1-levels)[np.newaxis, np.newaxis]) / item_[2].std(axis=-1)
In [ ]:
hits_ = np.round(item_[2].mean(axis=-1)*2601)
pv_ = np.stack([np.vectorize(lambda x: binom_test(x, n=2601, p=1 - levels[j]))(hits_[..., j]) for j in range(4)], axis=-1)
pv_[pv_ < 0.001] = 0
In [ ]:
pv_
In [ ]:
item_[2].shape
In [ ]:
item_[2][..., j, :].shape
In [ ]:
df_.T
In [ ]:
print df_.T.to_latex()
In [ ]:
fig = plt.figure(figsize=(12, 6))
ax = fig.add_subplot(111, projection='3d')
ax.plot_surface(mesh_[0], mesh_[1], m_w_[1, 1, :, 0].reshape(mesh_[0].shape),
cstride=1, rstride=1, cmap=plt.cm.coolwarm, lw=0,
antialiased=False, alpha=0.9)
ax.view_init(60, -60)
In [ ]: