In [1]:
import os, sys
sys.path.insert(0, os.path.abspath(os.path.join("..", "..")))
In [2]:
%matplotlib inline
import matplotlib.pyplot as plt
import open_cp.scripted
import open_cp.scripted.analysis as analysis
In [3]:
loaded = open_cp.scripted.Loader("retro_preds.pic.xz")
In [4]:
times = [x[1] for x in loaded]
preds = [x[2] for x in loaded]
fig, axes = plt.subplots(ncols=2, figsize=(16,7))
for ax, i in zip(axes, [0, 60]):
analysis.plot_prediction(loaded, preds[i], ax)
ax.set_title(times[i])
In [5]:
betas = analysis.hit_counts_to_beta("retro.csv")
In [6]:
fig, ax = plt.subplots(figsize=(12,8))
analysis.plot_betas(betas, ax)
In [7]:
fig, ax = plt.subplots(figsize=(12,8))
analysis.plot_betas(betas, ax, range(1,21))
Originally, I noticed that these results were not reproducible. This was caused by converting from a "continuous" prediction to a grid based prediction, where we carry out a monte carlo integration step (sample the kernel at some smallish number of points in each grid cell, and average). Apparently this introduces enough noise that you get quite different results.
We now check that running twice gives the same result (because we now sample to a sub-grid).
Result is that it doesn't really seem to matter: 50m seems fine.
In [22]:
all_betas = analysis.hit_counts_to_beta("retro_opt.csv")
all_betas1 = analysis.hit_counts_to_beta("retro_opt1.csv")
In [23]:
sample = [ "RetroHotspotCtsProvider(Weight=Quartic(bandwidth={}))".format(x)
for x in [50, 100, 150, 200, 250, 300] ]
betas = {k:all_betas[k] for k in sample}
betas1 = {k:all_betas1[k] for k in sample}
In [25]:
fig, axes = plt.subplots(ncols=2, figsize=(16,7))
analysis.plot_betas(betas, axes[0], range(1,21))
analysis.plot_betas(betas, axes[1], range(1,21), plot_sds=False)
#for ax in axes:
# ax.legend([])
In [26]:
fig, axes = plt.subplots(ncols=2, figsize=(16,7))
analysis.plot_betas(betas1, axes[0], range(1,21))
analysis.plot_betas(betas1, axes[1], range(1,21), plot_sds=False)
#for ax in axes:
# ax.legend([])
In [ ]:
In [27]:
all_betas = analysis.hit_counts_to_beta("retro_grid_opt.csv")
In [28]:
sample = [ "RetroHotspotProvider(Weight=Quartic(bandwidth={}))".format(x)
for x in [50, 100, 150, 200, 250, 300] ]
betas = {k:all_betas[k] for k in sample}
In [29]:
fig, axes = plt.subplots(ncols=2, figsize=(16,7))
analysis.plot_betas(betas, axes[0], range(1,21))
analysis.plot_betas(betas, axes[1], range(1,21), plot_sds=False)
#for ax in axes:
# ax.legend([])
In [30]:
import importlib
importlib.reload(analysis)
Out[30]:
In [31]:
fig, ax = plt.subplots(figsize=(10,6))
normed = analysis.plot_betas_means_against_max(all_betas, ax, range(1,21))
#ax.legend()
None
In [32]:
{key for key in normed if all(x>=0.95 for x in normed[key])}
Out[32]:
In [33]:
{key : min(normed[key]) for key in normed}
Out[33]:
In [40]:
import re
import numpy as np
def label(x):
m = re.search("width=(\d+)", x)
return int(m.group(1))
avg = {label(key) : np.mean(normed[key]) for key in normed}
fig, ax = plt.subplots(figsize=(8,6))
x = np.sort(list(avg))
y = [avg[t] for t in x]
ax.plot(x, y)
Out[40]:
In [41]:
avg
Out[41]:
In [42]:
r = normed
highlight = {140, 150}
fig, ax = plt.subplots(figsize=(10,6))
x = list(range(1, 21))
for k in r:
if label(k) not in highlight:
ax.plot(x, r[k], color="black", label="_")
else:
ax.plot(x, r[k], label=label(k))
ax.legend()
None
In [ ]: