In [3]:
from collections import Counter
from itertools import product
import numpy as np
from scipy import stats
from history_diagnostics import targetspace
In [2]:
%pylab
In [4]:
def evaluate_bootstrap_cdfs(metric, gen_func, n_bs_ref=200, n_test=500):
ref_sample = gen_func()
metric_sample = np.fromiter((metric(np.random.choice(ref_sample, len(ref_sample))) for _ in range(n_bs_ref)),
dtype=float, count=n_bs_ref)
cdf = targetspace.MetricCDF(metric_sample)
p_better = np.asarray([cdf(metric(gen_func())) for _ in range(n_test)])
return p_better, cdf
def evaluate_bootstrap_cdfs_2(metric, gen_func, n_bs_ref=200, n_test=500):
p_better = []
for _ in range(n_test):
ref_sample = gen_func()
test_sample = gen_func()
metric_sample = np.fromiter((metric(np.random.choice(ref_sample, len(ref_sample))) for _ in range(n_bs_ref)),
dtype=float, count=n_bs_ref)
cdf = targetspace.MetricCDF(metric_sample)
p_better.append(cdf(metric(test_sample)))
return np.asarray(p_better)
In [7]:
def plot_p_better_cdfs_Normal(n_bs_ref, n_test=1000):
figure()
suptitle("Normal(0, sig, n) with n_bs_ref={}".format(n_bs_ref))
plot([0,1], [0,1], "r--", lw=2)
bins = linspace(0, 1, 101)
def metric(s):
return s.sum() / len(s)
samp_size_max_exp = 5
for sig_exp in range(0, samp_size_max_exp):
sig = 10**(-sig_exp)
for samp_size_exp in range(0, samp_size_max_exp+1):
samp_size = 10**samp_size_exp
print(sig, samp_size)
samp_gen = lambda: np.random.randn(samp_size) * sig
p_better = evaluate_bootstrap_cdfs_2(metric, samp_gen, n_bs_ref, n_test)
hist(p_better, bins, cumulative=True, normed=True, histtype="step",
label="sig=10^-{} n=10^{}".format(sig_exp, samp_size_exp))
legend(loc='best')
In [8]:
plot_p_better_cdfs_Normal(100)