A notebook to process experimental results of ex1_vary_n.py. p(reject) as the sample size is varied.
In [ ]:
%load_ext autoreload
%autoreload 2
%matplotlib inline
#%config InlineBackend.figure_format = 'svg'
#%config InlineBackend.figure_format = 'pdf'
import numpy as np
import matplotlib.pyplot as plt
import fsic.data as data
import fsic.ex.exglobal as exglo
import fsic.glo as glo
import fsic.indtest as it
import fsic.kernel as kernel
import fsic.plot as plot
import fsic.util as util
import scipy.stats as stats
In [ ]:
plot.set_default_matplotlib_options()
In [ ]:
def load_plot_vs_n(fname, h1_true, xlabel='Sample size $n$', show_legend=True, xscale='log'):
func_xvalues = lambda agg_results: agg_results['sample_sizes']
ex = 1
def func_title(agg_results):
repeats, _, n_methods = agg_results['job_results'].shape
alpha = agg_results['alpha']
title = '%s. %d trials. $\\alpha$ = %.2g.'%\
( agg_results['prob_label'], repeats, alpha)
return title
#plt.figure(figsize=(10,5))
results = plot.plot_prob_reject(
ex, fname, h1_true, func_xvalues, xlabel=xlabel, func_title=func_title)
plt.title('')
plt.gca().legend(loc='best').set_visible(show_legend)
#plt.grid(True)
if xscale is not None:
plt.xscale(xscale)
return results
def load_runtime_vs_n(fname, h1_true=True, xlabel='Sample size $n$',
show_legend=True, xscale='log', yscale='log'):
func_xvalues = lambda agg_results: agg_results['sample_sizes']
ex = 1
def func_title(agg_results):
repeats, _, n_methods = agg_results['job_results'].shape
alpha = agg_results['alpha']
title = '%s. %d trials. $\\alpha$ = %.2g.'%\
( agg_results['prob_label'], repeats, alpha)
return title
#plt.figure(figsize=(10,6))
results = plot.plot_runtime(ex, fname,
func_xvalues, xlabel=xlabel, func_title=func_title)
plt.title('')
plt.gca().legend(loc='best').set_visible(show_legend)
#plt.grid(True)
if xscale is not None:
plt.xscale(xscale)
if yscale is not None:
plt.yscale(yscale)
return results
In [ ]:
def plot_pow_vs_time(results, h1_true=True, func_title=None, xscale='log', yscale='linear'):
#results = glo.ex_load_result(ex, fname)
repeats, _, n_methods = results['job_results'].shape
func_names = [f.__name__ for f in results['method_job_funcs'] ]
time_accessor = lambda job_results: job_results['time_secs']
rej_accessor = lambda jr: jr['test_result']['h0_rejected']
vf_time = np.vectorize(time_accessor)
vf_ref = np.vectorize(rej_accessor)
# results['job_results'] is a dictionary:
# {'test_result': (dict from running perform_test(te) '...':..., }
rejs = vf_ref(results['job_results'])
#print rejs
times = vf_time(results['job_results'])
# #varying params x #methods
time_avg = np.mean(times, axis=0)
time_std = np.std(times, axis=0)
mean_rejs = np.mean(rejs, axis=0)
#print mean_rejs
# plot
line_styles = exglo.func_plot_fmt_map()
method_labels = exglo.get_func2label_map()
for i in range(n_methods):
fmt = line_styles[func_names[i]]
#plt.errorbar(ns*te_proportion, mean_rejs[:, i], std_pvals[:, i])
method_label = method_labels[func_names[i]]
plt.plot(time_avg[:, i], mean_rejs[:, i], fmt, label=method_label)
ylabel = 'Test power' if h1_true else 'Type-I error'
plt.ylabel(ylabel)
plt.xlabel('Time (s)')
#plt.xlim([np.min(xvalues), np.max(xvalues)])
#plt.xticks( xvalues, xvalues )
#plt.legend(loc='best')
plt.gca().set_xscale(xscale)
plt.gca().set_yscale(yscale)
title = '%s. %d trials. '%( results['prob_label'],
repeats ) if func_title is None else func_title(results)
plt.title(title)
#plt.grid()
return results
In [ ]:
# H0 true. Same Gaussian.
# sg_fname = 'ex1-sg_d250-me7_rs300_nmi100.000_nma100000.000_a0.050_trp0.50.p'
sg_fname = 'ex1-sg_d250-me6_rs200_nmi1000_nma100000_a0.050_trp0.50.p'
sg_results = load_plot_vs_n(
sg_fname, h1_true=False, show_legend=False)
#plt.ylim([0.00, 0.23])
plt.xlim([3000, 10**5])
# plt.ylim([0.03, 0.07])
plt.savefig(sg_fname.replace('.p', '.pdf', 1), bbox_inches='tight')
In [ ]:
load_runtime_vs_n(sg_fname, xscale='log', yscale='log', show_legend=False);
#plt.legend(bbox_to_anchor=(1.7, 1))
plt.ylim([0.08, 2000])
plt.savefig(sg_fname.replace('.p', '', 1) + '_time.pdf', bbox_inches='tight')
In [ ]:
# sin frequency problem
#sin_fname = 'ex1-sin_w4-me5_rs300_nmi1000_nma100000_a0.050_trp0.50.p'
sin_fname = 'ex1-sin_w4-me6_rs300_nmi1000_nma100000_a0.050_trp0.50.p'
# sin_fname = 'ex1-sin_w4-me6_rs100_nmi1000_nma100000_a0.050_trp0.20.p'
sin_results = load_plot_vs_n(sin_fname, h1_true=True, show_legend=False)
plt.savefig(sin_fname.replace('.p', '.pdf', 1), bbox_inches='tight')
In [ ]:
# plot_pow_vs_time(sin_results, xscale='log', yscale='linear');
In [ ]:
# Gaussian sign problem
#gsign_fname = 'ex1-gsign_d4-me7_rs300_nmi100.000_nma100000.000_a0.050_trp0.50.p'
gsign_fname = 'ex1-gsign_d4-me6_rs300_nmi1000_nma100000_a0.050_trp0.50.p'
#gsign_fname = 'ex1-gsign_d4-me6_rs100_nmi1000_nma1000000_a0.050_trp0.50.p'
gsign_results = load_plot_vs_n(gsign_fname, h1_true=True, show_legend=False)
#plt.ylim([0, 1.1])
# plt.legend(bbox_to_anchor=(1.70, 1.05))
plt.savefig(gsign_fname.replace('.p', '.pdf', 1), bbox_inches='tight')
In [ ]:
gsign_results = load_runtime_vs_n(gsign_fname, xscale='log', yscale='log', show_legend=False)
plt.legend(bbox_to_anchor=(1.7, 1))
In [ ]:
# plot_pow_vs_time(gsign_results, xscale='log', yscale='linear');
# plt.legend(bbox_to_anchor=(1.7, 1))
# plt.savefig(gsign_fname.replace('.p', '', 1) + '_timepow.pdf', bbox_inches='tight')
A toy problem where X follows the standard multivariate Gaussian, and Y = sign(product(X))*|Z| where Z ~ N(0, 1).
In [ ]:
# # H0 true. Same Gaussian.
# sg50_fname = 'ex1-sg_d50-me5_rs200_nmi100.000_nma10000.000_a0.050_trp0.50.p'
# sg50_results = load_plot_vs_n(
# sg50_fname, h1_true=False, show_legend=True)
In [ ]:
#fname = 'ex2-sin-me7_n4000_J1_rs200_pmi1.000_pma5.000_a0.010_trp0.50.p'
fname = 'ex2-sg-me6_n4000_J1_rs100_pmi10.000_pma90.000_a0.050_trp0.50.p'
#fname = 'ex2-u2drot-me7_n4000_J1_rs200_pmi0.000_pma10.000_a0.010_trp0.50.p'
fpath = glo.ex_result_file(2, fname)
result = glo.pickle_load(fpath)
In [ ]:
def load_tpm_table(ex, fname, key):
"""
Load a trials x parameters x methods numpy array of results.
The value to load is specified by the key.
"""
ex = 2
results = glo.ex_load_result(ex, fname)
f_val = lambda job_results: job_results['test_result'][key]
vf_val = np.vectorize(f_val)
# results['job_results'] is a dictionary:
# {'test_result': (dict from running perform_test(te) '...':..., }
vals = vf_val(results['job_results'])
#repeats, _, n_methods = results['job_results'].shape
met_job_funcs = results['method_job_funcs']
return vals, met_job_funcs
In [ ]:
sta, met_job_funcs = load_tpm_table(ex=2, fname=fname, key='test_stat')
sta.shape
In [ ]:
met_job_funcs
In [ ]:
nfsicJ10_stats = sta[:, :, 1]
plt.figure(figsize=(12, 5))
plt.imshow(nfsicJ10_stats.T, interpolation='none')
plt.colorbar(orientation='horizontal')
In [ ]:
J = 10
thresh = stats.chi2.isf(0.05, df=J)
np.mean(nfsicJ10_stats > thresh, 0)
In [ ]:
param_stats = nfsicJ10_stats[:, 3]
plt.hist(param_stats, normed=True)
dom = np.linspace(1e-1, np.max(param_stats)+2, 500)
chi2_den = stats.chi2.pdf(dom, df=J)
plt.plot(dom, chi2_den, '-')
In [ ]:
In [ ]:
In [ ]: