A notebook to process experimental results of ex2_prob_params.py. p(reject) as problem parameters are varied.
In [ ]:
%load_ext autoreload
%autoreload 2
%matplotlib inline
#%config InlineBackend.figure_format = 'svg'
#%config InlineBackend.figure_format = 'pdf'
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
import fsic.data as data
import fsic.glo as glo
import fsic.indtest as it
import fsic.kernel as kernel
import fsic.plot as plot
import fsic.util as util
import scipy.stats as stats
In [ ]:
plot.set_default_matplotlib_options()
"""
# font options
font = {
#'family' : 'normal',
#'weight' : 'bold',
'size' : 18
}
plt.rc('font', **font)
"""
In [ ]:
def load_plot_vs_params(fname, h1_true=True, xlabel='Problem parameter', show_legend=True):
func_xvalues = lambda agg_results: agg_results['prob_params']
ex = 2
def func_title(agg_results):
repeats, _, n_methods = agg_results['job_results'].shape
alpha = agg_results['alpha']
test_size = (1.0 - agg_results['tr_proportion'])*agg_results['sample_size']
title = '%s. %d trials. test size: %d. $\\alpha$ = %.2g.'%\
( agg_results['prob_label'], repeats, test_size, alpha)
return title
#plt.figure(figsize=(10,5))
results = plot.plot_prob_reject(
ex, fname, h1_true, func_xvalues, xlabel=xlabel, func_title=func_title)
plt.title('')
plt.gca().legend(loc='best').set_visible(show_legend)
#plt.grid(True)
return results
def load_runtime_vs_params(fname, h1_true=True, xlabel='Problem parameter',
show_legend=True, xscale='linear', yscale='log'):
func_xvalues = lambda agg_results: agg_results['prob_params']
ex = 2
def func_title(agg_results):
repeats, _, n_methods = agg_results['job_results'].shape
alpha = agg_results['alpha']
title = '%s. %d trials. $\\alpha$ = %.2g.'%\
( agg_results['prob_label'], repeats, alpha)
return title
#plt.figure(figsize=(10,6))
results = plot.plot_runtime(ex, fname,
func_xvalues, xlabel=xlabel, func_title=func_title)
plt.title('')
plt.gca().legend(loc='best').set_visible(show_legend)
#plt.grid(True)
if xscale is not None:
plt.xscale(xscale)
if yscale is not None:
plt.yscale(yscale)
return results
In [ ]:
# H0 true. Same Gaussian.
sg_fname = 'ex2-sg-me6_n4000_J1_rs300_pmi10.000_pma90.000_a0.050_trp0.50.p'
#sg_fname = 'ex2-sg-me5_n4000_J1_rs300_pmi10.000_pma90.000_a0.050_trp0.50.p'
#g_results = load_plot_vs_params(
# sg_fname, h1_true=False, xlabel='$d_x$ and $d_y$', show_legend=True)
#lt.ylim([0.03, 0.1])
#plt.savefig(gmd_fname.replace('.p', '.pdf', 1))
In [ ]:
# H0 true. Same Gaussian. Large dimensions
#bsg_fname = 'ex2-bsg-me7_n4000_J1_rs300_pmi100.000_pma500.000_a0.050_trp0.50.p'
bsg_fname = 'ex2-bsg-me6_n4000_J1_rs300_pmi100.000_pma400.000_a0.050_trp0.50.p'
#bsg_results = load_plot_vs_params(bsg_fname, h1_true=False, xlabel='$d_x$ and $d_y$',
# show_legend=False)
#plt.ylim([0.03, 0.1])
#plt.savefig(bsg_fname.replace('.p', '.pdf', 1), bbox_inches='tight')
In [ ]:
# sin frequency problem
sin_fname = 'ex2-sin-me6_n4000_J1_rs300_pmi1.000_pma6.000_a0.050_trp0.50.p'
# sin_fname = 'ex2-sin-me6_n4000_J1_rs100_pmi1.000_pma6.000_a0.050_trp0.20.p'
#sin_fname = 'ex2-sin-me7_n4000_J1_rs300_pmi1.000_pma6.000_a0.050_trp0.50.p'
sin_results = load_plot_vs_params(
sin_fname, h1_true=True, xlabel=r'$\omega$ in $1+\sin(\omega x)\sin(\omega y)$',
show_legend=False)
plt.savefig(sin_fname.replace('.p', '.pdf', 1), bbox_inches='tight')
In [ ]:
# Gaussian sign problem
gsign_fname = 'ex2-gsign-me6_n4000_J1_rs300_pmi1.000_pma6.000_a0.050_trp0.50.p'
#gsign_fname = 'ex2-gsign-me7_n4000_J1_rs300_pmi1.000_pma6.000_a0.050_trp0.50.p'
#gsign_fname = 'ex2-gsign-me10_n4000_J1_rs100_pmi1.000_pma5.000_a0.050_trp0.50.p'
gsign_results = load_plot_vs_params(gsign_fname, h1_true=True,
xlabel='$d_x$', show_legend=False)
# plt.legend(bbox_to_anchor=(1.1, 1.05))
plt.savefig(gsign_fname.replace('.p', '.pdf', 1), bbox_inches='tight')
A toy problem where X follows the standard multivariate Gaussian, and Y = sign(product(X))*|Z| where Z ~ N(0, 1).
In [ ]:
# H0 true. Same Gaussian. medium-sized dimensions
#msg_fname = 'ex2-msg-me10_n4000_J1_rs100_pmi100.000_pma500.000_a0.050_trp0.50.p'
msg_fname = 'ex2-msg-me6_n4000_J1_rs300_pmi50.000_pma250.000_a0.050_trp0.50.p'
msg_results = load_plot_vs_params(msg_fname, h1_true=False, xlabel='$d_x$ and $d_y$',
show_legend=False)
plt.savefig(msg_fname.replace('.p', '.pdf', 1), bbox_inches='tight')
#plt.ylim([0.03, 0.1])
In [ ]:
load_runtime_vs_params(msg_fname, h1_true=False, show_legend=False,
yscale='log', xlabel='$d_x$ and $d_y$');
plt.savefig(msg_fname.replace('.p', '', 1)+'_time.pdf', bbox_inches='tight')
In [ ]:
# pairwise sign problem
pws_fname = 'ex2-pwsign-me6_n4000_J1_rs200_pmi20.000_pma100.000_a0.050_trp0.50.p'
#pwd_results = load_plot_vs_params(
# pws_fname, h1_true=True, xlabel=r'$d$',
# show_legend=True)
#plt.ylim([0, 1.1])
In [ ]:
# uniform rotate with noise dimensions
urot_noise_fname = 'ex2-urot_noise-me6_n4000_J1_rs200_pmi0.000_pma6.000_a0.050_trp0.50.p'
#urot_noise_results = load_plot_vs_params(
# urot_noise_fname, h1_true=True, xlabel='Noise dimensions for X and Y',
# show_legend=True)
In [ ]:
# Vary the rotation angle
#u2drot_fname = 'ex2-u2drot-me8_n4000_J1_rs200_pmi0.000_pma10.000_a0.010_trp0.50.p'
u2drot_fname = 'ex2-u2drot-me6_n4000_J1_rs200_pmi0.000_pma10.000_a0.050_trp0.50.p'
#u2drot_fname = 'ex2-u2drot-me5_n4000_J1_rs300_pmi0.000_pma10.000_a0.050_trp0.50.p'
#u2drot_results = load_plot_vs_params(
# u2drot_fname, h1_true=True, xlabel='Rotation angle (in degrees)', show_legend=True)
#plt.ylim([0, 0.05])
In [ ]:
#fname = 'sin-job_nfsicJ10_opt-n4000_J1_r220_p5.000_a0.010_trp0.50.p'
#fname = 'sg-job_nfsicJ10_perm_med-n4000_J1_r8_p50.000_a0.050_trp0.50.p'
#fpath = glo.ex_result_file(2, 'sg', fname)
#result = glo.pickle_load(fpath)
In [ ]:
#fname = 'ex2-sin-me7_n4000_J1_rs200_pmi1.000_pma5.000_a0.010_trp0.50.p'
fname = 'ex2-sg-me6_n4000_J1_rs100_pmi10.000_pma90.000_a0.050_trp0.50.p'
#fname = 'ex2-u2drot-me7_n4000_J1_rs200_pmi0.000_pma10.000_a0.010_trp0.50.p'
fpath = glo.ex_result_file(2, fname)
result = glo.pickle_load(fpath)
In [ ]:
def load_tpm_table(ex, fname, key):
"""
Load a trials x parameters x methods numpy array of results.
The value to load is specified by the key.
"""
results = glo.ex_load_result(ex, fname)
f_val = lambda job_results: job_results['test_result'][key]
vf_val = np.vectorize(f_val)
# results['job_results'] is a dictionary:
# {'test_result': (dict from running perform_test(te) '...':..., }
vals = vf_val(results['job_results'])
#repeats, _, n_methods = results['job_results'].shape
met_job_funcs = results['method_job_funcs']
return vals, met_job_funcs
In [ ]:
sta, met_job_funcs = load_tpm_table(ex=2, fname=fname, key='test_stat')
sta.shape
In [ ]:
met_job_funcs
In [ ]:
nfsicJ10_stats = sta[:, :, 1]
plt.figure(figsize=(12, 5))
plt.imshow(nfsicJ10_stats.T, interpolation='none')
plt.colorbar(orientation='horizontal')
In [ ]:
J = 10
thresh = stats.chi2.isf(0.05, df=J)
np.mean(nfsicJ10_stats > thresh, 0)
In [ ]:
param_stats = nfsicJ10_stats[:, 3]
plt.hist(param_stats, normed=True)
dom = np.linspace(1e-1, np.max(param_stats)+2, 500)
chi2_den = stats.chi2.pdf(dom, df=J)
plt.plot(dom, chi2_den, '-')