A notebook to process experimental results of ex2_prob_params.py. p(reject) as problem parameters are varied.


In [ ]:
%load_ext autoreload
%autoreload 2
%matplotlib inline
#%config InlineBackend.figure_format = 'svg'
#%config InlineBackend.figure_format = 'pdf'

import numpy as np
import matplotlib
import matplotlib.pyplot as plt
import fsic.data as data
import fsic.glo as glo
import fsic.indtest as it
import fsic.kernel as kernel
import fsic.plot as plot
import fsic.util as util

import scipy.stats as stats

In [ ]:
plot.set_default_matplotlib_options()
"""
# font options
font = {
    #'family' : 'normal',
    #'weight' : 'bold',
    'size'   : 18
}
plt.rc('font', **font)
"""

In [ ]:
def load_plot_vs_params(fname, h1_true=True, xlabel='Problem parameter', show_legend=True):
    func_xvalues = lambda agg_results: agg_results['prob_params']
    ex = 2
    def func_title(agg_results):
        repeats, _, n_methods = agg_results['job_results'].shape
        alpha = agg_results['alpha']
        test_size = (1.0 - agg_results['tr_proportion'])*agg_results['sample_size']
        title = '%s. %d trials. test size: %d. $\\alpha$ = %.2g.'%\
            ( agg_results['prob_label'], repeats, test_size, alpha)
        return title
    #plt.figure(figsize=(10,5))
    results = plot.plot_prob_reject(
        ex, fname, h1_true, func_xvalues, xlabel=xlabel, func_title=func_title)
    
    plt.title('')
    plt.gca().legend(loc='best').set_visible(show_legend)
    #plt.grid(True)
        
    return results


def load_runtime_vs_params(fname, h1_true=True, xlabel='Problem parameter', 
                      show_legend=True, xscale='linear', yscale='log'):
    func_xvalues = lambda agg_results: agg_results['prob_params']
    ex = 2
    def func_title(agg_results):
        repeats, _, n_methods = agg_results['job_results'].shape
        alpha = agg_results['alpha']
        title = '%s. %d trials. $\\alpha$ = %.2g.'%\
            ( agg_results['prob_label'], repeats, alpha)
        return title
    
    #plt.figure(figsize=(10,6))
    
    results = plot.plot_runtime(ex, fname,  
                                func_xvalues, xlabel=xlabel, func_title=func_title)
    
    plt.title('')
    plt.gca().legend(loc='best').set_visible(show_legend)
    #plt.grid(True)
    if xscale is not None:
        plt.xscale(xscale)
    if yscale is not None:
        plt.yscale(yscale)
        
    return results

In [ ]:
# H0 true. Same Gaussian.
sg_fname = 'ex2-sg-me6_n4000_J1_rs300_pmi10.000_pma90.000_a0.050_trp0.50.p'
#sg_fname = 'ex2-sg-me5_n4000_J1_rs300_pmi10.000_pma90.000_a0.050_trp0.50.p'
#g_results = load_plot_vs_params(
#   sg_fname, h1_true=False, xlabel='$d_x$ and $d_y$', show_legend=True)
#lt.ylim([0.03, 0.1])
#plt.savefig(gmd_fname.replace('.p', '.pdf', 1))

In [ ]:
# H0 true. Same Gaussian. Large dimensions
#bsg_fname = 'ex2-bsg-me7_n4000_J1_rs300_pmi100.000_pma500.000_a0.050_trp0.50.p'
bsg_fname = 'ex2-bsg-me6_n4000_J1_rs300_pmi100.000_pma400.000_a0.050_trp0.50.p'
#bsg_results = load_plot_vs_params(bsg_fname, h1_true=False, xlabel='$d_x$ and $d_y$', 
#                                 show_legend=False)
#plt.ylim([0.03, 0.1])
#plt.savefig(bsg_fname.replace('.p', '.pdf', 1), bbox_inches='tight')

In [ ]:
# sin frequency problem
sin_fname = 'ex2-sin-me6_n4000_J1_rs300_pmi1.000_pma6.000_a0.050_trp0.50.p'
# sin_fname = 'ex2-sin-me6_n4000_J1_rs100_pmi1.000_pma6.000_a0.050_trp0.20.p'
#sin_fname = 'ex2-sin-me7_n4000_J1_rs300_pmi1.000_pma6.000_a0.050_trp0.50.p'
sin_results = load_plot_vs_params(
    sin_fname, h1_true=True, xlabel=r'$\omega$ in $1+\sin(\omega x)\sin(\omega y)$', 
    show_legend=False)
plt.savefig(sin_fname.replace('.p', '.pdf', 1), bbox_inches='tight')

In [ ]:
# Gaussian sign problem 
gsign_fname = 'ex2-gsign-me6_n4000_J1_rs300_pmi1.000_pma6.000_a0.050_trp0.50.p'
#gsign_fname = 'ex2-gsign-me7_n4000_J1_rs300_pmi1.000_pma6.000_a0.050_trp0.50.p'
#gsign_fname = 'ex2-gsign-me10_n4000_J1_rs100_pmi1.000_pma5.000_a0.050_trp0.50.p'
gsign_results = load_plot_vs_params(gsign_fname, h1_true=True, 
                                   xlabel='$d_x$', show_legend=False)
# plt.legend(bbox_to_anchor=(1.1, 1.05))
plt.savefig(gsign_fname.replace('.p', '.pdf', 1), bbox_inches='tight')

A toy problem where X follows the standard multivariate Gaussian, and Y = sign(product(X))*|Z| where Z ~ N(0, 1).



In [ ]:
# H0 true. Same Gaussian. medium-sized dimensions
#msg_fname = 'ex2-msg-me10_n4000_J1_rs100_pmi100.000_pma500.000_a0.050_trp0.50.p'
msg_fname = 'ex2-msg-me6_n4000_J1_rs300_pmi50.000_pma250.000_a0.050_trp0.50.p'
msg_results = load_plot_vs_params(msg_fname, h1_true=False, xlabel='$d_x$ and $d_y$', 
                                 show_legend=False)
plt.savefig(msg_fname.replace('.p', '.pdf', 1), bbox_inches='tight')
#plt.ylim([0.03, 0.1])

In [ ]:
load_runtime_vs_params(msg_fname, h1_true=False, show_legend=False, 
                       yscale='log', xlabel='$d_x$ and $d_y$');
plt.savefig(msg_fname.replace('.p', '', 1)+'_time.pdf', bbox_inches='tight')

In [ ]:
# pairwise sign problem
pws_fname = 'ex2-pwsign-me6_n4000_J1_rs200_pmi20.000_pma100.000_a0.050_trp0.50.p'
#pwd_results = load_plot_vs_params(
#    pws_fname, h1_true=True, xlabel=r'$d$', 
#    show_legend=True)
#plt.ylim([0, 1.1])

In [ ]:
# uniform rotate with noise dimensions
urot_noise_fname = 'ex2-urot_noise-me6_n4000_J1_rs200_pmi0.000_pma6.000_a0.050_trp0.50.p'
#urot_noise_results = load_plot_vs_params(
#    urot_noise_fname, h1_true=True, xlabel='Noise dimensions for X and Y', 
#    show_legend=True)

In [ ]:
# Vary the rotation angle
#u2drot_fname = 'ex2-u2drot-me8_n4000_J1_rs200_pmi0.000_pma10.000_a0.010_trp0.50.p'
u2drot_fname = 'ex2-u2drot-me6_n4000_J1_rs200_pmi0.000_pma10.000_a0.050_trp0.50.p'
#u2drot_fname = 'ex2-u2drot-me5_n4000_J1_rs300_pmi0.000_pma10.000_a0.050_trp0.50.p'
#u2drot_results = load_plot_vs_params(
#    u2drot_fname, h1_true=True, xlabel='Rotation angle (in degrees)', show_legend=True)
#plt.ylim([0, 0.05])

Examine a trial file


In [ ]:
#fname = 'sin-job_nfsicJ10_opt-n4000_J1_r220_p5.000_a0.010_trp0.50.p'
#fname = 'sg-job_nfsicJ10_perm_med-n4000_J1_r8_p50.000_a0.050_trp0.50.p'
#fpath = glo.ex_result_file(2, 'sg', fname)
#result = glo.pickle_load(fpath)

Examine a result file


In [ ]:
#fname = 'ex2-sin-me7_n4000_J1_rs200_pmi1.000_pma5.000_a0.010_trp0.50.p'
fname = 'ex2-sg-me6_n4000_J1_rs100_pmi10.000_pma90.000_a0.050_trp0.50.p'
#fname = 'ex2-u2drot-me7_n4000_J1_rs200_pmi0.000_pma10.000_a0.010_trp0.50.p'
fpath = glo.ex_result_file(2, fname)
result = glo.pickle_load(fpath)

In [ ]:
def load_tpm_table(ex, fname, key):
    """
    Load a trials x parameters x methods numpy array of results.
    The value to load is specified by the key.
    """
    results = glo.ex_load_result(ex, fname)
    f_val = lambda job_results: job_results['test_result'][key]
    vf_val = np.vectorize(f_val)
    # results['job_results'] is a dictionary: 
    # {'test_result': (dict from running perform_test(te) '...':..., }
    vals = vf_val(results['job_results'])
    #repeats, _, n_methods = results['job_results'].shape
    met_job_funcs = results['method_job_funcs']
    return vals, met_job_funcs

In [ ]:
sta, met_job_funcs = load_tpm_table(ex=2, fname=fname, key='test_stat')
sta.shape

In [ ]:
met_job_funcs

In [ ]:
nfsicJ10_stats = sta[:, :, 1]
plt.figure(figsize=(12, 5))
plt.imshow(nfsicJ10_stats.T, interpolation='none')
plt.colorbar(orientation='horizontal')

In [ ]:
J = 10
thresh = stats.chi2.isf(0.05, df=J)
np.mean(nfsicJ10_stats > thresh, 0)

In [ ]:
param_stats = nfsicJ10_stats[:, 3]
plt.hist(param_stats, normed=True)

dom = np.linspace(1e-1, np.max(param_stats)+2, 500)
chi2_den = stats.chi2.pdf(dom, df=J)
plt.plot(dom, chi2_den, '-')