A notebook to process experimental results of ex1_vary_n.py. p(reject) as the sample size is varied.


In [ ]:
%load_ext autoreload
%autoreload 2
%matplotlib inline
#%config InlineBackend.figure_format = 'svg'
#%config InlineBackend.figure_format = 'pdf'
import numpy as np
import matplotlib.pyplot as plt
import fsic.data as data
import fsic.ex.exglobal as exglo
import fsic.glo as glo
import fsic.indtest as it
import fsic.kernel as kernel
import fsic.plot as plot
import fsic.util as util

import scipy.stats as stats

In [ ]:
plot.set_default_matplotlib_options()

In [ ]:
def load_plot_vs_n(fname, h1_true, xlabel='Sample size $n$', show_legend=True, xscale='log'):
    func_xvalues = lambda agg_results: agg_results['sample_sizes']
    ex = 1
    def func_title(agg_results):
        repeats, _, n_methods = agg_results['job_results'].shape
        alpha = agg_results['alpha']
        title = '%s. %d trials. $\\alpha$ = %.2g.'%\
            ( agg_results['prob_label'], repeats, alpha)
        return title
    #plt.figure(figsize=(10,5))
    results = plot.plot_prob_reject(
        ex, fname, h1_true, func_xvalues, xlabel=xlabel, func_title=func_title)
    
    plt.title('')
    plt.gca().legend(loc='best').set_visible(show_legend)
    #plt.grid(True)
    if xscale is not None:
        plt.xscale(xscale)
        
    return results

def load_runtime_vs_n(fname, h1_true=True, xlabel='Sample size $n$', 
                      show_legend=True, xscale='log', yscale='log'):
    func_xvalues = lambda agg_results: agg_results['sample_sizes']
    ex = 1
    def func_title(agg_results):
        repeats, _, n_methods = agg_results['job_results'].shape
        alpha = agg_results['alpha']
        title = '%s. %d trials. $\\alpha$ = %.2g.'%\
            ( agg_results['prob_label'], repeats, alpha)
        return title
    
    #plt.figure(figsize=(10,6))
    
    results = plot.plot_runtime(ex, fname,  
                                func_xvalues, xlabel=xlabel, func_title=func_title)
    
    plt.title('')
    plt.gca().legend(loc='best').set_visible(show_legend)
    #plt.grid(True)
    if xscale is not None:
        plt.xscale(xscale)
    if yscale is not None:
        plt.yscale(yscale)
        
    return results

In [ ]:
def plot_pow_vs_time(results, h1_true=True, func_title=None, xscale='log', yscale='linear'):
    #results = glo.ex_load_result(ex, fname)
    repeats, _, n_methods = results['job_results'].shape
    func_names = [f.__name__ for f in results['method_job_funcs'] ]
    
    time_accessor = lambda job_results: job_results['time_secs']
    rej_accessor = lambda jr: jr['test_result']['h0_rejected']
    
    vf_time = np.vectorize(time_accessor)
    vf_ref = np.vectorize(rej_accessor)
    # results['job_results'] is a dictionary: 
    # {'test_result': (dict from running perform_test(te) '...':..., }
    rejs = vf_ref(results['job_results'])
    #print rejs
    times = vf_time(results['job_results'])
    
    # #varying params x #methods
    time_avg = np.mean(times, axis=0)
    time_std = np.std(times, axis=0)
    mean_rejs = np.mean(rejs, axis=0)
    #print mean_rejs
    # plot
    line_styles = exglo.func_plot_fmt_map()
    method_labels = exglo.get_func2label_map()
    
    for i in range(n_methods):    
        fmt = line_styles[func_names[i]]
        #plt.errorbar(ns*te_proportion, mean_rejs[:, i], std_pvals[:, i])
        method_label = method_labels[func_names[i]]
        plt.plot(time_avg[:, i], mean_rejs[:, i], fmt, label=method_label)
            
    ylabel = 'Test power' if h1_true else 'Type-I error'
    plt.ylabel(ylabel)
    plt.xlabel('Time (s)')
    #plt.xlim([np.min(xvalues), np.max(xvalues)])
    #plt.xticks( xvalues, xvalues )
    #plt.legend(loc='best')
    plt.gca().set_xscale(xscale)
    plt.gca().set_yscale(yscale)
    title = '%s. %d trials. '%( results['prob_label'],
            repeats ) if func_title is None else func_title(results)
    plt.title(title)
    #plt.grid()
    return results

In [ ]:
# H0 true. Same Gaussian.
# sg_fname = 'ex1-sg_d250-me7_rs300_nmi100.000_nma100000.000_a0.050_trp0.50.p'
sg_fname = 'ex1-sg_d250-me6_rs200_nmi1000_nma100000_a0.050_trp0.50.p'
sg_results = load_plot_vs_n(
    sg_fname, h1_true=False, show_legend=False)
#plt.ylim([0.00, 0.23])
plt.xlim([3000, 10**5])
# plt.ylim([0.03, 0.07])
plt.savefig(sg_fname.replace('.p', '.pdf', 1), bbox_inches='tight')

In [ ]:
load_runtime_vs_n(sg_fname, xscale='log', yscale='log', show_legend=False);
#plt.legend(bbox_to_anchor=(1.7, 1))
plt.ylim([0.08, 2000])
plt.savefig(sg_fname.replace('.p', '', 1) + '_time.pdf', bbox_inches='tight')

In [ ]:
# sin frequency problem
#sin_fname = 'ex1-sin_w4-me5_rs300_nmi1000_nma100000_a0.050_trp0.50.p'
sin_fname = 'ex1-sin_w4-me6_rs300_nmi1000_nma100000_a0.050_trp0.50.p'
# sin_fname = 'ex1-sin_w4-me6_rs100_nmi1000_nma100000_a0.050_trp0.20.p'
sin_results = load_plot_vs_n(sin_fname, h1_true=True, show_legend=False)
plt.savefig(sin_fname.replace('.p', '.pdf', 1), bbox_inches='tight')

In [ ]:
# plot_pow_vs_time(sin_results, xscale='log', yscale='linear');

In [ ]:
# Gaussian sign problem 
#gsign_fname = 'ex1-gsign_d4-me7_rs300_nmi100.000_nma100000.000_a0.050_trp0.50.p'
gsign_fname = 'ex1-gsign_d4-me6_rs300_nmi1000_nma100000_a0.050_trp0.50.p'
#gsign_fname = 'ex1-gsign_d4-me6_rs100_nmi1000_nma1000000_a0.050_trp0.50.p'
gsign_results = load_plot_vs_n(gsign_fname, h1_true=True, show_legend=False)
#plt.ylim([0, 1.1])
# plt.legend(bbox_to_anchor=(1.70, 1.05))

plt.savefig(gsign_fname.replace('.p', '.pdf', 1), bbox_inches='tight')

In [ ]:
gsign_results = load_runtime_vs_n(gsign_fname, xscale='log', yscale='log', show_legend=False)
plt.legend(bbox_to_anchor=(1.7, 1))

In [ ]:
# plot_pow_vs_time(gsign_results, xscale='log', yscale='linear');
# plt.legend(bbox_to_anchor=(1.7, 1))
# plt.savefig(gsign_fname.replace('.p', '', 1) + '_timepow.pdf', bbox_inches='tight')

A toy problem where X follows the standard multivariate Gaussian, and Y = sign(product(X))*|Z| where Z ~ N(0, 1).



In [ ]:
# # H0 true. Same Gaussian.
# sg50_fname = 'ex1-sg_d50-me5_rs200_nmi100.000_nma10000.000_a0.050_trp0.50.p'

# sg50_results = load_plot_vs_n(
#     sg50_fname, h1_true=False, show_legend=True)

Examine a result file


In [ ]:
#fname = 'ex2-sin-me7_n4000_J1_rs200_pmi1.000_pma5.000_a0.010_trp0.50.p'
fname = 'ex2-sg-me6_n4000_J1_rs100_pmi10.000_pma90.000_a0.050_trp0.50.p'
#fname = 'ex2-u2drot-me7_n4000_J1_rs200_pmi0.000_pma10.000_a0.010_trp0.50.p'
fpath = glo.ex_result_file(2, fname)
result = glo.pickle_load(fpath)

In [ ]:
def load_tpm_table(ex, fname, key):
    """
    Load a trials x parameters x methods numpy array of results.
    The value to load is specified by the key.
    """
    ex = 2
    results = glo.ex_load_result(ex, fname)
    f_val = lambda job_results: job_results['test_result'][key]
    vf_val = np.vectorize(f_val)
    # results['job_results'] is a dictionary: 
    # {'test_result': (dict from running perform_test(te) '...':..., }
    vals = vf_val(results['job_results'])
    #repeats, _, n_methods = results['job_results'].shape
    met_job_funcs = results['method_job_funcs']
    return vals, met_job_funcs

In [ ]:
sta, met_job_funcs = load_tpm_table(ex=2, fname=fname, key='test_stat')
sta.shape

In [ ]:
met_job_funcs

In [ ]:
nfsicJ10_stats = sta[:, :, 1]
plt.figure(figsize=(12, 5))
plt.imshow(nfsicJ10_stats.T, interpolation='none')
plt.colorbar(orientation='horizontal')

In [ ]:
J = 10
thresh = stats.chi2.isf(0.05, df=J)
np.mean(nfsicJ10_stats > thresh, 0)

In [ ]:
param_stats = nfsicJ10_stats[:, 3]
plt.hist(param_stats, normed=True)

dom = np.linspace(1e-1, np.max(param_stats)+2, 500)
chi2_den = stats.chi2.pdf(dom, df=J)
plt.plot(dom, chi2_den, '-')

In [ ]:


In [ ]:


In [ ]: