A notebook to process experimental results of ex4_real_data.py.


In [ ]:
%load_ext autoreload
%autoreload 2
%matplotlib inline
#%config InlineBackend.figure_format = 'svg'
#%config InlineBackend.figure_format = 'pdf'
import numpy as np
import matplotlib.pyplot as plt
import fsic.data as data
import fsic.ex.exglobal as exglo
import fsic.glo as glo
import fsic.indtest as it
import fsic.kernel as kernel
import fsic.plot as plot
import fsic.util as util

import scipy.stats as stats

In [ ]:
# font options
font = {
    #'family' : 'normal',
    #'weight' : 'bold',
    'size'   : 14
}

plt.rc('font', **font)
plt.rc('lines', linewidth=2)

In [ ]:
def methods_powers(results, reps, alpha):
    """Return the powers of all methods"""
    n_methods = len(results['method_job_funcs'])
    met_powers = np.zeros(n_methods)
    results0 = results['job_results'] 
    for mi in range(n_methods):
        method_results = results0[:, mi]
        pvals = np.array([method_results[r]['test_result']['pvalue'] for r in range(reps)] )
        met_powers[mi] = np.mean(pvals < alpha)
    return met_powers


def table_powers(result_fnames, reps, alpha):
    """print a table showing test powers of all methods in all the result files."""
    ex = 4
    met_pows = []
    ns = []
    prob_labels = []
    for fi, fname in enumerate(result_fnames):
        results = glo.ex_load_result(ex, fname)
        #tr_proportion = results['tr_proportion']
        #te_proportion = 1-tr_proportion
        n = results['n']
        
        #nte = int(te_proportion*n)
        ns.append(n)
        met_pows.append(methods_powers(results, reps, alpha))
        prob_labels.append(results['prob_label'])
        
    f2label = exglo.get_func2label_map()
    method_labels = [f2label[f.__name__] for f in results['method_job_funcs']] 
    print(method_labels)
    print('')
    
    for prob_label, mps, n in zip(prob_labels, met_pows, ns):
        mps_str = [('%.3f'%p).lstrip('0') for p in mps]
        str_row = [prob_label] + ['%d'%n] + mps_str
        print(' & '.join(str_row))
        print(' \\\\ \n')

#table_powers(result_fnames, fname_labels, 500)

In [ ]:
fnames = [
    #'ex4-white_wine.n2000-me6_rs300_a0.050_trp0.50.p', 
    #'ex4-white_wine_h0.n2000-me6_rs300_a0.050_trp0.50.p',
    #'ex4-white_wine_std.n3000-me4_rs300_a0.050_trp0.50.p', 
    #'ex4-white_wine_std_h0.n3000-me4_rs300_a0.050_trp0.50.p',
    'ex4-white_wine_std.n2000-me5_rs300_a0.050_trp0.50.p',
    'ex4-white_wine_std_h0.n2000-me5_rs300_a0.050_trp0.50.p',
    'ex4-msd50000_std_h0.n2000-me5_rs300_a0.050_trp0.50.p',
    'ex4-msd50000_std.n2000-me5_rs300_a0.050_trp0.50.p',
    #'ex4-msd50000_std_h0.n6000-me4_rs300_a0.050_trp0.50.p',
    #'ex4-msd50000_std.n6000-me4_rs300_a0.050_trp0.50.p',
]
reps = 300
alpha = 0.01
table_powers(fnames, reps=reps, alpha=alpha)

Examine a result file


In [ ]:
fname = 'ex4-white_wine_std_h0.n3000-me4_rs300_a0.050_trp0.50.p'

fpath = glo.ex_result_file(4, fname)
results = glo.pickle_load(fpath)

n = results['n']
is_h0 = results['is_h0']
ps = results['paired_source']
repeats = results['repeats']
method_job_funcs = results['method_job_funcs']

In [ ]:
def load_tm_table(results, func):
    """
    Load a trials x methods numpy array of results.
    The value to load is specified by the key.
    job_result_func: a function to access the value of a job_result
    """
    vf_val = np.vectorize(func)
    # results['job_results'] is a dictionary: 
    # {'test_result': (dict from running perform_test(te) '...':..., }
    vals = vf_val(results['job_results'])
    #repeats, _, n_methods = results['job_results'].shape
    met_job_funcs = results['method_job_funcs']
    return vals, met_job_funcs

In [ ]:
f_indtest = lambda jr: jr['indtest']
f_stat = lambda jr: jr['test_result']['test_stat']
#f_time = lambda jr: jr['time_secs']
it_table, met_job_funcs = load_tm_table(results, func=f_indtest)
stat_table, _ = load_tm_table(results, func=f_stat)
print it_table.shape
met_job_funcs

In [ ]:
nfsic_ind = 0
nfsics = it_table[:, nfsic_ind]
kwidths = np.array([nf.k.sigma2 for nf in nfsics])
lwidths = np.array([nf.l.sigma2 for nf in nfsics])
nfsic_stats = stat_table[:, nfsic_ind]
nfsic_stats = nfsic_stats[np.isfinite(nfsic_stats)]

J = nfsics[0].V.shape[0]

In [ ]:
thresh = stats.chi2.isf(alpha, df=J)
dom = np.linspace(stats.chi2.isf(0.999, df=J), stats.chi2.isf(0.01, df=J), 300)
chi2den = stats.chi2.pdf(dom, df=J)


plt.figure(figsize=(10, 5))
plt.hist(nfsic_stats, bins=20, normed=True)
plt.plot(dom, chi2den, label=r'$\chi^2(%d)$'%J)

plt.legend()

In [ ]:
plt.hist(lwidths, bins=20)

In [ ]:
pack = glo.load_data_file('wine_quality', 'white_wine.p')

xy_pdata = data.PairedData(pack['X'], pack['Y'])
ps = data.PSStandardize(data.PSNullResample(xy_pdata))

In [ ]:
n = 2000
pdata = ps.sample(n, seed=27)
X, Y = pdata.xy()
medx2 = util.meddistance(X)**2
medy2 = util.meddistance(Y)**2

print 'medx2: %g'%medx2
print 'medy2: %g'%medy2

In [ ]:
from pandas.tools.plotting import scatter_matrix
import pandas as pd
df = pd.DataFrame(X)
scatter_matrix(df, alpha=0.5, figsize=(12, 12), diagonal='kde');

In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]: