A notebook to process experimental results of ex4_real_data.py.
In [ ]:
%load_ext autoreload
%autoreload 2
%matplotlib inline
#%config InlineBackend.figure_format = 'svg'
#%config InlineBackend.figure_format = 'pdf'
import numpy as np
import matplotlib.pyplot as plt
import fsic.data as data
import fsic.ex.exglobal as exglo
import fsic.glo as glo
import fsic.indtest as it
import fsic.kernel as kernel
import fsic.plot as plot
import fsic.util as util
import scipy.stats as stats
In [ ]:
# font options
font = {
#'family' : 'normal',
#'weight' : 'bold',
'size' : 14
}
plt.rc('font', **font)
plt.rc('lines', linewidth=2)
In [ ]:
def methods_powers(results, reps, alpha):
"""Return the powers of all methods"""
n_methods = len(results['method_job_funcs'])
met_powers = np.zeros(n_methods)
results0 = results['job_results']
for mi in range(n_methods):
method_results = results0[:, mi]
pvals = np.array([method_results[r]['test_result']['pvalue'] for r in range(reps)] )
met_powers[mi] = np.mean(pvals < alpha)
return met_powers
def table_powers(result_fnames, reps, alpha):
"""print a table showing test powers of all methods in all the result files."""
ex = 4
met_pows = []
ns = []
prob_labels = []
for fi, fname in enumerate(result_fnames):
results = glo.ex_load_result(ex, fname)
#tr_proportion = results['tr_proportion']
#te_proportion = 1-tr_proportion
n = results['n']
#nte = int(te_proportion*n)
ns.append(n)
met_pows.append(methods_powers(results, reps, alpha))
prob_labels.append(results['prob_label'])
f2label = exglo.get_func2label_map()
method_labels = [f2label[f.__name__] for f in results['method_job_funcs']]
print(method_labels)
print('')
for prob_label, mps, n in zip(prob_labels, met_pows, ns):
mps_str = [('%.3f'%p).lstrip('0') for p in mps]
str_row = [prob_label] + ['%d'%n] + mps_str
print(' & '.join(str_row))
print(' \\\\ \n')
#table_powers(result_fnames, fname_labels, 500)
In [ ]:
fnames = [
#'ex4-white_wine.n2000-me6_rs300_a0.050_trp0.50.p',
#'ex4-white_wine_h0.n2000-me6_rs300_a0.050_trp0.50.p',
#'ex4-white_wine_std.n3000-me4_rs300_a0.050_trp0.50.p',
#'ex4-white_wine_std_h0.n3000-me4_rs300_a0.050_trp0.50.p',
'ex4-white_wine_std.n2000-me5_rs300_a0.050_trp0.50.p',
'ex4-white_wine_std_h0.n2000-me5_rs300_a0.050_trp0.50.p',
'ex4-msd50000_std_h0.n2000-me5_rs300_a0.050_trp0.50.p',
'ex4-msd50000_std.n2000-me5_rs300_a0.050_trp0.50.p',
#'ex4-msd50000_std_h0.n6000-me4_rs300_a0.050_trp0.50.p',
#'ex4-msd50000_std.n6000-me4_rs300_a0.050_trp0.50.p',
]
reps = 300
alpha = 0.01
table_powers(fnames, reps=reps, alpha=alpha)
In [ ]:
fname = 'ex4-white_wine_std_h0.n3000-me4_rs300_a0.050_trp0.50.p'
fpath = glo.ex_result_file(4, fname)
results = glo.pickle_load(fpath)
n = results['n']
is_h0 = results['is_h0']
ps = results['paired_source']
repeats = results['repeats']
method_job_funcs = results['method_job_funcs']
In [ ]:
def load_tm_table(results, func):
"""
Load a trials x methods numpy array of results.
The value to load is specified by the key.
job_result_func: a function to access the value of a job_result
"""
vf_val = np.vectorize(func)
# results['job_results'] is a dictionary:
# {'test_result': (dict from running perform_test(te) '...':..., }
vals = vf_val(results['job_results'])
#repeats, _, n_methods = results['job_results'].shape
met_job_funcs = results['method_job_funcs']
return vals, met_job_funcs
In [ ]:
f_indtest = lambda jr: jr['indtest']
f_stat = lambda jr: jr['test_result']['test_stat']
#f_time = lambda jr: jr['time_secs']
it_table, met_job_funcs = load_tm_table(results, func=f_indtest)
stat_table, _ = load_tm_table(results, func=f_stat)
print it_table.shape
met_job_funcs
In [ ]:
nfsic_ind = 0
nfsics = it_table[:, nfsic_ind]
kwidths = np.array([nf.k.sigma2 for nf in nfsics])
lwidths = np.array([nf.l.sigma2 for nf in nfsics])
nfsic_stats = stat_table[:, nfsic_ind]
nfsic_stats = nfsic_stats[np.isfinite(nfsic_stats)]
J = nfsics[0].V.shape[0]
In [ ]:
thresh = stats.chi2.isf(alpha, df=J)
dom = np.linspace(stats.chi2.isf(0.999, df=J), stats.chi2.isf(0.01, df=J), 300)
chi2den = stats.chi2.pdf(dom, df=J)
plt.figure(figsize=(10, 5))
plt.hist(nfsic_stats, bins=20, normed=True)
plt.plot(dom, chi2den, label=r'$\chi^2(%d)$'%J)
plt.legend()
In [ ]:
plt.hist(lwidths, bins=20)
In [ ]:
pack = glo.load_data_file('wine_quality', 'white_wine.p')
xy_pdata = data.PairedData(pack['X'], pack['Y'])
ps = data.PSStandardize(data.PSNullResample(xy_pdata))
In [ ]:
n = 2000
pdata = ps.sample(n, seed=27)
X, Y = pdata.xy()
medx2 = util.meddistance(X)**2
medy2 = util.meddistance(Y)**2
print 'medx2: %g'%medx2
print 'medy2: %g'%medy2
In [ ]:
from pandas.tools.plotting import scatter_matrix
import pandas as pd
df = pd.DataFrame(X)
scatter_matrix(df, alpha=0.5, figsize=(12, 12), diagonal='kde');
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]: