A notebook to process experimental results of ex5. p(reject) as the sample size is varied.


In [ ]:
%load_ext autoreload
%autoreload 2
%matplotlib inline
#%config InlineBackend.figure_format = 'svg'
#%config InlineBackend.figure_format = 'pdf'
import numpy as np
import matplotlib.pyplot as plt
import fsic.data as data
import fsic.ex.exglobal as exglo
import fsic.glo as glo
import fsic.indtest as it
import fsic.kernel as kernel
import fsic.plot as plot
import fsic.util as util

import scipy.stats as stats

In [ ]:
plot.set_default_matplotlib_options()

In [ ]:
def load_plot_vs_n(fname, h1_true=True, xlabel='Sample size $n$', show_legend=True, xscale='linear'):
    func_xvalues = lambda agg_results: agg_results['sample_sizes']
    ex = 5
    def func_title(agg_results):
        repeats, _, n_methods = agg_results['job_results'].shape
        alpha = agg_results['alpha']
        title = '%s. %d trials. $\\alpha$ = %.2g.'%\
            ( agg_results['prob_label'], repeats, alpha)
        return title
    #plt.figure(figsize=(10,6))
    results = plot.plot_prob_reject(
        ex, fname, h1_true, func_xvalues, xlabel=xlabel, func_title=func_title)
    
    plt.title('')
    plt.gca().legend(loc='best').set_visible(show_legend)
    #plt.grid(True)
    if xscale is not None:
        plt.xscale(xscale)
        
    return results

In [ ]:
def load_runtime_vs_n(fname, h1_true=True, xlabel='Sample size $n$', 
                      show_legend=True, xscale='linear', yscale='log'):
    func_xvalues = lambda agg_results: agg_results['sample_sizes']
    ex = 5
    def func_title(agg_results):
        repeats, _, n_methods = agg_results['job_results'].shape
        alpha = agg_results['alpha']
        title = '%s. %d trials. $\\alpha$ = %.2g.'%\
            ( agg_results['prob_label'], repeats, alpha)
        return title
    
    #plt.figure(figsize=(10,6))
    
    results = plot.plot_runtime(ex, fname,  
                                func_xvalues, xlabel=xlabel, func_title=func_title)
    
    plt.title('')
    plt.gca().legend(loc='best').set_visible(show_legend)
    #plt.grid(True)
    if xscale is not None:
        plt.xscale(xscale)
    if yscale is not None:
        plt.yscale(yscale)
        
    return results

In [ ]:
# H0 true. 
msd_h0_fname = 'ex5-msd50000_std_h0-me6_rs300_nmi500_nma2000_a0.010_trp0.50.p'
msd_h0_results = load_plot_vs_n(msd_h0_fname, h1_true=False, show_legend=True)
plt.xticks(range(500, 2000+1, 500))
#plt.ylim([0.00, 0.023])
plt.legend(bbox_to_anchor=(1.70, 1.45), ncol=6, prop={'size': 20})
plt.savefig(msd_h0_fname.replace('.p', '.pdf', 1), bbox_inches='tight')

In [ ]:
msd_fname = 'ex5-msd50000_std-me6_rs300_nmi500_nma2000_a0.010_trp0.50.p'
msd_results = load_plot_vs_n(msd_fname, h1_true=True, show_legend=False)
plt.ylim([0.25, 1.02])
plt.xticks(range(500, 2000+1, 500))
plt.savefig(msd_fname.replace('.p', '.pdf', 1), bbox_inches='tight')

In [ ]:
# H0 true
vdo_h0_fname = 'ex5-data_n10000_td1878_vd2000_std_h0-me6_rs300_nmi2000_nma8000_a0.010_trp0.50.p'
load_plot_vs_n(vdo_h0_fname, h1_true=False, show_legend=False);
plt.xticks([2000, 4000, 6000, 8000])
plt.savefig(vdo_h0_fname.replace('.p', '.pdf', 1), bbox_inches='tight')

In [ ]:
vdo_fname = 'ex5-data_n10000_td1878_vd2000_std-me6_rs300_nmi2000_nma8000_a0.010_trp0.50.p'
load_plot_vs_n(vdo_fname, show_legend=False);
plt.xticks([2000, 4000, 6000, 8000])
#plt.legend(bbox_to_anchor=(1.70, 1.35), ncol=5, prop={'size': 20})
plt.savefig(vdo_fname.replace('.p', '.pdf', 1), bbox_inches='tight')

In [ ]:
load_runtime_vs_n(vdo_fname, show_legend=False, 
                 # yscale='linear'
                 );
plt.legend(bbox_to_anchor=(1.7, 1))
# plt.grid()
plt.xticks([2000, 4000, 6000, 8000])

plot the two vdo results together

plt.figure(figsize=(12, 4)) plt.subplot(1, 2, 1) load_plot_vs_n(vdo_h0_fname, h1_true=False, show_legend=False); plt.xticks([2000, 4000, 6000, 8000])

plt.subplot(1, 2, 2) load_plot_vs_n(vdo_fname, show_legend=False);

plt.legend(ncol=5)

plt.xticks([2000, 4000, 6000, 8000]) plt.legend(ncol=5, prop={'size': 16})



In [ ]:
# H0 true
wine_h0_fname = 'ex5-white_wine_std_h0-me6_rs100_nmi100_nma500_a0.010_trp0.50.p'
#wine_h0_fname = 'ex5-white_wine_ndx5_ndy5_std_h0-me5_rs100_nmi200_nma500_a0.010_trp0.50.p'
wine_h0_results = load_plot_vs_n(wine_h0_fname, h1_true=False, show_legend=True)
#plt.ylim([0.00, 0.23])
#plt.savefig(gmd_fname.replace('.p', '.pdf', 1))

In [ ]:
wine_fname = 'ex5-white_wine_std-me6_rs100_nmi100_nma500_a0.010_trp0.50.p'
#wine_fname = 'ex5-white_wine_ndx5_ndy5_std-me5_rs100_nmi200_nma500_a0.010_trp0.50.p'
wine_results = load_plot_vs_n(wine_fname, h1_true=True, show_legend=True)
#plt.ylim([0.00, 0.23])
#plt.savefig(gmd_fname.replace('.p', '.pdf', 1))

In [ ]:
# H0 true
news_h0_fname = 'ex5-news_popularity_std_h0-me6_rs100_nmi500_nma2000_a0.010_trp0.50.p'
#news_h0_fname = 'ex5-news_popularity_ndx5_ndy5_std_h0-me5_rs100_nmi500_nma2000_a0.010_trp0.50.p'
#news_h0_results = load_plot_vs_n(news_h0_fname, h1_true=False, show_legend=True)
#plt.ylim([0.00, 0.23])
#plt.savefig(gmd_fname.replace('.p', '.pdf', 1))

In [ ]:
news_fname = 'ex5-news_popularity_std-me6_rs100_nmi500_nma2000_a0.010_trp0.50.p'
#news_fname = 'ex5-news_popularity_ndx5_ndy5_std-me5_rs100_nmi500_nma2000_a0.010_trp0.50.p'
news_results = load_plot_vs_n(news_fname, h1_true=True, show_legend=True)
#plt.ylim([0.00, 0.23])
#plt.savefig(gmd_fname.replace('.p', '.pdf', 1))

In [ ]:
#craft_h0_fname = 'ex5-skillcraft1_std_h0-me6_rs100_nmi500_nma2000_a0.010_trp0.50.p'
craft_h0_fname = 'ex5-skillcraft1_ndx10_ndy10_std_h0-me5_rs100_nmi200_nma500_a0.010_trp0.50.p'
#craft_h0_results = load_plot_vs_n(craft_h0_fname, h1_true=False, show_legend=True)

In [ ]:
#craft_fname = 'ex5-skillcraft1_std-me6_rs100_nmi500_nma2000_a0.010_trp0.50.p'
craft_fname = 'ex5-skillcraft1_ndx10_ndy10_std-me5_rs100_nmi200_nma500_a0.010_trp0.50.p'
craft_results = load_plot_vs_n(craft_fname, h1_true=True, show_legend=True)

In [ ]:
cmusic_h0_fname = 'ex5-chromatic_music_std_h0-me5_rs100_nmi200_nma500_a0.010_trp0.50.p'
#cmusic_h0_results = load_plot_vs_n(cmusic_h0_fname, h1_true=True, show_legend=True)

In [ ]:
music68_h0_fname = 'ex5-music68_std_h0-me5_rs100_nmi200_nma500_a0.010_trp0.50.p'
#music68_h0_results = load_plot_vs_n(music68_fname, h1_true=False, show_legend=True)

In [ ]:
music68_fname = 'ex5-music68_std-me5_rs100_nmi200_nma500_a0.010_trp0.50.p'
music68_results = load_plot_vs_n(music68_fname, h1_true=True, show_legend=True)

In [ ]:
mrating_fname = 'ex5-movie_rating_std-me5_rs100_nmi200_nma500_a0.010_trp0.50.p'
#mrating_results = load_plot_vs_n(mrating_fname, h1_true=True)

In [ ]:
llt_fname = 'ex5-latlong_temp_y2013_std-me6_rs100_nmi500_nma2000_a0.010_trp0.50.p'
#llt_results = load_plot_vs_n(llt_fname, h1_true=True)

In [ ]:
vgender_fname = 'ex5-voice_gender_c_std-me6_rs100_nmi200_nma500_a0.010_trp0.50.p'
#vdender_results = load_plot_vs_n(vgender_fname, h1_true=True)

In [ ]:
lung_fname = 'ex5-lung_c_std-me5_rs100_nmi50_nma150_a0.010_trp0.50.p'
#lung_results = load_plot_vs_n(lung_fname, h1_true=True)

In [ ]:
carcinom_fname = 'ex5-carcinom_c_std-me5_rs100_nmi50_nma150_a0.010_trp0.50.p'
#carcinom_results = load_plot_vs_n(carcinom_fname)

In [ ]:
CCL_fname = 'ex5-CLL_SUB_111_c_std-me5_rs100_nmi50_nma100_a0.010_trp0.50.p'
#CCL_results = load_plot_vs_n(CCL_fname)

SMK_results = load_plot_vs_n('ex5-SMK_CAN_187_c_std-me5_rs100_nmi50_nma150_a0.010_trp0.50.p')

TOX_results = load_plot_vs_n('ex5-TOX_171_c_std-me5_rs100_nmi50_nma150_a0.010_trp0.50.p')

load_plot_vs_n('ex5-higgs_no_deriv_c_std-me5_rs100_nmi200_nma500_a0.010_trp0.50.p');

Check the learned locations of NFSIC in the Lat-Long data


In [ ]:
def load_tvm_table(results, func):
    """
    Load a trials x varying values x methods numpy array of results.
    The value to load is specified by the key.
    job_result_func: a function to access the value of a job_result
    """
    vf_val = np.vectorize(func)
    # results['job_results'] is a dictionary: 
    # {'test_result': (dict from running perform_test(te) '...':..., }
    vals = vf_val(results['job_results'])
    #repeats, _, n_methods = results['job_results'].shape
    met_job_funcs = results['method_job_funcs']
    return vals, met_job_funcs

In [ ]:
dname = 'latlong_temp_y2013'
fname = 'ex5-%s_std-me6_rs100_nmi500_nma2000_a0.010_trp0.50.p'%dname
fpath = glo.ex_result_file(5, fname)
results = glo.pickle_load(fpath)

In [ ]:
#f_stat_val = lambda job_result: job_result['test_result']['test_stat']
f_indtest = lambda jr: jr['indtest']
#f_time = lambda jr: jr['time_secs']
table, met_job_funcs = load_tvm_table(results, func=f_indtest)
table.shape

In [ ]:
met_job_funcs

In [ ]:
met_ind = 0
n_ind = 3
ZVs = [t.V for t in table[:, n_ind, met_ind]]
# each row of Vs is V learned in one trial. Assume J=1
ZVs = np.vstack(ZVs)
ZWs = [t.W for t in table[:, n_ind, met_ind]]
ZWs = np.vstack(ZWs)

In [ ]:
# load full data
data_path = glo.data_file('earth_temperature', dname+'.p')
pack = glo.pickle_load(data_path)

X, Y = pack['X'], pack['Y']
mx = np.mean(X)
my = np.mean(Y)
stdx = np.std(X, axis=0)
stdy = np.std(Y, axis=0)
Dx = np.diag(stdx)
Dy = np.diag(stdy)

In [ ]:
# reverse the Z-transform
#Vs = ZVs.dot(Dx) + mx
#Ws = ZWs.dot(Dy) + my

X = (X - mx)/stdx
Y = (Y - my)/stdy
Vs = ZVs
Ws = ZWs

In [ ]:
# plot the map and the learned locations
plt.figure(figsize=(10, 6))
find, toind = 20, 50
plt.plot(X[:, 1], X[:, 0], 'k.', alpha=0.3)
plt.plot(Vs[find:toind, 1], Vs[find:toind, 0], 'r*', markersize=15)
plt.xlabel('Longitude')
plt.ylabel('Latitude')
plt.grid()

In [ ]:
plt.figure(figsize=(10, 5))
plt.hist(Y, normed=True, bins=20);
plt.xlabel('Temperature')
plt.grid()

In [ ]:


In [ ]:


In [ ]: