A notebook to test and demonstrate the METest of Jitkrittum et al., 2016 (NIPS 2016) used as a goodness-of-fit test


In [ ]:
%load_ext autoreload
%autoreload 2
%matplotlib inline
#%config InlineBackend.figure_format = 'svg'
#%config InlineBackend.figure_format = 'pdf'

import freqopttest.tst as tst
import kgof
import kgof.data as data
import kgof.density as density
import kgof.goftest as gof
import kgof.intertst as tgof
import kgof.kernel as ker
import kgof.util as util
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
import scipy.stats as stats

In [ ]:
# font options
font = {
    #'family' : 'normal',
    #'weight' : 'bold',
    'size'   : 18
}

plt.rc('font', **font)
plt.rc('lines', linewidth=2)
matplotlib.rcParams['pdf.fonttype'] = 42
matplotlib.rcParams['ps.fonttype'] = 42

Test with random test locations


In [ ]:
# true p
seed = 20
d = 1
# sample
n = 800
alpha = 0.05
# number of test locations to use
J = 2

mean = np.zeros(d)
variance = 1

In [ ]:
p = density.IsotropicNormal(mean, variance)
q_mean = mean.copy()
q_variance = variance
# q_mean[0] = 1

# ds = data.DSIsotropicNormal(q_mean, q_variance)
q_means = np.array([ [0], [0]])
q_variances = np.array([0.01, 1])
ds = data.DSIsoGaussianMixture(q_means, q_variances, pmix=[0.2, 0.8])
# ds = data.DSIsoGaussianMixture(p_means, p_variances)

In [ ]:
dat = ds.sample(n, seed=seed+2)
tr, te = dat.split_tr_te(tr_proportion=0.5, seed=2)

In [ ]:
# Test
Xtr = tr.data()
sig2 = util.meddistance(Xtr, subsample=1000)**2

# random test locations
V0 = util.fit_gaussian_draw(Xtr, J, seed=seed+1)
me_rand = tgof.GaussMETest(p, sig2, V0, alpha=alpha, seed=seed)

In [ ]:
me_rand_result = me_rand.perform_test(te)
me_rand_result
#kstein.compute_stat(dat)

Test with optimized test locations


In [ ]:
op = {'n_test_locs': J, 'seed': seed+5, 'max_iter': 200, 
     'batch_proportion': 1.0, 'locs_step_size': 1.0, 
      'gwidth_step_size': 0.1, 'tol_fun': 1e-4}
# optimize on the training set
me_opt = tgof.GaussMETestOpt(p, n_locs=J, tr_proportion=0.5, alpha=alpha, seed=seed+1)

In [ ]:
# Give the ME test the full data. Internally the data are divided into tr and te.
me_opt_result = me_opt.perform_test(dat, op)
me_opt_result

In [ ]:


In [ ]:


In [ ]:


In [ ]: