A notebook to test and demonstrate the METest
of Jitkrittum et al., 2016 (NIPS 2016) used as a goodness-of-fit test
In [ ]:
%load_ext autoreload
%autoreload 2
%matplotlib inline
#%config InlineBackend.figure_format = 'svg'
#%config InlineBackend.figure_format = 'pdf'
import freqopttest.tst as tst
import kgof
import kgof.data as data
import kgof.density as density
import kgof.goftest as gof
import kgof.intertst as tgof
import kgof.kernel as ker
import kgof.util as util
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
import scipy.stats as stats
In [ ]:
# font options
font = {
#'family' : 'normal',
#'weight' : 'bold',
'size' : 18
}
plt.rc('font', **font)
plt.rc('lines', linewidth=2)
matplotlib.rcParams['pdf.fonttype'] = 42
matplotlib.rcParams['ps.fonttype'] = 42
In [ ]:
# true p
seed = 20
d = 1
# sample
n = 800
alpha = 0.05
# number of test locations to use
J = 2
mean = np.zeros(d)
variance = 1
In [ ]:
p = density.IsotropicNormal(mean, variance)
q_mean = mean.copy()
q_variance = variance
# q_mean[0] = 1
# ds = data.DSIsotropicNormal(q_mean, q_variance)
q_means = np.array([ [0], [0]])
q_variances = np.array([0.01, 1])
ds = data.DSIsoGaussianMixture(q_means, q_variances, pmix=[0.2, 0.8])
# ds = data.DSIsoGaussianMixture(p_means, p_variances)
In [ ]:
dat = ds.sample(n, seed=seed+2)
tr, te = dat.split_tr_te(tr_proportion=0.5, seed=2)
In [ ]:
# Test
Xtr = tr.data()
sig2 = util.meddistance(Xtr, subsample=1000)**2
# random test locations
V0 = util.fit_gaussian_draw(Xtr, J, seed=seed+1)
me_rand = tgof.GaussMETest(p, sig2, V0, alpha=alpha, seed=seed)
In [ ]:
me_rand_result = me_rand.perform_test(te)
me_rand_result
#kstein.compute_stat(dat)
In [ ]:
op = {'n_test_locs': J, 'seed': seed+5, 'max_iter': 200,
'batch_proportion': 1.0, 'locs_step_size': 1.0,
'gwidth_step_size': 0.1, 'tol_fun': 1e-4}
# optimize on the training set
me_opt = tgof.GaussMETestOpt(p, n_locs=J, tr_proportion=0.5, alpha=alpha, seed=seed+1)
In [ ]:
# Give the ME test the full data. Internally the data are divided into tr and te.
me_opt_result = me_opt.perform_test(dat, op)
me_opt_result
In [ ]:
In [ ]:
In [ ]:
In [ ]: