Test quadratic MMD two-sample test
In [ ]:
%load_ext autoreload
%autoreload 2
%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt
import freqopttest.util as util
import freqopttest.data as data
import freqopttest.kernel as kernel
import freqopttest.tst as tst
import freqopttest.glo as glo
import scipy.stats as stats
import sys
In [ ]:
# sample source
n = 500
dim = 10
alpha = 0.01
seed = 22
ss = data.SSGaussMeanDiff(dim, my=0.5)
#ss = data.SSSameGauss(d=dim)
#ss = data.SSGaussVarDiff(dim)
#ss = data.SSBlobs()
tst_data = ss.sample(n, seed=seed)
tr, te = tst_data.split_tr_te(tr_proportion=0.5, seed=10)
In [ ]:
med = util.meddistance(tr.stack_xy(), 1000)
list_gwidth = np.hstack( ( (med**2) *(2.0**np.linspace(-4, 4, 20) ) ) )
list_gwidth.sort()
list_kernels = [kernel.KGauss(gw2) for gw2 in list_gwidth]
besti, powers = tst.QuadMMDTest.grid_search_kernel(tr, list_kernels, alpha)
In [ ]:
print 'med^2: %.3f'%med**2
print 'best width^2: %.3f'%(list_gwidth[besti])
plt.plot(list_gwidth, powers, 'o-')
plt.xlabel('Gaussian width^2')
plt.ylabel('Approx. test power')
In [ ]:
k = kernel.KGauss(list_gwidth[besti])
mmd_test = tst.QuadMMDTest(k, n_permute=200, alpha=alpha)
test_result = mmd_test.perform_test(te)
test_result
In [ ]:
permuted_mmd2s = test_result['list_permuted_mmd2']
stat = test_result['test_stat']
bins = plt.hist(permuted_mmd2s, 10, normed=True, label='H0 Permuted MMD2');
plt.xlabel('Permuted MMD^2 values')
plt.ylabel('Normalized frequency')
plt.stem([stat], [max(bins[0])/2], 'or-', label='Observed stat.')
plt.legend()
In [ ]:
In [ ]:
In [ ]: