Test quadratic MMD two-sample test


In [ ]:
%load_ext autoreload
%autoreload 2
%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt
import freqopttest.util as util
import freqopttest.data as data
import freqopttest.kernel as kernel
import freqopttest.tst as tst
import freqopttest.glo as glo
import scipy.stats as stats
import sys

In [ ]:
# sample source 
n = 500
dim = 10
alpha = 0.01
seed = 22

ss = data.SSGaussMeanDiff(dim, my=0.5)
#ss = data.SSSameGauss(d=dim)
#ss = data.SSGaussVarDiff(dim)
#ss = data.SSBlobs()
tst_data = ss.sample(n, seed=seed)
tr, te = tst_data.split_tr_te(tr_proportion=0.5, seed=10)

Test Quadratic MMD


In [ ]:
med = util.meddistance(tr.stack_xy(), 1000)
list_gwidth = np.hstack( ( (med**2) *(2.0**np.linspace(-4, 4, 20) ) ) )
list_gwidth.sort()
list_kernels = [kernel.KGauss(gw2) for gw2 in list_gwidth]
besti, powers = tst.QuadMMDTest.grid_search_kernel(tr, list_kernels, alpha)

In [ ]:
print 'med^2: %.3f'%med**2
print 'best width^2: %.3f'%(list_gwidth[besti])
plt.plot(list_gwidth, powers, 'o-')
plt.xlabel('Gaussian width^2')
plt.ylabel('Approx. test power')

In [ ]:
k = kernel.KGauss(list_gwidth[besti])
mmd_test = tst.QuadMMDTest(k, n_permute=200, alpha=alpha)
test_result = mmd_test.perform_test(te)
test_result

In [ ]:
permuted_mmd2s = test_result['list_permuted_mmd2']
stat = test_result['test_stat']
bins = plt.hist(permuted_mmd2s, 10, normed=True, label='H0 Permuted MMD2');
plt.xlabel('Permuted MMD^2 values')
plt.ylabel('Normalized frequency')
plt.stem([stat], [max(bins[0])/2], 'or-', label='Observed stat.')
plt.legend()

In [ ]:


In [ ]:


In [ ]: