In [ ]:
%load_ext autoreload
%autoreload 2
%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt
import freqopttest.util as util
import freqopttest.data as data
import freqopttest.kernel as kernel
import freqopttest.tst as tst
import freqopttest.glo as glo
import sys
In [ ]:
# sample source
m = 2000
dim = 6
alpha = 0.01
seed = 22
n = m
#ss = data.SSGaussMeanDiff(dim, my=0)
ss = data.SSSameGauss(d=dim)
#ss = data.SSGaussVarDiff(dim)
#ss = data.SSBlobs()
tst_data = ss.sample(m, seed=seed)
tr, te = tst_data.split_tr_te(tr_proportion=0.5, seed=10)
In [ ]:
# choose the best kernel that maximizes the test power
med = util.meddistance(tr.stack_xy())
widths = [ (med*f) for f in 2.0**np.linspace(-1, 4, 20)]
list_kernels = [kernel.KGauss( w**2 ) for w in widths]
besti, powers = tst.LinearMMDTest.grid_search_kernel(tr, list_kernels, alpha)
In [ ]:
plt.plot(widths, powers, 'o-')
plt.xlabel('Gaussian width')
plt.ylabel('test power')
plt.title('median distance = %.3g. Best width: %.3g'%(med, widths[besti]) )
In [ ]:
# The actual test
best_ker = list_kernels[besti]
lin_mmd_test = tst.LinearMMDTest(best_ker, alpha)
test_result = lin_mmd_test.perform_test(te)
test_result
In [ ]:
rep = 500
te_size = 5000
alpha = 0.05
lin_mmd_test = tst.LinearMMDTest(best_ker, alpha)
pvals = np.zeros(rep)
for r in range(rep):
te = ss.sample(te_size, seed=r+34000)
test_result = lin_mmd_test.perform_test(te)
pvals[r] = test_result['pvalue']
# plot
type1 = np.sum(pvals < alpha)/float(rep)
print('type1: %.4g'%type1)
In [ ]: