In [ ]:
%load_ext autoreload
%autoreload 2
%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt
import freqopttest.util as util
import freqopttest.data as data
import freqopttest.kernel as kernel
import freqopttest.tst as tst
import freqopttest.glo as glo
import sys

In [ ]:
# sample source 
n = 3000
dim = 10
seed = 17
#ss = data.SSGaussMeanDiff(dim, my=1)
#ss = data.SSGaussVarDiff(dim)
#ss = data.SSSameGauss(dim)
ss = data.SSBlobs()
dim = ss.dim()
tst_data = ss.sample(n, seed=seed)
tr, te = tst_data.split_tr_te(tr_proportion=0.5, seed=10)

grid search for Gaussian width. Random test features


In [ ]:
J = 5
alpha = 0.01

In [ ]:
T = tst.MeanEmbeddingTest.init_locs_2randn(tr, J, seed=seed+1)
#T = np.random.randn(J, dim)

med = util.meddistance(tr.stack_xy(), 1000)
list_gwidth = np.hstack( ( (med**2) *(2.0**np.linspace(-5, 5, 40) ) ) )
list_gwidth.sort()
besti, powers = tst.MeanEmbeddingTest.grid_search_gwidth(tr, T, list_gwidth, alpha)

# plot
plt.plot(list_gwidth, powers, 'o-')
plt.xscale('log', basex=2)
plt.xlabel('Gaussian width')
plt.ylabel('Test power')
plt.title('Median dist: %.3g. Best gwidth2**0.5: %.3g'%(med, list_gwidth[besti]**0.5) )

In [ ]:
# test with the best Gaussian width
best_width = list_gwidth[besti]
met_grid = tst.MeanEmbeddingTest(T, best_width, alpha)
met_grid.perform_test(te)

optimize the test locations and Gaussian width


In [ ]:
op = {'n_test_locs': J, 'seed': seed+5, 'max_iter': 200, 
     'batch_proportion': 1.0, 'locs_step_size': 1.0, 
      'gwidth_step_size': 0.1, 'tol_fun': 1e-4}
# optimize on the training set
test_locs, gwidth, info = tst.MeanEmbeddingTest.optimize_locs_width(tr, alpha, **op)

In [ ]:
# Plot evolution of the test locations, Gaussian width

# trajectories of the Gaussian width
gwidths = info['gwidths']
fig, axs = plt.subplots(2, 2, figsize=(10, 9))
axs[0, 0].plot(gwidths)
axs[0, 0].set_xlabel('iteration')
axs[0, 0].set_ylabel('Gaussian width')
axs[0, 0].set_title('Gaussian width evolution')

# evolution of objective values
objs = info['obj_values']
axs[0, 1].plot(objs)
axs[0, 1].set_title('Objective $\lambda(T)$')

# trajectories of the test locations
# iters x J. X Coordinates of all test locations
locs = info['test_locs']
for coord in [0, 1]:
    locs_d0 = locs[:, :, coord]
    J = locs_d0.shape[1]
    axs[1, coord].plot(locs_d0)
    axs[1, coord].set_xlabel('iteration')
    axs[1, coord].set_ylabel('index %d of test_locs'%(coord))
    axs[1, coord].set_title('evolution of %d test locations'%J)

print('optimized width: %.3f'%gwidth)

In [ ]:
# test with the best optimized test features, and optimized Gaussian width
met_opt = tst.MeanEmbeddingTest(test_locs, gwidth, alpha)
met_opt.perform_test(te)

In [ ]:


In [ ]: