In [ ]:
%load_ext autoreload
%autoreload 2
%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt
import freqopttest.util as util
import freqopttest.data as data
import freqopttest.kernel as kernel
import freqopttest.tst as tst
import freqopttest.glo as glo
import sys

In [ ]:
# sample source 
m = 2000
dim = 200
n = m
seed = 11
#ss = data.SSGaussMeanDiff(dim, my=1.0)
ss = data.SSGaussVarDiff(dim)
#ss = data.SSBlobs()
dim = ss.dim()
tst_data = ss.sample(m, seed=seed+1)
tr, te = tst_data.split_tr_te(tr_proportion=0.5, seed=100)
#te = tst_data

smooth CF test


In [ ]:
J = 7
alpha = 0.01
smooth_cf = tst.SmoothCFTest.create_randn(te, J, alpha=alpha, seed=seed)
smooth_cf.perform_test(te)

grid search to choose the best Gaussian width


In [ ]:
def randn(J, d, seed):
    rand_state = np.random.get_state()
    np.random.seed(seed)
    M = np.random.randn(J, d)
    np.random.set_state(rand_state)
    return M

T_randn = randn(J, dim, seed)
mean_sd = tr.mean_std()
scales = 2.0**np.linspace(-4, 4, 30)
#list_gwidth = mean_sd*scales*(dim**0.5)
list_gwidth = np.hstack( (mean_sd*scales*(dim**0.5), 2**np.linspace(-8, 8, 20) ))
list_gwidth.sort()

besti, powers = tst.SmoothCFTest.grid_search_gwidth(tr, T_randn, list_gwidth, alpha)

# plot
plt.plot(list_gwidth, powers, 'o-')
plt.xscale('log', basex=2)
plt.xlabel('Gaussian width')
plt.ylabel('Test power')
plt.title('Mean std: %.3g. Best chosen: %.2g'%(mean_sd, list_gwidth[besti]) )
med = util.meddistance(tr.stack_xy())
print('med distance xy: %.3g'%med)

In [ ]:
# actual test 
best_width = list_gwidth[besti]
scf_grid = tst.SmoothCFTest(T_randn, best_width, alpha)
scf_grid.perform_test(te)

optimize test frequencies


In [ ]:
op = {'n_test_freqs': J, 'seed': seed, 'max_iter': 300, 
     'batch_proportion': 1.0, 'freqs_step_size': 0.1, 
      'gwidth_step_size': 0.01, 'tol_fun': 1e-4}
# optimize on the training set
test_freqs, gwidth, info = tst.SmoothCFTest.optimize_freqs_width(tr, alpha, **op)
scf_opt = tst.SmoothCFTest(test_freqs, gwidth, alpha=alpha)
scf_opt_test = scf_opt.perform_test(te)
scf_opt_test

In [ ]:
# plot optimization results

# trajectories of the Gaussian width
gwidths = info['gwidths']
fig, axs = plt.subplots(2, 2, figsize=(10, 9))
axs[0, 0].plot(gwidths)
axs[0, 0].set_xlabel('iteration')
axs[0, 0].set_ylabel('Gaussian width')
axs[0, 0].set_title('Gaussian width evolution')

# evolution of objective values
objs = info['obj_values']
axs[0, 1].plot(objs)
axs[0, 1].set_title('Objective $\lambda(T)$')

# trajectories of the test locations
# iters x J. X Coordinates of all test locations
locs = info['test_freqs']
for coord in [0, 1]:
    locs_d0 = locs[:, :, coord]
    J = locs_d0.shape[1]
    axs[1, coord].plot(locs_d0)
    axs[1, coord].set_xlabel('iteration')
    axs[1, coord].set_ylabel('index %d of test_locs'%(coord))
    axs[1, coord].set_title('evolution of %d test locations'%J)

print('optimized width: %.3f'%gwidth)

SCF: optimize just the Gaussian width


In [ ]:
op_gwidth = {'max_iter': 300,'gwidth_step_size': 0.1,  
             'batch_proportion': 1.0, 'tol_fun': 1e-4}
# optimize on the training set
rand_state = np.random.get_state()
np.random.seed(seed=seed)
T0_randn = np.random.randn(J, dim)
np.random.set_state(rand_state)

med = util.meddistance(tr.stack_xy())
gwidth, info = tst.SmoothCFTest.optimize_gwidth(tr, T0_randn, med**2, **op_gwidth)

In [ ]:
# trajectories of the Gaussian width
gwidths = info['gwidths']
fig, axs = plt.subplots(1, 2, figsize=(10, 4))
axs[0].plot(gwidths)
axs[0].set_xlabel('iteration')
axs[0].set_ylabel('Gaussian width')
axs[0].set_title('Gaussian width evolution')

# evolution of objective values
objs = info['obj_values']
axs[1].plot(objs)
axs[1].set_title('Objective $\lambda(T)$')

In [ ]:


In [ ]: