In [ ]:
%load_ext autoreload
%autoreload 2
%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt
import freqopttest.util as util
import scipy.stats as stats
import sys

load data


In [ ]:
import freqopttest.data as data

In [ ]:
nx = 500
seed = 22
mean_shift = [0.0, 0]
D = data.toy_2d_gauss_mean_diff(nx, mean_shift, seed=seed)
#D = data.toy_2d_gauss_variance_diff(nx, 1.35, seed=seed)
# split train/test data 
tr_data, te_data = D.split_tr_te(tr_proportion=0.5)
#data.plot_2d_data(D)

optimize the test locations on the training set


In [ ]:
import freqopttest.tst as tst
alpha = 0.01
op = {'n_test_locs': 6, 'seed': seed+5, 'max_iter': 300, 
     'batch_proportion': 1.0, 'locs_step_size': 0.5, 
      'gwidth_step_size': 0.01, 'tol_fun': 1e-4}
# optimize on the training set
test_locs, gwidth, info = tst.MeanEmbeddingTest.optimize_locs_width(tr_data, alpha, **op)

In [ ]:
fig, axs = plt.subplots(1, 2, figsize=(10, 4))
xtr, ytr = tr_data.xy()
#data.plot_2d_data(tr_data)
T0 = info['test_locs0']
# plot training data
axs[0].plot(xtr[:, 0], xtr[:, 1], 'xr', label='X tr')
axs[0].plot(ytr[:, 0], ytr[:, 1], 'xb', label='Y tr')
axs[0].plot(T0[:, 0], T0[:, 1], 'vy', markersize=10, label='T0')
axs[0].plot(test_locs[:, 0], test_locs[:, 1], '*k', markersize=10, label='test locs')
axs[0].legend(loc='best')
axs[0].set_title('Training set')

xte, yte = te_data.xy()
# plot test data
axs[1].plot(xte[:, 0], xte[:, 1], 'xr', label='X te')
axs[1].plot(yte[:, 0], yte[:, 1], 'xb', label='Y te')
axs[1].plot(T0[:, 0], T0[:, 1], 'vy', markersize=10, label='T0')
axs[1].plot(test_locs[:, 0], test_locs[:, 1], '*k', markersize=10, label='test locs')
axs[1].legend(loc='best')
axs[1].set_title('Test set')

In [ ]:
# trajectories of the Gaussian width
gwidths = info['gwidths']
fig, axs = plt.subplots(2, 2, figsize=(10, 9))
axs[0, 0].plot(gwidths)
axs[0, 0].set_xlabel('iteration')
axs[0, 0].set_ylabel('Gaussian width')
axs[0, 0].set_title('Gaussian width evolution')

# evolution of objective values
objs = info['obj_values']
axs[0, 1].plot(objs)
axs[0, 1].set_title('Objective $\lambda(T)$')

# trajectories of the test locations
# iters x J. X Coordinates of all test locations
locs = info['test_locs']
for coord in [0, 1]:
    locs_d0 = locs[:, :, coord]
    J = locs_d0.shape[1]
    axs[1, coord].plot(locs_d0)
    axs[1, coord].set_xlabel('iteration')
    axs[1, coord].set_ylabel('index %d of test_locs'%(coord))
    axs[1, coord].set_title('evolution of %d test locations'%J)

X, Y = D.xy()
xy = np.vstack((X, Y))
print('optimized width: %.3f'%gwidth)

ME: Optimize just the Gaussian width


In [ ]:
op_gwidth = {'max_iter': 200,'gwidth_step_size': 0.1,  
             'batch_proportion': 1.0, 'tol_fun': 1e-4}
# optimize on the training set
T0_randn = tst.MeanEmbeddingTest.init_locs_randn(tr_data, J, seed=seed)
med = util.meddistance(tr_data.stack_xy())
gwidth, info = tst.MeanEmbeddingTest.optimize_gwidth(tr_data, T0_randn, med**2, **op_gwidth)

In [ ]:
# trajectories of the Gaussian width
gwidths = info['gwidths']
fig, axs = plt.subplots(1, 2, figsize=(10, 4))
axs[0].plot(gwidths)
axs[0].set_xlabel('iteration')
axs[0].set_ylabel('Gaussian width')
axs[0].set_title('Gaussian width evolution')

# evolution of objective values
objs = info['obj_values']
axs[1].plot(objs)
axs[1].set_title('Objective $\lambda(T)$')

In [ ]:


In [ ]: