In [ ]:

    
%load_ext autoreload
%autoreload 2
%matplotlib inline
#%config InlineBackend.figure_format = 'svg'
#%config InlineBackend.figure_format = 'pdf'
import freqopttest.util as util
import freqopttest.data as data
import freqopttest.ex.exglobal as exglo
from freqopttest.ex.ex4_text import load_nips_TSTData
import freqopttest.kernel as kernel
import freqopttest.tst as tst
import freqopttest.glo as glo
import freqopttest.plot as plot
import matplotlib.pyplot as plt
import numpy as np
import os
import scipy.stats as stats

import skimage.io as io
import skimage as ski
import skimage.filters as filters

try:
    import cPickle as pickle
except:
    import pickle



In [ ]:

    
def showim(img):
    plt.imshow(img, cmap=plt.cm.gray, interpolation='Nearest')



In [ ]:

    
# http://www.scipy-lectures.org/advanced/image_processing/
img = io.imread('../freqopttest/data/karolinska/S_48/AF01AFS.JPG')
showim(img)

Generate data



In [ ]:

    
def read_gen_ndarray(dir_path):
    ld = os.listdir(dir_path)
    list_imgs = []
    for fname in sorted(ld):
        fpath = os.path.join(dir_path, fname)
        img = io.imread(fpath)
        flat = img.flatten()
        #if flat.shape[0] != 3008:
        if flat.shape[0] != 34*48:
            print '#pixels: %d. %s'%(flat.shape[0], fpath)
        list_imgs.append(flat)
    ndarray = np.vstack(list_imgs)
    return ndarray



In [ ]:

    
# path to the folder containing images for X
img_height = 48
x_path = glo.data_file('karolinska/crop_%d_group/HA_NE_SU'%img_height)
y_path = glo.data_file('karolinska/crop_%d_group/AF_AN_DI'%img_height)
X = read_gen_ndarray(x_path)
Y = read_gen_ndarray(y_path)



In [ ]:

    
# make the sizes of the two samples the same
n_min = min(X.shape[0], Y.shape[0])
X = X[:n_min]
Y = Y[:n_min]
assert(X.shape[0] == Y.shape[0])



In [ ]:

    
# normalize range to be [0, 1] to avoid overflow
#img_shape = (32, 24)
img_shape = (48, 34)
X_norm = X/256.0
Y_norm = Y/256.0
print X[0].shape
showim(np.reshape(X_norm[0], img_shape ))
plt.colorbar()



In [ ]:

    
# write the data as a TSTData
tst_label = 'crop%d_HANESU_AFANDI'%img_height
tst_data = tst.TSTData(X_norm, Y_norm, tst_label)
data_dest = glo.data_file('karolinska', '%s.p'%tst_label)
with open(data_dest, 'w') as f:
    pickle.dump(tst_data, f)
    
# write another set combining X, Y. H0 holds
null_dest = glo.data_file('karolinska', 'crop%d_h0.p'%img_height)
with open(null_dest, 'w') as f2:
    pickle.dump(tst_data.stack_xy(), f2)

ME two-sample test



In [ ]:

    
# sample source
seed = 20
ss = data.SSResample(tst_data)
tr, te = tst_data.split_tr_te(tr_proportion=0.5, seed=seed+1)



In [ ]:

    
# parameter optimization
alpha = 0.01

op = {'n_test_locs': 2, 'seed': seed+11, 'max_iter': 200, 
     'batch_proportion': 1.0, 'locs_step_size': 5.0, 
      'gwidth_step_size': 0.1, 'tol_fun': 1e-4}
# optimize on the training set
test_locs, gwidth, info = tst.MeanEmbeddingTest.optimize_locs_width(tr, alpha, **op)



In [ ]:

    
# plot evolution of objective values
plt.plot(info['obj_values'])
plt.xlabel('iteration')
plt.ylabel('Objective')



In [ ]:

    
# actual test
met = tst.MeanEmbeddingTest(test_locs, gwidth, alpha)
met.perform_test(te)

Visualize learned test locations



In [ ]:

    
loc = test_locs[1]
loc_img = np.reshape(loc, img_shape)
plt.imshow( loc_img, interpolation='Nearest', cmap='gray')
plt.colorbar()

Test samples under H0



In [ ]:

    
one_sample = glo.load_data_file('crop48_h0.p')
ss = data.SSNullResample(one_sample)
ss.sample(10, seed=4)



In [ ]: