A notebook to test Nystrom HSIC implementation.


In [ ]:
%load_ext autoreload
%autoreload 2
%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt
import fsic.util as util
import fsic.data as data
import fsic.feature as fea
import fsic.kernel as kernel
import fsic.indtest as it
import fsic.glo as glo
import scipy.stats as stats

In [ ]:
# font options
font = {
    #'family' : 'normal',
    #'weight' : 'bold',
    'size'   : 16
}

plt.rc('font', **font)
plt.rc('lines', linewidth=2)
#matplotlib.rc('text', usetex=True)
#matplotlib.rcParams['text.latex.preamble']=[r"\usepackage{amsmath}"]

In [ ]:
def get_quad_psfunc():
    """
    Return a PairedSource to generate y = x^2 + Gaussian noise.
    """
    px = lambda n: np.random.rand(n, 1)*8 - 4
    f = lambda x: 0.2*x**2 + np.random.randn(x.shape[0], 1)
    return data.PSFunc(f, px)

In [ ]:
# paired source 
alpha = 0.05
n = 800
seed = 19

dx = 90 
dy = 50
#ps = data.PSIndSameGauss(dx, dy)
#ps = get_quad_psfunc()
#ps = data.PS2DUnifRotate(angle=np.pi/3)
#ps = data.PSIndUnif(xlb=[0, 3], xub=[1, 10], ylb=[-5, 5], yub=[8, 10])
ps = data.PS2DSinFreq(freq=2)

pdata = ps.sample(n, seed=seed)
#tr, te = pdata.split_tr_te(tr_proportion=0.5, seed=10)

In [ ]:
# get the median distances 
X, Y = pdata.xy()
medx = util.meddistance(X, subsample=1000)
medy = util.meddistance(Y, subsample=1000)
sigmax2 = medx**2
sigmay2 = medy**2
k = kernel.KGauss(sigmax2)
l = kernel.KGauss(sigmay2)

# Randomly choose inducing points.
D = 20
induce_x = X[util.subsample_ind(n, D, seed=seed+1), :]
induce_y = Y[util.subsample_ind(n, D, seed=seed+2), :]

n_simulate = 5000
nyhsic = it.NystromHSIC(k, l, induce_x, induce_y, 
                       n_simulate=n_simulate, alpha=alpha, seed=seed+10)

In [ ]:
nyhsic_result = nyhsic.perform_test(pdata)
nyhsic_result

Null distribution

Check that the distribution simulated from the spectral approach is the same as the one obtained by permutations.


In [ ]:
n_permute = 1000
n_simulate = 1000
fmx = nyhsic.fmx
fmy = nyhsic.fmy

Zx = fmx.gen_features(X)
Zy = fmy.gen_features(Y)
list_perm = it.FiniteFeatureHSIC.list_permute(X, Y, fmx, fmy, n_permute=n_permute, seed=100)
list_spectral, eigx, eigy = it.FiniteFeatureHSIC.list_permute_spectral(Zx, Zy, 
                                                           n_simulate=n_simulate, seed=119)

In [ ]:
freq_p, edge_p = np.histogram(list_perm)
freq_s, edge_s = np.histogram(list_spectral)
nfreq_p = freq_p/float(np.sum(freq_p))
nfreq_s = freq_s/float(np.sum(freq_s))
np.abs(nfreq_p-nfreq_s)

In [ ]:
# histogram
plt.figure(figsize=(10, 5))
#plt.hist(list_perm, color='blue', alpha=0.7, normed=True, bins=20)
plt.hist(list_spectral, color='red', alpha=0.5, normed=True, bins=25, label='Spectral')
plt.hist(list_perm, color='blue', alpha=0.5, normed=True, bins=25, label='Permutation')
plt.legend()

In [ ]:


In [ ]: