This notebook is to test the implementation of QuadHSIC.
In [ ]:
%load_ext autoreload
%autoreload 2
%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt
import fsic.util as util
import fsic.data as data
import fsic.kernel as kernel
import fsic.indtest as it
import fsic.glo as glo
import scipy.stats as stats
In [ ]:
# font options
font = {
#'family' : 'normal',
#'weight' : 'bold',
'size' : 16
}
plt.rc('font', **font)
plt.rc('lines', linewidth=2)
#matplotlib.rc('text', usetex=True)
#matplotlib.rcParams['text.latex.preamble']=[r"\usepackage{amsmath}"]
In [ ]:
def get_quad_psfunc():
"""
Return a PairedSource to generate y = x^2 + Gaussian noise.
"""
px = lambda n: np.random.rand(n, 1)*8 - 4
f = lambda x: 0.2*x**2 + np.random.randn(x.shape[0], 1)
return data.PSFunc(f, px)
In [ ]:
# paired source
alpha = 0.05
n = 800
seed = 15
dx = 10
dy = 5
ps = data.PSIndSameGauss(dx, dy)
#ps = get_quad_psfunc()
#ps = data.PSIndUnif(xlb=[0, 3], xub=[1, 10], ylb=[-5, 5], yub=[8, 10])
pdata = ps.sample(n, seed=seed)
tr, te = pdata.split_tr_te(tr_proportion=0.5, seed=10)
In [ ]:
def kl_median(pdata):
"""
Get two Gaussian kernels constructed with the median heuristic.
"""
xtr, ytr = pdata.xy()
dx = xtr.shape[1]
dy = ytr.shape[1]
medx2 = util.meddistance(xtr, subsample=1000)**2
medy2 = util.meddistance(ytr, subsample=1000)**2
k = kernel.KGauss(medx2)
l = kernel.KGauss(medy2)
return k, l
In [ ]:
# number of test locations
k, l = kl_median(tr)
# perform test
n_permute = 20
qhsic = it.QuadHSIC(k, l, n_permute=n_permute, alpha=alpha)
qhsic.perform_test(te)
In [ ]:
alpha = 0.05
n = 800
n_permute = 100
repeats = 100
# data
ps = data.PSIndSameGauss(dx=2, dy=3)
pdata = ps.sample(n, seed=398)
#ps = get_quad_psfunc()
#pdata = ps.sample(n, seed=938)
tr, te = pdata.split_tr_te(tr_proportion=0.5, seed=11)
k, l = kl_median(tr)
# the test
qhsic = it.QuadHSIC(k, l, n_permute=n_permute, alpha=alpha)
In [ ]:
nte = 400
all_results = []
for r in range(repeats):
if (r+1)%10==0:
print 'starting trial: %d'%(r+1)
te = ps.sample(nte, seed=r+2389)
test_result = qhsic.perform_test(te)
all_results.append(test_result)
In [ ]:
pvalues = np.array([result['pvalue'] for result in all_results])
stats = np.array([result['test_stat'] for result in all_results])
prob_reject = np.mean(pvalues < alpha)
print 'prob(reject H0) = %.4g'%prob_reject
In [ ]:
n_permute = 200
pdata = ps.sample(4000, seed=29)
X, Y = pdata.xy()
#with util.ContextTimer() as t1:
# arr_hsic_naive = it.QuadHSIC._list_permute_generic(X, Y, k, l, n_permute)
#print 'naive permutations took %.3g s'%t1.secs
with util.ContextTimer() as t2:
arr_hsic = it.QuadHSIC.list_permute(X, Y, k, l, n_permute)
print 'permutations with precomputed K,L took %.3g'%t2.secs
In [ ]: