This notebook is to test the implementation of the Randomized Dependence Coefficient test using permutations to compute the null distribution.


In [ ]:
%load_ext autoreload
%autoreload 2
%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt
import fsic.util as util
import fsic.data as data
import fsic.feature as fea
import fsic.kernel as kernel
import fsic.indtest as it
import fsic.glo as glo
import scipy.stats as stats

In [ ]:
# font options
font = {
    #'family' : 'normal',
    #'weight' : 'bold',
    'size'   : 16
}

plt.rc('font', **font)
plt.rc('lines', linewidth=2)
#matplotlib.rc('text', usetex=True)
#matplotlib.rcParams['text.latex.preamble']=[r"\usepackage{amsmath}"]

In [ ]:
def get_quad_psfunc():
    """
    Return a PairedSource to generate y = x^2 + Gaussian noise.
    """
    px = lambda n: np.random.rand(n, 1)*8 - 4
    f = lambda x: 0.2*x**2 + np.random.randn(x.shape[0], 1)
    return data.PSFunc(f, px)

In [ ]:
# paired source 
alpha = 0.05
n = 1000
seed = 17

dx = 10 
dy = 5
#ps = data.PSIndSameGauss(dx, dy)
ps = get_quad_psfunc()
#ps = data.PS2DUnifRotate(angle=np.pi/3)
#ps = data.PSIndUnif(xlb=[0, 3], xub=[1, 10], ylb=[-5, 5], yub=[8, 10])
#ps = data.PS2DSinFreq(freq=2)

pdata = ps.sample(n, seed=seed)
#tr, te = pdata.split_tr_te(tr_proportion=0.5, seed=10)

In [ ]:
# get the median distances 
X, Y = pdata.xy()
# copula transform to both X and Y
cop_map = fea.MarginalCDFMap() 
Xcdf = cop_map.gen_features(X)
Ycdf = cop_map.gen_features(Y)

medx = util.meddistance(Xcdf, subsample=1000)
medy = util.meddistance(Ycdf, subsample=1000)
sigmax2 = medx**2
sigmay2 = medy**2

feature_pairs = 10
fmx = fea.RFFKGauss(sigmax2, n_features=feature_pairs, seed=seed+1)
fmy = fea.RFFKGauss(sigmay2, n_features=feature_pairs, seed=seed+2)
rdc = it.RDCPerm(fmx, fmy, n_permute=300, alpha=alpha, seed=seed+89)
rdc_result = rdc.perform_test(pdata)
rdc_result

Null distribution


In [ ]:
n_permute = 800
with util.ContextTimer() as t1:
    # naive permutation
    naive_rdcs = it.RDCPerm._list_permute_naive(X, Y, fmx, fmy, n_permute=n_permute, seed=seed+1, use_copula=True)
    pass

with util.ContextTimer() as t2:
    # fast permutation 
    fast_rdcs = it.RDCPerm.list_permute(X, Y, fmx, fmy, n_permute=n_permute, seed=seed+1, 
                             use_copula=True, cca_reg=1e-5)
    
print 'naive permutation took: %.3g s'%t1.secs
print 'fast permutation took: %.3g s'%t2.secs

In [ ]:
# histograms 
plt.figure(figsize=(10, 5))
plt.hist(naive_rdcs, alpha=0.5, label='Naive', bins=15, normed=True)
plt.hist(fast_rdcs, alpha=0.5, label='Fast', bins=15, normed=True)
plt.legend()

In [ ]:


In [ ]:


In [ ]:


In [ ]: