The first notebook to test the idea.


In [ ]:
%load_ext autoreload
%autoreload 2
%matplotlib inline
#%config InlineBackend.figure_format = 'svg'
#%config InlineBackend.figure_format = 'pdf'

import kgof
import kgof.data as data
import kgof.density as density
import kgof.goftest as gof
import kgof.kernel as kernel
import kgof.util as util
import matplotlib
import matplotlib.pyplot as plt
import autograd.numpy as np
import scipy.stats as stats

In [ ]:
# font options
font = {
    #'family' : 'normal',
    #'weight' : 'bold',
    'size'   : 18
}

plt.rc('font', **font)
plt.rc('lines', linewidth=2)
matplotlib.rcParams['pdf.fonttype'] = 42
matplotlib.rcParams['ps.fonttype'] = 42

Isotropic normal distribution


In [ ]:
# true p
seed = 22
d = 40
mean = np.zeros(d)
variance = 1

In [ ]:
# sample
n = 700

# only one dimension of the mean is shifted
#draw_mean = mean + np.hstack((1, np.zeros(d-1)))

p = density.IsotropicNormal(mean, variance)
qvariance = 2.5
ds = data.DSIsotropicNormal(mean+0, qvariance)

# # Gaussian mixture
# p_means = np.array([ [0], [3.0]])
# p_variances = np.array([1, 0.01])
# # p = density.IsoGaussianMixture(p_means, p_variances)
# p = density.IsotropicNormal(np.zeros(1), 1)

# q_means = np.array([ [0], [0]])
# q_variances = np.array([0.01, 1])
# ds = data.DSIsoGaussianMixture(q_means, q_variances, pmix=[0.2, 0.8])
# # ds = data.DSIsoGaussianMixture(p_means, p_variances)
dat = ds.sample(n, seed=seed+1)

X = dat.data()
tr, te = dat.split_tr_te(tr_proportion=0.2, seed=seed+1)

In [ ]:
# Plot the density and generated data
if p.dim()==1:
#     dat2 = ds.sample(2000, seed=seed+2)
#     X2 = dat2.X
    sd = np.std(X)
    dom = np.linspace(np.min(X)-sd, np.max(X)+sd, 500)
    unden = np.exp(p.log_normalized_den(dom[:, np.newaxis]))
    plt.figure(figsize=(10, 5))
    plt.hist(X, bins=40, normed=True, label='Data', color='r')
    plt.plot(dom, unden, 'b-', label='p')
    plt.legend(loc='best')

In [ ]:
# Test
J = 5
alpha = 0.01

X = dat.X
gwidth0 = util.meddistance(X, subsample=1000)**2
# random test locations
V0 = util.fit_gaussian_draw(X, J, seed=seed+1)
# V0[0, 0] = 3
# print V0
print('Gaussian width^2: {0}'.format(gwidth0))

In [ ]:
k0 = kernel.KGauss(gwidth0)
null_sim = gof.FSSDH0SimCovObs(n_simulate=2000, seed=10)
# null_sim = gof.FSSDH0SimCovDraw(n_simulate=2000, seed=10)

fssd = gof.FSSD(p, k0, V0, null_sim=null_sim, alpha=alpha)
fssd.perform_test(te)

In [ ]:
fssd.get_H1_mean_variance(te)

Optimized Gaussian FSSD


In [ ]:
opts = {
    'reg': 1e-3,
    'max_iter': 30, 
    'tol_fun':1e-9, 
#     'disp':True
}

V_opt, gw_opt, opt_result = gof.GaussFSSD.optimize_locs_widths(p, tr, gwidth0, V0, **opts)
del(opt_result['jac'])
del(opt_result['x'])
opt_result

In [ ]:
gw_opt

In [ ]:
# construct a test
k_opt = kernel.KGauss(gw_opt)
null_sim = gof.FSSDH0SimCovObs(n_simulate=2000, seed=10)
# null_sim = gof.FSSDH0SimCovDraw(n_simulate=2000, seed=10)
fssd_opt = gof.FSSD(p, k_opt, V_opt, null_sim=null_sim, alpha=alpha)
fssd_opt_result = fssd_opt.perform_test(te, return_simulated_stats=True)
fssd_opt_result

In [ ]:
# get the mean and variance under H1 of the test statistic
fssd_opt.get_H1_mean_variance(te)

In [ ]:
sim_stats = fssd_opt_result['sim_stats']
plt.hist(sim_stats, bins=20, normed=True);
plt.stem([fssd_opt_result['test_stat']], [0.03], 'r-o', label='Stat')
plt.legend()

In [ ]:
gof.GaussFSSD.optimize_auto_init(p, tr, J, **opts)

Analyze Gaussian-Bernoulli RBM


In [ ]:
def gbrbm_perturb(var_perturb_B, dx=50, dh=10):
    """
    Get a Gaussian-Bernoulli RBM problem where the first entry of the B matrix
    (the matrix linking the latent and the observation) is perturbed.

    - var_perturb_B: Gaussian noise variance for perturbing B.
    - dx: observed dimension
    - dh: latent dimension

    Return p (density), data source
    """
    with util.NumpySeedContext(seed=10):
        B = np.random.randint(0, 2, (dx, dh))*2 - 1.0
        b = np.random.randn(dx)
        c = np.random.randn(dh)
        p = density.GaussBernRBM(B, b, c)

        B_perturb = np.copy(B)
        B_perturb[0, 0] = B_perturb[0, 0] + \
            np.random.randn(1)*np.sqrt(var_perturb_B)
        ds = data.DSGaussBernRBM(B_perturb, b, c, burnin=50)

    return p, ds

In [ ]:
p, ds_per =  gbrbm_perturb(1e-1, dx=2, dh=8)
ds = p.get_datasource()
dat = ds.sample(n=200, seed=5)
dat_per = ds_per.sample(n=200, seed=4)

In [ ]:
X = dat.data()
X_per = dat_per.data()
plt.plot(X[:, 0], X[:, 1], 'bx')
plt.plot(X_per[:, 0], X_per[:, 1], 'rx')

Visually compare IMQ and Gaussian kernels


In [ ]:
b = -0.5
k_imq = kernel.KIMQ(b=b, c=1)
k_g = kernel.KGauss(sigma2=1.0)

dom = np.linspace(-8, 8, 100)[:, np.newaxis]
v = 0
plt.plot(dom, k_imq.eval(dom, np.array([[v]])), 'b-', label='IMQ kernel')
plt.plot(dom, k_g.eval(dom, np.array([[v]])), 'r-', label='Gaussian kernel')

plt.legend()

In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]: