The first notebook to test the idea.
In [ ]:
%load_ext autoreload
%autoreload 2
%matplotlib inline
#%config InlineBackend.figure_format = 'svg'
#%config InlineBackend.figure_format = 'pdf'
import kgof
import kgof.data as data
import kgof.density as density
import kgof.goftest as gof
import kgof.kernel as kernel
import kgof.util as util
import matplotlib
import matplotlib.pyplot as plt
import autograd.numpy as np
import scipy.stats as stats
In [ ]:
# font options
font = {
#'family' : 'normal',
#'weight' : 'bold',
'size' : 18
}
plt.rc('font', **font)
plt.rc('lines', linewidth=2)
matplotlib.rcParams['pdf.fonttype'] = 42
matplotlib.rcParams['ps.fonttype'] = 42
In [ ]:
# true p
seed = 22
d = 40
mean = np.zeros(d)
variance = 1
In [ ]:
# sample
n = 700
# only one dimension of the mean is shifted
#draw_mean = mean + np.hstack((1, np.zeros(d-1)))
p = density.IsotropicNormal(mean, variance)
qvariance = 2.5
ds = data.DSIsotropicNormal(mean+0, qvariance)
# # Gaussian mixture
# p_means = np.array([ [0], [3.0]])
# p_variances = np.array([1, 0.01])
# # p = density.IsoGaussianMixture(p_means, p_variances)
# p = density.IsotropicNormal(np.zeros(1), 1)
# q_means = np.array([ [0], [0]])
# q_variances = np.array([0.01, 1])
# ds = data.DSIsoGaussianMixture(q_means, q_variances, pmix=[0.2, 0.8])
# # ds = data.DSIsoGaussianMixture(p_means, p_variances)
dat = ds.sample(n, seed=seed+1)
X = dat.data()
tr, te = dat.split_tr_te(tr_proportion=0.2, seed=seed+1)
In [ ]:
# Plot the density and generated data
if p.dim()==1:
# dat2 = ds.sample(2000, seed=seed+2)
# X2 = dat2.X
sd = np.std(X)
dom = np.linspace(np.min(X)-sd, np.max(X)+sd, 500)
unden = np.exp(p.log_normalized_den(dom[:, np.newaxis]))
plt.figure(figsize=(10, 5))
plt.hist(X, bins=40, normed=True, label='Data', color='r')
plt.plot(dom, unden, 'b-', label='p')
plt.legend(loc='best')
In [ ]:
# Test
J = 5
alpha = 0.01
X = dat.X
gwidth0 = util.meddistance(X, subsample=1000)**2
# random test locations
V0 = util.fit_gaussian_draw(X, J, seed=seed+1)
# V0[0, 0] = 3
# print V0
print('Gaussian width^2: {0}'.format(gwidth0))
In [ ]:
k0 = kernel.KGauss(gwidth0)
null_sim = gof.FSSDH0SimCovObs(n_simulate=2000, seed=10)
# null_sim = gof.FSSDH0SimCovDraw(n_simulate=2000, seed=10)
fssd = gof.FSSD(p, k0, V0, null_sim=null_sim, alpha=alpha)
fssd.perform_test(te)
In [ ]:
fssd.get_H1_mean_variance(te)
In [ ]:
opts = {
'reg': 1e-3,
'max_iter': 30,
'tol_fun':1e-9,
# 'disp':True
}
V_opt, gw_opt, opt_result = gof.GaussFSSD.optimize_locs_widths(p, tr, gwidth0, V0, **opts)
del(opt_result['jac'])
del(opt_result['x'])
opt_result
In [ ]:
gw_opt
In [ ]:
# construct a test
k_opt = kernel.KGauss(gw_opt)
null_sim = gof.FSSDH0SimCovObs(n_simulate=2000, seed=10)
# null_sim = gof.FSSDH0SimCovDraw(n_simulate=2000, seed=10)
fssd_opt = gof.FSSD(p, k_opt, V_opt, null_sim=null_sim, alpha=alpha)
fssd_opt_result = fssd_opt.perform_test(te, return_simulated_stats=True)
fssd_opt_result
In [ ]:
# get the mean and variance under H1 of the test statistic
fssd_opt.get_H1_mean_variance(te)
In [ ]:
sim_stats = fssd_opt_result['sim_stats']
plt.hist(sim_stats, bins=20, normed=True);
plt.stem([fssd_opt_result['test_stat']], [0.03], 'r-o', label='Stat')
plt.legend()
In [ ]:
gof.GaussFSSD.optimize_auto_init(p, tr, J, **opts)
In [ ]:
def gbrbm_perturb(var_perturb_B, dx=50, dh=10):
"""
Get a Gaussian-Bernoulli RBM problem where the first entry of the B matrix
(the matrix linking the latent and the observation) is perturbed.
- var_perturb_B: Gaussian noise variance for perturbing B.
- dx: observed dimension
- dh: latent dimension
Return p (density), data source
"""
with util.NumpySeedContext(seed=10):
B = np.random.randint(0, 2, (dx, dh))*2 - 1.0
b = np.random.randn(dx)
c = np.random.randn(dh)
p = density.GaussBernRBM(B, b, c)
B_perturb = np.copy(B)
B_perturb[0, 0] = B_perturb[0, 0] + \
np.random.randn(1)*np.sqrt(var_perturb_B)
ds = data.DSGaussBernRBM(B_perturb, b, c, burnin=50)
return p, ds
In [ ]:
p, ds_per = gbrbm_perturb(1e-1, dx=2, dh=8)
ds = p.get_datasource()
dat = ds.sample(n=200, seed=5)
dat_per = ds_per.sample(n=200, seed=4)
In [ ]:
X = dat.data()
X_per = dat_per.data()
plt.plot(X[:, 0], X[:, 1], 'bx')
plt.plot(X_per[:, 0], X_per[:, 1], 'rx')
In [ ]:
b = -0.5
k_imq = kernel.KIMQ(b=b, c=1)
k_g = kernel.KGauss(sigma2=1.0)
dom = np.linspace(-8, 8, 100)[:, np.newaxis]
v = 0
plt.plot(dom, k_imq.eval(dom, np.array([[v]])), 'b-', label='IMQ kernel')
plt.plot(dom, k_g.eval(dom, np.array([[v]])), 'r-', label='Gaussian kernel')
plt.legend()
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]: