1D gaussian toy model



In [1]:

    
%pylab inline









    



Populating the interactive namespace from numpy and matplotlib



In [2]:

    
import seaborn as sns
sns.set_style("whitegrid")
sns.set_context("talk")
rc('axes', labelsize=20, titlesize=20)



In [3]:

    
import numpy as np
import triangle
from clerk.stats.distances import mahalanobis
import abcpmc 

#np.random.seed(987654321)

Data



In [4]:

    
mean = 1
sigma = 1
n = 10000
y = np.random.normal(mean, sigma, n)



In [5]:

    
sns.distplot(y)









    Out[5]:





<matplotlib.axes._subplots.AxesSubplot at 0x1175fccd0>

Setup



In [6]:

    
prior = abcpmc.TophatPrior([-5], [5])



In [7]:

    
def mean_dist(x, y):
    return np.abs(np.mean(x, axis=0) - np.mean(y, axis=0))

dist = mean_dist



In [8]:

    
def create_new_sample(theta):
    return np.random.normal(theta, sigma, n)
postfn = create_new_sample

Verification



In [9]:

    
theta = prior()
print theta
x = postfn([mean])
d = dist(x, y)
print d









    



[-0.4025917]
0.000582605098998



In [10]:

    
distances = [dist(y, postfn(mean)) for _ in range(1000)]
sns.distplot(distances)









    Out[10]:





<matplotlib.axes._subplots.AxesSubplot at 0x11780e8d0>

ABC sampling with PMC



In [11]:

    
def sample(T, eps_val, eps_min):
    abcpmc_sampler = abcpmc.Sampler(N=2000, Y=y, postfn=postfn, dist=dist, threads=8)
    eps = abcpmc.ConstEps(T, eps_val)
    pools = []
    for pool in abcpmc_sampler.sample(prior, eps):
        print("T: {0}, eps: {1:>.4f}, ratio: {2:>.4f}".format(pool.t, eps(pool.t), pool.ratio))
        for i, (mean, std) in enumerate(zip(*abcpmc.weighted_avg_and_std(pool.thetas, pool.ws, axis=0))):
            print(u"    theta[{0}]: {1:>.4f} \u00B1 {2:>.4f}".format(i, mean,std))

        eps.eps = np.percentile(pool.dists, 90)
        if eps.eps < eps_min:
            eps.eps = eps_min
            
        pools.append(pool)
        
    abcpmc_sampler.close()
    
    return pools



In [12]:

    
T=38
eps=0.5
pools = sample(T, eps, sigma/sqrt(n))









    



T: 0, eps: 0.5000, ratio: 0.1000
    theta[0]: 0.9792 ± 0.2894
T: 1, eps: 0.4495, ratio: 0.6137
    theta[0]: 1.0011 ± 0.2582
T: 2, eps: 0.3917, ratio: 0.6086
    theta[0]: 0.9932 ± 0.2256
T: 3, eps: 0.3414, ratio: 0.6001
    theta[0]: 0.9860 ± 0.1989
T: 4, eps: 0.2992, ratio: 0.6127
    theta[0]: 0.9945 ± 0.1734
T: 5, eps: 0.2653, ratio: 0.6077
    theta[0]: 0.9961 ± 0.1531
T: 6, eps: 0.2335, ratio: 0.6008
    theta[0]: 0.9967 ± 0.1362
T: 7, eps: 0.2053, ratio: 0.6335
    theta[0]: 0.9904 ± 0.1180
T: 8, eps: 0.1806, ratio: 0.6219
    theta[0]: 0.9940 ± 0.1040
T: 9, eps: 0.1577, ratio: 0.6099
    theta[0]: 0.9918 ± 0.0905
T: 10, eps: 0.1387, ratio: 0.6207
    theta[0]: 0.9923 ± 0.0811
T: 11, eps: 0.1228, ratio: 0.6177
    theta[0]: 0.9919 ± 0.0717
T: 12, eps: 0.1081, ratio: 0.6057
    theta[0]: 0.9914 ± 0.0640
T: 13, eps: 0.0959, ratio: 0.6107
    theta[0]: 0.9927 ± 0.0557
T: 14, eps: 0.0835, ratio: 0.6232
    theta[0]: 0.9920 ± 0.0496
T: 15, eps: 0.0729, ratio: 0.5928
    theta[0]: 0.9909 ± 0.0426
T: 16, eps: 0.0624, ratio: 0.5891
    theta[0]: 0.9905 ± 0.0373
T: 17, eps: 0.0554, ratio: 0.5872
    theta[0]: 0.9932 ± 0.0340
T: 18, eps: 0.0498, ratio: 0.5938
    theta[0]: 0.9933 ± 0.0294
T: 19, eps: 0.0436, ratio: 0.6011
    theta[0]: 0.9920 ± 0.0271
T: 20, eps: 0.0385, ratio: 0.5685
    theta[0]: 0.9920 ± 0.0244
T: 21, eps: 0.0337, ratio: 0.5599
    theta[0]: 0.9926 ± 0.0222
T: 22, eps: 0.0299, ratio: 0.5498
    theta[0]: 0.9925 ± 0.0198
T: 23, eps: 0.0261, ratio: 0.5549
    theta[0]: 0.9924 ± 0.0180
T: 24, eps: 0.0232, ratio: 0.5258
    theta[0]: 0.9924 ± 0.0167
T: 25, eps: 0.0205, ratio: 0.5056
    theta[0]: 0.9926 ± 0.0155
T: 26, eps: 0.0181, ratio: 0.4697
    theta[0]: 0.9929 ± 0.0148
T: 27, eps: 0.0159, ratio: 0.4240
    theta[0]: 0.9922 ± 0.0138
T: 28, eps: 0.0141, ratio: 0.4017
    theta[0]: 0.9927 ± 0.0134
T: 29, eps: 0.0125, ratio: 0.3784
    theta[0]: 0.9924 ± 0.0129
T: 30, eps: 0.0112, ratio: 0.3383
    theta[0]: 0.9929 ± 0.0118
T: 31, eps: 0.0101, ratio: 0.3437
    theta[0]: 0.9923 ± 0.0111
T: 32, eps: 0.0100, ratio: 0.3518
    theta[0]: 0.9922 ± 0.0115
T: 33, eps: 0.0100, ratio: 0.3458
    theta[0]: 0.9924 ± 0.0114
T: 34, eps: 0.0100, ratio: 0.3600
    theta[0]: 0.9930 ± 0.0114
T: 35, eps: 0.0100, ratio: 0.3515
    theta[0]: 0.9931 ± 0.0111
T: 36, eps: 0.0100, ratio: 0.3637
    theta[0]: 0.9921 ± 0.0112
T: 37, eps: 0.0100, ratio: 0.3531
    theta[0]: 0.9926 ± 0.0113

Postprocessing



In [13]:

    
def plot_pool(prob, samples):
    prob = np.array(prob).flatten()
    samples = np.vstack(samples)
    
    sns.distplot(prob)
    show()
    sns.distplot(samples, axlabel=r'$\theta$')
    #savefig("1d_gauss_posterior.pdf")
    show()
    sample_mean = np.mean(samples, axis=0)
    print(u"mean: {0:>.4f} \u00B1 {1:>.4f}".format(float(np.mean(samples, axis=0)), float(np.std(samples, axis=0))))



In [14]:

    
offset = 33
samples = np.array([pool.thetas for pool in pools])
distances = np.array([pool.dists for pool in pools])

plot_pool(distances[offset:], samples[offset:])









    












    












    



mean: 0.9926 ± 0.0098



In [15]:

    
var_vals = np.var((samples), axis=1)
eps_values = np.array([pool.eps for pool in pools])



In [16]:

    
max_eps = 38
f, ax = subplots(1,1)

ax.plot(eps_values, var_vals[:max_eps], "-" ,label="ABC PMC")

ax.plot(eps_values, (float(sigma)**2/n + eps_values[:max_eps]**2/3), "--", label="ABC analytic")

ax.axhline(1e-4, linestyle=":", linewidth=4, color=sns.xkcd_rgb["pale red"], label="Bayesian")


#ticks = xticks()[0][:-1]
#xticks(ticks, ["{0:>4.3f}".format(eps) for eps in eps_values[ticks.tolist()]], rotation=70)
#ax.semilogy()
ax.set_ylabel(r"var($\theta$)")
ax.set_xlabel(r"$\epsilon$")
ax.legend(loc="best")
ax.invert_xaxis()
ylim([-.01, None])
#savefig("1d_gauss_variance.pdf")









    Out[16]:





(-0.01, 0.089999999999999997)



In [17]:

    
acc_ratios = np.array([pool.ratio for pool in pools])
plot(acc_ratios, label="ABC PMC")
plot(2 * np.array(eps_values), label="analytic")
xticks(np.arange(len(eps_values)), ["{0:>4.3f}".format(eps) for eps in eps_values], rotation=70)
#semilogy()
ylabel("acceptance ratio")
xlabel(r"$\epsilon$")
legend(loc="best")









    Out[17]:





<matplotlib.legend.Legend at 0x118367390>



In [18]:

    
from scipy.special import erf
def p_theta_eta(theta, eta):
    phi = lambda t: (1 + erf(t / np.sqrt(2))) / 2
    ybar = np.mean(y, axis=0)
    return 1. / (2 * eta) *(phi((ybar - theta + eta) / (sigma / np.sqrt(n))) - phi((ybar - theta - eta) / (sigma / np.sqrt(n))) )



In [19]:

    
def get_ticks(ticks, num=7):
    return [float("{0:<.2f}".format(tick)) for tick in np.linspace(ticks[0], ticks[-1], 7)]



In [20]:

    
from scipy.stats import norm



In [21]:

    
nbins = 6
rc('axes', labelsize=40, titlesize=30)
#rc("ticks", size=20)
sns.set(style="ticks")
with mpl.rc_context(rc={"figure.figsize": [15, 20]}):
    for i in range(36/2):
        subplot(5, 4, i+1)
        title(r"Iteration: {0}, $\epsilon$: {1:>4.3f}".format(i*2, eps_values[i*2]), size=15)
        ax = sns.distplot(samples[i*2], hist=False, axlabel=r'$\theta$', hist_kws={"rotation":.3, "fontsize":30})
        ax.set_xlabel(r'$\theta$', size=15)
        ax.locator_params(axis = 'x', nbins = nbins)
        ax.locator_params(axis = 'y', nbins = 5)
        min_val, max_val = ax.get_xlim()
        x_grid = np.linspace(min_val, max_val, 1000)
        plot(x_grid, p_theta_eta(x_grid, eps_values[i*2]), "--")
        if i*2>=22:
            xlim([0.96, 1.04])
        
        if i*2<=14:
            ylim([None, 9])
        
        rv = norm(loc=np.mean(y), scale=1/np.sqrt(n))
        plot(x_grid, rv.pdf(x_grid), ":", color=sns.xkcd_rgb["pale red"])
        fill(x_grid, rv.pdf(x_grid), color=sns.xkcd_rgb["pale red"], alpha=0.2)
        tick_params(axis='both', which='major', labelsize=15)
    #i = 37
    #subplot(5, 4, 20)
    #title(r"Iteration: {0}, $\epsilon$: {1:>4.3f}".format(i, eps_values[i]))
    #ax = sns.distplot(samples[i], hist=False, axlabel=r'$\theta$')
    #ax.locator_params(axis = 'x', nbins = nbins)
    #min_val, max_val = ax.get_xlim()
    #x_grid = np.linspace(min_val, max_val, 1000)
    #plot(x_grid, p_theta_eta(x_grid, eps_values[i]))
    
    ax = subplot(5, 4, i+2)
    plot(1, label="ABC PMC")
    plot(1, "--", label="ABC analytic")
    plot(1, ":", label="Bayesian")
    legend(loc=2)
    ax.set_axis_off()
    subplots_adjust(hspace = 0.40)
    
    #savefig("1d_gauss_posterior_evolution.pdf")



In [22]:

    
from scipy.stats import gaussian_kde



In [23]:

    
x_grid = np.linspace(0.4, 1.6, 1000)
with mpl.rc_context(rc={"figure.figsize": [15, 20]}):
    for i in range(len(samples)/2):
        subplot(5, 4, i+1)
        title("Iteration: {0}, eps: {1:>4.3f}".format(i*2, eps_values[i*2]))
        
        kde = gaussian_kde(np.array(samples[i*2])[:, 0])
        plot(x_grid, kde(x_grid))
        plot(x_grid, p_theta_eta(x_grid, eps_values[i*2]))



In [ ]: