Single Integral Comparisons 2

David Thomas 2017/03/27

After fixing a bug that surfaced in the previous single integral comparisons we seek to retest our method.

Setup


In [197]:
%matplotlib inline
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
from matplotlib import rc
from bigmali.grid import Grid
from bigmali.likelihood import BiasedLikelihood
from bigmali.prior import TinkerPrior
from bigmali.hyperparameter import get
from scipy.stats import lognorm
from time import time
rc('text', usetex=True)

data = pd.read_csv('/Users/user/Code/PanglossNotebooks/MassLuminosityProject/mock_data.csv')
data.ix[0]

true_mass = 1.005246e+11
true_z = 2.077090e+00
true_lum = 1.377704e+04
true_lum_obs = 1.374879e+04
true_lum_obs_collection = data.lum_obs

prior = TinkerPrior(Grid())

def p1(lobs, lum, sigma):
    return fast_lognormal(lum, sigma, lobs)

def p2(lum, mass, a1, a2, a3, a4, S, z):
    mu_lum = np.exp(a1) * ((mass / a3) ** a2) * ((1 + z) ** (a4))
    return fast_lognormal(mu_lum, S, lum)
    
def p3(mass, z):
    return prior.fetch(z).pdf(mass)

def q1(lum, lobs, sigma):
    return fast_lognormal(lobs, sigma, lum)
    
def q2(mass, lum, a1, a2, a3, a4, S, z):
    mu_mass = a3 * (lum / (np.exp(a1) * (1 + z) ** a4)) ** (1 / a2)
    return fast_lognormal(mu_mass, S, mass)

def midpoints(arr):
    n = len(arr)-1
    ret = np.zeros(n)
    for i in xrange(n):
        ret[i] = (arr[i+1] + arr[i]) / 2.
    return ret

def fast_lognormal(mu, sigma, x):
    return  (1/(x * sigma * np.sqrt(2 * np.pi))) * np.exp(- 0.5 * (np.log(x) - np.log(mu)) ** 2 / sigma ** 2)

a1,a2,a3,a4,S = get()

In [198]:
p2(space, true_mass, a1, a2, a3, a4, S, true_z)


10275.5266333
Out[198]:
array([ 0.,  0.,  0., ...,  0.,  0.,  0.])

In [100]:
fast_lognormal(1,1,1)


Out[100]:
0.3989422804014327

In [101]:
fast_lognormal(1,2,1)


Out[101]:
0.19947114020071635

In [102]:
fast_lognormal(1,np.array([1,2]),1)


Out[102]:
array([ 0.39894228,  0.19947114])

Sanity check that distributions we use all integrate to 1.


In [196]:
from scipy.integrate import trapz

space = np.linspace(true_lum_obs_collection.min(), true_lum_obs_collection.max(), 10**5)
pp1 = p1(space,true_lum, 0.05)
print trapz(pp1, space)

space = np.linspace(true_lum_obs_collection.min(), true_lum_obs_collection.max(), 10**5)
pp2 = p2(space, true_mass, a1, a2, a3, a4, S, true_z)
print trapz(pp2, space)

space = np.linspace(data.mass.min(), data.mass.max(), 10**4)
pp3 = p3(space, true_z)
print trapz(pp3, space)

space = np.linspace(true_lum_obs_collection.min(), true_lum_obs_collection.max(), 10**5)
qq1 = q1(true_lum, space, 0.05)
print trapz(qq1, space)

space = np.linspace(data.mass.min(), data.mass.max(), 10**4)
qq2 = q2(space, true_lum, a1, a2, a3, a4, S, true_z)
print trapz(qq2, space)


1.0
10275.5266333
1.0
1.07420251604
1.00125078158
1.0

Check lognorm and fast_lognormal behaves as expected


In [121]:
r = lognorm(1,scale=2).rvs(size=10**5)
plt.hist(r, normed=True, bins=1000, alpha=0.5)
space = np.linspace(0.1,5,100)
pspace = lognorm(1,scale=2).pdf(space)
plt.plot(space, pspace)
fspace = fast_lognormal(2,1,space)
plt.plot(space, fspace)


Out[121]:
[<matplotlib.lines.Line2D at 0x10fcfc910>]

In [122]:
space = np.linspace(0.1,5,100)
pspace = lognorm(1,scale=2).pdf(space)
plt.plot(space, pspace)
fspace = fast_lognormal(2,1,space)
plt.plot(space, fspace)


Out[122]:
[<matplotlib.lines.Line2D at 0x10f8938d0>]

In [59]:
print pp1


[  0.00000000e+000   4.01861087e-004   1.78221822e-036   1.55154287e-091
   2.04711608e-147   1.65835619e-200   2.98359648e-250   8.54311716e-297
   0.00000000e+000   0.00000000e+000]

In [58]:
pp1


Out[58]:
array([  0.00000000e+000,   4.01861087e-004,   1.78221822e-036,
         1.55154287e-091,   2.04711608e-147,   1.65835619e-200,
         2.98359648e-250,   8.54311716e-297,   0.00000000e+000,
         0.00000000e+000])

In [47]:
plt.plot(space, pp1)


Out[47]:
[<matplotlib.lines.Line2D at 0x109f04450>]

Numerical Integration

\begin{align*} \mathcal{L}(L_{obs}|\alpha, S, \sigma_{obs}, z) &= \iint\ dLdM\ P(L_{obs}|L, \sigma_{obs})P(L|M, \alpha, S, z)P(M|z)\\ &= \sum_{M=min(MP)}^{max(MP)}\sum_{L = min(L_{obs})}^{max(L_{obs})}\ \Delta_M\Delta_L\ P(L_{obs}|L, \sigma_{obs})P(L|M, \alpha, S, z)P(M|z)\\ \end{align*}

In [155]:
def numerical_integration_grid(a1, a2, a3, a4, S):
    nsamples = 100
    masses = midpoints(prior.fetch(true_z).mass[1:-1])
    delta_masses = np.diff(prior.fetch(true_z).mass[1:-1])
    lums_tmp = np.logspace(log10(np.min(data.lum_obs)), log10(np.max(data.lum_obs)), nsamples)
    lums = midpoints(lums_tmp)
    delta_lums = np.diff(lums_tmp)
    sigma = 0.05
    integral = 0
    ans = np.zeros((len(lums)*len(masses),3))
    count = 0
    for i, lum in enumerate(lums):
        for j, mass in enumerate(masses):
            ans[count,0] = lum
            ans[count,1] = mass
            ans[count,2] = np.sum(delta_masses[j] * delta_lums[i] * p1(true_lum_obs, lum, sigma) * \
            p2(lum, mass, a1, a2, a3, a4, S, true_z) * p3(mass, true_z))
            count += 1
    return ans

In [156]:
grid = numerical_integration_grid(a1,a2,a3,a4,S)

In [159]:
import matplotlib.colors as colors
step = 1
epsilon = 1e-20
gridv = np.maximum(grid[::step,2], epsilon)
x = log10(grid[::step,0])
y = log10(grid[::step,1])
scat = plt.scatter(y, x, marker='s', s=40, alpha=0.8, c=gridv, norm=colors.LogNorm(), cmap='coolwarm', lw=0);
plt.gcf().set_size_inches((6,6))
plt.ylim([x.min(),x.max()])
plt.xlim([y.min(),y.max()])
plt.scatter(log10(true_mass), log10(true_lum_obs), color='black', s=50, label='Halo')
# plt.scatter(x[22723], y[22723], color='gold', s=50, label='MaximumWeight')
plt.legend()
plt.gcf().colorbar(scat)
plt.title('Numerical Integration Weights')
plt.xlabel('Mass $(\log_{10}(M))$')
plt.ylabel('Luminosity $(\log_{10}(L))$')


Out[159]:
<matplotlib.text.Text at 0x117c70a50>

In [469]:
nsamples = np.logspace(1, 5, 10)
times = []
res = []

for ns in nsamples:
    start = time()
    res.append(numerical_integration(a1,a2,a3,a4,S,nsamples=ns))
    end = time()
    times.append(end-start)

In [480]:
plt.subplot(211)
plt.title('Result')
plt.plot(nsamples, res)
plt.ylabel('Integral Value')
plt.xlabel('Luminosity Samples')
plt.gca().set_xscale('log')
plt.gca().set_yscale('log')

plt.subplot(212)
plt.title('Runtime')
plt.plot(nsamples, times)
plt.ylabel('Runtime (seconds)')
plt.xlabel('Luminosity Samples')
plt.gca().set_xscale('log')
plt.gca().set_yscale('log')

plt.tight_layout()
plt.gcf().set_size_inches((8,4))


Note: stability even with different hyperparameters.


In [510]:
%time numerical_integration(a1,a2,a3,a4,S_test,nsamples=100)


CPU times: user 15.9 ms, sys: 1.31 ms, total: 17.2 ms
Wall time: 16.4 ms
Out[510]:
1.6392239694116272e-05

In [38]:
S_space = S * np.linspace(1,10, 10)
vals = []
for S_test in S_space:
    vals.append(numerical_integration(a1,a2,a3,a4,S_test,nsamples=100))
    
plt.plot(S_space, vals)


Out[38]:
[<matplotlib.lines.Line2D at 0x109b62b10>]

In [512]:
a1_space = a1 * np.linspace(0.5,2, 10)
vals = []
for a1_test in a1_space:
    vals.append(numerical_integration(a1_test,a2,a3,a4,S,nsamples=100))
plt.plot(a1_space, vals)


Out[512]:
[<matplotlib.lines.Line2D at 0x119376790>]

In [517]:
a2_space = a2 * np.linspace(0.5,2, 10)
vals = []
for a2_test in a2_space:
    vals.append(numerical_integration(a1,a2_test,a3,a4,S,nsamples=100))
plt.plot(a2_space, vals)


Out[517]:
[<matplotlib.lines.Line2D at 0x11a99ad90>]

In [515]:
a4_space = a4 * np.linspace(0.5,2, 10)
vals = []
for a4_test in a4_space:
    vals.append(numerical_integration(a1,a2,a3,a4_test,S,nsamples=100))
plt.plot(a4_space, vals)


Out[515]:
[<matplotlib.lines.Line2D at 0x11adf8250>]

In [ ]:
a1_space = a1 * np.linspace(0.5,2, 10)
vals = []
for a1_test in a1_space:
    vals.append(numerical_integration(a1_test,a2,a3,a4,S,nsamples=100))
plt.plot(a1_space, vals)

In [37]:
def numerical_integration(a1, a2, a3, a4, S, nsamples=10**3):
    masses = midpoints(prior.fetch(true_z).mass[1:])
    delta_masses = np.diff(prior.fetch(true_z).mass[1:])
    lums_tmp = np.logspace(log10(np.min(data.lum_obs)), log10(np.max(data.lum_obs)), nsamples)
    lums = midpoints(lums_tmp)
    delta_lums = np.diff(lums_tmp)
    sigma = 0.05
    integral = 0
    for i,lum in enumerate(lums):
        integral += np.sum(delta_masses * delta_lums[i] * p1(true_lum_obs, lum, sigma) * \
            p2(lum, masses, a1, a2, a3, a4, S, true_z) * p3(masses, true_z))
    return integral

In [536]:
nsamples = 100
masses = midpoints(prior.fetch(true_z).mass[1:])
delta_masses = np.diff(prior.fetch(true_z).mass[1:])
lums_tmp = np.logspace(log10(np.min(data.lum_obs)), log10(np.max(data.lum_obs)), nsamples)
lums = midpoints(lums_tmp)
delta_lums = np.diff(lums_tmp)
sigma = 0.05

def func(params):
    a1,a2,a4,S = params
    integral = 0
    for i,lum in enumerate(lums):
        integral += np.sum(delta_masses * delta_lums[i] * p1(true_lum_obs, lum, sigma) * \
            p2(lum, masses, a1, a2, a3, a4, S, true_z) * p3(masses, true_z))
    return -integral
x0 = [a1,a2,a4,S*5]
obj = minimize(func, x0, method='BFGS')

In [546]:
obj['x']


Out[546]:
array([  1.07368855e+01,   2.89190866e-01,   1.14645141e+00,
         6.23726598e-05])

In [430]:
%time print numerical_integration(a1,a2,a3,a4,S, nsamples=10**3)


1.16445252492e-05
CPU times: user 156 ms, sys: 4.52 ms, total: 161 ms
Wall time: 203 ms

In [427]:
%time print numerical_integration(a1,a2,a3,a4,S, nsamples=10**2)


1.16423505533e-05
CPU times: user 15.8 ms, sys: 1.21 ms, total: 17 ms
Wall time: 15.7 ms

In [428]:
%time print numerical_integration(a1,a2,a3,a4,S, nsamples=10**4)


1.16445466074e-05
CPU times: user 1.29 s, sys: 12.2 ms, total: 1.3 s
Wall time: 1.31 s

In [156]:
def numerical_integration(a1, a2, a3, a4, S, nsamples=10**4):
    masses = midpoints(prior.fetch(true_z).mass)
    delta_masses = np.diff(prior.fetch(true_z).mass)
    lums = np.linspace(np.min(data.lum_obs), np.max(data.lum_obs), nsamples)
    print nsamples
    delta_lum = lums[1] - lums[0]
    print delta_lum
    sigma = 0.05
    integral = 0
    for lum in lums:
        integral += np.sum(delta_masses * delta_lum * p1(true_lum_obs, lum, sigma) * \
            p2(lum, masses, a1, a2, a3, a4, S, true_z) * p3(masses, true_z))
    return integral

In [157]:
%time print numerical_integration(a1,a2,a3,a4,S, nsamples=10**5)


100000
1.09545583084
1.16445468227e-05
CPU times: user 12.5 s, sys: 45.8 ms, total: 12.5 s
Wall time: 12.6 s

In [158]:
%time print numerical_integration(a1,a2,a3,a4,S, nsamples=10**4)


10000
10.9555443173
1.16445468227e-05
CPU times: user 1.29 s, sys: 6.5 ms, total: 1.3 s
Wall time: 1.3 s

In [155]:
%time print numerical_integration(a1,a2,a3,a4,S, nsamples=10**3)


1.16445468227e-05
CPU times: user 146 ms, sys: 1.94 ms, total: 148 ms
Wall time: 148 ms

In [151]:
%time print numerical_integration(a1,a2,a3,a4,S, nsamples=10**6)


0.00112456109124
CPU times: user 2min 5s, sys: 431 ms, total: 2min 5s
Wall time: 2min 6s

In [143]:
n = 10
vals = np.zeros(n)
timevals = np.zeros(n)
logspace = np.logspace(2, 5, n)
for i, nsamples in enumerate(logspace):
    start = time.time()
    vals[i] = numerical_integration(a1,a2,a3,a4,S, nsamples=nsamples)
    end = time.time()
    timevals[i] = end - start

In [144]:
plt.subplot(211)
plt.plot(logspace, vals)
plt.gca().set_xscale('log')
plt.gca().set_yscale('log')
plt.title('Integral vs Samples')
plt.xlabel('Samples')
plt.ylabel('Integral')

plt.subplot(212)
plt.plot(logspace, timevals)
plt.gca().set_xscale('log')
plt.gca().set_yscale('log')
plt.title('Time vs Samples')
plt.xlabel('Samples')
plt.ylabel('Seconds')
plt.gcf().set_size_inches(10,8)
plt.tight_layout()


Simple Monte Carlo

\begin{align*} \mathcal{L}(L_{obs}|\alpha, S, \sigma_{obs}) &= \iint dLdM\ P(L_{obs}|L, \sigma_{obs})P(L|M, \alpha, S, z)P(M|z)\\ &= \frac{1}{N_s}\sum_{M \in\ P(M|z)}\sum_{L \in\ P(L|M, \alpha, S, z)} P(L_{obs}|L, \sigma_{obs})\\ \end{align*}

In [3]:
def simple_monte_carlo_integration(a1, a2, a3, a4, S, nsamples=10**6):
    sigma = 0.05
    masses = prior.fetch(true_z).rvs(nsamples)
    mu_lum = np.exp(a1) * ((masses / a3) ** a2) * ((1 + true_z) ** (a4))
    lums = lognorm(S, scale=mu_lum).rvs()
    return np.sum(p1(true_lum_obs, lums, sigma)) / (nsamples)

In [555]:
%time print simple_monte_carlo_integration(a1,a2,a3,a4,S, nsamples=10**4)


1.69121809651e-06
CPU times: user 4.52 ms, sys: 1.36 ms, total: 5.88 ms
Wall time: 4.69 ms

In [556]:
%time print simple_monte_carlo_integration(a1,a2,a3,a4,S, nsamples=10**6)


1.70034874254e-06
CPU times: user 220 ms, sys: 46.5 ms, total: 266 ms
Wall time: 273 ms

In [557]:
%time print simple_monte_carlo_integration(a1,a2,a3,a4,S, nsamples=10**7)


1.69751972983e-06
CPU times: user 2.05 s, sys: 373 ms, total: 2.42 s
Wall time: 2.48 s

In [17]:
import time

n = 20
vals = np.zeros(n)
logspace = np.logspace(4, 7, n)
for i, nsamples in enumerate(logspace):
    vals[i] = simple_monte_carlo_integration(a1,a2,a3,a4,S, nsamples=nsamples)
    
timevals = np.zeros(n)
logspace = np.logspace(4, 7, n)
for i, nsamples in enumerate(logspace):
    start = time.time()
    simple_monte_carlo_integration(a1,a2,a3,a4,S, nsamples=nsamples)
    end = time.time()
    timevals[i] = end - start

In [20]:
plt.subplot(211)
plt.plot(logspace, vals)
plt.gca().set_xscale('log')
plt.gca().set_yscale('log')
plt.title('Integral vs Samples')
plt.xlabel('Samples')
plt.ylabel('Integral')

plt.subplot(212)
plt.plot(logspace, timevals)
plt.gca().set_xscale('log')
plt.gca().set_yscale('log')
plt.title('Time vs Samples')
plt.xlabel('Samples')
plt.ylabel('Seconds')
plt.gcf().set_size_inches(10,8)
plt.tight_layout()



In [4]:
def simple_monte_carlo_integration_samples(a1, a2, a3, a4, S, nsamples=10**6):
    sigma = 0.05
    masses = prior.fetch(true_z).rvs(nsamples)
    mu_lum = np.exp(a1) * ((masses / a3) ** a2) * ((1 + true_z) ** (a4))
    lums = lognorm(S, scale=mu_lum).rvs()
    weights = p1(true_lum_obs, lums, sigma)
    return masses, lums, weights

In [145]:
m, l, w = simple_monte_carlo_integration_samples(a1, a2, a3, a4, S, nsamples=10**6)

In [160]:
import matplotlib.colors as colors

scat = plt.scatter(log10(m[:2000]), log10(l[:2000]), c=w[:2000], norm=colors.LogNorm(), cmap='coolwarm', lw=0)
plt.gcf().colorbar(scat)
plt.scatter(log10(true_mass), log10(true_lum_obs), color='black', s=50, label='Halo')
plt.legend()
plt.title('1e3 out of 2e5 Simple Monte Carlo Weights')
plt.xlabel('Mass ($\log_{10}(M)$)')
plt.ylabel('Luminosity ($\log_{10}(L)$)')
# step = 1
# epsilon = 1e-20
# gridv = np.maximum(grid[::step,2], epsilon)
# x = log10(grid[::step,0])
# y = log10(grid[::step,1])
# scat = plt.scatter(x, y, marker='s', s=40, alpha=0.8, c=gridv, norm=colors.LogNorm(), cmap='coolwarm', lw=0);
# plt.gcf().set_size_inches((6,6))
# plt.xlim([x.min(),x.max()])
# plt.ylim([y.min(),y.max()])
# plt.scatter(log10(true_lum_obs), log10(true_mass), color='black', s=50, label='True Observed')
# plt.scatter(x[22723], y[22723], color='gold', s=50, label='MaximumWeight')
# plt.legend()
# plt.gcf().colorbar(scat)
# plt.title('Numerical Integration Weights')
# plt.ylabel('Mass $(\log_{10}(M))$')
# plt.xlabel('Luminosity $(\log_{10}(L))$')


Out[160]:
<matplotlib.text.Text at 0x113eab3d0>

Importance Sampling Integration


In [133]:
import time

n = 20
vals = np.zeros(n)
logspace = np.logspace(3, 6, n)
for i, nsamples in enumerate(logspace):
    vals[i] = importance_sampling_integration(a1,a2,a3,a4,S, nsamples=int(nsamples))
    
timevals = np.zeros(n)
logspace = np.logspace(3, 6, n)
for i, nsamples in enumerate(logspace):
    start = time.time()
    importance_sampling_integration(a1,a2,a3,a4,S, nsamples=int(nsamples))
    end = time.time()
    timevals[i] = end - start

In [134]:
plt.subplot(211)
plt.plot(logspace, vals)
plt.gca().set_xscale('log')
plt.gca().set_yscale('log')
plt.title('Integral vs Samples')
plt.xlabel('Samples')
plt.ylabel('Integral')

plt.subplot(212)
plt.plot(logspace, timevals)
plt.gca().set_xscale('log')
plt.gca().set_yscale('log')
plt.title('Time vs Samples')
plt.xlabel('Samples')
plt.ylabel('Seconds')
plt.gcf().set_size_inches(10,8)
plt.tight_layout()


\begin{align*} \mathcal{L}(L_{obs}|\alpha_1, \alpha_2, \alpha_3, \alpha_4, S, \sigma_{obs}) &= \iint \frac{P(L_{obs}|L, \sigma_{obs})P(L|M, \alpha, S, z)P(M|z)Q(M,L)}{Q(M,L)}\\ &= \frac{1}{N_s}\sum_{(M,L) \in\ Q}\frac{P(L_{obs}|L, \sigma_{obs})P(L|M, \alpha, S, z)P(M|z)}{Q(L|L_{obs}, \sigma_{obs})Q(M|L,\alpha, S, z, c)}\\ \end{align*}

In [132]:
def importance_sampling_integration(a1, a2, a3, a4, S, nsamples=10**6):
    nsamples = min(nsamples, len(true_lum_obs_collection)-1)
    sigma = 0.05
    rev_S = 5.6578015811698101 * S
    permuted_lum_obs = np.random.permutation(true_lum_obs_collection)[:nsamples]
    lums = lognorm(sigma, scale=permuted_lum_obs).rvs()
    mu_mass = a3 * (lums / (np.exp(a1) * (1 + true_z) ** a4)) ** (1 / a2)
    masses = lognorm(rev_S, scale=mu_mass).rvs()
    integral = np.sum((p1(permuted_lum_obs, lums, sigma) * \
            p2(lums, masses, a1, a2, a3, a4, S, true_z) * p3(masses, true_z)) / \
                (q1(lums, permuted_lum_obs, sigma) * q2(masses, lums, a1, a2, a3, a4, rev_S, true_z))) /\
            len(lums)
    return integral

In [560]:
%time print importance_sampling_integration(a1,a2,a3,a4,S, nsamples=10**2)


0.000131549976458
CPU times: user 6.51 ms, sys: 1.22 ms, total: 7.73 ms
Wall time: 6.62 ms

In [559]:
%time print importance_sampling_integration(a1,a2,a3,a4,S, nsamples=10**3)


0.000136712311576
CPU times: user 7.64 ms, sys: 2.45 ms, total: 10.1 ms
Wall time: 8.45 ms

In [121]:
%time print importance_sampling_integration(a1,a2,a3,a4,S, nsamples=10**4)


0.000137747326398
CPU times: user 12.3 ms, sys: 1.82 ms, total: 14.1 ms
Wall time: 12.7 ms

In [122]:
%time print importance_sampling_integration(a1,a2,a3,a4,S, nsamples=10**7)


0.000133224486982
CPU times: user 80.2 ms, sys: 7.46 ms, total: 87.6 ms
Wall time: 86.1 ms

In [123]:
%time print importance_sampling_integration(a1,a2,a3,a4,S, nsamples=10**8)


0.000133246748158
CPU times: user 89.5 ms, sys: 5.96 ms, total: 95.4 ms
Wall time: 95.7 ms

In [13]:
def importance_sampling_integration_samples(a1, a2, a3, a4, S, nsamples=10**6):
    nsamples = min(nsamples, len(true_lum_obs_collection)-1)
    sigma = 0.05
    rev_S = 5.6578015811698101 * S
    permuted_lum_obs = np.random.permutation(true_lum_obs_collection)[:nsamples]
    lums = lognorm(sigma, scale=permuted_lum_obs).rvs()
    mu_mass = a3 * (lums / (np.exp(a1) * (1 + true_z) ** a4)) ** (1 / a2)
    masses = lognorm(rev_S, scale=mu_mass).rvs()
    weights = (p1(permuted_lum_obs, lums, sigma) * \
            p2(lums, masses, a1, a2, a3, a4, S, true_z) * p3(masses, true_z)) / \
                (q1(lums, permuted_lum_obs, sigma) * q2(masses, lums, a1, a2, a3, a4, rev_S, true_z))
    return masses, lums, weights

In [161]:
m, l, w = importance_sampling_integration_samples(a1, a2, a3, a4, S, nsamples=10**6)

In [163]:
import matplotlib.colors as colors
scat = plt.scatter(log10(m[:1000]), log10(l[:1000]), c=w[:1000], norm=colors.LogNorm(), cmap='coolwarm', lw=0)
plt.gcf().colorbar(scat)
plt.scatter(log10(true_mass), log10(true_lum_obs), color='black', s=50, label='Halo')
plt.legend()
plt.title('1e3 out of 2e5 Importance Sampling Weights')
plt.xlabel('Mass ($\log_{10}(M)$)')
plt.ylabel('Luminosity ($\log_{10}(L)$)')


Out[163]:
<matplotlib.text.Text at 0x1135f0cd0>

In [18]:
import matplotlib.colors as colors
ind = log10(m) > 10.2
scat = plt.scatter(log10(m[ind][:500]), log10(l[ind][:500]), c=w[ind][:500], norm=colors.LogNorm(), cmap='coolwarm', lw=0)
plt.gcf().colorbar(scat)
plt.scatter(log10(true_mass), log10(true_lum_obs), color='black', s=50, label='True Observed')
# step = 1
# epsilon = 1e-20
# gridv = np.maximum(grid[::step,2], epsilon)
# x = log10(grid[::step,0])
# y = log10(grid[::step,1])
# scat = plt.scatter(x, y, marker='s', s=40, alpha=0.8, c=gridv, norm=colors.LogNorm(), cmap='coolwarm', lw=0);
# plt.gcf().set_size_inches((6,6))
# plt.xlim([x.min(),x.max()])
# plt.ylim([y.min(),y.max()])
# plt.scatter(log10(true_lum_obs), log10(true_mass), color='black', s=50, label='True Observed')
# plt.scatter(x[22723], y[22723], color='gold', s=50, label='MaximumWeight')
# plt.legend()
# plt.gcf().colorbar(scat)
# plt.title('Numerical Integration Weights')
# plt.ylabel('Mass $(\log_{10}(M))$')
# plt.xlabel('Luminosity $(\log_{10}(L))$')


Out[18]:
<matplotlib.collections.PathCollection at 0x1092696d0>

In [20]:
prior.fetch(2.0).pdf(10**10.2)


Out[20]:
5.01830539050561e-11

In [22]:
prior.fetch(2.0).pdf(10**12)


Out[22]:
9.3433202778941481e-15

In [23]:
def importance_sampling_integration_samples2(a1, a2, a3, a4, S, nsamples=10**6):
    nsamples = min(nsamples, len(true_lum_obs_collection)-1)
    sigma = 0.05
    rev_S = 5.6578015811698101 * S
    permuted_lum_obs = np.random.permutation(true_lum_obs_collection)[:nsamples]
    lums = lognorm(sigma, scale=permuted_lum_obs).rvs()
    mu_mass = a3 * (lums / (np.exp(a1) * (1 + true_z) ** a4)) ** (1 / a2)
    masses = lognorm(rev_S, scale=mu_mass).rvs()
    return masses, lums, p2(lums, masses, a1, a2, a3, a4, S, true_z), q2(masses, lums, a1, a2, a3, a4, rev_S, true_z)

In [110]:
m, l, wn, wd = importance_sampling_integration_samples2(a1, a2, a3, a4, S, nsamples=10**6)

In [111]:
print np.sum(log10(m) < 10.2)
print np.sum(log10(m) > 10.2)


28753
87165

In [25]:
import matplotlib.colors as colors
ind = log10(m) > 10.2
scat = plt.scatter(log10(m[ind][:500]), log10(l[ind][:500]), c=wn[ind][:500], norm=colors.LogNorm(), cmap='coolwarm', lw=0)
plt.gcf().colorbar(scat)
plt.scatter(log10(true_mass), log10(true_lum_obs), color='black', s=50, label='True Observed')


Out[25]:
<matplotlib.collections.PathCollection at 0x10a5fab50>

In [26]:
ind = log10(m) > 10.2
scat = plt.scatter(log10(m[ind][:500]), log10(l[ind][:500]), c=wd[ind][:500], norm=colors.LogNorm(), cmap='coolwarm', lw=0)
plt.gcf().colorbar(scat)
plt.scatter(log10(true_mass), log10(true_lum_obs), color='black', s=50, label='True Observed')


Out[26]:
<matplotlib.collections.PathCollection at 0x10a7c78d0>

In [ ]:


In [29]:
from scipy.optimize import minimize

def func(prms):
    a1, a2, a4, S = prms
    nsamples = len(true_lum_obs_collection)-1
    sigma = 0.05
    rev_S = 5.6578015811698101 * S
    permuted_lum_obs = np.random.permutation(true_lum_obs_collection)[:nsamples]
    lums = lognorm(sigma, scale=permuted_lum_obs).rvs()
    mu_mass = a3 * (lums / (np.exp(a1) * (1 + true_z) ** a4)) ** (1 / a2)
    masses = lognorm(rev_S, scale=mu_mass).rvs()
    integral = np.sum((p1(permuted_lum_obs, lums, sigma) * \
            p2(lums, masses, a1, a2, a3, a4, S, true_z) * p3(masses, true_z)) / \
                (q1(lums, permuted_lum_obs, sigma) * q2(masses, lums, a1, a2, a3, a4, rev_S, true_z))) /\
            len(lums)
    return -integral

x0 = [a1,a2,a4,S]

obj = minimize(func, x0, method='BFGS')


/usr/local/lib/python2.7/site-packages/scipy/optimize/optimize.py:964: RuntimeWarning: divide by zero encountered in double_scalars
  rhok = 1.0 / (numpy.dot(yk, sk))

In [30]:
obj


Out[30]:
      fun: -7.4760615596372037e-05
 hess_inv: array([[  3.48308698e-07,  -2.07882698e-07,  -7.73768703e-08,
         -2.54135667e-07],
       [ -2.07882697e-07,   1.24347463e-07,   4.59867788e-08,
          1.50790041e-07],
       [ -7.73768700e-08,   4.59867783e-08,   1.73287045e-08,
          5.70702805e-08],
       [ -2.54135669e-07,   1.50790043e-07,   5.70702803e-08,
          1.88342460e-07]])
      jac: array([-28.30590744, -24.68470101,  41.00275193, -11.44276151])
  message: 'Desired error not necessarily achieved due to precision loss.'
     nfev: 342
      nit: 6
     njev: 52
   status: 2
  success: False
        x: array([ 10.74781344,   0.36260134,   1.1587271 ,   0.15702342])

In [40]:
S_space = S * np.linspace(0.1,2, 10)
vals = []
for S_test in S_space:
    vals.append(importance_sampling_integration(a1,a2,a3,a4,S_test,nsamples=10**5))
plt.plot(S_space, vals)


Out[40]:
[<matplotlib.lines.Line2D at 0x109ecb690>]

In [129]:
samples = importance_sampling_integration_samples(a1,a2,a3,a4,S, nsamples=10**5)
plt.title('Distribution of Sample Weights')
plt.ylabel('Density')
plt.xlabel('$log_{10}$\ (Weight)')
plt.hist(np.log(samples)/np.log(10), alpha=0.6, bins=100, normed=True);



In [132]:
samples.max()


Out[132]:
0.0013192080292059318

In [140]:
nsamples = min(nsamples, len(true_lum_obs_collection)-1)
sigma = 0.05
rev_S = 5.6578015811698101 * S
permuted_lum_obs = np.random.permutation(true_lum_obs_collection)[:nsamples]
lums = lognorm(sigma, scale=permuted_lum_obs).rvs()
mu_mass = a3 * (lums / (np.exp(a1) * (1 + true_z) ** a4)) ** (1 / a2)
masses = lognorm(rev_S, scale=mu_mass).rvs()
tp1 = p1(permuted_lum_obs, lums, sigma)
tp2 = p2(lums, masses, a1, a2, a3, a4, S, true_z)
tp3 = p3(masses, true_z)
tq1 = q1(lums, permuted_lum_obs, sigma)
tq2 = q2(masses, lums, a1, a2, a3, a4, rev_S, true_z)

In [145]:
plt.hist(log10((tp2 * tp3) / tq2), bins=20, alpha=0.5);



In [107]:
np.argmax(tp2 * tp3 / tq2)


Out[107]:
36661

In [110]:
print tp2[36661]
print tp3[36661]
print tq2[36661]


9.02349891243e-06
4.59164464371e-11
3.57647859837e-25

In [7]:
def log10(a):
    return np.log(a) / np.log(10)

In [119]:
log10(mu_mass[36661])


Out[119]:
9.6630631262198872

In [117]:
log10(lums[36661])


Out[117]:
3.5262049285569601

In [113]:
log10(masses[36661])


Out[113]:
10.222765044465945

In [38]:
plt.hist(np.log(masses)/np.log(10), bins=20)


Out[38]:
(array([  3.30000000e+01,   2.62000000e+02,   8.21000000e+02,
          2.16700000e+03,   4.30800000e+03,   7.13600000e+03,
          1.00680000e+04,   1.29990000e+04,   1.57350000e+04,
          1.74310000e+04,   1.70490000e+04,   1.30900000e+04,
          7.97200000e+03,   4.01100000e+03,   1.81200000e+03,
          7.08000000e+02,   2.25000000e+02,   6.20000000e+01,
          2.30000000e+01,   6.00000000e+00]),
 array([  8.13542481,   8.42363065,   8.71183649,   9.00004233,
          9.28824817,   9.57645401,   9.86465985,  10.15286569,
         10.44107153,  10.72927737,  11.01748321,  11.30568905,
         11.59389489,  11.88210074,  12.17030658,  12.45851242,
         12.74671826,  13.0349241 ,  13.32312994,  13.61133578,  13.89954162]),
 <a list of 20 Patch objects>)

In [58]:
plt.scatter(10.50, 8.49, color='red')
plt.scatter(10.50, 8.49, color='green')
plt.scatter(np.log(masses[:200]) / np.log(10), np.log(lums[:200]) / np.log(10), alpha=0.3)


Out[58]:
<matplotlib.collections.PathCollection at 0x10ba43890>

In [50]:
np.log(masses[6]) / np.log(10)


Out[50]:
10.50309270355808

In [51]:
np.log(lums[6])


Out[51]:
8.4971594894965126

In [59]:
permuted_lum_obs[6]


Out[59]:
5138.5754516699999

In [65]:
permuted_lum_obs = np.random.permutation(true_lum_obs_collection)[:nsamples]
lums = lognorm(sigma, scale=permuted_lum_obs).rvs()

In [66]:
plt.hist(np.log(lums) / np.log(10), bins=20)


Out[66]:
(array([  1.40000000e+01,   2.25000000e+02,   8.28000000e+02,
          2.06000000e+03,   4.20400000e+03,   7.03500000e+03,
          9.88100000e+03,   1.28850000e+04,   1.56210000e+04,
          1.78170000e+04,   1.72810000e+04,   1.33680000e+04,
          8.02000000e+03,   3.96600000e+03,   1.72300000e+03,
          6.86000000e+02,   2.23000000e+02,   5.60000000e+01,
          2.00000000e+01,   5.00000000e+00]),
 array([ 2.97311334,  3.07722739,  3.18134144,  3.28545549,  3.38956954,
         3.49368359,  3.59779764,  3.70191169,  3.80602574,  3.91013979,
         4.01425383,  4.11836788,  4.22248193,  4.32659598,  4.43071003,
         4.53482408,  4.63893813,  4.74305218,  4.84716623,  4.95128028,
         5.05539432]),
 <a list of 20 Patch objects>)

In [68]:
lums.max()


Out[68]:
113604.18340356463

In [69]:
np.log(lums.max()) / np.log(10)


Out[69]:
5.0553943242942907

Optimization


In [198]:
from scipy.optimize import minimize

ans = np.array([a1,a2,a3,a4,S])
def func(arr):
    return - importance_sampling_integration(*(arr * ans))

x0 = np.array([1,0.97,1.01,1.03,1.2])
minimize(func, x0, method='Nelder-Mead')


Out[198]:
      fun: -0.00013042002983643667
 hess_inv: array([[ 0.68012823, -0.20927487, -0.13289837, -0.26835587, -0.29565279],
       [-0.20927487,  0.87743721, -0.07796555, -0.17923977, -0.18053777],
       [-0.13289837, -0.07796555,  0.95040535, -0.11379082, -0.11476855],
       [-0.26835587, -0.17923977, -0.11379082,  0.77580109, -0.25133255],
       [-0.29565279, -0.18053777, -0.11476855, -0.25133255,  0.73831085]])
      jac: array([-43.99549979, -16.55729911, -10.6278869 , -40.03504741, -29.68355597])
  message: 'Desired error not necessarily achieved due to precision loss.'
     nfev: 201
      nit: 1
     njev: 27
   status: 2
  success: False
        x: array([ 1.  ,  0.97,  1.01,  1.03,  1.2 ])

In [183]:
[a1,a2,a3,a4,S]


Out[183]:
[10.747809151611289,
 0.36260141487530501,
 235000000000000.0,
 1.1587242790463443,
 0.1570168038792813]

Plots


In [178]:
np.append([2,5], np.logspace(1, 5, n))


Out[178]:
array([  2.00000000e+00,   5.00000000e+00,   1.00000000e+01,
         1.62377674e+01,   2.63665090e+01,   4.28133240e+01,
         6.95192796e+01,   1.12883789e+02,   1.83298071e+02,
         2.97635144e+02,   4.83293024e+02,   7.84759970e+02,
         1.27427499e+03,   2.06913808e+03,   3.35981829e+03,
         5.45559478e+03,   8.85866790e+03,   1.43844989e+04,
         2.33572147e+04,   3.79269019e+04,   6.15848211e+04,
         1.00000000e+05])

In [183]:
n = 15
vals = np.zeros((n+1, 3))
timevals = np.zeros((n+1, 3))
logspace = np.append([5], np.logspace(1, 5, n))
for i, nsamples in enumerate(logspace):
    nsamples = int(nsamples)
    start = time.time()
    vals[i][0] = numerical_integration(a1,a2,a3,a4,S, nsamples=nsamples)
    end = time.time()
    timevals[i][0] = end - start
    
    start = time.time()
    vals[i][1] = simple_monte_carlo_integration(a1,a2,a3,a4,S, nsamples=nsamples)
    end = time.time()
    timevals[i][1] = end - start
    
    start = time.time()
    vals[i][2] = importance_sampling_integration(a1,a2,a3,a4,S, nsamples=nsamples)
    end = time.time()
    timevals[i][2] = end - start

In [185]:
plt.subplot(211)
plt.plot(logspace, vals[:,0], label='num')
plt.plot(logspace, vals[:,1], label='smc')
plt.plot(logspace, vals[:,2], label='is')
plt.gca().set_xscale('log')
plt.gca().set_yscale('log')
plt.title('Integral vs Samples')
plt.xlabel('Samples')
plt.ylabel('Integral')
plt.xlim([5,10**5])
plt.legend(loc=4)

plt.subplot(212)
plt.plot(logspace, timevals[:,0], label='num')
plt.plot(logspace, timevals[:,1], label='smc')
plt.plot(logspace, timevals[:,2], label='is')
plt.gca().set_xscale('log')
plt.gca().set_yscale('log')
plt.title('Time vs Samples')
plt.xlabel('Samples')
plt.ylabel('Seconds')
plt.legend(loc=2)
plt.xlim([5,10**5])

plt.gcf().set_size_inches(10,8)
plt.tight_layout();



In [186]:
def numerical_integration_linear(a1, a2, a3, a4, S, nsamples=10**3):
    masses = midpoints(prior.fetch(true_z).mass[1:])
    delta_masses = np.diff(prior.fetch(true_z).mass[1:])
    lums_tmp = np.linspace(np.min(data.lum_obs), np.max(data.lum_obs), nsamples)
    lums = midpoints(lums_tmp)
    delta_lums = np.diff(lums_tmp)
    sigma = 0.05
    integral = 0
    for i,lum in enumerate(lums):
        integral += np.sum(delta_masses * delta_lums[i] * p1(true_lum_obs, lum, sigma) * \
            p2(lum, masses, a1, a2, a3, a4, S, true_z) * p3(masses, true_z))
    return integral

In [188]:
n = 8
vals = np.zeros((n, 2))
timevals = np.zeros((n, 2))
logspace = np.logspace(1, 5, n)
for i, nsamples in enumerate(logspace):
    nsamples = int(nsamples)
    start = time.time()
    vals[i][0] = numerical_integration(a1,a2,a3,a4,S, nsamples=nsamples)
    end = time.time()
    timevals[i][0] = end - start
    start = time.time()
    vals[i][1] = numerical_integration_linear(a1,a2,a3,a4,S, nsamples=nsamples)
    end = time.time()
    timevals[i][1] = end - start

In [191]:
plt.plot(logspace, vals[:,0], label='log')
plt.plot(logspace, vals[:,1], label='lin')
plt.gca().set_xscale('log')
plt.gca().set_yscale('log')
plt.title('Numerical Integration Integral vs Samples')
plt.xlabel('Samples')
plt.ylabel('Integral')
plt.legend(loc=4)


Out[191]:
<matplotlib.legend.Legend at 0x11367c710>