After fixing a bug that surfaced in the previous single integral comparisons we seek to retest our method.
In [197]:
%matplotlib inline
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
from matplotlib import rc
from bigmali.grid import Grid
from bigmali.likelihood import BiasedLikelihood
from bigmali.prior import TinkerPrior
from bigmali.hyperparameter import get
from scipy.stats import lognorm
from time import time
rc('text', usetex=True)
data = pd.read_csv('/Users/user/Code/PanglossNotebooks/MassLuminosityProject/mock_data.csv')
data.ix[0]
true_mass = 1.005246e+11
true_z = 2.077090e+00
true_lum = 1.377704e+04
true_lum_obs = 1.374879e+04
true_lum_obs_collection = data.lum_obs
prior = TinkerPrior(Grid())
def p1(lobs, lum, sigma):
return fast_lognormal(lum, sigma, lobs)
def p2(lum, mass, a1, a2, a3, a4, S, z):
mu_lum = np.exp(a1) * ((mass / a3) ** a2) * ((1 + z) ** (a4))
return fast_lognormal(mu_lum, S, lum)
def p3(mass, z):
return prior.fetch(z).pdf(mass)
def q1(lum, lobs, sigma):
return fast_lognormal(lobs, sigma, lum)
def q2(mass, lum, a1, a2, a3, a4, S, z):
mu_mass = a3 * (lum / (np.exp(a1) * (1 + z) ** a4)) ** (1 / a2)
return fast_lognormal(mu_mass, S, mass)
def midpoints(arr):
n = len(arr)-1
ret = np.zeros(n)
for i in xrange(n):
ret[i] = (arr[i+1] + arr[i]) / 2.
return ret
def fast_lognormal(mu, sigma, x):
return (1/(x * sigma * np.sqrt(2 * np.pi))) * np.exp(- 0.5 * (np.log(x) - np.log(mu)) ** 2 / sigma ** 2)
a1,a2,a3,a4,S = get()
In [198]:
p2(space, true_mass, a1, a2, a3, a4, S, true_z)
Out[198]:
In [100]:
fast_lognormal(1,1,1)
Out[100]:
In [101]:
fast_lognormal(1,2,1)
Out[101]:
In [102]:
fast_lognormal(1,np.array([1,2]),1)
Out[102]:
Sanity check that distributions we use all integrate to 1.
In [196]:
from scipy.integrate import trapz
space = np.linspace(true_lum_obs_collection.min(), true_lum_obs_collection.max(), 10**5)
pp1 = p1(space,true_lum, 0.05)
print trapz(pp1, space)
space = np.linspace(true_lum_obs_collection.min(), true_lum_obs_collection.max(), 10**5)
pp2 = p2(space, true_mass, a1, a2, a3, a4, S, true_z)
print trapz(pp2, space)
space = np.linspace(data.mass.min(), data.mass.max(), 10**4)
pp3 = p3(space, true_z)
print trapz(pp3, space)
space = np.linspace(true_lum_obs_collection.min(), true_lum_obs_collection.max(), 10**5)
qq1 = q1(true_lum, space, 0.05)
print trapz(qq1, space)
space = np.linspace(data.mass.min(), data.mass.max(), 10**4)
qq2 = q2(space, true_lum, a1, a2, a3, a4, S, true_z)
print trapz(qq2, space)
Check lognorm and fast_lognormal behaves as expected
In [121]:
r = lognorm(1,scale=2).rvs(size=10**5)
plt.hist(r, normed=True, bins=1000, alpha=0.5)
space = np.linspace(0.1,5,100)
pspace = lognorm(1,scale=2).pdf(space)
plt.plot(space, pspace)
fspace = fast_lognormal(2,1,space)
plt.plot(space, fspace)
Out[121]:
In [122]:
space = np.linspace(0.1,5,100)
pspace = lognorm(1,scale=2).pdf(space)
plt.plot(space, pspace)
fspace = fast_lognormal(2,1,space)
plt.plot(space, fspace)
Out[122]:
In [59]:
print pp1
In [58]:
pp1
Out[58]:
In [47]:
plt.plot(space, pp1)
Out[47]:
In [155]:
def numerical_integration_grid(a1, a2, a3, a4, S):
nsamples = 100
masses = midpoints(prior.fetch(true_z).mass[1:-1])
delta_masses = np.diff(prior.fetch(true_z).mass[1:-1])
lums_tmp = np.logspace(log10(np.min(data.lum_obs)), log10(np.max(data.lum_obs)), nsamples)
lums = midpoints(lums_tmp)
delta_lums = np.diff(lums_tmp)
sigma = 0.05
integral = 0
ans = np.zeros((len(lums)*len(masses),3))
count = 0
for i, lum in enumerate(lums):
for j, mass in enumerate(masses):
ans[count,0] = lum
ans[count,1] = mass
ans[count,2] = np.sum(delta_masses[j] * delta_lums[i] * p1(true_lum_obs, lum, sigma) * \
p2(lum, mass, a1, a2, a3, a4, S, true_z) * p3(mass, true_z))
count += 1
return ans
In [156]:
grid = numerical_integration_grid(a1,a2,a3,a4,S)
In [159]:
import matplotlib.colors as colors
step = 1
epsilon = 1e-20
gridv = np.maximum(grid[::step,2], epsilon)
x = log10(grid[::step,0])
y = log10(grid[::step,1])
scat = plt.scatter(y, x, marker='s', s=40, alpha=0.8, c=gridv, norm=colors.LogNorm(), cmap='coolwarm', lw=0);
plt.gcf().set_size_inches((6,6))
plt.ylim([x.min(),x.max()])
plt.xlim([y.min(),y.max()])
plt.scatter(log10(true_mass), log10(true_lum_obs), color='black', s=50, label='Halo')
# plt.scatter(x[22723], y[22723], color='gold', s=50, label='MaximumWeight')
plt.legend()
plt.gcf().colorbar(scat)
plt.title('Numerical Integration Weights')
plt.xlabel('Mass $(\log_{10}(M))$')
plt.ylabel('Luminosity $(\log_{10}(L))$')
Out[159]:
In [469]:
nsamples = np.logspace(1, 5, 10)
times = []
res = []
for ns in nsamples:
start = time()
res.append(numerical_integration(a1,a2,a3,a4,S,nsamples=ns))
end = time()
times.append(end-start)
In [480]:
plt.subplot(211)
plt.title('Result')
plt.plot(nsamples, res)
plt.ylabel('Integral Value')
plt.xlabel('Luminosity Samples')
plt.gca().set_xscale('log')
plt.gca().set_yscale('log')
plt.subplot(212)
plt.title('Runtime')
plt.plot(nsamples, times)
plt.ylabel('Runtime (seconds)')
plt.xlabel('Luminosity Samples')
plt.gca().set_xscale('log')
plt.gca().set_yscale('log')
plt.tight_layout()
plt.gcf().set_size_inches((8,4))
Note: stability even with different hyperparameters.
In [510]:
%time numerical_integration(a1,a2,a3,a4,S_test,nsamples=100)
Out[510]:
In [38]:
S_space = S * np.linspace(1,10, 10)
vals = []
for S_test in S_space:
vals.append(numerical_integration(a1,a2,a3,a4,S_test,nsamples=100))
plt.plot(S_space, vals)
Out[38]:
In [512]:
a1_space = a1 * np.linspace(0.5,2, 10)
vals = []
for a1_test in a1_space:
vals.append(numerical_integration(a1_test,a2,a3,a4,S,nsamples=100))
plt.plot(a1_space, vals)
Out[512]:
In [517]:
a2_space = a2 * np.linspace(0.5,2, 10)
vals = []
for a2_test in a2_space:
vals.append(numerical_integration(a1,a2_test,a3,a4,S,nsamples=100))
plt.plot(a2_space, vals)
Out[517]:
In [515]:
a4_space = a4 * np.linspace(0.5,2, 10)
vals = []
for a4_test in a4_space:
vals.append(numerical_integration(a1,a2,a3,a4_test,S,nsamples=100))
plt.plot(a4_space, vals)
Out[515]:
In [ ]:
a1_space = a1 * np.linspace(0.5,2, 10)
vals = []
for a1_test in a1_space:
vals.append(numerical_integration(a1_test,a2,a3,a4,S,nsamples=100))
plt.plot(a1_space, vals)
In [37]:
def numerical_integration(a1, a2, a3, a4, S, nsamples=10**3):
masses = midpoints(prior.fetch(true_z).mass[1:])
delta_masses = np.diff(prior.fetch(true_z).mass[1:])
lums_tmp = np.logspace(log10(np.min(data.lum_obs)), log10(np.max(data.lum_obs)), nsamples)
lums = midpoints(lums_tmp)
delta_lums = np.diff(lums_tmp)
sigma = 0.05
integral = 0
for i,lum in enumerate(lums):
integral += np.sum(delta_masses * delta_lums[i] * p1(true_lum_obs, lum, sigma) * \
p2(lum, masses, a1, a2, a3, a4, S, true_z) * p3(masses, true_z))
return integral
In [536]:
nsamples = 100
masses = midpoints(prior.fetch(true_z).mass[1:])
delta_masses = np.diff(prior.fetch(true_z).mass[1:])
lums_tmp = np.logspace(log10(np.min(data.lum_obs)), log10(np.max(data.lum_obs)), nsamples)
lums = midpoints(lums_tmp)
delta_lums = np.diff(lums_tmp)
sigma = 0.05
def func(params):
a1,a2,a4,S = params
integral = 0
for i,lum in enumerate(lums):
integral += np.sum(delta_masses * delta_lums[i] * p1(true_lum_obs, lum, sigma) * \
p2(lum, masses, a1, a2, a3, a4, S, true_z) * p3(masses, true_z))
return -integral
x0 = [a1,a2,a4,S*5]
obj = minimize(func, x0, method='BFGS')
In [546]:
obj['x']
Out[546]:
In [430]:
%time print numerical_integration(a1,a2,a3,a4,S, nsamples=10**3)
In [427]:
%time print numerical_integration(a1,a2,a3,a4,S, nsamples=10**2)
In [428]:
%time print numerical_integration(a1,a2,a3,a4,S, nsamples=10**4)
In [156]:
def numerical_integration(a1, a2, a3, a4, S, nsamples=10**4):
masses = midpoints(prior.fetch(true_z).mass)
delta_masses = np.diff(prior.fetch(true_z).mass)
lums = np.linspace(np.min(data.lum_obs), np.max(data.lum_obs), nsamples)
print nsamples
delta_lum = lums[1] - lums[0]
print delta_lum
sigma = 0.05
integral = 0
for lum in lums:
integral += np.sum(delta_masses * delta_lum * p1(true_lum_obs, lum, sigma) * \
p2(lum, masses, a1, a2, a3, a4, S, true_z) * p3(masses, true_z))
return integral
In [157]:
%time print numerical_integration(a1,a2,a3,a4,S, nsamples=10**5)
In [158]:
%time print numerical_integration(a1,a2,a3,a4,S, nsamples=10**4)
In [155]:
%time print numerical_integration(a1,a2,a3,a4,S, nsamples=10**3)
In [151]:
%time print numerical_integration(a1,a2,a3,a4,S, nsamples=10**6)
In [143]:
n = 10
vals = np.zeros(n)
timevals = np.zeros(n)
logspace = np.logspace(2, 5, n)
for i, nsamples in enumerate(logspace):
start = time.time()
vals[i] = numerical_integration(a1,a2,a3,a4,S, nsamples=nsamples)
end = time.time()
timevals[i] = end - start
In [144]:
plt.subplot(211)
plt.plot(logspace, vals)
plt.gca().set_xscale('log')
plt.gca().set_yscale('log')
plt.title('Integral vs Samples')
plt.xlabel('Samples')
plt.ylabel('Integral')
plt.subplot(212)
plt.plot(logspace, timevals)
plt.gca().set_xscale('log')
plt.gca().set_yscale('log')
plt.title('Time vs Samples')
plt.xlabel('Samples')
plt.ylabel('Seconds')
plt.gcf().set_size_inches(10,8)
plt.tight_layout()
In [3]:
def simple_monte_carlo_integration(a1, a2, a3, a4, S, nsamples=10**6):
sigma = 0.05
masses = prior.fetch(true_z).rvs(nsamples)
mu_lum = np.exp(a1) * ((masses / a3) ** a2) * ((1 + true_z) ** (a4))
lums = lognorm(S, scale=mu_lum).rvs()
return np.sum(p1(true_lum_obs, lums, sigma)) / (nsamples)
In [555]:
%time print simple_monte_carlo_integration(a1,a2,a3,a4,S, nsamples=10**4)
In [556]:
%time print simple_monte_carlo_integration(a1,a2,a3,a4,S, nsamples=10**6)
In [557]:
%time print simple_monte_carlo_integration(a1,a2,a3,a4,S, nsamples=10**7)
In [17]:
import time
n = 20
vals = np.zeros(n)
logspace = np.logspace(4, 7, n)
for i, nsamples in enumerate(logspace):
vals[i] = simple_monte_carlo_integration(a1,a2,a3,a4,S, nsamples=nsamples)
timevals = np.zeros(n)
logspace = np.logspace(4, 7, n)
for i, nsamples in enumerate(logspace):
start = time.time()
simple_monte_carlo_integration(a1,a2,a3,a4,S, nsamples=nsamples)
end = time.time()
timevals[i] = end - start
In [20]:
plt.subplot(211)
plt.plot(logspace, vals)
plt.gca().set_xscale('log')
plt.gca().set_yscale('log')
plt.title('Integral vs Samples')
plt.xlabel('Samples')
plt.ylabel('Integral')
plt.subplot(212)
plt.plot(logspace, timevals)
plt.gca().set_xscale('log')
plt.gca().set_yscale('log')
plt.title('Time vs Samples')
plt.xlabel('Samples')
plt.ylabel('Seconds')
plt.gcf().set_size_inches(10,8)
plt.tight_layout()
In [4]:
def simple_monte_carlo_integration_samples(a1, a2, a3, a4, S, nsamples=10**6):
sigma = 0.05
masses = prior.fetch(true_z).rvs(nsamples)
mu_lum = np.exp(a1) * ((masses / a3) ** a2) * ((1 + true_z) ** (a4))
lums = lognorm(S, scale=mu_lum).rvs()
weights = p1(true_lum_obs, lums, sigma)
return masses, lums, weights
In [145]:
m, l, w = simple_monte_carlo_integration_samples(a1, a2, a3, a4, S, nsamples=10**6)
In [160]:
import matplotlib.colors as colors
scat = plt.scatter(log10(m[:2000]), log10(l[:2000]), c=w[:2000], norm=colors.LogNorm(), cmap='coolwarm', lw=0)
plt.gcf().colorbar(scat)
plt.scatter(log10(true_mass), log10(true_lum_obs), color='black', s=50, label='Halo')
plt.legend()
plt.title('1e3 out of 2e5 Simple Monte Carlo Weights')
plt.xlabel('Mass ($\log_{10}(M)$)')
plt.ylabel('Luminosity ($\log_{10}(L)$)')
# step = 1
# epsilon = 1e-20
# gridv = np.maximum(grid[::step,2], epsilon)
# x = log10(grid[::step,0])
# y = log10(grid[::step,1])
# scat = plt.scatter(x, y, marker='s', s=40, alpha=0.8, c=gridv, norm=colors.LogNorm(), cmap='coolwarm', lw=0);
# plt.gcf().set_size_inches((6,6))
# plt.xlim([x.min(),x.max()])
# plt.ylim([y.min(),y.max()])
# plt.scatter(log10(true_lum_obs), log10(true_mass), color='black', s=50, label='True Observed')
# plt.scatter(x[22723], y[22723], color='gold', s=50, label='MaximumWeight')
# plt.legend()
# plt.gcf().colorbar(scat)
# plt.title('Numerical Integration Weights')
# plt.ylabel('Mass $(\log_{10}(M))$')
# plt.xlabel('Luminosity $(\log_{10}(L))$')
Out[160]:
In [133]:
import time
n = 20
vals = np.zeros(n)
logspace = np.logspace(3, 6, n)
for i, nsamples in enumerate(logspace):
vals[i] = importance_sampling_integration(a1,a2,a3,a4,S, nsamples=int(nsamples))
timevals = np.zeros(n)
logspace = np.logspace(3, 6, n)
for i, nsamples in enumerate(logspace):
start = time.time()
importance_sampling_integration(a1,a2,a3,a4,S, nsamples=int(nsamples))
end = time.time()
timevals[i] = end - start
In [134]:
plt.subplot(211)
plt.plot(logspace, vals)
plt.gca().set_xscale('log')
plt.gca().set_yscale('log')
plt.title('Integral vs Samples')
plt.xlabel('Samples')
plt.ylabel('Integral')
plt.subplot(212)
plt.plot(logspace, timevals)
plt.gca().set_xscale('log')
plt.gca().set_yscale('log')
plt.title('Time vs Samples')
plt.xlabel('Samples')
plt.ylabel('Seconds')
plt.gcf().set_size_inches(10,8)
plt.tight_layout()
In [132]:
def importance_sampling_integration(a1, a2, a3, a4, S, nsamples=10**6):
nsamples = min(nsamples, len(true_lum_obs_collection)-1)
sigma = 0.05
rev_S = 5.6578015811698101 * S
permuted_lum_obs = np.random.permutation(true_lum_obs_collection)[:nsamples]
lums = lognorm(sigma, scale=permuted_lum_obs).rvs()
mu_mass = a3 * (lums / (np.exp(a1) * (1 + true_z) ** a4)) ** (1 / a2)
masses = lognorm(rev_S, scale=mu_mass).rvs()
integral = np.sum((p1(permuted_lum_obs, lums, sigma) * \
p2(lums, masses, a1, a2, a3, a4, S, true_z) * p3(masses, true_z)) / \
(q1(lums, permuted_lum_obs, sigma) * q2(masses, lums, a1, a2, a3, a4, rev_S, true_z))) /\
len(lums)
return integral
In [560]:
%time print importance_sampling_integration(a1,a2,a3,a4,S, nsamples=10**2)
In [559]:
%time print importance_sampling_integration(a1,a2,a3,a4,S, nsamples=10**3)
In [121]:
%time print importance_sampling_integration(a1,a2,a3,a4,S, nsamples=10**4)
In [122]:
%time print importance_sampling_integration(a1,a2,a3,a4,S, nsamples=10**7)
In [123]:
%time print importance_sampling_integration(a1,a2,a3,a4,S, nsamples=10**8)
In [13]:
def importance_sampling_integration_samples(a1, a2, a3, a4, S, nsamples=10**6):
nsamples = min(nsamples, len(true_lum_obs_collection)-1)
sigma = 0.05
rev_S = 5.6578015811698101 * S
permuted_lum_obs = np.random.permutation(true_lum_obs_collection)[:nsamples]
lums = lognorm(sigma, scale=permuted_lum_obs).rvs()
mu_mass = a3 * (lums / (np.exp(a1) * (1 + true_z) ** a4)) ** (1 / a2)
masses = lognorm(rev_S, scale=mu_mass).rvs()
weights = (p1(permuted_lum_obs, lums, sigma) * \
p2(lums, masses, a1, a2, a3, a4, S, true_z) * p3(masses, true_z)) / \
(q1(lums, permuted_lum_obs, sigma) * q2(masses, lums, a1, a2, a3, a4, rev_S, true_z))
return masses, lums, weights
In [161]:
m, l, w = importance_sampling_integration_samples(a1, a2, a3, a4, S, nsamples=10**6)
In [163]:
import matplotlib.colors as colors
scat = plt.scatter(log10(m[:1000]), log10(l[:1000]), c=w[:1000], norm=colors.LogNorm(), cmap='coolwarm', lw=0)
plt.gcf().colorbar(scat)
plt.scatter(log10(true_mass), log10(true_lum_obs), color='black', s=50, label='Halo')
plt.legend()
plt.title('1e3 out of 2e5 Importance Sampling Weights')
plt.xlabel('Mass ($\log_{10}(M)$)')
plt.ylabel('Luminosity ($\log_{10}(L)$)')
Out[163]:
In [18]:
import matplotlib.colors as colors
ind = log10(m) > 10.2
scat = plt.scatter(log10(m[ind][:500]), log10(l[ind][:500]), c=w[ind][:500], norm=colors.LogNorm(), cmap='coolwarm', lw=0)
plt.gcf().colorbar(scat)
plt.scatter(log10(true_mass), log10(true_lum_obs), color='black', s=50, label='True Observed')
# step = 1
# epsilon = 1e-20
# gridv = np.maximum(grid[::step,2], epsilon)
# x = log10(grid[::step,0])
# y = log10(grid[::step,1])
# scat = plt.scatter(x, y, marker='s', s=40, alpha=0.8, c=gridv, norm=colors.LogNorm(), cmap='coolwarm', lw=0);
# plt.gcf().set_size_inches((6,6))
# plt.xlim([x.min(),x.max()])
# plt.ylim([y.min(),y.max()])
# plt.scatter(log10(true_lum_obs), log10(true_mass), color='black', s=50, label='True Observed')
# plt.scatter(x[22723], y[22723], color='gold', s=50, label='MaximumWeight')
# plt.legend()
# plt.gcf().colorbar(scat)
# plt.title('Numerical Integration Weights')
# plt.ylabel('Mass $(\log_{10}(M))$')
# plt.xlabel('Luminosity $(\log_{10}(L))$')
Out[18]:
In [20]:
prior.fetch(2.0).pdf(10**10.2)
Out[20]:
In [22]:
prior.fetch(2.0).pdf(10**12)
Out[22]:
In [23]:
def importance_sampling_integration_samples2(a1, a2, a3, a4, S, nsamples=10**6):
nsamples = min(nsamples, len(true_lum_obs_collection)-1)
sigma = 0.05
rev_S = 5.6578015811698101 * S
permuted_lum_obs = np.random.permutation(true_lum_obs_collection)[:nsamples]
lums = lognorm(sigma, scale=permuted_lum_obs).rvs()
mu_mass = a3 * (lums / (np.exp(a1) * (1 + true_z) ** a4)) ** (1 / a2)
masses = lognorm(rev_S, scale=mu_mass).rvs()
return masses, lums, p2(lums, masses, a1, a2, a3, a4, S, true_z), q2(masses, lums, a1, a2, a3, a4, rev_S, true_z)
In [110]:
m, l, wn, wd = importance_sampling_integration_samples2(a1, a2, a3, a4, S, nsamples=10**6)
In [111]:
print np.sum(log10(m) < 10.2)
print np.sum(log10(m) > 10.2)
In [25]:
import matplotlib.colors as colors
ind = log10(m) > 10.2
scat = plt.scatter(log10(m[ind][:500]), log10(l[ind][:500]), c=wn[ind][:500], norm=colors.LogNorm(), cmap='coolwarm', lw=0)
plt.gcf().colorbar(scat)
plt.scatter(log10(true_mass), log10(true_lum_obs), color='black', s=50, label='True Observed')
Out[25]:
In [26]:
ind = log10(m) > 10.2
scat = plt.scatter(log10(m[ind][:500]), log10(l[ind][:500]), c=wd[ind][:500], norm=colors.LogNorm(), cmap='coolwarm', lw=0)
plt.gcf().colorbar(scat)
plt.scatter(log10(true_mass), log10(true_lum_obs), color='black', s=50, label='True Observed')
Out[26]:
In [ ]:
In [29]:
from scipy.optimize import minimize
def func(prms):
a1, a2, a4, S = prms
nsamples = len(true_lum_obs_collection)-1
sigma = 0.05
rev_S = 5.6578015811698101 * S
permuted_lum_obs = np.random.permutation(true_lum_obs_collection)[:nsamples]
lums = lognorm(sigma, scale=permuted_lum_obs).rvs()
mu_mass = a3 * (lums / (np.exp(a1) * (1 + true_z) ** a4)) ** (1 / a2)
masses = lognorm(rev_S, scale=mu_mass).rvs()
integral = np.sum((p1(permuted_lum_obs, lums, sigma) * \
p2(lums, masses, a1, a2, a3, a4, S, true_z) * p3(masses, true_z)) / \
(q1(lums, permuted_lum_obs, sigma) * q2(masses, lums, a1, a2, a3, a4, rev_S, true_z))) /\
len(lums)
return -integral
x0 = [a1,a2,a4,S]
obj = minimize(func, x0, method='BFGS')
In [30]:
obj
Out[30]:
In [40]:
S_space = S * np.linspace(0.1,2, 10)
vals = []
for S_test in S_space:
vals.append(importance_sampling_integration(a1,a2,a3,a4,S_test,nsamples=10**5))
plt.plot(S_space, vals)
Out[40]:
In [129]:
samples = importance_sampling_integration_samples(a1,a2,a3,a4,S, nsamples=10**5)
plt.title('Distribution of Sample Weights')
plt.ylabel('Density')
plt.xlabel('$log_{10}$\ (Weight)')
plt.hist(np.log(samples)/np.log(10), alpha=0.6, bins=100, normed=True);
In [132]:
samples.max()
Out[132]:
In [140]:
nsamples = min(nsamples, len(true_lum_obs_collection)-1)
sigma = 0.05
rev_S = 5.6578015811698101 * S
permuted_lum_obs = np.random.permutation(true_lum_obs_collection)[:nsamples]
lums = lognorm(sigma, scale=permuted_lum_obs).rvs()
mu_mass = a3 * (lums / (np.exp(a1) * (1 + true_z) ** a4)) ** (1 / a2)
masses = lognorm(rev_S, scale=mu_mass).rvs()
tp1 = p1(permuted_lum_obs, lums, sigma)
tp2 = p2(lums, masses, a1, a2, a3, a4, S, true_z)
tp3 = p3(masses, true_z)
tq1 = q1(lums, permuted_lum_obs, sigma)
tq2 = q2(masses, lums, a1, a2, a3, a4, rev_S, true_z)
In [145]:
plt.hist(log10((tp2 * tp3) / tq2), bins=20, alpha=0.5);
In [107]:
np.argmax(tp2 * tp3 / tq2)
Out[107]:
In [110]:
print tp2[36661]
print tp3[36661]
print tq2[36661]
In [7]:
def log10(a):
return np.log(a) / np.log(10)
In [119]:
log10(mu_mass[36661])
Out[119]:
In [117]:
log10(lums[36661])
Out[117]:
In [113]:
log10(masses[36661])
Out[113]:
In [38]:
plt.hist(np.log(masses)/np.log(10), bins=20)
Out[38]:
In [58]:
plt.scatter(10.50, 8.49, color='red')
plt.scatter(10.50, 8.49, color='green')
plt.scatter(np.log(masses[:200]) / np.log(10), np.log(lums[:200]) / np.log(10), alpha=0.3)
Out[58]:
In [50]:
np.log(masses[6]) / np.log(10)
Out[50]:
In [51]:
np.log(lums[6])
Out[51]:
In [59]:
permuted_lum_obs[6]
Out[59]:
In [65]:
permuted_lum_obs = np.random.permutation(true_lum_obs_collection)[:nsamples]
lums = lognorm(sigma, scale=permuted_lum_obs).rvs()
In [66]:
plt.hist(np.log(lums) / np.log(10), bins=20)
Out[66]:
In [68]:
lums.max()
Out[68]:
In [69]:
np.log(lums.max()) / np.log(10)
Out[69]:
In [198]:
from scipy.optimize import minimize
ans = np.array([a1,a2,a3,a4,S])
def func(arr):
return - importance_sampling_integration(*(arr * ans))
x0 = np.array([1,0.97,1.01,1.03,1.2])
minimize(func, x0, method='Nelder-Mead')
Out[198]:
In [183]:
[a1,a2,a3,a4,S]
Out[183]:
In [178]:
np.append([2,5], np.logspace(1, 5, n))
Out[178]:
In [183]:
n = 15
vals = np.zeros((n+1, 3))
timevals = np.zeros((n+1, 3))
logspace = np.append([5], np.logspace(1, 5, n))
for i, nsamples in enumerate(logspace):
nsamples = int(nsamples)
start = time.time()
vals[i][0] = numerical_integration(a1,a2,a3,a4,S, nsamples=nsamples)
end = time.time()
timevals[i][0] = end - start
start = time.time()
vals[i][1] = simple_monte_carlo_integration(a1,a2,a3,a4,S, nsamples=nsamples)
end = time.time()
timevals[i][1] = end - start
start = time.time()
vals[i][2] = importance_sampling_integration(a1,a2,a3,a4,S, nsamples=nsamples)
end = time.time()
timevals[i][2] = end - start
In [185]:
plt.subplot(211)
plt.plot(logspace, vals[:,0], label='num')
plt.plot(logspace, vals[:,1], label='smc')
plt.plot(logspace, vals[:,2], label='is')
plt.gca().set_xscale('log')
plt.gca().set_yscale('log')
plt.title('Integral vs Samples')
plt.xlabel('Samples')
plt.ylabel('Integral')
plt.xlim([5,10**5])
plt.legend(loc=4)
plt.subplot(212)
plt.plot(logspace, timevals[:,0], label='num')
plt.plot(logspace, timevals[:,1], label='smc')
plt.plot(logspace, timevals[:,2], label='is')
plt.gca().set_xscale('log')
plt.gca().set_yscale('log')
plt.title('Time vs Samples')
plt.xlabel('Samples')
plt.ylabel('Seconds')
plt.legend(loc=2)
plt.xlim([5,10**5])
plt.gcf().set_size_inches(10,8)
plt.tight_layout();
In [186]:
def numerical_integration_linear(a1, a2, a3, a4, S, nsamples=10**3):
masses = midpoints(prior.fetch(true_z).mass[1:])
delta_masses = np.diff(prior.fetch(true_z).mass[1:])
lums_tmp = np.linspace(np.min(data.lum_obs), np.max(data.lum_obs), nsamples)
lums = midpoints(lums_tmp)
delta_lums = np.diff(lums_tmp)
sigma = 0.05
integral = 0
for i,lum in enumerate(lums):
integral += np.sum(delta_masses * delta_lums[i] * p1(true_lum_obs, lum, sigma) * \
p2(lum, masses, a1, a2, a3, a4, S, true_z) * p3(masses, true_z))
return integral
In [188]:
n = 8
vals = np.zeros((n, 2))
timevals = np.zeros((n, 2))
logspace = np.logspace(1, 5, n)
for i, nsamples in enumerate(logspace):
nsamples = int(nsamples)
start = time.time()
vals[i][0] = numerical_integration(a1,a2,a3,a4,S, nsamples=nsamples)
end = time.time()
timevals[i][0] = end - start
start = time.time()
vals[i][1] = numerical_integration_linear(a1,a2,a3,a4,S, nsamples=nsamples)
end = time.time()
timevals[i][1] = end - start
In [191]:
plt.plot(logspace, vals[:,0], label='log')
plt.plot(logspace, vals[:,1], label='lin')
plt.gca().set_xscale('log')
plt.gca().set_yscale('log')
plt.title('Numerical Integration Integral vs Samples')
plt.xlabel('Samples')
plt.ylabel('Integral')
plt.legend(loc=4)
Out[191]: