In [1]:
%matplotlib inline
import matplotlib.pyplot as plt
from matplotlib import rc
from scipy.stats import norm, lognorm
import pandas as pd
import numpy as np
import seaborn as sns
import itertools
rc('text', usetex=True)
sns.set_style("whitegrid")
# we will work with datasets of 2000 samples
n = 2000
tmp = pd.read_csv('mock_data.csv')
mass = tmp.mass[0 : n].as_matrix()
z = tmp.z[0 : n].as_matrix()
datasets = []
for seed in xrange(9):
alpha1 = norm(10.709, 0.022).rvs()
alpha2 = norm(0.359, 0.009).rvs()
alpha3 = 2.35e14
alpha4 = norm(1.10, 0.06).rvs()
S = norm(0.155, 0.0009).rvs()
sigma_L = 0.05
mu_li = np.exp(alpha1) * ((mass / alpha3) ** (alpha2))* ((1+z) ** (alpha4))
li = lognorm(S, scale=mu_li).rvs()
observed = lognorm(sigma_L, scale=li).rvs()
d = {
'seed': seed,
'alpha1': alpha1,
'alpha2': alpha2,
'alpha3': alpha3,
'alpha4': alpha4,
'S': S,
'sigma_L': sigma_L,
'lum_mu': mu_li,
'lum': li,
'lum_obs': observed,
'mass': mass,
'z': z,
}
datasets.append(d)
In [2]:
def log10(arr):
return np.log(arr) / np.log(10)
f, axes = plt.subplots(3, 3, figsize=(9, 9), sharex=True, sharey=True)
for i, ax in enumerate(axes.flat):
d = datasets[i]
sns.kdeplot(log10(d['mass']), log10(d['lum']), shade=True, cmap='Blues', ax=ax)
ax.set_xlabel('Mass ($\log_{10} M_{\odot}$)')
ax.set_ylabel('Luminosity ($\log_{10} L_{\odot} / h^2$)')
ax.set_title('Seed %d' % i)
f.tight_layout()
In [4]:
mus = []
stds = []
for i in xrange(9):
d = datasets[i]
mus.append(np.mean(log10(d['lum'])))
stds.append(np.std(log10(d['lum'])))
plt.scatter(mus, stds)
plt.xlabel('Mean Luminosity ($\log_{10} L_{\odot} / h^2$)')
plt.ylabel('Stdev Luminosity ($\log_{10} L_{\odot} / h^2$)')
plt.title('Comparing Different Luminosity Distributions');
In [5]:
for dataset in datasets:
dataset['inv_lum'] = lognorm(dataset['sigma_L'], scale=dataset['lum_obs']).rvs()
In [6]:
f, axes = plt.subplots(3, 3, figsize=(9, 9), sharex=True, sharey=True)
for i, ax in enumerate(axes.flat):
d = datasets[i]
ax.scatter(log10(d['mass']), log10(d['lum']), color='blue', alpha=0.2, label='true')
ax.scatter(log10(d['mass']), log10(d['inv_lum']), color='red', alpha=0.2, label='inverted')
ax.set_xlabel('Mass ($\log_{10} M_{\odot}$)')
ax.set_ylabel('Luminosity ($\log_{10} L_{\odot} / h^2$)')
ax.set_title('Seed %d' % i)
ax.legend()
f.tight_layout()
In [7]:
for d in datasets:
d['inv_mass_mu'] = d['alpha3'] * (d['inv_lum'] / (np.exp(d['alpha1']) * (1 + d['z']) ** d['alpha4'])) ** (1 / d['alpha2'])
d['inv_mass'] = lognorm(d['S'], scale=d['inv_mass_mu']).rvs()
In [8]:
f, axes = plt.subplots(3, 3, figsize=(9, 9), sharex=True, sharey=True)
for i, ax in enumerate(axes.flat):
d = datasets[i]
ax.scatter(log10(d['mass']), log10(d['mass']), color='blue', alpha=0.2)
ax.scatter(log10(d['mass']), log10(d['inv_mass']), color='red', alpha=0.2)
ax.set_xlabel('Mass ($\log_{10} M_{\odot}$)')
ax.set_ylabel('Mass ($\log_{10} M_{\odot}$)')
ax.set_title('Seed %d' % i)
f.tight_layout()
Now we fix seed0 and compare against to see how sensitive our inversions are to the drawn hyperparameters.
In [10]:
for d in datasets:
fixed_seed = 0
alpha1 = datasets[fixed_seed]['alpha1']
alpha2 = datasets[fixed_seed]['alpha2']
alpha3 = datasets[fixed_seed]['alpha3']
alpha4 = datasets[fixed_seed]['alpha4']
S = datasets[fixed_seed]['S']
d['seed_inv_mass_mu'] = alpha3 * (d['inv_lum'] / (np.exp(alpha1) * (1 + d['z']) ** alpha4)) ** (1 / alpha2)
d['seed_inv_mass'] = lognorm(5.6578015811698101 * S, scale=d['seed_inv_mass_mu']).rvs()
f, axes = plt.subplots(3, 3, figsize=(9, 9), sharex=True, sharey=True)
for i, ax in enumerate(axes.flat):
d = datasets[i]
ax.scatter(log10(d['mass']), log10(d['mass']), color='blue', alpha=0.2)
ax.scatter(log10(d['mass']), log10(d['inv_mass']), color='red', alpha=0.2)
ax.scatter(log10(d['mass']), log10(d['seed_inv_mass']), color='gold', alpha=0.2)
ax.set_xlabel('Mass ($\log_{10} M_{\odot}$)')
ax.set_ylabel('Inverted Mass ($\log_{10} M_{\odot}$)')
ax.set_title('Seed %d' % i)
f.tight_layout()
In [11]:
f, axes = plt.subplots(3, 3, figsize=(9, 9), sharex=True, sharey=True)
for i, ax in enumerate(axes.flat):
d = datasets[i]
ax.scatter(log10(d['mass']), log10(d['lum']), color='blue', alpha=0.2)
ax.scatter(log10(d['seed_inv_mass']), log10(d['inv_lum']), color='red', alpha=0.2)
ax.set_xlabel('Mass ($\log_{10} M_{\odot}$)')
ax.set_ylabel('Luminosity ($\log_{10} L_{\odot} / h^2$)')
ax.set_title('Seed %d' % i)
f.tight_layout()