Generate 9 Small Luminosity Datasets


In [1]:
%matplotlib inline

import matplotlib.pyplot as plt
from matplotlib import rc
from scipy.stats import norm, lognorm
import pandas as pd
import numpy as np
import seaborn as sns
import itertools

rc('text', usetex=True)
sns.set_style("whitegrid")

# we will work with datasets of 2000 samples
n = 2000
tmp = pd.read_csv('mock_data.csv')
mass = tmp.mass[0 : n].as_matrix()
z = tmp.z[0 : n].as_matrix()

datasets = []
for seed in xrange(9):
    alpha1 = norm(10.709, 0.022).rvs()
    alpha2 = norm(0.359, 0.009).rvs()
    alpha3 = 2.35e14
    alpha4 = norm(1.10, 0.06).rvs()
    S = norm(0.155, 0.0009).rvs()
    sigma_L = 0.05
    mu_li = np.exp(alpha1) * ((mass / alpha3) ** (alpha2))* ((1+z) ** (alpha4))
    li = lognorm(S, scale=mu_li).rvs()
    observed = lognorm(sigma_L, scale=li).rvs()
    d = {
        'seed': seed,
        'alpha1': alpha1,
        'alpha2': alpha2,
        'alpha3': alpha3,
        'alpha4': alpha4,
        'S': S,
        'sigma_L': sigma_L,
        'lum_mu': mu_li,
        'lum': li,
        'lum_obs': observed,
        'mass': mass,
        'z': z,
    }
    datasets.append(d)

In [2]:
def log10(arr):
    return np.log(arr) / np.log(10)

f, axes = plt.subplots(3, 3, figsize=(9, 9), sharex=True, sharey=True)
for i, ax in enumerate(axes.flat):
    d = datasets[i]
    sns.kdeplot(log10(d['mass']), log10(d['lum']), shade=True, cmap='Blues', ax=ax)
    ax.set_xlabel('Mass ($\log_{10} M_{\odot}$)')
    ax.set_ylabel('Luminosity ($\log_{10} L_{\odot} / h^2$)')
    ax.set_title('Seed %d' % i)

f.tight_layout()



In [4]:
mus = []
stds = []
for i in xrange(9):
    d = datasets[i]
    mus.append(np.mean(log10(d['lum'])))
    stds.append(np.std(log10(d['lum'])))
plt.scatter(mus, stds)
plt.xlabel('Mean Luminosity ($\log_{10} L_{\odot} / h^2$)')
plt.ylabel('Stdev Luminosity ($\log_{10} L_{\odot} / h^2$)')
plt.title('Comparing Different Luminosity Distributions');


Invert Observed Luminosities


In [5]:
for dataset in datasets:
    dataset['inv_lum'] = lognorm(dataset['sigma_L'], scale=dataset['lum_obs']).rvs()

In [6]:
f, axes = plt.subplots(3, 3, figsize=(9, 9), sharex=True, sharey=True)
for i, ax in enumerate(axes.flat):
    d = datasets[i]
    ax.scatter(log10(d['mass']), log10(d['lum']), color='blue', alpha=0.2, label='true')
    ax.scatter(log10(d['mass']), log10(d['inv_lum']), color='red', alpha=0.2, label='inverted')
    ax.set_xlabel('Mass ($\log_{10} M_{\odot}$)')
    ax.set_ylabel('Luminosity ($\log_{10} L_{\odot} / h^2$)')
    ax.set_title('Seed %d' % i)
    ax.legend()

f.tight_layout()


Invert Mass


In [7]:
for d in datasets:
    d['inv_mass_mu'] = d['alpha3'] * (d['inv_lum'] / (np.exp(d['alpha1']) * (1 + d['z']) ** d['alpha4'])) ** (1 / d['alpha2'])
    d['inv_mass'] = lognorm(d['S'], scale=d['inv_mass_mu']).rvs()

In [8]:
f, axes = plt.subplots(3, 3, figsize=(9, 9), sharex=True, sharey=True)
for i, ax in enumerate(axes.flat):
    d = datasets[i]
    ax.scatter(log10(d['mass']), log10(d['mass']), color='blue', alpha=0.2)
    ax.scatter(log10(d['mass']), log10(d['inv_mass']), color='red', alpha=0.2)
    ax.set_xlabel('Mass ($\log_{10} M_{\odot}$)')
    ax.set_ylabel('Mass ($\log_{10} M_{\odot}$)')
    ax.set_title('Seed %d' % i)

f.tight_layout()


Now we fix seed0 and compare against to see how sensitive our inversions are to the drawn hyperparameters.


In [10]:
for d in datasets:
    fixed_seed = 0
    alpha1 = datasets[fixed_seed]['alpha1']
    alpha2 = datasets[fixed_seed]['alpha2']
    alpha3 = datasets[fixed_seed]['alpha3']
    alpha4 = datasets[fixed_seed]['alpha4']
    S = datasets[fixed_seed]['S']
    d['seed_inv_mass_mu'] = alpha3 * (d['inv_lum'] / (np.exp(alpha1) * (1 + d['z']) ** alpha4)) ** (1 / alpha2)
    d['seed_inv_mass'] = lognorm(5.6578015811698101 * S, scale=d['seed_inv_mass_mu']).rvs()

f, axes = plt.subplots(3, 3, figsize=(9, 9), sharex=True, sharey=True)
for i, ax in enumerate(axes.flat):
    d = datasets[i]
    ax.scatter(log10(d['mass']), log10(d['mass']), color='blue', alpha=0.2)
    ax.scatter(log10(d['mass']), log10(d['inv_mass']), color='red', alpha=0.2)
    ax.scatter(log10(d['mass']), log10(d['seed_inv_mass']), color='gold', alpha=0.2)
    ax.set_xlabel('Mass ($\log_{10} M_{\odot}$)')
    ax.set_ylabel('Inverted Mass ($\log_{10} M_{\odot}$)')
    ax.set_title('Seed %d' % i)

f.tight_layout()



In [11]:
f, axes = plt.subplots(3, 3, figsize=(9, 9), sharex=True, sharey=True)
for i, ax in enumerate(axes.flat):
    d = datasets[i]
    ax.scatter(log10(d['mass']), log10(d['lum']), color='blue', alpha=0.2)
    ax.scatter(log10(d['seed_inv_mass']), log10(d['inv_lum']), color='red', alpha=0.2)
    ax.set_xlabel('Mass ($\log_{10} M_{\odot}$)')
    ax.set_ylabel('Luminosity ($\log_{10} L_{\odot} / h^2$)')
    ax.set_title('Seed %d' % i)

f.tight_layout()