The following analysis was executed using commit e4f54288
of the exosyspop module.
In [1]:
%matplotlib inline
from __future__ import print_function, division
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
# Set up plot stuff the DFM way:
%run ../peerless/plot_setup.py
# Retrieve the target catalog
!../scripts/peerless-targets
from peerless.catalogs import TargetCatalog, EBCatalog, KICatalog
targets = TargetCatalog().df
targets.index = targets.kepid
# Some targets have missing masses. Let's artificially add those in, with M=R
bad = targets.mass.isnull()
targets.loc[bad, 'mass'] = targets.loc[bad, 'radius']
# Calculate galactic latitude of target stars, for later use.
from astropy.coordinates import SkyCoord
c = SkyCoord(targets.ra, targets.dec, unit='deg')
targets['b'] = c.galactic.b.deg
# Download (or load) BG star simulation of the middle of the Kepler field, using TRILEGAL, via vespa
try:
bgstars = pd.read_hdf('bgstars.h5', 'df')
except:
from vespa.stars.trilegal import get_trilegal
get_trilegal('bgstars.h5', 290.6654895, 44.48410232)
bgstars = pd.read_hdf('bgstars.h5', 'df')
In [2]:
from exosyspop.populations import KeplerPowerLawBinaryPopulation
# Initialize population object
ebpop = KeplerPowerLawBinaryPopulation(targets)
# These parameters have been found to work well
min_period = 20
max_period = 25*365.25
fB = 0.25
beta = -1.0
ebpop.set_params(beta=beta, period_min=min_period, period_max=max_period, fB=fB)
# Query to emulate Kepler EB selection: less than 1000 days;
# both primary and secondary eclipse found, or greater than two of either;
# duration less than 2.5 days.
q = 'period > {} and period < 1000 and '.format(min_period) +\
'((n_pri > 0 and n_sec > 0) or ' +\
'(n_pri > 2 or n_sec > 2)) and (T14_pri < 2.5 or T14_sec < 2.5)'
kep_ebs = EBCatalog().df.query('{} < period < 1000'.format(min_period))
n_exp = len(kep_ebs) * (len(targets)/len(KICatalog().df))
print(n_exp)
plt.hist(np.log10(kep_ebs.period),
histtype='step', lw=3, normed=True, label='Observed');
new_obs = ebpop.observe(new_orbits=True).query(q)
plt.hist(np.log10(new_obs.period), label='Simulated',
histtype='step', lw=3, normed=True);
plt.legend()
print(len(new_obs))
In [3]:
ebpop.params
Out[3]:
OK, the above parameters qualitatively match the period distribution well, what about the total number of detections? Let's do a bunch of these observation simulations and confirm it looks good:
In [4]:
n_obs = [len(ebpop.observe(new_orbits=True).query(q)) for i in range(100)]
plt.hist(n_obs, histtype='step', lw=3, bins=10, color='k');
plt.axvline(n_exp, color='r', ls='--', lw=3);
In [5]:
# Define a BEB population
from exosyspop.populations import TRILEGAL_BGPowerLawBinaryPopulation
ebpop = KeplerPowerLawBinaryPopulation(targets)
bgpop = TRILEGAL_BGPowerLawBinaryPopulation(targets, bgstars, use_ic=True)
In [6]:
# Set params to match above
pars = dict(period_min=min_period, period_max=max_period, beta=beta, fB=fB)
ebpop.set_params(**pars)
bgpop.set_params(**pars)
## Just
#ebpop._generate_binaries()
#bgpop._generate_binaries()
In [7]:
# Train the regressions; calling explicitly allows to do this with N=1000 rather than N=10000
ebpop._train_trap(N=1000);
bgpop._train_trap(N=1000);
In [8]:
n_sim = 100
dfm_q = 'n_pri < 3 and n_sec < 3 and ' +\
'(((n_pri==0 or snr_pri < 15) and snr_sec > 15) or ' +\
'((n_sec==0 or snr_sec < 15) and snr_pri > 15))'
dfm_q += ' and ((T14_pri < 2.5 and n_pri > 0) or (T14_sec < 2.5 and n_sec > 0))'
nobs_eb = [len(ebpop.observe(new_orbits=True, regr_trap=True).query(dfm_q)) for i in range(n_sim)]
nobs_bg = [len(bgpop.observe(new_orbits=True, regr_trap=True).query(dfm_q)) for i in range(n_sim)]
In [26]:
plt.figure(figsize=(8,6))
plt.hist(nobs_eb, histtype='step', lw=3, label='EB', bins=10)
plt.hist(nobs_bg, histtype='step', lw=3, label='BEB', bins=3)
plt.legend();
plt.xlim(xmin=-0.3)
plt.yticks([])
plt.ylabel('Normalized frequency')
plt.xlabel('Number of observed single/double eclipses');
plt.savefig('N_EBs.pdf', bbox_inches='tight')
(np.mean(nobs_eb), np.std(nobs_eb), (np.mean(nobs_bg), np.std(nobs_bg)))
Out[26]:
In [10]:
# Here, defining population to match what was fit for in the paper
from exosyspop.populations import PlanetPopulation
import astropy.constants as const
RJUP = const.R_jup.cgs
RSUN = const.R_sun.cgs
REARTH = const.R_earth.cgs
class LongPeriodPlanetPopulation(PlanetPopulation):
"""
Poisson Population in two bins: 0.1-0.4 RJup and 0.4-1.0 RJup
From 2 to 25 years, with beta eccentricity distribution
"""
min_period = 2*365.25
max_period = 25*365.25
vol1 = (np.log(0.4) - np.log(0.1))*(np.log(max_period) - np.log(min_period))
vol2 = (np.log(1) - np.log(0.4))*(np.log(max_period) - np.log(min_period))
param_names = ('Gam1', 'Gam2', 'beta_a', 'beta_b')
default_params = {'Gam1':0.4, 'Gam2':0.18,
'beta_a':1.12, 'beta_b':3.09}
def _sample_period(self, N):
lo = np.log(self.min_period)
hi = np.log(self.max_period)
logp = np.random.random(N)*(hi - lo) + lo
return np.exp(logp)
def _sample_Np(self, N):
Gam1, Gam2 = self._get_params(['Gam1', 'Gam2'])
N_pl = Gam1*self.vol1 + Gam2*self.vol2 # Sum of both bins is total rate
return np.random.poisson(N_pl, size=N)
def _sample_Rp(self, N):
Gam1, Gam2 = self._get_params(['Gam1', 'Gam2'])
Nexp1 = Gam1*self.vol1
Nexp2 = Gam2*self.vol2
# Assign radii uniformly in each bin, with appropriate ratio given to each bin
f1 = Nexp1 / (Nexp1 + Nexp2)
u = np.random.random(N)
in_1 = u < f1
in_2 = u >= f1
N1 = in_1.sum()
N2 = in_2.sum()
Rp = np.ones(N)
lo1, hi1 = np.log(0.1), np.log(0.4)
lo2, hi2 = np.log(0.4), np.log(1)
Rp[in_1] = np.exp(np.random.random(N1)*(hi1 - lo1) + lo1)
Rp[in_2] = np.exp(np.random.random(N2)*(hi2 - lo2) + lo2)
return Rp * RJUP/RSUN
In [11]:
plpop = LongPeriodPlanetPopulation(targets)
plpop._train_trap(N=1000);
#_ = plpop.observe() # There was a point where this was necessary to avoid a bug...
OK, now let's load up the candidates, so we'll have something to compare to.
In [12]:
candidates = pd.read_csv('../results/fits.csv', index_col=0)
# add a few columns of interest
candidates['s2n'] = candidates.depth*1000/plpop.get_noise(candidates.index, candidates.duration) * np.sqrt(candidates.n_transits)
candidates['rstar'] = targets.ix[candidates.index, 'radius']
# convert depth from ppt to relative
for p in ['depth', 'depth_uncert_minus', 'depth_uncert_plus']:
candidates[p] *= 1e-3
# convert from hours to days
for p in ['duration', 'duration_uncert_minus', 'duration_uncert_plus']:
candidates[p] /= 24.
candidates[['depth','duration','period','radius','rstar','impact','n_transits','s2n']]
Out[12]:
In [13]:
# Check out the period and radius distributions of the planets in this population
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(10,4))
ax1.hist(plpop.period, histtype='step', lw=3);
ax1.set_yticks([])
ax2.hist(plpop.radius_B*RSUN/RJUP, histtype='step', lw=3);
ax2.set_yticks([]);
In [14]:
# Emulate detection criteria: fewer than three transits, SNR > 15, duration < 2.5d
dfm_q_pl = 'n_pri < 3 and snr_pri > 15 and T14_pri < 2.5'
dfm_q_pl_nosnr = 'n_pri < 3 and T14_pri < 2.5 and snr_pri > 0'
nobs_pl = [len(plpop.observe(new_orbits=True, regr_trap=True).query(dfm_q_pl)) for i in range(100)]
nobs_pl2 = [len(plpop.observe(new_orbits=True, regr_trap=True).query(dfm_q_pl_nosnr)) for i in range(100)]
plt.hist(nobs_pl, histtype='step', lw=3, label='SNR $>$ 15');
plt.hist(nobs_pl2, histtype='step', lw=3, color='k', ls=':', label='all')
plt.legend(loc='upper right');
In [15]:
# This matches up pretty well with what was actually detected
np.mean(nobs_pl), len(candidates.query('radius < 1.0'))
Out[15]:
In [16]:
# Of course, this isn't an exact match because the detection/veto criteria
# are much more complicated than SNR > 15...
# Simulate a large population of observed single/double transits
N = 1000
pl_obs = plpop.get_N_observed(query=dfm_q_pl, N=N)
plt.hist(np.log10(pl_obs.snr_pri), normed=True);
plt.hist(np.log10(candidates.s2n.values), histtype='step', color='r', lw=3, normed=True);
Let's put all the number-of-detections simulations together on the same plot:
In [17]:
plt.figure(figsize=(8,6))
plt.hist(nobs_eb, histtype='step', lw=3, label='EBs', bins=10)#, bins=np.arange(16));
plt.hist(nobs_bg, histtype='step', lw=3, label='BEBs', bins=3)#, bins=np.arange(16));
plt.hist(nobs_pl, histtype='step', lw=3, label='Planets')
plt.legend();
plt.xlim(xmin=-0.3)
plt.yticks([])
plt.ylabel('Normalized frequency')
plt.xlabel('Number of observed single/double eclipses');
plt.savefig('N_predicted.pdf', bbox_inches='tight')
In [18]:
def plot_kde(kde, ax=None, dur_range=(0,2.), depth_range=(-3.5,-0.5),
cands=candidates, **kwargs):
if ax is None:
fig, ax = plt.subplots(1,1)
dur_grid = np.arange(dur_range[0], dur_range[1], 0.025)
depth_grid = np.arange(depth_range[0], depth_range[1], 0.025)
xx, yy = np.meshgrid(dur_grid, depth_grid)
f = kde(np.vstack([xx.ravel(),yy.ravel()])).reshape(xx.shape)
ax.imshow(f, origin='lower', extent=[dur_range[0], dur_range[1],
depth_range[0], depth_range[1]],
aspect='auto', cmap='binary', interpolation='nearest',
**kwargs)
ax.set_xlabel('Duration [days]')
ax.set_ylabel('log$_{10}$(depth)')
if cands is not None:
xval = cands.duration
xerr = [cands.duration_uncert_minus, cands.duration_uncert_plus]
yval = np.log10(cands.depth)
yerr = [yval - np.log10(cands.depth - cands.depth_uncert_minus),
np.log10(cands.depth + cands.depth_uncert_plus) - yval]
ax.plot(xval, yval, 'w.', ms=14)
ax.plot(xval, yval, 'g.', ms=10)
#ax.errorbar(xval, yval, xerr=xerr, yerr=yerr,
# color='g', marker='.', ms=10, ls='none', capsize=0)
In [19]:
# First, simulate large populations of EBs, BEBs
eb_obs = ebpop.get_N_observed(query=dfm_q, N=2000, new_orbits=True)
bg_obs = bgpop.get_N_observed(query=dfm_q, N=200, use_ic=True, new_orbits=True) #takes longer, so use fewer
In [20]:
from scipy.stats import gaussian_kde
eb_kde = gaussian_kde(np.array([eb_obs.observed.dur_pri, eb_obs.observed.logd_pri]))
bg_kde = gaussian_kde(np.array([bg_obs.observed.dur_pri, bg_obs.observed.logd_pri]))
pl_kde = gaussian_kde(np.array([pl_obs.observed.dur_pri, pl_obs.observed.logd_pri]))
In [21]:
fig, axes = plt.subplots(1,3, figsize=(12,4), sharey=True)
plot_kde(eb_kde, ax=axes[0])
plot_kde(bg_kde, ax=axes[1])
plot_kde(pl_kde, ax=axes[2])
axes[1].set_ylabel('')
axes[2].set_ylabel('')
plt.subplots_adjust(wspace=0)
for ax,title in zip(axes, ['EB','BEB','Planet']):
ax.set_yticks(np.arange(-3,-0.5,0.5))
ax.annotate(title, xy=(0.95,0.1), xycoords='axes fraction', fontsize=20, ha='right')
plt.savefig('shape_kdes.pdf', bbox_inches='tight')
In [44]:
pts = [candidates.duration, np.log10(candidates.depth)]
L_eb = eb_kde(pts)
L_bg = bg_kde(pts)
L_pl = pl_kde(pts)
pi_eb = 7.2
pi_bg = 0.41
pi_pl = 12 # for example.
norm = L_eb*pi_eb + L_bg*pi_bg + L_pl*pi_pl
candidates['pr_eb'] = L_eb*pi_eb / norm
candidates['pr_bg'] = L_bg*pi_bg / norm
candidates['pr_pl'] = L_pl*pi_pl / norm
In [49]:
candidates[['pr_eb', 'pr_bg', 'pr_pl']].sum()
Out[49]:
In [46]:
candidates[['radius','depth', 'duration', 'pr_eb', 'pr_bg', 'pr_pl']]
Out[46]:
In [47]:
candidates[['pr_eb', 'pr_bg', 'pr_pl']].to_csv('../results/fpp.csv')
In [43]:
pi_eb = 2.21# 6.5
pi_bg = 0.22# 0.33
pi_pl = 13.57# 12 # for example.
norm = L_eb*pi_eb + L_bg*pi_bg + L_pl*pi_pl
candidates['pr_eb'] = L_eb*pi_eb / norm
candidates['pr_bg'] = L_bg*pi_bg / norm
candidates['pr_pl'] = L_pl*pi_pl / norm
candidates[['pr_eb', 'pr_bg', 'pr_pl']].sum()
Out[43]:
In [ ]: