In [1]:
%matplotlib inline
By: Dan Foreman-Mackey
This is a version of a blog post I wrote updated for the most recent Kepler data release. The main change from Q1–Q16 is that the completeness model has changed. The main changes are:
First, a helper function for downloading data from the Exoplanet Archive:
In [179]:
import os
import requests
import numpy as np
import pandas as pd
from io import BytesIO # Python 3 only!
import matplotlib.pyplot as pl
def get_catalog(name, basepath="data"):
"""
Download a catalog from the Exoplanet Archive by name and save it as a
Pandas HDF5 file.
:param name: the table name
:param basepath: the directory where the downloaded files should be saved
(default: ``data`` in the current working directory)
"""
fn = os.path.join(basepath, "{0}.h5".format(name))
if os.path.exists(fn):
return pd.read_hdf(fn, name)
if not os.path.exists(basepath):
os.makedirs(basepath)
print("Downloading {0}...".format(name))
url = ("http://exoplanetarchive.ipac.caltech.edu/cgi-bin/nstedAPI/"
"nph-nstedAPI?table={0}&select=*").format(name)
r = requests.get(url)
if r.status_code != requests.codes.ok:
r.raise_for_status()
fh = BytesIO(r.content)
df = pd.read_csv(fh)
df.to_hdf(fn, name, format="t")
return df
Here's the completeness model to apply to Q1—Q17 catalog:
In [180]:
def get_duration(period, aor, e):
"""
Equation (1) from Burke et al. This estimates the transit
duration in the same units as the input period. There is a
typo in the paper (24/4 = 6 != 4).
:param period: the period in any units of your choosing
:param aor: the dimensionless semi-major axis (scaled
by the stellar radius)
:param e: the eccentricity of the orbit
"""
return 0.25 * period * np.sqrt(1 - e**2) / aor
def get_a(period, mstar, Go4pi=2945.4625385377644/(4*np.pi*np.pi)):
"""
Compute the semi-major axis of an orbit in Solar radii.
:param period: the period in days
:param mstar: the stellar mass in Solar masses
"""
return (Go4pi*period*period*mstar) ** (1./3)
def get_delta(k, c=1.0874, s=1.0187, mean=False):
"""
Estimate the approximate expected transit depth as a function
of radius ratio. There might be a typo here. In the paper it
uses c + s*k but in the public code, it is c - s*k:
https://github.com/christopherburke/KeplerPORTs
:param k: the dimensionless radius ratio between the planet and
the star
"""
delta_max = k*k * (c + s*k)
if mean:
return 0.84 * delta_max
return delta_max
cdpp_cols = [k for k in get_catalog("q1_q17_dr24_stellar").keys() if k.startswith("rrmscdpp")]
cdpp_vals = np.array([k[-4:].replace("p", ".") for k in cdpp_cols], dtype=float)
def get_mes(star, period, rp, tau, re=0.009171, mean=False):
"""
Estimate the multiple event statistic value for a transit.
:param star: a pandas row giving the stellar properties
:param period: the period in days
:param rp: the planet radius in Earth radii
:param tau: the transit duration in hours
"""
# Interpolate to the correct CDPP for the duration.
cdpp = np.array(star[cdpp_cols], dtype=float)
sigma = np.interp(tau, cdpp_vals, cdpp)
# Compute the radius ratio and estimate the S/N.
k = rp * re / star.radius
snr = get_delta(k, mean=mean) * 1e6 / sigma
# Scale by the estimated number of transits.
ntrn = star.dataspan * star.dutycycle / period
return snr * np.sqrt(ntrn)
# Pre-compute and freeze the gamma function from Equation (5) in
# Burke et al.
mesthresh = 15
def get_pdet(star, aor, period, rp, e, comp_p, mean=False):
"""
Equation (5) from Burke et al. Estimate the detection efficiency
for a transit.
:param star: a pandas row giving the stellar properties
:param aor: the dimensionless semi-major axis (scaled
by the stellar radius)
:param period: the period in days
:param rp: the planet radius in Earth radii
:param e: the orbital eccentricity
"""
tau = get_duration(period, aor, e) * 24.
mes = get_mes(star, period, rp, tau, mean=mean)
y = np.polyval(comp_p, mes) / (1 + np.exp(-2.0*(mes-mesthresh)))
return y * (y <= 1.0) + 1.0 * (y > 1.0)
def get_pwin(star, period):
"""
Equation (6) from Burke et al. Estimates the window function
using a binomial distribution.
:param star: a pandas row giving the stellar properties
:param period: the period in days
"""
M = star.dataspan / period
f = star.dutycycle
omf = 1.0 - f
pw = 1 - omf**M - M*f*omf**(M-1) - 0.5*M*(M-1)*f*f*omf**(M-2)
msk = (pw >= 0.0) * (M >= 2.0)
return pw * msk
def get_pgeom(aor, e):
"""
The geometric transit probability.
See e.g. Kipping (2014) for the eccentricity factor
http://arxiv.org/abs/1408.1393
:param aor: the dimensionless semi-major axis (scaled
by the stellar radius)
:param e: the orbital eccentricity
"""
return 1. / (aor * (1 - e*e)) * (aor > 1.0)
def get_completeness(star, period, rp, e, comp_p, with_geom=True, mean=False):
"""
A helper function to combine all the completeness effects.
:param star: a pandas row giving the stellar properties
:param period: the period in days
:param rp: the planet radius in Earth radii
:param e: the orbital eccentricity
:param with_geom: include the geometric transit probability?
"""
aor = get_a(period, star.mass) / star.radius
pdet = get_pdet(star, aor, period, rp, e, comp_p, mean=mean)
pwin = get_pwin(star, period)
if not with_geom:
return pdet * pwin
pgeom = get_pgeom(aor, e)
return pdet * pwin * pgeom
And a function for estimating the occurrence rate (assumed constant) in a bin in $T_\mathrm{eff}$ and period:
In [181]:
def run_analysis(trng, period_rng):
stlr = get_catalog("q1_q17_dr24_stellar")
# Select the stars.
m = np.isfinite(stlr.teff) & (trng[0] <= stlr.teff) & (stlr.teff < trng[1])
m &= np.isfinite(stlr.logg) & (4.0 <= stlr.logg)
# Only include stars with sufficient data coverage.
m &= (stlr.dutycycle * stlr.dataspan) > 2*365.25
m &= stlr.dutycycle > 0.33
# Only select stars with mass estimates.
m &= np.isfinite(stlr.mass)
stlr = pd.DataFrame(stlr[m])
print("Selected {0} targets after cuts".format(len(stlr)))
# KOI catalog.
kois = get_catalog("q1_q17_dr24_koi")
# Select candidates.
rp_rng = (1.5, 2.3)
# Join on the stellar list.
kois = pd.merge(kois, stlr[["kepid", "teff", "radius"]], on="kepid", how="inner")
# Only select the KOIs in the relevant part of parameter space.
m = kois.koi_pdisposition == "CANDIDATE"
base_kois = pd.DataFrame(kois[m])
m &= (period_rng[0] <= kois.koi_period) & (kois.koi_period < period_rng[1])
m &= np.isfinite(kois.koi_prad) & (rp_rng[0] <= kois.koi_prad) & (kois.koi_prad < rp_rng[1])
m &= np.isfinite(kois.koi_max_mult_ev) & (kois.koi_max_mult_ev > 15.0)
kois = pd.DataFrame(kois[m])
print("Selected {0} KOIs after cuts".format(len(kois)))
# Calibrate the completeness.
inj = pd.read_csv("data/DR24-Pipeline-Detection-Efficiency-Table.txt", delim_whitespace=True,
skiprows=4, header=None, names=[
"kepid", "sky", "period", "epoch", "t_depth", "t_dur", "t_b", "t_ror", "t_aor",
"offset_from_source", "offset_distance", "expect_mes", "recovered", "meas_mes",
"r_period", "r_epoch", "r_depth", "r_dur", "r_b", "r_ror", "r_aor"
], na_values="null")
# Join on the stellar list.
inj = pd.merge(inj, stlr[["kepid"]], on="kepid", how="inner")
# Estimate the linear trend above 15 MES.
bins = np.linspace(mesthresh, 80, 20)
n_tot, _ = np.histogram(inj.expect_mes, bins)
m = inj.meas_mes > mesthresh
# m = inj.expect_mes > mesthresh
m &= inj.recovered
n_rec, _ = np.histogram(inj.expect_mes[m], bins)
x = 0.5 * (bins[:-1] + bins[1:])
y = n_rec / n_tot
m = np.isfinite(y)
x, y = x[m], y[m]
pl.figure()
comp_p = np.polyfit(x, y, 1)
pl.plot(x, y)
x0 = np.linspace(0, 80, 500)
pl.plot(x0, np.polyval(comp_p, x0) / (1 + np.exp(-2*(x0-mesthresh))))
pl.xlabel("expected MES");
# Compute the mean completeness.
print("Computing mean completeness...")
p = np.exp(np.random.uniform(np.log(period_rng[0]), np.log(period_rng[1]), 5000))
r = np.exp(np.random.uniform(np.log(rp_rng[0]), np.log(rp_rng[1]), len(p)))
c = np.zeros(len(p))
for _, star in stlr.iterrows():
c += get_completeness(star, p, r, 0.0, comp_p, with_geom=True)
# Compute occurrence rate.
Q = np.mean(c)
N = len(kois)
occ = N / Q
sig = occ / np.sqrt(N)
print("{0:.3} ± {1:.3}".format(occ, sig))
return occ, sig, N, Q, comp_p
G-dwarfs:
In [182]:
run_analysis((5300.0, 6000.0), (40, 80))
Out[182]:
M-dwarfs:
In [185]:
run_analysis((2400.0, 3900.0), (20, 40))
Out[185]:
In [ ]: