In [1]:
from __future__ import division
import pandas as pd
from exosyspop.populations import BinaryPopulation
from exosyspop.populations import TRILEGAL_BGBinaryPopulation
from exosyspop.populations import KeplerBinaryPopulation, PoissonPlanetPopulation
from exosyspop.populations import KeplerPowerLawBinaryPopulation
from exosyspop.populations import PopulationMixture
targets = pd.read_hdf('targets.h5')
bgstars = pd.read_hdf('bgstars.h5')
# Sanitize dtypes of targets DataFrame
for c in targets.columns:
if targets[c].dtype == object:
targets.loc[:,c] = targets.loc[:,c].astype(str)
import logging
rootLogger = logging.getLogger()
In [2]:
#pop = KeplerPowerLawBinaryPopulation(targets)
#pop._train_trap(N=1000)
#pop.save('plaw_pop', overwrite=True)
pop = KeplerPowerLawBinaryPopulation.load('plaw_pop')
pop.set_params(period_min=20, period_max=1200, beta=-0.95, fB=0.14)
catalog = pop.observe(new=True, regr_trap=True)
In [3]:
from exosyspop.survey import DetectionThreshold, DetectionRamp
eff = DetectionRamp(6,16)
In [4]:
pop.params
Out[4]:
In [5]:
import sys
sys.path.append('..')
from simpleabc.simple_abc import Model, basic_abc, pmc_abc
from scipy.stats import gaussian_kde, entropy, anderson_ksamp, uniform
import numpy as np
class PopulationModel(Model):
"""
Test model for stellar binary population where parameters are fB, beta
"""
def __init__(self, poplist, eff=None):
self.poplist = poplist
self.eff = eff
self.period_min = poplist.params['period_min']
self.period_max = poplist.params['period_max']
bounds = [(0,1), (-1.5,0)]
prior = [uniform(0,1), uniform(-1.5, 1.5)]
def draw_theta(self):
""" Draw parameters from prior
"""
return [p.rvs() for p in self.prior]
def generate_data(self, theta):
"""Generates synthetic catalog
"""
fB, beta = theta
self.poplist.set_params(fB=fB, beta=beta)
try:
return self.poplist.observe(new=True,
regr_trap=True).observe(self.eff)
except:
print('Error! fB={}, beta={}'.format(fB,beta))
def summary_stats(self, data):
"""Computes summary statistics from data
"""
if data is None:
return [np.nan]*3
N = len(data)
try:
Pmin, Pmax = np.log(self.period_min), np.log(self.period_max)
Pgrid = np.linspace(Pmin, Pmax, 1000)
if N > 1:
k = gaussian_kde(np.log(data.period.values))
p = k(Pgrid)
else:
p = np.ones(len(Pgrid))*1./(Pmax - Pmin)
except ValueError:
print(data.period.values)
raise
phase_sec = data.phase_sec.dropna().values
return p, N, phase_sec
def d_period(self, summary_stats, summary_stats_synth):
p1, _, _ = summary_stats
p2, _, _ = summary_stats_synth
kl_period = entropy(p1, p2)
return kl_period
def Ndist(self, N1, N2):
if N1==0. or N2==0.:
dist = 1
else:
dist = max(1 - 1.*N1/N2, 1-1*N2/N1)
return dist
def d_N(self, summary_stats, summary_stats_synth):
_, N1, _ = summary_stats
_, N2, _ = summary_stats_synth
return self.Ndist(N1, N2)
def d_fsec(self, summary_stats, summary_stats_synth):
_, N1, phase_sec1 = summary_stats
_, N2, phase_sec2 = summary_stats_synth
f_sec1 = len(phase_sec1)/float(N1)
f_sec2 = len(phase_sec2)/float(N2)
return np.absolute(f_sec1 - f_sec2)
def d_phase(self, summary_stats, summary_stats_synth, nbins=11):
_, _, phase_sec1 = summary_stats
_, _, phase_sec2 = summary_stats_synth
k1 = gaussian_kde(phase_sec1)
k2 = gaussian_kde(phase_sec2)
phs = np.linspace(0,1,100)
pdf1 = k1(phs)
pdf2 = k2(phs)
return entropy(pdf1, pdf2)
def distance_function(self, summary_stats, summary_stats_synth):
"""Computes distance
"""
d1 = self.d_period(summary_stats, summary_stats_synth)
d2 = self.d_N(summary_stats, summary_stats_synth)
#d3 = self.d_fsec(summary_stats, summary_stats_synth)
#d4 = self.d_phase(summary_stats, summary_stats_synth)
return d1 + d2 * (0.015/0.072) #renormalized based on null test
#return (d1 + d2 + d3 + d4)/4.
In [6]:
model = PopulationModel(PopulationMixture([pop]))
theta_0 = 0.14, -0.95
data = model.generate_data(theta_0)
model.set_data(data)
In [7]:
posterior = basic_abc(model, data, min_samples=10, epsilon=0.6,
verbose=True)
In [33]:
%matplotlib inline
import matplotlib.pyplot as plt
fig, axes = plt.subplots(1,2, figsize=(10,4))
hist_kwargs = dict(histtype='step', lw=3, color='k')
axes[0].hist(posterior[0][0], **hist_kwargs);
axes[1].hist(posterior[0][1], **hist_kwargs);
In [8]:
pmc_posterior = pmc_abc(model, data, epsilon_0=0.2, min_samples=100, steps=10, verbose=True)
In [9]:
pmc_posterior.shape
Out[9]:
In [10]:
pmc_posterior[-1]['epsilon']
Out[10]:
In [11]:
%matplotlib inline
import matplotlib.pyplot as plt
for i in range(pmc_posterior.shape[0]):
plt.figure()
plt.plot(pmc_posterior[i][0][0,:], pmc_posterior[i][0][1,:], '.');
plt.title('epsilon = {}'.format(pmc_posterior[i]['epsilon']))
plt.plot(0.14, -0.95, 'rx', ms=20)
plt.xlim(0,1)
plt.ylim(-1.2, 0)
In [12]:
model.data.columns
Out[12]:
In [13]:
model.data.to_hdf('test_catalog.h5','df')
OK, let's try an MCMC to calculate the actual posterior here, which we can do.
$$\theta = (f_B, \beta)$$$$\Gamma_\theta = f_B *
In [15]:
model.period_min, model.period_max
Out[15]:
In [16]:
model.bounds
Out[16]:
In [18]:
targets[['radius', 'mass']].describe()
Out[18]:
In [77]:
from exosyspop.utils import semimajor, RSUN, AU
Rmean = targets.radius.mean()
Mmean = targets.mass.mean()
def pr_eclipse(period):
"""Approximate eclipse probability for period in days
1.5*Rmean / amean(assuming mass=1.5*Mmena)
"""
a = semimajor(period, Mmean*1.5)
return 1.5*Rmean*RSUN/(a*AU)
period_grid = np.linspace(period_rng[0], period_rng[1], 1000)
pr_ecl_grid = pr_eclipse(period_grid)
def pwin(period, dataspan, dutycycle):
M = dataspan/ period
f = dutycycle
omf = 1.0 - f
pw = 1 - omf**M - M*f*omf**(M-1) - 0.5*M*(M-1)*f*f*omf**(M-2)
msk = (pw >= 0.) * (M >= 2.0)
return pw*msk
pwin_grid = np.array([pwin(p, targets.dataspan, targets.dutycycle).mean() for p in period_grid])
In [78]:
plt.plot(period_grid, pwin_grid)
Out[78]:
In [22]:
pr_eclipse(200)
Out[22]:
In [66]:
data = pd.read_hdf('test_catalog.h5')
period_rng = (20, 1200)
def population_model(theta, period):
fB, beta = theta
v = fB * np.ones_like(period)
beta1 = beta+1
v *= period**beta*beta1 / (period_rng[1]**beta1 - period_rng[0]**beta1)
return v
pr_ecl_data = pr_eclipse(data.period)
def lnlike(theta):
pop = population_model(theta, period_grid) * pr_ecl_grid
norm = np.trapz(pop, period_grid)
#print(norm)
ll = np.sum(np.log(population_model(theta, data.period) * pr_ecl_data)) - norm
return ll if np.isfinite(ll) else -np.inf
bounds = [(0,1), (-1.5,0)]
def lnprob(theta):
for t, rng in zip(theta, bounds):
if not rng[0] < t < rng[1]:
return -np.inf
return lnlike(theta)
In [64]:
def lhood_norm(theta):
pop = population_model(theta, period_grid) * pr_ecl_grid
return np.trapz(pop, period_grid)
fB_grid = np.linspace(0,1,100)
beta_grid = np.linspace(-1.5,0,101)
norms = [lhood_norm([fB, -0.95]) for fB in fB_grid]
#norms = [lhood_norm([0.2, b]) for b in beta_grid]
plt.plot(fB_grid, norms)
#plt.plot(beta_grid, norms)
Out[64]:
In [67]:
for fB, beta in zip(np.linspace(0,1,20), np.ones(20)*-0.95):
print(fB, beta, lnprob([fB, beta]))
In [ ]: