In [1]:
%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt
The goal here is to simultaneously model the stellar binary population and the planet population. We can make the assumption that the same stellar binary population creates both the detected EBs and false positives.
I want to parametrize the stellar companion period distribution. Stellar companions as a whole are well modeled by a lognormal distribution, but there's this bump of short-period companions that are caused by dynamical evolution in triple systems (e.g., Figure 12 of the latest Kepler EB paper). For the purposes of single-transit exploration, it's fine to ignore that short-period bump. And since the maximum of the solar-type star companion period distribution is at a period of about 250 years (Duchene & Kraus), the period range we care about should be able to be modeled as a power law.
$\Gamma_P \propto P^\alpha$
For vespa
as designed for standard Kepler candidates, I synthesized new populations for each individual candidate. Part of the reason for this is that the integration time smears out the transit shape such that the shape is not simply scalable with period. However, for long periods it might become negligible. I should test this.
In [9]:
# OK, first make a model of a star
# Give it sun-like spectroscopic properties and a Jmag of 10
from isochrones import BinaryStarModel
from isochrones.dartmouth import Dartmouth_Isochrone
#mod = BinaryStarModel(Dartmouth_Isochrone, Teff=(5700,100), logg=(4.5,0.1), feh=(0.0,0.1), J=(10,0.05))
#mod.fit()
In [10]:
#mod.save_hdf('test_binpop.h5')
mod = BinaryStarModel.load_hdf('test_binpop.h5')
In [5]:
%matplotlib inline
mod.triangle();
In [90]:
# OK, now make an EBPopluation
from vespa import EBPopulation
try:
rootLogger.setLevel(logging.INFO)
except:
pass
N = 20000
P = np.logspace(0,3,N)
np.random.shuffle(P)
#ebpop = EBPopulation(period=2, starmodel=mod, n=N)
#ebpop.fit_trapezoids()
#ebpop.save_hdf('ebpop.h5', overwrite=True);
ebpop = EBPopulation.load_hdf('ebpop.h5')
In [12]:
ebpop.stars.columns
Out[12]:
Let's list out the most likely relevant predictor features:
In [27]:
from vespa import PopulationSet
popset = PopulationSet.load_hdf('K06563.01/popset.h5')
In [28]:
import numpy as np
import pandas as pd
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
from sklearn.svm import SVR
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
pop = popset['pl']
#pop = ebpop
ok = (pop.stars.depth > 0).values
stars = pop.stars[ok] #.query('depth > 0')
texp = 1626/86400.
# Define features
sec = stars.secondary
P = stars.P
T14 = sec*stars.T14_sec + ~sec*stars.T14_pri
T23 = sec*stars.T23_sec + ~sec*stars.T23_pri
T14 += texp
T23 = np.clip(T23-texp, 0, np.inf)
tau = (T14 - T23)/2.
#tau[tau < texp] = texp
#slope_anal = T14/tau
#slope_anal[slope_anal]
k = sec*(stars.radius_A/stars.radius_B) + ~sec*(stars.radius_B/stars.radius_A)
b = sec*(stars.b_sec/k) + ~sec*stars.b_pri
logd = np.log10(sec*stars.dsec + ~sec*stars.dpri)
u1 = sec*stars.u1_2 + ~sec*stars.u1_1
u2 = sec*stars.u2_2 + ~sec*stars.u2_1
#fluxfrac = sec*stars.fluxfrac_2 + ~sec*stars.fluxfrac_1
dilution = pop.dilution_factor[ok]
#X = np.array([P,T14,tau,k,b,logd,u1,u2,dilution,sec]).T
X = pop.eclipse_features
#X = np.concatenate([pop.eclipse_features for pop in popset.poplist])
inds = np.arange(len(X))
itest = inds % 5 == 0
itrain = ~itest
Xtest = X[itest, :]
Xtrain = X[itrain, :]
# Define targets
duration = stars.duration
logdepth = np.log10(stars.depth)
slope = stars.slope
Yslope = np.array(slope)
Ylogd = np.array(logdepth)
Ydur = np.array(duration)
#Ydur = np.concatenate([pop.eclipse_targets[0] for pop in popset.poplist])
#Ylogd = np.concatenate([pop.eclipse_targets[1] for pop in popset.poplist])
#Yslope = np.concatenate([pop.eclipse_targets[2] for pop in popset.poplist])
#Ydur, Ylogd, Yslope = pop.eclipse_targets
Yp = []
Yt = []
scores = []
for Y in [Yslope, Ylogd, Ydur]:
Ytest = Y[itest]
Ytrain = Y[itrain]
#regr = LinearRegression()
regr = RandomForestRegressor(n_estimators=10)
#regr = SVR(kernel='rbf', C=1e3, gamma=0.1)
pipeline = Pipeline([('scale', StandardScaler()), ('regression', regr)])
#pipeline = Pipeline([('regression',regr)])
pipeline.fit(Xtrain, Ytrain)
Yp.append(pipeline.predict(Xtest))
Yt.append(Ytest)
scores.append(pipeline.score(Xtest, Ytest))
In [35]:
%matplotlib inline
import matplotlib.pyplot as plt
fig, axes = plt.subplots(1, 3, figsize=(12,4))
slope_diff = (np.abs(Yt[0]-Yp[0]))/Yt[0]
bad = slope_diff > 0.2
bad = Yt[2] > 1.5
for ax,(yt,yp),score in zip(axes, zip(Yt,Yp), scores):
ax.plot(yt, yp, '.')
ax.plot(yt[bad], yp[bad], 'r.')
ax.plot(yt, yt, 'k-')
ax.annotate(score, xy=(0.1,0.9), xycoords='axes fraction')
maxslope = 10
axes[0].set_xlim(xmax=maxslope)
axes[0].set_ylim(ymax=maxslope);
#print(np.where(bad))
Out[35]:
In [39]:
cols = ['radius_A', 'radius_B', 'T14_pri', 'T14_sec', 'secondary',
'depth','duration','slope', 'ecc','w', 'b_pri', 'b_sec', 'dsec', 'dpri',
'fluxfrac_1', 'fluxfrac_2']
print(Yt[2][bad])
print(Yp[2][bad])
stars.loc[bad, cols]
stars.query('duration > 1.5')[cols]
Out[39]:
In [82]:
from vespa_transitutils import traptransit
i = 223
sec = False
pars = pop.eclipse_pars(i, secondary=sec)
ts,fs = pop.eclipse(i, secondary=sec, tol=1e-4)
plt.plot(ts, fs)
print(pars)
trap = np.array(pop.eclipse_trapfit(i, secondary=sec))
plt.plot(ts, traptransit(ts, trap))
print(trap)
In [42]:
from vespa.populations import PopulationSet
#popset = PopulationSet.load_hdf('K06705.01/popset.h5')
popset = PopulationSet.load_hdf('K00087.01/popset.h5')
pop = popset['beb_px2']
In [44]:
i = 45
print(pop.stars.ix[i])
sec = False
pars = pop.eclipse_pars(i, secondary=sec)
ts,fs = pop.eclipse(i, secondary=sec, tol=1e-4)
plt.plot(ts, fs)
print(pars)
trap = np.array(pop.eclipse_trapfit(i, secondary=sec))
plt.plot(ts, traptransit(ts, trap))
print(trap)
In [26]:
!scp -r tmorton@della:/tigress/tmorton/kepler/fpp/K06563.01 .
In [83]:
import sys
sys.path.append('/u/tdm/repositories/peerless/prediction')
sys.path.append('/u/tdm/repositories/peerless')
from sims import BinaryPopulation
from targets import targets
pop = BinaryPopulation(targets)
In [7]:
%prun obs = pop.observe('period > 5', new=True)
In [4]:
obs[['period','trap_dur_pri','T14_pri','trap_dur_sec','T14_sec',
'trap_depth_pri','d_pri','trap_depth_sec','d_sec','flux_ratio']]
Out[4]:
In [10]:
def get_features(df, secondary=False, texp=1626./86400):
if secondary:
T14 = df.T14_sec
T23 = df.T23_sec
k = 1./df.k
b = b_sec/df.k
logd = np.log10(df.d_sec)
sec = np.ones(len(df))
else:
T14 = df.T14_pri
T23 = df.T23_pri
k = df.k
b = df.b_pri
logd = np.log10(df.d_pri)
sec = np.zeros(len(df))
u1 = 0.394*np.ones(len(df))
u2 = 0.296*np.ones(len(df))
T14 += texp
T23 = np.clip(T23 - texp, 0, T14)
tau = (T14 - T23)/2.
return np.array([T14, tau, k, b, logd, u1, u2, sec]).T
In [15]:
obs = pop.observe('period > 5')
X = get_features(obs)
In [18]:
obs.columns
Out[18]:
Most expensive parts of population simulations are exact isochrone interpolations of radius and magnitudes. If we can make those analytic we're in good shape for fast population simulations.
In [22]:
from isochrones.dartmouth import Dartmouth_Isochrone
dar = Dartmouth_Isochrone()
In [25]:
%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
N = 1000
M1 = np.random.random(N) * 0.2 + 0.9
age = 9.4
feh = 0.0
q = np.random.random(N) * 0.8 + 0.2
M2 = q*M1
qrad = dar.radius(M2, age, feh)/dar.radius(M1, age, feh)
dmag = dar.mag['Kepler'](M2, age, feh) - dar.mag['Kepler'](M1, age, feh)
plt.hist(qrad, histtype='step', normed=True);
plt.hist(q, histtype='step', normed=True);
plt.figure()
plt.hist(dmag, histtype='step', normed=True);
plt.figure()
plt.plot(qrad, dmag, '.');
In [41]:
dar.tri.points.shape
Out[41]:
In [66]:
%timeit pipeline.predict(Xtest)
In [61]:
len(Xtest)
Out[61]:
In [1]:
import sys
sys.path.append('/u/tdm/repositories/peerless/prediction')
sys.path.append('/u/tdm/repositories/peerless')
from exosyspop.populations import KeplerBinaryPopulation
from targets import targets
pop = KeplerBinaryPopulation(targets, fB=0.4)
In [2]:
%matplotlib inline
pop._train_pipelines(plot=False, n_estimators=10)
In [3]:
pop._train_trap('period > 1', N=10000, plot=True)
In [4]:
%timeit obs = pop.observe('period > 1', new=True)
In [5]:
obs = pop.observe('period > 1', new=True)
obs.columns
Out[5]:
In [9]:
cols = [u'trap_dur_pri', u'trap_depth_pri',
u'trap_slope_pri', u'trap_dur_sec', u'trap_depth_sec',
u'trap_slope_sec', u'trap_dur_pri_regr', u'trap_depth_pri_regr',
u'trap_slope_pri_regr', u'trap_dur_sec_regr', u'trap_depth_sec_regr',
u'trap_slope_sec_regr']
obs[cols].head(10)
Out[9]:
In [5]:
pop._get_trap_features(obs)
Out[5]:
In [6]:
obs[['period','d_pri','trap_depth_pri','T14_pri','trap_dur_pri','T14_sec','trap_dur_sec','d_sec','trap_depth_sec','k']].head(10)
Out[6]:
In [6]:
%prun obs = pop.observe('period > 5', fit_trap=True)
In [4]:
len(obs)
Out[4]:
In [4]:
%timeit pop.get_necl('period > 5', new=True)
In [11]:
pop.get_necl('period > 5', new=True)
Out[11]:
In [1]:
import pandas as pd
store = pd.HDFStore('mod13_bg.h5')
bgstars = store['df']
store.close()
In [15]:
len(bgstars)/(3600.**2)
Out[15]:
In [2]:
from exosyspop.populations import TRILEGAL_BinaryPopulation
TRILEGAL_BinaryPopulation.binary_features# += ('logL', 'logTe', 'logg')
Out[2]:
In [3]:
bgpop = TRILEGAL_BinaryPopulation(bgstars)
In [4]:
%matplotlib inline
Xtest, dmag, qR = bgpop._train_pipelines(plot=True, n_estimators=10, n_jobs=8)
In [5]:
bgpop._train_trap(N=1000)
In [13]:
len(bgpop.stars)/3600.**2
Out[13]:
In [8]:
obs = bgpop.observe(regr_trap=True, dataspan=0.5, dutycycle=1.)
len(obs)
Out[8]:
In [9]:
query = 'period < 2 and (tra | occ)'
len(bgpop.stars.query(query))
Out[9]:
In [10]:
obs[['period', 'n_pri', 'n_sec', 'phase_sec','T14_pri','T14_sec','ecc','d_pri','d_sec']]
Out[10]:
In [6]:
import matplotlib.pyplot as plt
bgpop.params = bgpop.default_params
bgpop.ecc_empirical = False
bgpop._generate_orbits()
plt.hist(bgpop.ecc);
In [51]:
import matplotlib.pyplot as plt
import numpy as np
N = 10000
p = bgpop._sample_period(N)
ecc = bgpop._sample_ecc(N, p)
plt.plot(np.log10(p),ecc,'o', ms=0.5, alpha=0.3)
#plt.xlim(xmax=1)
plt.figure()
from scipy.stats import beta, rayleigh
x = np.arange(0,1,0.01)
a, b = 2.12, 3.09
a, b = 0.8, 2.0
sigma = 0.05
plt.plot(x, beta(a, b).pdf(x))
plt.plot(x, rayleigh(0,sigma).pdf(x))
plt.hist(ecc[p>100], histtype='step', normed=True);
In [53]:
plt.hist(np.random.rayleigh(0.1, 1000));
In [52]:
from vespa.stars.utils import rochelobe
rochelobe(1)
Out[52]:
In [42]:
beta.fit(ecc[p>100])
Out[42]:
In [9]:
%timeit bgpop._generate_binaries()
In [7]:
%timeit bgpop._generate_binaries(use_ic=True)
In [10]:
%prun obs = bgpop.observe(dataspan=4*365, dutycycle=1., new_orbits=True)
In [21]:
%prun bgpop._generate_orbits()
In [23]:
len(obs.query('period > 30 and n_pri > 1'))
Out[23]:
In [1]:
import pyximport; pyximport.install();
In [2]:
from exosyspop.distributions import sample_powerlaw
from exosyspop.utils import draw_powerlaw, draw_powerlaw2
In [3]:
%timeit x1 = sample_powerlaw(-2, 1., 10., 100000)
%timeit x2 = draw_powerlaw2(-2, 1., 10., 100000)
%timeit x3 = draw_powerlaw(-2, (1,10), 100000)
In [4]:
%matplotlib inline
import matplotlib.pyplot as plt
plt.hist(x1, bins=50, histtype='step');
plt.hist(x2, bins=50, histtype='step');
In [17]:
import numpy as np
In [18]:
np.random.poisson?
In [1]:
%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
from exosyspop.populations import TRILEGAL_BGBinaryPopulation
pop = TRILEGAL_BGBinaryPopulation()
x = np.arange(5, 20, 0.01)
plt.plot(x, pop.rho_bg(x))
plt.ylim(ymin=0);
print(pop.rho_bg(5))
print(pop.rho_bg(20))
In [2]:
ls
In [3]:
mv mod13_bg.h5 bgstars.h5
In [ ]:
In [74]:
B = 15/np.log(10)
print B
In [75]:
0.05/np.exp(-5/B)
Out[75]:
In [1]:
from exosyspop.utils import Pbg_kepler
In [37]:
print(Pbg_kepler(17,20,r=1))
print(Pbg_kepler(17, 5, r=1))
In [38]:
0.02*0.5**2
Out[38]:
In [12]:
ls /scr/depot0/tdm/kepler/fpp/starfields/
In [13]:
In [18]:
len(stars2)/3600.**2
Out[18]:
In [20]:
len(stars24)/3600**2.
Out[20]:
In [22]:
%%file kepchiplocs.txt
2 289.518116 50.83687
3 285.892454 49.202271
4 282.515934 47.450492
6 295.693679 49.873734
7 291.928416 48.484211
8 288.404053 46.860029
9 285.043609 45.201626
10 281.899401 43.425678
11 297.24426 45.823265
12 294.122350 45.996224
13 290.665452 44.484019
14 287.380312 42.871357
15 284.284433 41.188504
16 299.637740 44.853876
17 296.113142 43.521260
18 292.755219 42.079530
19 289.546528 40.531098
20 286.492399 38.912346
22 297.983923 40.968464
23 294.714081 39.625267
24 291.568009 38.149180
In [23]:
i, ra, dec = np.loadtxt('kepchiplocs.txt', unpack=True)
In [24]:
from astropy.coordinates import SkyCoord
b = [SkyCoord(r,d,unit='deg').galactic.b.deg for r,d in zip(ra,dec)]
In [27]:
for x,y in zip(i,b):
print(x,y)
In [28]:
stars4 = pd.read_hdf('/scr/depot0/tdm/kepler/fpp/starfields/kepler_starfield_4.0.h5')
stars22 = pd.read_hdf('/scr/depot0/tdm/kepler/fpp/starfields/kepler_starfield_22.0.h5')
In [29]:
len(stars4)/3600.**2
Out[29]:
In [30]:
len(stars22)/3600.**2
Out[30]:
In [ ]: