In [ ]:
import os
from os import path
# Third-party
from astropy.io import fits
from astropy.stats import median_absolute_deviation
from astropy.table import QTable, Table, join
from astropy.time import Time
import astropy.units as u
import matplotlib as mpl
import matplotlib.pyplot as plt
from matplotlib.gridspec import GridSpec
import numpy as np
%matplotlib inline
import h5py
import pandas as pd
from sqlalchemy import func
import tqdm
from sklearn.cluster import KMeans
from thejoker import JokerSamples
from twoface.config import TWOFACE_CACHE_PATH
from twoface.samples_analysis import unimodal_P, MAP_sample
from twoface.db import (db_connect, AllStar, AllVisit, AllVisitToAllStar, NessRG,
StarResult, Status, JokerRun)
from twoface.plot import plot_two_panel, plot_phase_fold, _RV_LBL
from twoface.mass import get_m2_min, mf
In [ ]:
Session, _ = db_connect(path.join(TWOFACE_CACHE_PATH, 'apogee.sqlite'))
session = Session()
plot_path = '../../paper/1-catalog/figures/'
table_path = '../../paper/1-catalog/tables/'
samples_file = path.join(TWOFACE_CACHE_PATH, 'apogee-jitter.hdf5')
mcmc_samples_file = path.join(TWOFACE_CACHE_PATH, 'apogee-jitter-mcmc.hdf5')
In [ ]:
run = session.query(JokerRun).limit(1).one()
joker_pars = run.get_joker_params()
In [ ]:
high_K_stars = session.query(AllStar).join(StarResult).filter(StarResult.status_id.in_([1, 4]))\
.filter(StarResult.high_K).all()
print(len(high_K_stars))
print(session.query(AllStar).join(StarResult).filter(StarResult.high_K).count())
Remove ones that are already identified as unimodal:
In [ ]:
unimodal = QTable.read(path.join(table_path, 'highK-unimodal.fits'))
mask = np.logical_not(np.isin(np.array([s.apogee_id for s in high_K_stars], dtype='U20'),
unimodal['APOGEE_ID']))
high_K_stars = np.array(high_K_stars)[mask]
In [ ]:
def is_n_modal(data, samples, n_clusters=2):
clf = KMeans(n_clusters=n_clusters)
ecc = samples['e'].value
lnP = np.log(samples['P'].value).reshape(-1, 1)
y = clf.fit_predict(lnP)
data = star.apogeervdata()
unimodals = []
means = []
for j in np.unique(y):
sub_samples = samples[y==j]
if len(sub_samples) == 1:
unimodals.append(True)
means.append(sub_samples)
else:
unimodals.append(unimodal_P(sub_samples, data))
means.append(MAP_sample(data, sub_samples, joker_pars))
return all(unimodals), means
In [ ]:
bimodal = []
MAP_samples = []
nsamples = []
n = 0
with h5py.File(samples_file, 'r') as f:
for star in tqdm.tqdm(high_K_stars):
samples = JokerSamples.from_hdf5(f[star.apogee_id])
data = star.apogeervdata()
if len(samples) > 1:
is_bimodal, MAP = is_n_modal(data, samples, n_clusters=2)
bimodal.append(is_bimodal)
MAP_samples.append(MAP)
else:
bimodal.append(False)
nsamples.append(len(samples))
nsamples = np.array(nsamples)
bimodal = np.array(bimodal)
MAP_samples = np.array(MAP_samples)
bimodal.sum()
In [ ]:
bimodal.sum()
Most of these only have a few samples:
In [ ]:
plt.hist(nsamples[bimodal], bins='auto');
In [ ]:
bi_stars = np.array(high_K_stars)[bimodal]
bi_MAP_samples = np.array(MAP_samples)[bimodal]
assert len(bi_MAP_samples) == len(bi_stars)
In [ ]:
catalog = {'APOGEE_ID':[], 'P':[], 'e':[], 'K':[]}
for samples, star in zip(bi_MAP_samples, bi_stars):
for s in samples:
catalog['APOGEE_ID'].append(star.apogee_id)
catalog['P'].append(s['P'])
catalog['e'].append(s['e'])
catalog['K'].append(s['K'])
catalog['P'] = u.Quantity(catalog['P'])
catalog['K'] = u.Quantity(catalog['K'])
catalog = Table(catalog)
In [ ]:
ness_tbl = Table.read('../../data/NessRG.fits')
ness_tbl.rename_column('2MASS', 'APOGEE_ID')
ness_tbl = ness_tbl[np.isin(ness_tbl['APOGEE_ID'], catalog['APOGEE_ID'])]
# trim the duplicates...
_, unq_idx = np.unique(ness_tbl['APOGEE_ID'], return_index=True)
ness_tbl = ness_tbl[unq_idx]
tbl_with_ness = join(catalog, ness_tbl, keys='APOGEE_ID', join_type='outer')
assert len(tbl_with_ness) == len(catalog)
In [ ]:
np.isfinite(tbl_with_ness['lnM']).sum()
In [ ]:
rnd = np.random.RandomState(seed=42)
N = rnd.normal
m1 = np.full(len(tbl_with_ness), np.nan) * u.Msun
m2_min = np.full(len(tbl_with_ness), np.nan) * u.Msun
for i, row in tqdm.tqdm(enumerate(tbl_with_ness)):
if tbl_with_ness['lnM'].mask[i]:
continue
m1_ = np.exp(row['lnM']) * u.Msun
mass_func = mf(P=row['P'] * catalog['P'].unit,
K=row['K'] * catalog['K'].unit,
e=row['e'])
m1[i] = m1_
m2_min[i] = get_m2_min(m1_, mass_func)
tbl_with_ness['M1'] = m1
tbl_with_ness['M2_min'] = m2_min
tbl_with_ness['M1'].mask = np.isnan(tbl_with_ness['M1'])
tbl_with_ness['M2_min'].mask = np.isnan(tbl_with_ness['M1'])
In [ ]:
allstar_tbl = fits.getdata('/Users/adrian/data/APOGEE_DR14/allStar-l31c.2.fits')
allstar_tbl = allstar_tbl[np.isin(allstar_tbl['APOGEE_ID'], tbl_with_ness['APOGEE_ID'])]
# trim the duplicates...
_, unq_idx = np.unique(allstar_tbl['APOGEE_ID'], return_index=True)
allstar_tbl = allstar_tbl[unq_idx]
assert len(allstar_tbl) == len(tbl_with_ness)//2
allstar_tbl = Table(allstar_tbl)
allstar_tbl.rename_column('K', 'KS')
allstar_tbl.rename_column('K_ERR', 'KS_ERR')
In [ ]:
output_catalog = join(tbl_with_ness[catalog.colnames + ['M1', 'M2_min']], allstar_tbl, keys='APOGEE_ID')
output_catalog[:1]
In [ ]:
# with h5py.File(samples_file, 'r') as f:
# for star in tqdm.tqdm(np.array(high_K_stars)[bimodal]):
# samples = JokerSamples.from_hdf5(f[star.apogee_id])
# data = star.apogeervdata()
# fig = plot_two_panel(data, samples,
# plot_data_orbits_kw=dict(highlight_P_extrema=False))
# fig.savefig('../../plots/bimodal/{0}.png'.format(star.apogee_id), dpi=200)
# plt.close(fig)
In [ ]:
# bimodal:
suspect = ['2M09490802+3649393', '2M09494588+3711256',
'2M10244885+1336456', '2M13412997+2836185',
'2M15125534+6748381']
check = ['2M18525864-0031500']
clean_flag = np.zeros(len(catalog), dtype=int)
clean_flag[np.isin(output_catalog['APOGEE_ID'], check)] = 1
clean_flag[np.isin(output_catalog['APOGEE_ID'], suspect)] = 2
output_catalog['clean_flag'] = clean_flag
In [ ]:
np.isin(output_catalog['APOGEE_ID'], suspect).sum()
In [ ]:
output_catalog.write(path.join(table_path, 'highK-bimodal.fits'), overwrite=True)
# output_catalog[:4].write('../../paper/1-catalog/tables/bimodal-top.tex', overwrite=True)
In [ ]:
catalog = Table.read(path.join(table_path, 'highK-bimodal.fits'))
len(catalog)
In [ ]:
np.random.seed(111)
rc = {
'axes.labelsize': 18,
'xtick.labelsize': 14,
'ytick.labelsize': 14
}
# rand_subset = np.random.choice(catalog['APOGEE_ID'].astype('U20'),
# size=4,
# replace=False)
rand_subset = ['2M18041328-2958182',
'2M19114515-0725486',
'2M20184780+2023122',
'2M22030551+6844336']
with mpl.rc_context(rc):
gs = GridSpec(4, 3)
fig = plt.figure(figsize=(8., 9.5))
for j, apogee_id in enumerate(rand_subset):
ax1 = fig.add_subplot(gs[j, :2])
ax2 = fig.add_subplot(gs[j, 2])
if j == 0:
ax1.set_title('High-$K$, bimodal', fontsize=20)
star = AllStar.get_apogee_id(session, apogee_id)
data = star.apogeervdata()
with h5py.File(samples_file, 'r') as f:
samples = JokerSamples.from_hdf5(f[star.apogee_id])
fig = plot_two_panel(data, samples, axes=[ax1, ax2], tight=False,
plot_data_orbits_kw=dict(n_times=16384,
n_orbits=128,
highlight_P_extrema=False,
xlim_choice='data',
relative_to_t0=True,
plot_kwargs=dict(linewidth=0.2,
rasterized=True)))
xlim = ax1.get_xlim()
ylim = ax1.get_ylim()
ax1.text(xlim[0] + (xlim[1]-xlim[0])/20,
ylim[1] - (ylim[1]-ylim[0])/20,
star.apogee_id,
fontsize=15, va='top', ha='left')
ax1.set_xlabel('')
ax2.set_xlabel('')
logP = np.log10(samples['P'].to(u.day).value)
span = np.ptp(logP)
# ax2.set_xlim(10**(logP.min()-0.75),
# 10**(logP.max()+0.75))
# ax2.set_xlim(10**(logP.min()-0.5*span),
# 10**(logP.max()+0.5*span))
ax1.set_xlabel(r'${\rm BMJD} - t_0$ [day]')
ax2.set_xlabel('period, $P$ [day]')
fig.tight_layout()
fig.subplots_adjust(left=0.125, right=0.95, hspace=0.2, wspace=0.4)
fig.savefig(path.join(plot_path, 'highK-bimodal.pdf'), dpi=250)
In [ ]:
stats = []
with h5py.File(samples_file, 'r') as f:
for star in tqdm.tqdm(high_K_stars):
logP = np.log10(f[star.apogee_id]['P'][:])
stats.append([np.ptp(logP), np.std(logP)])
stats = np.array(stats)
In [ ]:
fig, ax = plt.subplots(1, 1, figsize=(6,6))
ax.scatter(stats[:, 0], 3*stats[:, 1], alpha=0.25, linewidth=0)
ax.set_xlim(-0.02, 5)
ax.set_ylim(-0.02, 5)
In [ ]:
((stats[:, 0] < 1) | bimodal).sum()
In [ ]:
star = np.array(high_K_stars)[(stats[:, 0] < 1) & np.logical_not(bimodal)][11]
data = star.apogeervdata()
with h5py.File(samples_file, 'r') as f:
samples = JokerSamples.from_hdf5(f[star.apogee_id])
_ = plot_two_panel(data, samples, plot_data_orbits_kw=dict(highlight_P_extrema=False))
In [ ]:
In [ ]: