In [ ]:
import os
from os import path

# Third-party
from astropy.io import fits
from astropy.stats import median_absolute_deviation
from astropy.table import QTable, Table, join
from astropy.time import Time
import astropy.units as u
import matplotlib as mpl
import matplotlib.pyplot as plt
from matplotlib.gridspec import GridSpec
import numpy as np
%matplotlib inline
import h5py
import pandas as pd
from sqlalchemy import func
import tqdm
from sklearn.cluster import KMeans

from thejoker import JokerSamples

from twoface.config import TWOFACE_CACHE_PATH
from twoface.samples_analysis import unimodal_P, MAP_sample
from twoface.db import (db_connect, AllStar, AllVisit, AllVisitToAllStar, NessRG,
                        StarResult, Status, JokerRun)
from twoface.plot import plot_two_panel, plot_phase_fold, _RV_LBL
from twoface.mass import get_m2_min, mf

In [ ]:
Session, _ = db_connect(path.join(TWOFACE_CACHE_PATH, 'apogee.sqlite'))
session = Session()

plot_path = '../../paper/1-catalog/figures/'
table_path = '../../paper/1-catalog/tables/'

samples_file = path.join(TWOFACE_CACHE_PATH, 'apogee-jitter.hdf5')
mcmc_samples_file = path.join(TWOFACE_CACHE_PATH, 'apogee-jitter-mcmc.hdf5')

In [ ]:
run = session.query(JokerRun).limit(1).one()
joker_pars = run.get_joker_params()

In [ ]:
high_K_stars = session.query(AllStar).join(StarResult).filter(StarResult.status_id.in_([1, 4]))\
                      .filter(StarResult.high_K).all()
print(len(high_K_stars))
print(session.query(AllStar).join(StarResult).filter(StarResult.high_K).count())

Remove ones that are already identified as unimodal:


In [ ]:
unimodal = QTable.read(path.join(table_path, 'highK-unimodal.fits'))
mask = np.logical_not(np.isin(np.array([s.apogee_id for s in high_K_stars], dtype='U20'), 
                              unimodal['APOGEE_ID']))
high_K_stars = np.array(high_K_stars)[mask]

Make the catalog:


In [ ]:
def is_n_modal(data, samples, n_clusters=2):
    clf = KMeans(n_clusters=n_clusters)
    
    ecc = samples['e'].value
    lnP = np.log(samples['P'].value).reshape(-1, 1)
    y = clf.fit_predict(lnP)

    data = star.apogeervdata()
    
    unimodals = []
    means = []
    for j in np.unique(y):
        sub_samples = samples[y==j]
        if len(sub_samples) == 1:
            unimodals.append(True)
            means.append(sub_samples)
        else:
            unimodals.append(unimodal_P(sub_samples, data))
            means.append(MAP_sample(data, sub_samples, joker_pars))
        
    return all(unimodals), means

In [ ]:
bimodal = []
MAP_samples = []
nsamples = []

n = 0
with h5py.File(samples_file, 'r') as f:
    for star in tqdm.tqdm(high_K_stars):
        samples = JokerSamples.from_hdf5(f[star.apogee_id])
        data = star.apogeervdata()
        
        if len(samples) > 1:
            is_bimodal, MAP = is_n_modal(data, samples, n_clusters=2)
            bimodal.append(is_bimodal)
            MAP_samples.append(MAP)
            
        else:
            bimodal.append(False)
        
        nsamples.append(len(samples))

nsamples = np.array(nsamples)
bimodal = np.array(bimodal)
MAP_samples = np.array(MAP_samples)
bimodal.sum()

In [ ]:
bimodal.sum()

Most of these only have a few samples:


In [ ]:
plt.hist(nsamples[bimodal], bins='auto');

In [ ]:
bi_stars = np.array(high_K_stars)[bimodal]
bi_MAP_samples = np.array(MAP_samples)[bimodal]
assert len(bi_MAP_samples) == len(bi_stars)

In [ ]:
catalog = {'APOGEE_ID':[], 'P':[], 'e':[], 'K':[]}

for samples, star in zip(bi_MAP_samples, bi_stars):
    for s in samples:
        catalog['APOGEE_ID'].append(star.apogee_id)
        catalog['P'].append(s['P'])
        catalog['e'].append(s['e'])
        catalog['K'].append(s['K'])
    
catalog['P'] = u.Quantity(catalog['P'])
catalog['K'] = u.Quantity(catalog['K'])
catalog = Table(catalog)

Add Ness masses:


In [ ]:
ness_tbl = Table.read('../../data/NessRG.fits')
ness_tbl.rename_column('2MASS', 'APOGEE_ID')
ness_tbl = ness_tbl[np.isin(ness_tbl['APOGEE_ID'], catalog['APOGEE_ID'])]

# trim the duplicates...
_, unq_idx = np.unique(ness_tbl['APOGEE_ID'], return_index=True)
ness_tbl = ness_tbl[unq_idx]

tbl_with_ness = join(catalog, ness_tbl, keys='APOGEE_ID', join_type='outer')
assert len(tbl_with_ness) == len(catalog)

In [ ]:
np.isfinite(tbl_with_ness['lnM']).sum()

In [ ]:
rnd = np.random.RandomState(seed=42)
N = rnd.normal

m1 = np.full(len(tbl_with_ness), np.nan) * u.Msun
m2_min = np.full(len(tbl_with_ness), np.nan) * u.Msun

for i, row in tqdm.tqdm(enumerate(tbl_with_ness)):
    if tbl_with_ness['lnM'].mask[i]:
        continue
        
    m1_ = np.exp(row['lnM']) * u.Msun
    mass_func = mf(P=row['P'] * catalog['P'].unit, 
                   K=row['K'] * catalog['K'].unit, 
                   e=row['e'])
    
    m1[i] = m1_
    m2_min[i] = get_m2_min(m1_, mass_func)
    
tbl_with_ness['M1'] = m1
tbl_with_ness['M2_min'] = m2_min

tbl_with_ness['M1'].mask = np.isnan(tbl_with_ness['M1'])
tbl_with_ness['M2_min'].mask = np.isnan(tbl_with_ness['M1'])

Add APOGEE DR14 info:


In [ ]:
allstar_tbl = fits.getdata('/Users/adrian/data/APOGEE_DR14/allStar-l31c.2.fits')
allstar_tbl = allstar_tbl[np.isin(allstar_tbl['APOGEE_ID'], tbl_with_ness['APOGEE_ID'])]

# trim the duplicates...
_, unq_idx = np.unique(allstar_tbl['APOGEE_ID'], return_index=True)
allstar_tbl = allstar_tbl[unq_idx]
assert len(allstar_tbl) == len(tbl_with_ness)//2

allstar_tbl = Table(allstar_tbl)
allstar_tbl.rename_column('K', 'KS')
allstar_tbl.rename_column('K_ERR', 'KS_ERR')

In [ ]:
output_catalog = join(tbl_with_ness[catalog.colnames + ['M1', 'M2_min']], allstar_tbl, keys='APOGEE_ID')
output_catalog[:1]

By-eye vetting:

Plot all of the stars, see what orbits look like bad (2) or questionable (1) fits:


In [ ]:
# with h5py.File(samples_file, 'r') as f:
#     for star in tqdm.tqdm(np.array(high_K_stars)[bimodal]):
#         samples = JokerSamples.from_hdf5(f[star.apogee_id])
#         data = star.apogeervdata()
        
#         fig = plot_two_panel(data, samples, 
#                              plot_data_orbits_kw=dict(highlight_P_extrema=False))
#         fig.savefig('../../plots/bimodal/{0}.png'.format(star.apogee_id), dpi=200)
#         plt.close(fig)

In [ ]:
# bimodal:
suspect = ['2M09490802+3649393', '2M09494588+3711256', 
           '2M10244885+1336456', '2M13412997+2836185',
           '2M15125534+6748381']

check = ['2M18525864-0031500']


clean_flag = np.zeros(len(catalog), dtype=int)
clean_flag[np.isin(output_catalog['APOGEE_ID'], check)] = 1
clean_flag[np.isin(output_catalog['APOGEE_ID'], suspect)] = 2
output_catalog['clean_flag'] = clean_flag

In [ ]:
np.isin(output_catalog['APOGEE_ID'], suspect).sum()

In [ ]:
output_catalog.write(path.join(table_path, 'highK-bimodal.fits'), overwrite=True)
# output_catalog[:4].write('../../paper/1-catalog/tables/bimodal-top.tex', overwrite=True)

Make paper figure:


In [ ]:
catalog = Table.read(path.join(table_path, 'highK-bimodal.fits'))
len(catalog)

In [ ]:
np.random.seed(111)

rc = {
    'axes.labelsize': 18,
    'xtick.labelsize': 14,
    'ytick.labelsize': 14
}
    
# rand_subset = np.random.choice(catalog['APOGEE_ID'].astype('U20'), 
#                                size=4, 
#                                replace=False)
rand_subset = ['2M18041328-2958182',
               '2M19114515-0725486',
               '2M20184780+2023122',
               '2M22030551+6844336']
    
with mpl.rc_context(rc):
    gs = GridSpec(4, 3)
    fig = plt.figure(figsize=(8., 9.5))
    for j, apogee_id in enumerate(rand_subset):
        ax1 = fig.add_subplot(gs[j, :2])
        ax2 = fig.add_subplot(gs[j, 2])

        if j == 0:
            ax1.set_title('High-$K$, bimodal', fontsize=20)
        
        star = AllStar.get_apogee_id(session, apogee_id)
        data = star.apogeervdata()

        with h5py.File(samples_file, 'r') as f:
            samples = JokerSamples.from_hdf5(f[star.apogee_id])

        fig = plot_two_panel(data, samples, axes=[ax1, ax2], tight=False,
                             plot_data_orbits_kw=dict(n_times=16384, 
                                                      n_orbits=128,
                                                      highlight_P_extrema=False,
                                                      xlim_choice='data',
                                                      relative_to_t0=True,
                                                      plot_kwargs=dict(linewidth=0.2,
                                                                       rasterized=True)))

        xlim = ax1.get_xlim()
        ylim = ax1.get_ylim()

        ax1.text(xlim[0] + (xlim[1]-xlim[0])/20,
                 ylim[1] - (ylim[1]-ylim[0])/20,
                 star.apogee_id, 
                 fontsize=15, va='top', ha='left')

        ax1.set_xlabel('')
        ax2.set_xlabel('')
        
        logP = np.log10(samples['P'].to(u.day).value)
        span = np.ptp(logP)
#         ax2.set_xlim(10**(logP.min()-0.75),
#                      10**(logP.max()+0.75))
#         ax2.set_xlim(10**(logP.min()-0.5*span),
#                      10**(logP.max()+0.5*span))

    ax1.set_xlabel(r'${\rm BMJD} - t_0$ [day]')
    ax2.set_xlabel('period, $P$ [day]')

    fig.tight_layout()
    fig.subplots_adjust(left=0.125, right=0.95, hspace=0.2, wspace=0.4)
    
    fig.savefig(path.join(plot_path, 'highK-bimodal.pdf'), dpi=250)

(old idea)

Stars with samples that have small dispersion, or PTP lnP:


In [ ]:
stats = []
with h5py.File(samples_file, 'r') as f:
    for star in tqdm.tqdm(high_K_stars):
        logP = np.log10(f[star.apogee_id]['P'][:])
        stats.append([np.ptp(logP), np.std(logP)])
stats = np.array(stats)

In [ ]:
fig, ax = plt.subplots(1, 1, figsize=(6,6))
ax.scatter(stats[:, 0], 3*stats[:, 1], alpha=0.25, linewidth=0)
ax.set_xlim(-0.02, 5)
ax.set_ylim(-0.02, 5)

In [ ]:
((stats[:, 0] < 1) | bimodal).sum()

In [ ]:
star = np.array(high_K_stars)[(stats[:, 0] < 1) & np.logical_not(bimodal)][11]

data = star.apogeervdata()
with h5py.File(samples_file, 'r') as f:
    samples = JokerSamples.from_hdf5(f[star.apogee_id])
    
_ = plot_two_panel(data, samples, plot_data_orbits_kw=dict(highlight_P_extrema=False))

In [ ]:


In [ ]: