In [ ]:
import os
from os import path
from astropy.time import Time
from astropy.io import fits, ascii
import astropy.units as u
from astropy.table import Table
from astropy.constants import G
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import h5py
from sqlalchemy import func
from scipy.optimize import root
from scipy.stats import norm
import tqdm
from thejoker import JokerSamples
from thejoker.sampler import JokerParams, TheJoker
from thejoker.plot import plot_rv_curves
from twoface.config import TWOFACE_CACHE_PATH
from twoface.db import (db_connect, AllStar, AllVisit, AllVisitToAllStar,
StarResult, Status, JokerRun, initialize_db)
from twoface.data import APOGEERVData
from twoface.plot import plot_data_orbits
In [ ]:
TWOFACE_CACHE_PATH = path.abspath('../cache/')
samples_file = path.join(TWOFACE_CACHE_PATH, 'apogee-jitter.hdf5')
residuals_file = path.join(TWOFACE_CACHE_PATH, 'residuals.hdf5')
figures_path = '../../paper/1-catalog/figures/'
In [ ]:
Session, _ = db_connect(path.join(TWOFACE_CACHE_PATH, 'apogee.sqlite'))
session = Session()
In [ ]:
# stars = session.query(AllStar).join(StarResult, JokerRun, Status)\
# .filter(JokerRun.name == 'apogee-jitter')\
# .filter(Status.id == 2).all()
# logg cut removes large jitter stars
stars = session.query(AllStar).join(StarResult, Status, AllVisitToAllStar, AllVisit)\
.filter(Status.id > 0)\
.filter(AllStar.logg > 2)\
.group_by(AllStar.apstar_id)\
.having(func.count(AllVisit.id) >= 10)\
.all()
len(stars)
In [ ]:
if not path.exists(residuals_file):
rows = None
with h5py.File(samples_file, 'r') as f:
for star in tqdm.tqdm(stars):
data = star.apogeervdata()
samples = JokerSamples.from_hdf5(f[star.apogee_id])
chi2s = []
for j in range(len(samples)):
orbit = samples.get_orbit(j)
chi2 = np.sum( ((data.rv - orbit.radial_velocity(data.t)) / data.stddev)**2 ).value
chi2s.append(chi2)
best_sample = samples[np.argmin(chi2s)]
orbit = best_sample.get_orbit(0)
resid = (data.rv - orbit.radial_velocity(data.t)).to(u.km/u.s).value
norm_resid = ((data.rv - orbit.radial_velocity(data.t)) / data.stddev).decompose().value
norm_resid_jitter = ((data.rv - orbit.radial_velocity(data.t)) / np.sqrt(data.stddev**2 + best_sample['jitter']**2)).decompose().value
plate = [int(v.plate) for v in star.visits]
fiber = [int(v.fiberid) for v in star.visits]
mjd = [int(v.mjd) for v in star.visits]
if rows is None:
rows = list(zip(plate, fiber, mjd, resid, norm_resid, norm_resid_jitter))
else:
these_rows = list(zip(plate, fiber, mjd, resid, norm_resid, norm_resid_jitter))
rows = rows + these_rows
tbl = np.array(rows, dtype=[('plate', int), ('fiber', int), ('mjd', int),
('resid', float), ('norm_resid', float),
('norm_jitter', float)])
tbl = Table(tbl)
tbl.write(residuals_file, path='/residuals')
tbl = Table.read(residuals_file, path='/residuals')
In [ ]:
from scipy.stats import binned_statistic
In [ ]:
fig, axes = plt.subplots(3, 1, figsize=(12, 12), sharey=True)
style = dict(marker='.', linestyle='none', alpha=0.25, ms=3, color='k')
axes[0].plot(tbl['plate'], tbl['resid'], **style)
axes[0].set_xlabel('PLATEID')
axes[1].plot(tbl['fiber'], tbl['resid'], **style)
axes[1].set_xlabel('FIBERID')
axes[1].set_ylabel('Residual [{0:latex_inline}]'.format(u.km/u.s))
axes[2].plot(tbl['mjd'], tbl['resid'], **style)
axes[2].set_xlabel('MJD')
percentiles = [1, 5, 15, 85, 95, 99]
_colors = ['#edf8b1', '#7fcdbb', '#2c7fb8']
colors = _colors + _colors[::-1]
for i, name in enumerate(['plate', 'fiber', 'mjd']):
for j, p in enumerate(percentiles):
bins = np.linspace(tbl[name].min(), tbl[name].max(), 64)
bin_c = (bins[:-1]+bins[1:]) / 2.
stats = binned_statistic(tbl[name], tbl['resid'],
statistic=lambda x: np.percentile(x, p),
bins=bins)
axes[i].plot(bin_c, stats.statistic, drawstyle='steps-mid',
marker='', color=colors[j], alpha=0.8)
axes[0].set_yscale('symlog', linthreshy=1E-1)
axes[0].set_ylim(-100, 100)
for ax in axes:
ax.axhline(1., zorder=-10, alpha=0.5, color='tab:orange', linestyle='--', linewidth=1.)
ax.axhline(-1., zorder=-10, alpha=0.5, color='tab:orange', linestyle='--', linewidth=1.)
ax.axhspan(-0.1, 0.1, zorder=-10, color='#eeeeee')
fig.tight_layout()
In [ ]:
fig, ax = plt.subplots(1, 1, figsize=(12, 4.5), sharey=True)
style = dict(marker='.', linestyle='none', alpha=0.5, ms=3)
ax.plot(tbl['fiber'], np.abs(tbl['resid']), **style)
ax.set_xlabel('FIBERID')
ax.set_ylabel('abs(residual) [{0:latex_inline}]'.format(u.km/u.s))
ax.set_yscale('log')
ax.set_ylim(0.1, 100)
fig.tight_layout()
In [ ]:
plt.hist(tbl['resid'].ravel(), bins=np.linspace(-10, 10, 256), normed=True);
r_grid = np.linspace(-10, 10, 1024)
plt.plot(r_grid, norm.pdf(r_grid, 0, 0.15), marker='')
plt.yscale('log')
plt.ylim(1E-4, 1E1)
plt.xlabel('residual [{0:latex_inline}]'.format(u.km/u.s))
In [ ]:
plt.hist(tbl['norm_resid'].ravel(), bins=np.linspace(-10, 10, 256), normed=True);
r_grid = np.linspace(-10, 10, 1024)
plt.plot(r_grid, norm.pdf(r_grid, 0, 1), marker='')
plt.yscale('log')
plt.ylim(1E-4, 1E1)
plt.xlabel('residual [{0:latex_inline}]'.format(u.km/u.s))
In [ ]:
fig, ax = plt.subplots(1, 1, figsize=(6, 5))
ax.hist(tbl['norm_jitter'].ravel(), bins=np.linspace(-10, 10, 128),
normed=True, rasterized=True)
r_grid = np.linspace(-10, 10, 1024)
ax.plot(r_grid, norm.pdf(r_grid, 0, 1), marker='',
linewidth=3, linestyle='--')
ax.set_yscale('log')
ax.set_ylim(1E-4, 1E0)
ax.set_xlabel('normalized visit residuals, $R_{nk}$')
ax.set_ylabel('density')
fig.tight_layout()
fig.savefig(path.join(figures_path, 'residuals.pdf'), dpi=250)
In [ ]: