In [ ]:
import os
from os import path
from astropy.time import Time
from astropy.io import fits, ascii
import astropy.units as u
from astropy.table import Table
from astropy.constants import G

import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import h5py
from sqlalchemy import func
from scipy.optimize import root
from scipy.stats import norm
import tqdm

from thejoker import JokerSamples
from thejoker.sampler import JokerParams, TheJoker
from thejoker.plot import plot_rv_curves

from twoface.config import TWOFACE_CACHE_PATH
from twoface.db import (db_connect, AllStar, AllVisit, AllVisitToAllStar, 
                        StarResult, Status, JokerRun, initialize_db)
from twoface.data import APOGEERVData
from twoface.plot import plot_data_orbits

In [ ]:
TWOFACE_CACHE_PATH = path.abspath('../cache/')
samples_file = path.join(TWOFACE_CACHE_PATH, 'apogee-jitter.hdf5')
residuals_file = path.join(TWOFACE_CACHE_PATH, 'residuals.hdf5')
figures_path = '../../paper/1-catalog/figures/'

In [ ]:
Session, _ = db_connect(path.join(TWOFACE_CACHE_PATH, 'apogee.sqlite'))
session = Session()

In [ ]:
# stars = session.query(AllStar).join(StarResult, JokerRun, Status)\
#                .filter(JokerRun.name == 'apogee-jitter')\
#                .filter(Status.id == 2).all()

# logg cut removes large jitter stars
stars = session.query(AllStar).join(StarResult, Status, AllVisitToAllStar, AllVisit)\
                              .filter(Status.id > 0)\
                              .filter(AllStar.logg > 2)\
                              .group_by(AllStar.apstar_id)\
                              .having(func.count(AllVisit.id) >= 10)\
                              .all()
len(stars)

In [ ]:
if not path.exists(residuals_file):
    rows = None
    with h5py.File(samples_file, 'r') as f:
        for star in tqdm.tqdm(stars):
            data = star.apogeervdata()
            samples = JokerSamples.from_hdf5(f[star.apogee_id])

            chi2s = []
            for j in range(len(samples)):
                orbit = samples.get_orbit(j)
                chi2 = np.sum( ((data.rv - orbit.radial_velocity(data.t)) / data.stddev)**2 ).value
                chi2s.append(chi2)

            best_sample = samples[np.argmin(chi2s)]
            orbit = best_sample.get_orbit(0)

            resid = (data.rv - orbit.radial_velocity(data.t)).to(u.km/u.s).value
            norm_resid = ((data.rv - orbit.radial_velocity(data.t)) / data.stddev).decompose().value
            norm_resid_jitter = ((data.rv - orbit.radial_velocity(data.t)) / np.sqrt(data.stddev**2 + best_sample['jitter']**2)).decompose().value

            plate = [int(v.plate) for v in star.visits]
            fiber = [int(v.fiberid) for v in star.visits]
            mjd = [int(v.mjd) for v in star.visits]

            if rows is None:
                rows = list(zip(plate, fiber, mjd, resid, norm_resid, norm_resid_jitter))

            else:
                these_rows = list(zip(plate, fiber, mjd, resid, norm_resid, norm_resid_jitter))
                rows = rows + these_rows

    tbl = np.array(rows, dtype=[('plate', int), ('fiber', int), ('mjd', int), 
                                ('resid', float), ('norm_resid', float), 
                                ('norm_jitter', float)])
    tbl = Table(tbl)
    tbl.write(residuals_file, path='/residuals')
    
tbl = Table.read(residuals_file, path='/residuals')

In [ ]:
from scipy.stats import binned_statistic

In [ ]:
fig, axes = plt.subplots(3, 1, figsize=(12, 12), sharey=True)

style = dict(marker='.', linestyle='none', alpha=0.25, ms=3, color='k')
axes[0].plot(tbl['plate'], tbl['resid'], **style)
axes[0].set_xlabel('PLATEID')

axes[1].plot(tbl['fiber'], tbl['resid'], **style)
axes[1].set_xlabel('FIBERID')
axes[1].set_ylabel('Residual [{0:latex_inline}]'.format(u.km/u.s))

axes[2].plot(tbl['mjd'], tbl['resid'], **style)
axes[2].set_xlabel('MJD')

percentiles = [1, 5, 15, 85, 95, 99]
_colors = ['#edf8b1', '#7fcdbb', '#2c7fb8']
colors = _colors + _colors[::-1]
for i, name in enumerate(['plate', 'fiber', 'mjd']):
    for j, p in enumerate(percentiles):
        bins = np.linspace(tbl[name].min(), tbl[name].max(), 64)
        bin_c = (bins[:-1]+bins[1:]) / 2.
        stats = binned_statistic(tbl[name], tbl['resid'], 
                                 statistic=lambda x: np.percentile(x, p), 
                                 bins=bins)
        
        axes[i].plot(bin_c, stats.statistic, drawstyle='steps-mid', 
                     marker='', color=colors[j], alpha=0.8)

axes[0].set_yscale('symlog', linthreshy=1E-1)
axes[0].set_ylim(-100, 100)

for ax in axes:
    ax.axhline(1., zorder=-10, alpha=0.5, color='tab:orange', linestyle='--', linewidth=1.)
    ax.axhline(-1., zorder=-10, alpha=0.5, color='tab:orange', linestyle='--', linewidth=1.)
    ax.axhspan(-0.1, 0.1, zorder=-10, color='#eeeeee')

fig.tight_layout()

In [ ]:
fig, ax = plt.subplots(1, 1, figsize=(12, 4.5), sharey=True)

style = dict(marker='.', linestyle='none', alpha=0.5, ms=3)
ax.plot(tbl['fiber'], np.abs(tbl['resid']), **style)
ax.set_xlabel('FIBERID')
ax.set_ylabel('abs(residual) [{0:latex_inline}]'.format(u.km/u.s))
ax.set_yscale('log')
ax.set_ylim(0.1, 100)

fig.tight_layout()

In [ ]:
plt.hist(tbl['resid'].ravel(), bins=np.linspace(-10, 10, 256), normed=True);

r_grid = np.linspace(-10, 10, 1024)
plt.plot(r_grid, norm.pdf(r_grid, 0, 0.15), marker='')

plt.yscale('log')
plt.ylim(1E-4, 1E1)
plt.xlabel('residual [{0:latex_inline}]'.format(u.km/u.s))

In [ ]:
plt.hist(tbl['norm_resid'].ravel(), bins=np.linspace(-10, 10, 256), normed=True);

r_grid = np.linspace(-10, 10, 1024)
plt.plot(r_grid, norm.pdf(r_grid, 0, 1), marker='')

plt.yscale('log')
plt.ylim(1E-4, 1E1)
plt.xlabel('residual [{0:latex_inline}]'.format(u.km/u.s))

In [ ]:
fig, ax = plt.subplots(1, 1, figsize=(6, 5))

ax.hist(tbl['norm_jitter'].ravel(), bins=np.linspace(-10, 10, 128), 
        normed=True, rasterized=True)

r_grid = np.linspace(-10, 10, 1024)
ax.plot(r_grid, norm.pdf(r_grid, 0, 1), marker='', 
        linewidth=3, linestyle='--')

ax.set_yscale('log')
ax.set_ylim(1E-4, 1E0)
ax.set_xlabel('normalized visit residuals, $R_{nk}$')
ax.set_ylabel('density')

fig.tight_layout()
fig.savefig(path.join(figures_path, 'residuals.pdf'), dpi=250)

In [ ]: