Create a stats table for analysis on each target


In [62]:
%matplotlib inline
%config InlineBackend.figure_format = "retina"

from salter import (LightCurve, subtract_add_divide, concatenate_transit_light_curves, 
                    Residuals, lc_archive, planet_props)
import matplotlib.pyplot as plt
import numpy as np
import warnings
from astropy.stats import mad_std
from itertools import product
from astropy.utils.console import ProgressBar

In [2]:
extra_oot_time = 1.5 # [durations]; Extra transit durations to keep before ingress/after egress

We will take every combination of the statistical tests available on the Residuals object, with every interesting combination of data samples. The combinations of data samples include:

  • in-transit vs. out-of-transit
  • out-of-transit before midtransit vs. out-of-transit after midtransit
  • in-transit before midtransit vs. in-transit after midtransit

In [19]:
stats_tests = ['ks', 'anderson', 'ttest']

comparisons = [(['out_of_transit', 'before_midtransit'], ['out_of_transit', 'after_midtransit']),
               (['in_transit', 'before_midtransit'], ['in_transit', 'after_midtransit']),
               ('in_transit', 'out_of_transit')]

def test_conditions_to_key(test, conditions):
    """
    Convert a given test and conditions into a key for the table
    """
    unpacked_conditions = [(condition if type(condition) is not list else 
                       '&'.join(map("{0}".format, condition))) 
                       for condition in conditions]
    key = "{0}:{1}".format(test, '-vs-'.join(map("{0}".format, unpacked_conditions)))
    return key

Compute statistics on each available light curve


In [65]:
results = dict()

results['kepid'] = []

for test, conditions in product(stats_tests, comparisons):
    key = test_conditions_to_key(test, conditions)
    results[key] = []

with warnings.catch_warnings():
    warnings.simplefilter("ignore", category=np.RankWarning)
    
    with ProgressBar(len(list(lc_archive.file.keys())), ipython_widget=True) as bar:
        for kic_number in list(lc_archive.file.keys()):
            bar.update()

            if int(kic_number) in planet_props.table['kepid'].data.data:
                whole_lc = LightCurve.from_hdf5(int(kic_number))

                if whole_lc.params.per > 1:
                    # Mask out-of-transit portions of light curves, chop into individual transits
                    mask_oot = whole_lc.mask_out_of_transit(oot_duration_fraction=extra_oot_time)
                    near_transit = LightCurve(**mask_oot)
                    transits = near_transit.get_transit_light_curves()

                    # Normalize all transits with the subtract-add-divide method, 
                    # using a second order polynomial
                    subtract_add_divide(whole_lc, transits)

                    normed_transits = concatenate_transit_light_curves(transits)

                    normed_transits.fit_lc_3param()

                    r = Residuals(normed_transits, normed_transits.params, 
                                  buffer_duration=0.3)
                    #r.plot()

                    #mad = mad_std(r.residuals)

                    #plt.ylim([-5*mad, 5*mad])

                    #ks_pvalue = r.ks(['out_of_transit', 'before_midtransit'], 
                    #                 ['out_of_transit', 'after_midtransit'])
                    #print(ks_pvalue)
                    #plt.title("$p = {0}$".format(ks_pvalue))
                    #plt.show()

                    # Skip any transits with only a few data points in or out-of-transit
                    if np.count_nonzero(r.in_transit) > 10 and np.count_nonzero(r.out_of_transit) > 10:
                        results["kepid"].append(int(kic_number))

                        # Test every combination of the statistical tests and the 
                        # different samples of data

                        for test, conditions in product(stats_tests, comparisons):
                            try: 
                                result = getattr(r, test)(*conditions)
                            except ValueError: 
                                result = np.nan
                                
                            key = test_conditions_to_key(test, conditions)

                            results[key].append(result)


/Users/bmmorris/anaconda/lib/python3.5/site-packages/astropy/time/core.py:880: VisibleDeprecationWarning: converting an array with ndim > 0 to an index will result in an error in the future
  jd1 = apply_method(jd1)
/Users/bmmorris/anaconda/lib/python3.5/site-packages/astropy/time/core.py:881: VisibleDeprecationWarning: converting an array with ndim > 0 to an index will result in an error in the future
  jd2 = apply_method(jd2)
/Users/bmmorris/git/salter/salter/lightcurve.py:347: VisibleDeprecationWarning: converting an array with ndim > 0 to an index will result in an error in the future
  fluxes=self.fluxes[start_ind:end_ind],
/Users/bmmorris/git/salter/salter/lightcurve.py:348: VisibleDeprecationWarning: converting an array with ndim > 0 to an index will result in an error in the future
  errors=self.errors[start_ind:end_ind],
/Users/bmmorris/git/salter/salter/lightcurve.py:349: VisibleDeprecationWarning: converting an array with ndim > 0 to an index will result in an error in the future
  quarters=self.quarters[start_ind:end_ind],
/Users/bmmorris/anaconda/lib/python3.5/site-packages/scipy/stats/morestats.py:1694: UserWarning: approximate p-value will be computed by extrapolation
  warnings.warn("approximate p-value will be computed by extrapolation")

Put the statistical results into a Table object


In [66]:
from astropy.table import join, Table

results_table = Table(results)

Open the planet properties table (with columns from NASA Exoplanet Archive and Exoplanet Orbit Database)


In [68]:
from salter import get_planets_table

planets_table = get_planets_table()

Join the NEA+EOD table with the stats results table


In [72]:
stats_table = join(results_table, planets_table, keys='kepid')

Write the table to disk:


In [75]:
from astropy.io import ascii

ascii.write(stats_table, 'data/stats_table.csv', format='csv', delimiter=',')


WARNING: AstropyDeprecationWarning: data/stats_table.csv already exists. Automatically overwriting ASCII files is deprecated. Use the argument 'overwrite=True' in the future. [astropy.io.ascii.ui]

In [ ]: