In [118]:
%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt
from astropy.io import fits
import pandas as pd

band = ['_g','_r','_i','_z','_y']

12311 objects in our 1 deg^2 field. That's 3.4 objects/arcmin^2

Column names are:

  • ra
  • dec

  • mean #PSF vectors, 5 columns grizy

  • err #err is error on the mean. I believe stdev^2-err^2*(nmag_ok-1) gives you an estimate of astrophysical variablity
  • stdev

  • mean_ap #flexible apertures

  • err_ap
  • stdev_ap

  • mean_kron # KRON mags

  • err_kron
  • stdev_kron

  • nmag_ok # Number of good detection in each bands (5-vector) used in the above stats

  • lc_mag # PSF light curves. 5x20 matrices. 0 padded.

  • lc_err
  • lc_mjd

  • lc_mag_ap # Same for aperture

  • lc_err_ap
  • lc_mjd_ap

  • lc_mag_kron # Same for KRON

  • lc_err_kron
  • lc_mjd_kron

  • w1_nanomaggies # WISE (unWISE improved version). I believe the conversion to magnitudes is 22.5-2.5 log10(w1234_nanomaggies)

  • w1_nanomaggies_ivar
  • w2_nanomaggies
  • w2_nanomaggies_ivar
  • w3_nanomaggies
  • w3_nanomaggies_ivar
  • w4_nanomaggies
  • w4_nanomaggies_ivar

  • sdss_flux # unWISE also provides SDSS fluxes. I figure why not (these are 5 vectors)

  • sdss_flux_ivar

Let's work in pandas: import all the non-light curve data and split up bands so all features are 1D


In [142]:
def wrangle(fname):
    data = fits.getdata(fname)
    df = pd.DataFrame()
    #loop over variables in fits table, reshaping if necessary
    for col in data.columns.names:
        if data[col].shape == data['ra'].shape:
            df[col] = data[col]
        elif data[col].shape == data['mean'].shape:
            for i in xrange(5):
                newcol = col + band[i]
                df[newcol] = data[col][:,i]
    print df.shape
    return df

duds = wrangle('../../onedeg.fits')
lenses = wrangle('../../lenses.fits')


(12311, 70)
(77, 70)

Data quality exploration/cuts

  • Should we demand that SDSS photometry agrees with PS1? -- no!
  • Change WISE magnitudes to relative colors
  • cut out objects with any photometric outliers? -- demand 5 band detections in coadds, but not light curves (bc variability/photo errrors could cause us to dip below detection threshold.
    • err on the side of throwing things out

Tentative feature list:

  • mag_ap_i
  • r-i
  • g-r
  • i-z
  • z-W1
  • W1-W2 (skip W3 and W4 for now because of Adri)
  • i_size = (i_PSF - i_aper)
  • g_size, r_size, z_size. No point in y_size beacuse we expect the blue emission to be extended.
  • i_var = stddev_i^2 - (err_i^2 x nmag_ok)
  • g_var, r_var, z_var

Extract <10 features to use in XD

i-band mag, 4 relative colors, spatial extent, variability measure...


In [30]:
def wiseMag(wiseFlux):
    return 22.5-2.5*np.log10(wiseFlux)

def extractFeaturesFrom(ds):
    out = pd.DataFrame()
    ri = ds['mean_ap_r'] - ds['mean_ap_i']
    out['ri'] = ri
    gr = ds['mean_ap_g'] - ds['mean_ap_r']
    out['gr'] = gr
    iz = ds['mean_ap_i'] - ds['mean_ap_z']
    out['iz'] = iz
    zw1 = ds['mean_ap_z'] - wiseMag(duds['w1_nanomaggies'])
    out['zw1'] = zw1
    w1w2 = wiseMag(duds['w1_nanomaggies']) - wiseMag(duds['w2_nanomaggies'])
    out['w1w2'] = w1w2
    size = ds['mean_i'] - ds['mean_ap_i']
    out['size'] = size
    variability = ds['stdev_ap_i']**2 - (ds['err_ap_i']**2)*ds['nmag_ok_i']
    out['variability'] = variability
    return out

Let's demand magnitudes be reasonable and Plot Eric's data


In [160]:
def applyCuts(data):
    brightest = 10
    dimmest = 30
    data = data[np.logical_and(data['mean_ap_g'] < dimmest, data['mean_ap_g'] > brightest)]
    data = data[np.logical_and(data['mean_ap_r'] < dimmest, data['mean_ap_r'] > brightest)]
    data = data[np.logical_and(data['mean_ap_i'] < dimmest, data['mean_ap_i'] > brightest)]
    data = data[np.logical_and(data['mean_ap_z'] < dimmest, data['mean_ap_z'] > brightest)]
    data = data[np.logical_and(data['mean_ap_y'] < dimmest, data['mean_ap_y'] > brightest)]
    data = data[np.logical_and(wiseMag(data['w1_nanomaggies']) < dimmest, wiseMag(data['w1_nanomaggies']) > brightest)]
    data = data[np.logical_and(wiseMag(data['w2_nanomaggies']) < dimmest, wiseMag(data['w2_nanomaggies']) > brightest)]
    return data

data = pd.concat([lenses,duds],ignore_index=True)
truth = np.concatenate([np.ones(lenses.shape[0]),np.zeros(duds.shape[0])],axis=0)
data['truth'] = truth
#data = data.groupby('truth')
data = applyCuts(data)
features = extractFeaturesFrom(data)
features = features.dropna(how='any')
print features.shape


(4564, 7)

In [174]:
import PS1QLS
import triangle
reload(PS1QLS)
text = ['r-i','g-r', 'i-z', 'z-W1', 'W1-W2', 'i_size', 'i_var' ]
ds = PS1QLS.Dataset(features,data['truth'],text=text)
#ds.plot()
fig1 = triangle.corner(features[data['truth']==0],labels=text)
_ = triangle.corner(features[data['truth']==1],color='b',marker='x',fig=fig1)



In [137]:
wiseMag(lenses['w2_nanomaggies'])


Out[137]:
0     13.077560
1     13.900681
2     14.992960
3     13.851151
4     14.101301
5     11.441101
6     13.833104
7     14.713346
8     12.177123
9     13.176417
10    13.160835
11    14.460720
12    14.507549
13    15.442705
14    15.346421
...
62    14.314931
63    13.287121
64    14.066351
65    13.962947
66    13.331821
67    14.119545
68    15.652144
69    14.107385
70    15.652144
71    14.107385
72    12.814551
73    12.814551
74    14.724026
75    14.724026
76    13.650781
Name: w2_nanomaggies, Length: 77, dtype: float32

In [ ]: