In [ ]:
%reload_ext autoreload
%load_ext autoreload
%autoreload 2
In [ ]:
%matplotlib inline
from astroML.plotting import hist
from astropy.io import fits
from astropy.modeling import models, fitting
from datetime import datetime
from image_registration import cross_correlation_shifts
from glob import glob
from functools import partial
from matplotlib.ticker import MaxNLocator
from matplotlib import style
from os import listdir
# from least_asymmetry.asym import actr, moments, fitgaussian
from multiprocessing import cpu_count, Pool
from numpy import min as npmin, max as npmax, zeros, arange, sum, float, isnan, hstack
from numpy import int32 as npint, round as npround, nansum as sum, nanstd as std
from os import environ, path, mkdir
from pandas import DataFrame, read_csv, read_pickle, scatter_matrix
from photutils import CircularAperture, CircularAnnulus, aperture_photometry, findstars
from pylab import ion, gcf, sort, linspace, indices, median, mean, std, empty, figure, transpose, ceil
from pylab import concatenate, pi, sqrt, ones, diag, inf, rcParams, isnan, isfinite, array, nanmax
from pylab import figure, plot, imshow, scatter, legend
from seaborn import *
from scipy.special import erf
from scipy import stats
from sklearn.cluster import DBSCAN
from sklearn.externals import joblib
from sklearn.preprocessing import StandardScaler
from socket import gethostname
from statsmodels.robust import scale
from statsmodels.nonparametric import kde
from sys import exit
from time import time, localtime
from tqdm import tqdm_notebook
from numpy import zeros, nanmedian as median, nanmean as mean, nan
from sys import exit
from sklearn.externals import joblib
import numpy as np
In [ ]:
startFull = time()
Master Class for Exoplanet Time Series Observation Photometry
In [ ]:
from wanderer import wanderer
In [ ]:
def clipOutlier2D(arr2D, nSig=10):
arr2D = arr2D.copy()
medArr2D = median(arr2D,axis=0)
sclArr2D = np.sqrt(((scale.mad(arr2D)**2.).sum()))
outliers = abs(arr2D - medArr2D) > nSig*sclArr2D
inliers = abs(arr2D - medArr2D) <= nSig*sclArr2D
arr2D[outliers] = median(arr2D[inliers],axis=0)
return arr2D
In [ ]:
rcParams['figure.dpi'] = 150
rcParams['image.interpolation'] = 'None'
rcParams['image.cmap'] = 'Blues_r'
rcParams['axes.grid'] = False
As an example, Spitzer data is expected to be store in the directory structure:
$HOME/PLANET_DIRECTORY/PLANETNAME/data/raw/AORDIR/CHANNEL/bcd/
EXAMPLE:
tempuser,Research/PlanetsHappy-5bThe loadfitsdir should read as: /home/tempuser/Research/Planets/HATP26/data/raw/r11235813/ch2/bcd/
In [ ]:
from os import environ
planetName = 'HATP26'
planetDirectory = '/Research/Planets/'
channel = 'ch2'
# channel = 'ch2/'
dataSub = 'bcd/'
dataDir = environ['HOME'] + planetDirectory + planetName + '/data/raw/' + channel + '/big/'
AORs = []
for dirNow in glob(dataDir + '/*'):
AORs.append(dirNow.split('/')[-1])
fileExt = '*bcd.fits'
uncsExt = '*bunc.fits'
print(dataDir)
In [ ]:
len(AORs)
In [ ]:
iAOR = 0 # Change this to 0,1,2,3,4,5,6,7 to "cycle" over the 8 AORs in the base directory /home/tempuser/Research/Planets/HATP26/data/raw/
AORNow = AORs[iAOR] # This will flag an error if no AORs were found in the directory `dataDir`
loadfitsdir = dataDir + AORNow + '/' + channel + '/' + dataSub
print(loadfitsdir)
In [ ]:
nCores = cpu_count()
In [ ]:
fitsFilenames = glob(loadfitsdir + fileExt);print(len(fitsFilenames))
uncsFilenames = glob(loadfitsdir + uncsExt);print(len(uncsFilenames))
In [ ]:
header_test = fits.getheader(fitsFilenames[0])
print('AORLABEL:\t{}\nNum Fits Files:\t{}\nNum Unc Files:\t{}'.format\
(header_test['AORLABEL'], len(fitsFilenames), len(uncsFilenames)))
In [ ]:
ppm = 1e6
y,x = 0,1
yguess, xguess = 15., 15. # Specific to Spitzer circa 2010 and beyond
filetype = 'bcd.fits' # Specific to Spitzer Basic Calibrated Data
In [ ]:
method = 'median'
print('Initialize an instance of `wanderer` as `example_wanderer_median`\n')
example_wanderer_median = wanderer(fitsFileDir=loadfitsdir, filetype=filetype, telescope='Spitzer',
yguess=yguess, xguess=xguess, method=method, nCores=nCores)
example_wanderer_median.AOR = AORNow
example_wanderer_median.planetName = planetName
example_wanderer_median.channel = channel
In [ ]:
print('Load Data From Fits Files in ' + loadfitsdir + '\n')
example_wanderer_median.spitzer_load_fits_file(outputUnits='electrons')#(outputUnits='muJ_per_Pixel')
Double check for NaNs
In [ ]:
example_wanderer_median.imageCube[np.where(isnan(example_wanderer_median.imageCube))] = \
np.nanmedian(example_wanderer_median.imageCube)
Identifier Strong Outliers
In [ ]:
print('Find, flag, and NaN the "Bad Pixels" Outliers' + '\n')
example_wanderer_median.find_bad_pixels()
Flux Weighted Centroiding -- just to say we tried it
In [ ]:
print('Fit for All Centers: Flux Weighted, Gaussian Fitting, Gaussian Moments, Least Asymmetry' + '\n')
example_wanderer_median.fit_flux_weighted_centering()
Gaussian centroid fitting -- the most widely used version
In [ ]:
start = time()
example_wanderer_median.mp_lmfit_gaussian_centering(subArraySize=6, recheckMethod=None, median_crop=False)
print('Operation took {} seconds with {} cores'.format(time()-start, example_wanderer_median.nCores))
Compute the sigma-clipping outliers for plotting purpose
In [ ]:
nSig = 10.1
medY = median(example_wanderer_median.centering_GaussianFit.T[y])
medX = median(example_wanderer_median.centering_GaussianFit.T[x])
stdY = std(example_wanderer_median.centering_GaussianFit.T[y])
stdX = std(example_wanderer_median.centering_GaussianFit.T[x])
ySig = 4
xSig = 4
outliers = (((example_wanderer_median.centering_GaussianFit.T[y] - medY)/(ySig*stdY))**2 + \
((example_wanderer_median.centering_GaussianFit.T[x] - medX)/(xSig*stdX))**2) > 1
Plot the inliers (blue) vs outliers (not blue)
In [ ]:
ax = figure().add_subplot(111)
cx, cy = example_wanderer_median.centering_GaussianFit.T[x],example_wanderer_median.centering_GaussianFit.T[y]
ax.plot(cx,cy,'.',ms=1)
ax.plot(cx[outliers],cy[outliers],'.',ms=1)
# ax.plot(median(cx), median(cy),'ro',ms=1)
ax.set_xlim(medX-nSig*stdX,medX+nSig*stdX)
ax.set_ylim(medY-nSig*stdY,medY+nSig*stdY)
Use advanced clustering algorithms (DBSCAN) to determine the inliers vs outliers
In [ ]:
dbs = DBSCAN(n_jobs=-1, eps=0.2, leaf_size=10)
dbsPred = dbs.fit_predict(example_wanderer_median.centering_GaussianFit)
In [ ]:
dbs_options = [k for k in range(-1,100) if (dbsPred==k).sum()]
Plot the full extent of the data to show that DBSCAN was able to identify the inliers correctly
In [ ]:
fig = figure(figsize=(6,6))
ax = fig.add_subplot(111)
medGaussCenters = median(example_wanderer_median.centering_GaussianFit,axis=0)
sclGaussCenters = scale.mad(example_wanderer_median.centering_GaussianFit)
sclGaussCenterAvg = np.sqrt(((sclGaussCenters**2.).sum()))
yctrs = example_wanderer_median.centering_GaussianFit.T[y]
xctrs = example_wanderer_median.centering_GaussianFit.T[x]
nSigmas = 5
for nSig in linspace(1,10,10):
CircularAperture(medGaussCenters[::-1],nSig*sclGaussCenterAvg).plot(ax=ax)
for dbsOpt in dbs_options:
ax.plot(xctrs[dbsPred==dbsOpt], yctrs[dbsPred==dbsOpt],'.',zorder=0, ms=1)
Make sure that there are only a handful (<< 1%) of outliers
In [ ]:
npix = 3
stillOutliers = np.where(abs(example_wanderer_median.centering_GaussianFit - medGaussCenters) > 4*sclGaussCenterAvg)[0]
print(len(stillOutliers))
Select the "class" dbsClean == 0 for the inliers
In [ ]:
dbsClean = 0
dbsKeep = (dbsPred == dbsClean)
In [ ]:
nCores = example_wanderer_median.nCores
start = time()
example_wanderer_median.mp_measure_background_annular_mask()
print('AnnularBG took {} seconds with {} cores'.format(time() - start, nCores))
Plot the background to make sure that the (to be subtracted) flux is stable overtime
In [ ]:
fig = figure(figsize=(20,10))
ax = fig.add_subplot(111)
ax.plot(example_wanderer_median.timeCube, example_wanderer_median.background_CircleMask,'.',alpha=0.2)
ax.plot(example_wanderer_median.timeCube, example_wanderer_median.background_Annulus,'.',alpha=0.2)
ax.plot(example_wanderer_median.timeCube, example_wanderer_median.background_MedianMask,'.',alpha=0.2)
ax.plot(example_wanderer_median.timeCube, example_wanderer_median.background_KDEUniv,'.',alpha=0.2)
ax.axvline(example_wanderer_median.timeCube.min()-.01+0.02)
ax.set_ylim(-25,100)
Compute the effective widths of each image to use later as the "beta pixels" and "optimal apertures"
In [ ]:
example_wanderer_median.measure_effective_width()
print(example_wanderer_median.effective_widths.mean(), sqrt(example_wanderer_median.effective_widths).mean())
In [ ]:
print('Pipeline took {} seconds thus far'.format(time() - startFull))
Compute the time series with static aperture radii only
In [ ]:
print('Iterating over Background Techniques, Centering Techniques, Aperture Radii' + '\n')
centering_choices = ['Gaussian_Fit']#, 'Gaussian_Mom', 'FluxWeighted']#, 'LeastAsymmetry']
background_choices = ['AnnularMask']#example_wanderer_median.background_df.columns
staticRads = np.arange(1, 6,0.5)#[1.0 ]# aperRads = np.arange(1, 6,0.5)
for staticRad in tqdm_notebook(staticRads, total=len(staticRads), desc='Static'):
example_wanderer_median.mp_compute_flux_over_time_varRad(staticRad, varRad=None, centering_choices[0], background_choices[0], useTheForce=True)
Create Beta Variable Radius
In [ ]:
example_wanderer_median.mp_compute_flux_over_time_betaRad()
In [ ]:
print('Entire Pipeline took {} seconds'.format(time() - startFull))
Use Advanced clustering algorithms DBSCAN to compute the outliers of the flux distribution. This is sensitive the structure in the data (i.e. transit vs outlier), which is not always true with sigma-clipping.
In [ ]:
example_wanderer_median.mp_DBScan_Flux_All()
Check that the majority of data is an inlier
In [ ]:
inlier_master = array(list(example_wanderer_median.inliers_Phots.values())).mean(axis=0) == 1.0
In [ ]:
((~inlier_master).sum() / inlier_master.size)*100
Compute the PLD components -- normalized and store the PLD vectors
In [ ]:
example_wanderer_median.extract_PLD_components()
Use Advanced clustering algorithms DBSCAN to compute the outliers of the PLD distributions.
In [ ]:
example_wanderer_median.mp_DBScan_PLD_All()
In [ ]:
print('Saving `example_wanderer_median` to a set of pickles for various Image Cubes and the Storage Dictionary')
savefiledir = environ['HOME']+'/Research/Planets/'+planetName+'/ExtracedData/' + channel
saveFileNameHeader = planetName+'_'+ AORNow +'_Median'
saveFileType = '.joblib.save'
if not path.exists(environ['HOME']+'/Research/Planets/'+planetName+'/ExtracedData/'):
mkdir(environ['HOME']+'/Research/Planets/'+planetName+'/ExtracedData/')
if not path.exists(savefiledir):
print('Creating ' + savefiledir)
mkdir(savefiledir)
print()
print('Saving to ' + savefiledir + saveFileNameHeader + saveFileType)
print()
example_wanderer_median.save_data_to_save_files(savefiledir=savefiledir, \
saveFileNameHeader=saveFileNameHeader, \
saveFileType=saveFileType)
Compute the RMS in the raw data as a function of the apeture radius
In [ ]:
color_cycle = rcParams['axes.prop_cycle'].by_key()['color']
ax = figure().add_subplot(111)
for key in example_wanderer_median.flux_TSO_df.keys():
aperRad = float(key.split('_')[-2])
ax.scatter(aperRad, scale.mad(np.diff(example_wanderer_median.flux_TSO_df[key])), color=color_cycle[0])
ax.set_xlabel('Aperture Radius')
ax.set_ylabel('MAD( Diff ( Flux ) )')
In [ ]:
print('Entire Pipeline took {} seconds'.format(time() - startFull))
Keys for skywalker input
In [ ]:
flux_key = 'phots'
time_key = 'times'
flux_err_key = 'noise'
eff_width_key = 'npix'
pld_coeff_key = 'pld'
ycenter_key = 'ycenters'
xcenter_key = 'xcenters'
ywidth_key = 'ywidths'
xwidth_key = 'xwidths'
Things that DON'T change with respect to aperture radii
In [ ]:
timeCube = example_wanderer_median.timeCube
phots_array = example_wanderer_median.flux_TSO_df.values
PLDFeatures = example_wanderer_median.PLD_components.T
try:
inliers_Phots = example_wanderer_median.inliers_Phots.values()
except:
inliers_Phots = np.ones(photsLocal.shape)
try:
inliers_PLD = example_wanderer_median.inliers_PLD.values()
except:
inliers_PLD = np.ones(PLDFeatureLocal.shape)
inliersMaster = array(list(inliers_Phots)).all(axis=0) # Need to Switch `axis=0` for Qatar-2
inliersMaster = inliersMaster * inliers_PLD.all(axis=1)
nSig = 6 # vary this as desired for 3D sigma clipping double check
ypos, xpos = clipOutlier2D(transpose([example_wanderer_median.centering_GaussianFit.T[y][inliersMaster], \
example_wanderer_median.centering_GaussianFit.T[x][inliersMaster]])).T
npix = sqrt(example_wanderer_median.effective_widths[inliersMaster])
time_c = timeCube[inliersMaster]
ywidths_c, xwidths_c = example_wanderer_median.widths_GaussianFit[inliersMaster].T
pld_comp_c = example_wanderer_median.PLD_components.T # this is new to Carlos's notebook instance
pld_output_c = np.array(list([time_c]) + list(pld_comp_c))
Things that DO change with respect to aperture radii
In [ ]:
for phot_select, key_flux_now in tqdm(enumerate(example_wanderer_median.flux_TSO_df.keys())):
if key_flux_now[-3:] == 0.0: # only do static radii
flux_c = example_wanderer_median.flux_TSO_df[key_flux_now].values[inliersMaster]
noise_c = example_wanderer_median.noise_TSO_df[key_flux_now].values[inliersMaster]
output_dict = {time_key: time_c,
flux_key: flux_c,
flux_err_key: noise_c,
eff_width_key: npix_c,
xcenter_key: xpos_c,
ycenter_key: ypos_c,
xwidth_key: xwidth_c,
ywidth_key: ywidth_c,
pld_coeff_key: pld_comp_c}
# This creates 1 joblib output file for one static aperture radius -- need to be cycled from above: change `staticRad = '2.5'` to new radius
joblib.dump(output_dict, '{}_full_output_for_skywalker_pipeline_{}_{}_{}.joblib.save'.format(planet_dir_name, channel, staticRad, varRad))
In [ ]:
timeCube = example_wanderer_median.timeCube
phots_array = example_wanderer_median.flux_TSO_df.values
PLDFeatures = example_wanderer_median.PLD_components.T
try:
inliers_Phots = example_wanderer_median.inliers_Phots.values()
except:
inliers_Phots = np.ones(photsLocal.shape)
try:
inliers_PLD = example_wanderer_median.inliers_PLD.values()
except:
inliers_PLD = np.ones(PLDFeatureLocal.shape)
In [26]:
# Gaussian_Fit_AnnularMask_rad_2.5_0.0
staticRad = '2.5' # Need to cycle over all possible values here: [1.0, 1.5, 2.0, ..., 5.5]
varRad = '0.0'
key_flux_now = 'Gaussian_Fit_AnnularMask_rad_'+staticRad+'_'+varRad
phot_select = np.where(example_wanderer_median.flux_TSO_df.keys() == key_flux_now)[0][0]
In [27]:
inliersMaster = array(list(inliers_Phots)).all(axis=0) # Need to Switch `axis=0` for Qatar-2
inliersMaster = inliersMaster * inliers_PLD.all(axis=1)
In [29]:
nSig = 6 # vary this as desired
if inliersMaster.all():
# If inliersMaster keeps ALL values, then double check with 3D inlier flagging
print('Working on AOR {}'.format(AORNow))
cy_now, cx_now = example_wanderer_median.centering_GaussianFit.T
phots_now = phots_array[:,phot_select]
phots_clipped = clipOutlier2D(phots_now, nSig=nSig)
cy_clipped, cx_clipped= clipOutlier2D(transpose([cy_now, cx_now]),nSig=nSig).T
arr2D_clipped = transpose([phots_clipped, cy_clipped, cx_clipped])
# 3D inlier selection
inliersMaster = (phots_clipped == phots_now)*(cy_clipped==cy_now)*(cx_clipped==cx_now)
else:
print("this box is just to double check -- keeping all inlier flags from above"
In [113]:
ypos, xpos = clipOutlier2D(transpose([example_wanderer_median.centering_GaussianFit.T[y][inliersMaster], \
example_wanderer_median.centering_GaussianFit.T[x][inliersMaster]])).T
npix = sqrt(example_wanderer_median.effective_widths[inliersMaster])
In [116]:
flux_c = phots_array[:, phot_select][inliersMaster]
# noise_c = np.sqrt(flux_c) # Photon limit
noise_c = example_wanderer_median.noise_TSO_df[key_flux_now].values[inliersMaster]
time_c = timeCube[inliersMaster]
ywidths_c, xwidths_c = example_wanderer_median.widths_GaussianFit[inliersMaster].T
In [118]:
# I am guessing that this will work.
# I'm keeping the commented line because that's what I used before
# pld_comp_c = wanderer.extract_PLD_components(example_wanderer_median.imageCube, order=1)
pld_comp_c = example_wanderer_median.PLD_components.T # this is new to Carlos's notebook instance
pld_output_c = np.array(list([time_c]) + list(pld_comp_c))
In [120]:
flux_key = 'phots'
time_key = 'times'
flux_err_key = 'noise'
eff_width_key = 'npix'
pld_coeff_key = 'pld'
ycenter_key = 'ycenters'
xcenter_key = 'xcenters'
ywidth_key = 'ywidths'
xwidth_key = 'xwidths'
output_dict = {time_key: time_c,
flux_key: flux_c,
flux_err_key: noise_c,
eff_width_key: npix_c,
xcenter_key: xpos_c,
ycenter_key: ypos_c,
xwidth_key: xwidth_c,
ywidth_key: ywidth_c,
pld_coeff_key: pld_comp_c}
# This creates 1 joblib output file for one static aperture radius -- need to be cycled from above: change `staticRad = '2.5'` to new radius
joblib.dump(output_dict, '{}_full_output_for_skywalker_pipeline_{}_{}_{}.joblib.save'.format(planet_dir_name, channel, staticRad, varRad))
Out[120]: