grizli
toretrieve and pre-process raw CLEAR G102/F105W and 3D-HST G141/F140W observations for a single CLEAR pointing (GS1).
These series of notebooks draw heavily from Gabe Brammer's existing grizli
notebooks, which are available at https://github.com/gbrammer/grizli/tree/master/examples, but with examples specific for the CLEAR survey.
In [ ]:
import grizli
try:
from mastquery import query, overlaps
use_mquery = True
except:
from hsaquery import query, overlaps
use_mquery = False
import os
import numpy as np
from IPython.display import Image
from grizli.pipeline import auto_script
import glob
from glob import glob
import astropy
from grizli.prep import process_direct_grism_visit
from astropy.io import fits
grizli
outputs will be stored. [PATH_TO_CATS] is where the catalogs are stored and must include the following: ### reference mosaic image (e.g., goodss-F105W-astrodrizzle-v4.3_drz_sci.fits)
### segmentation map (e.g., Goods_S_plus_seg.fits)
### source catalog (e.g., goodss-F105W-astrodrizzle-v4.3_drz_sub_plus.cat)
### radec_catalog (e.g., goodsS_radec.cat)
### 3DHST Eazy Catalogs (e.g., goodss_3dhst.v4.1.cats/*)
the [PATH_TO_CATS] files are available on the team archive: https://archive.stsci.edu/pub/clear_team/INCOMING/for_hackday/
In [ ]:
field = 'GS1'
ref_filter = 'F105W'
HOME_PATH = '/Users/rsimons/Desktop/clear/for_hackday/%s'%field
PATH_TO_CATS = '/Users/rsimons/Desktop/clear/Catalogs'
# Create [HOME_PATH] and [HOME_PATH]/query_results directories if they do not already exist
if not os.path.isdir(HOME_PATH): os.system('mkdir %s'%HOME_PATH)
if not os.path.isdir(HOME_PATH + '/query_results'): os.system('mkdir %s/query_results'%HOME_PATH)
# Move to the [HOME_PATH] directory
os.chdir(HOME_PATH)
In [ ]:
# proposal_id = [14227] is CLEAR
parent = query.run_query(box = None, proposal_id = [14227], instruments=['WFC3/IR', 'ACS/WFC'],
filters = ['G102'], target_name = 'GS1')
In [ ]:
# Find all G102 and G141 observations overlapping the parent query in the archive
tabs = overlaps.find_overlaps(parent, buffer_arcmin=0.01,
filters=['G102', 'G141'],
instruments=['WFC3/IR','WFC3/UVIS','ACS/WFC'], close=False)
footprint_fits_file = glob('*footprint.fits')[0]
jtargname = footprint_fits_file.strip('_footprint.fits')
# A list of the target names
fp_fits = fits.open(footprint_fits_file)
overlapping_target_names = set(fp_fits[1].data['target'])
# Move the footprint figure files to $HOME_PATH/query_results/ so that they are not overwritten
os.system('cp %s/%s_footprint.fits %s/query_results/%s_footprint_%s.fits'%(HOME_PATH, jtargname, HOME_PATH, jtargname, 'all_G102_G141'))
os.system('cp %s/%s_footprint.npy %s/query_results/%s_footprint_%s.npy'%(HOME_PATH, jtargname, HOME_PATH, jtargname, 'all_G102_G141'))
os.system('cp %s/%s_footprint.pdf %s/query_results/%s_footprint_%s.pdf'%(HOME_PATH, jtargname, HOME_PATH, jtargname, 'all_G102_G141'))
os.system('cp %s/%s_info.dat %s/query_results/%s_info_%s.dat'%(HOME_PATH, jtargname, HOME_PATH, jtargname, 'all_G102_G141'))
In [ ]:
# Table summary of query
tabs[0]
**For GS1, the retrieval step takes about 30 minutes to run and requires 1.9 GB of space.
In [ ]:
# Loop targ_name by targ_name
for t, targ_name in enumerate(overlapping_target_names):
if use_mquery:
extra = {'target_name':targ_name}
else:
extra = query.DEFAULT_EXTRA.copy()
extra += ["TARGET.TARGET_NAME LIKE '%s'"%targ_name]
# search the MAST archive again, this time looking for
# all grism and imaging observations with the given target name
tabs = overlaps.find_overlaps(parent, buffer_arcmin=0.01,
filters=['G102', 'G141', 'F098M', 'F105W', 'F125W', 'F140W'],
instruments=['WFC3/IR','WFC3/UVIS','ACS/WFC'],
extra=extra, close=False)
if False:
# retrieve raw data from MAST
s3_status = os.system('aws s3 ls s3://stpubdata --request-payer requester')
auto_script.fetch_files(field_root=jtargname, HOME_PATH=HOME_PATH, remove_bad=True,
reprocess_parallel=True, s3_sync=(s3_status == 0))
# Move the figure files to $HOME_PATH/query_results/ so that they are not overwritten
os.system('mv %s/%s_footprint.fits %s/query_results/%s_footprint_%s.fits'%(HOME_PATH, jtargname, HOME_PATH, jtargname, targ_name))
os.system('mv %s/%s_footprint.npy %s/query_results/%s_footprint_%s.npy'%(HOME_PATH, jtargname, HOME_PATH, jtargname, targ_name))
os.system('mv %s/%s_footprint.pdf %s/query_results/%s_footprint_%s.pdf'%(HOME_PATH, jtargname, HOME_PATH, jtargname, targ_name))
os.system('mv %s/%s_info.dat %s/query_results/%s_info_%s.dat'%(HOME_PATH, jtargname, HOME_PATH, jtargname, targ_name))
os.chdir(HOME_PATH)
[HOME_PATH]/j0333m2742
[HOME_PATH]/j0333m2742/RAW
[HOME_PATH]/j0333m2742/Prep
[HOME_PATH]/j0333m2742/Extractions
[HOME_PATH]/j0333m2742/Persistance
RAW/ is where the downloaded raw and pre-processed data are stored.
Prep/ is the general working directory for processing and analyses.
In [ ]:
PATH_TO_RAW = glob(HOME_PATH + '/*/RAW')[0]
PATH_TO_PREP = glob(HOME_PATH + '/*/PREP')[0]
# Move to the Prep directory
os.chdir(PATH_TO_PREP)
Extract exposure information from downloaded flt files
In [ ]:
# Find all pre-processed flt files in the RAW directory
files = glob('%s/*flt.fits'%PATH_TO_RAW)
# Generate a table from the headers of the flt fits files
info = grizli.utils.get_flt_info(files)
The info
table includes relevant exposure details: e.g., filter, instrument, targetname, PA, RA, DEC.
Print the first three rows of the table.
In [ ]:
info[0:3]
Next, we use grizli
to parse the headers of the downloaded flt files in RAW/ and sort them into "visits". Each visit represents a specific pointing + orient + filter and contains the list of its associated exposure files.
In [ ]:
# Parse the table and group exposures into associated "visits"
visits, filters = grizli.utils.parse_flt_files(info=info, uniquename=True)
# an F140W imaging visit
print ('\n\n visits[0]\n\t product: ', visits[0]['product'], '\n\t files: ', visits[0]['files'])
# a g141 grism visit
print ('\n\n visits[1]\n\t product: ', visits[1]['product'], '\n\t files: ', visits[1]['files'])
We are now ready to pre-process the raw data we downloaded from MAST.
Aligned, background-subtracted FLTS
Drizzled mosaics of direct and grism images
In [ ]:
if 'N' in field.upper(): radec_catalog = PATH_TO_CATS + '/goodsN_radec.cat'
if 'S' in field.upper(): radec_catalog = PATH_TO_CATS + '/goodsS_radec.cat'
product_names = np.array([visit['product'] for visit in visits])
filter_names = np.array([visit['product'].split('-')[-1] for visit in visits])
basenames = np.array([visit['product'].split('.')[0]+'.0' for visit in visits])
# First process the G102/F105W visits, then G141/F140W
for ref_grism, ref_filter in [('G102', 'F105W'), ('G141', 'F140W')]:
print ('Processing %s + %s visits'%(ref_grism, ref_filter))
for v, visit in enumerate(visits):
product = product_names[v]
basename = basenames[v]
filt1 = filter_names[v]
field_in_contest = basename.split('-')[0]
if (ref_filter.lower() == filt1.lower()):
#Found a direct image, now search for grism counterpart
grism_index= np.where((basenames == basename) & (filter_names == ref_grism.lower()))[0][0]
if True:
# run the pre-process script
status = process_direct_grism_visit(direct = visit,
grism = visits[grism_index],
radec = radec_catalog,
align_mag_limits = [14, 23])
In [ ]:
os.chdir(PATH_TO_PREP)
!cat gs1-cxt-09-227.0-f105w_wcs.log
Image(filename = PATH_TO_PREP + '/gs1-cxt-09-227.0-f105w_wcs.png', width = 600, height = 600)
In [ ]:
os.chdir(PATH_TO_PREP)
Image(filename = PATH_TO_PREP + '/gs1-cxt-09-227.0-g102_column.png', width = 600, height = 600)
In [ ]: