In [ ]:
import os
from glob import glob
import pandas as pd
import numpy as np
from astropy.table import Table
from astropy.io import fits
from IPython.display import Image
import matplotlib as mpl
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
%matplotlib inline
In [ ]:
cleardir = os.environ['HOME']+'/Data/CLEAR' # sets path to be $HOME/Data/CLEAR
# sets path to be $HOME/Data/CLEAR
# cleardir should include (available from the website):
# RELEASE_v1.0.0/
# RELEASE_v1.0.0/CATALOGS/
# available from: https://archive.stsci.edu/pub/clear_team/
threeddir = os.environ['HOME']+'/Data/3DHST/photometry'
# should include
# goodsn_3dhst.v4.1.cats/
#. Catalog/
#. Eazy/
# Fast/
#. RF_colors/
# goodss_3dhst.v4.1.cats/
#. Catalog/
#. Eazy/
# Fast/
#. RF_colors/
# available from: https://3dhst.research.yale.edu/Data.php
catdir = os.path.join(cleardir,'CATALOGS')
# sets path for $HOME/Data/CLEAR/CATALOGS
# should include all files from CATALOGS/ directory at:
# https://archive.stsci.edu/pub/clear_team/CATALOGS/
#
linedir = os.path.join(cleardir,'RELEASE_v1.0.0/CATALOGS')
# should include all files from RELEASE_v1.0.0/CATALOGS/ directory at:
#. https://archive.stsci.edu/pub/clear_team/RELEASE_v1.0.0/CATALOGS/
combineddir = os.path.join(cleardir,'RELEASE_v1.0.0/COMBINED')
# shoudl include all files from https://archive.stsci.edu/pub/clear_team/RELEASE_v1.0.0/
# download the tar.gz files and untar them - I sent a .sh script that has suggested ways to download everything.
If you need to do this, for look for AAA_wget.sh as an example of how you can auto download everything from the CLEAR site. It looks like this:
$ more AAA_wget.sh
# this was a useful wget command to get all files
# Run these from a ~/Data/CLEAR/ directory (that's what Casey did): (you might need to "mkdir INCOMING" and the other directories first)
cd INCOMING && wget -r -nH --cut-dirs=4 --user='papovich@tamu.edu' --ask-password ftp://archive.stsci.edu//pub/clear_team/INCOMING/GoodsN_plus && cd ..
cd INCOMING && wget -r -nH --cut-dirs=4 --user='papovich@tamu.edu' --ask-password ftp://archive.stsci.edu//pub/clear_team/INCOMING/GoodsS_plus && cd ..
# FLT -- this will get everything... all 3.3 Gb worth or something
cd FLTS && wget -r -nH --cut-dirs=4 --user='papovich@tamu.edu' --ask-password ftp://archive.stsci.edu//pub/clear_team/FLTS/*.fits && cd ..
# (Might need to "mkdir RELEASE_v1.0.0" first... )
cd RELEASE_v1.0.0 && wget -r -nH --cut-dirs=4 --user='papovich@tamu.edu' --ask-password ftp://archive.stsci.edu//pub/clear_team/RELEASE_v1.0.0/* && cd ..
In [ ]:
### Check that all the directories we just set up do exist:
for d in [cleardir, catdir, linedir, threeddir,combineddir] :
if os.path.exists(d) : print ("EXISTS: %s" % d)
else : print("DOES NOT EXIST: %s" % d)
We check if all GOODS-North catalogs are in place:
In [ ]:
gndfile = os.path.join(catdir,'goodsn_3dhst.v4.3.cat')
gndzoutfile = os.path.join(catdir,'goodsn_v4.3.zout')
gndlinefile = os.path.join(linedir,'GN_CLEAR.linefit.concat.v1.0.0.fits')
gndzfitfile = os.path.join(linedir,'GN_CLEAR.zfit.concat.v1.0.0.fits')
gndfoutfile = os.path.join(catdir,'goodsn_v4.3.fout')
#gndfoutfile = os.path.join(threeddir,'goodsn_3dhst.v4.1.cats','Fast','goodsn_3dhst.v4.1.fout')
# all need to exist:
for file in [gndfile, gndzoutfile, gndlinefile, gndzfitfile,gndfoutfile] :
if os.path.exists(file) : print ("EXISTS: %s" % file)
else: print("DOES NOT EXIST: %s" % d)
We check if all GOODS-South catalogs are in place:
In [ ]:
gsdfile = os.path.join(catdir,'goodss_3dhst.v4.3.cat')
gsdzoutfile = os.path.join(catdir,'goodss_v4.3.zout')
gsdlinefile = os.path.join(linedir,'GS_CLEAR.linefit.concat.v1.0.0.fits')
gsdzfitfile = os.path.join(linedir,'GS_CLEAR.zfit.concat.v1.0.0.fits')
gsdfoutfile = os.path.join(catdir,'goodss_v4.3.fout')
#gsdfoutfile = os.path.join(threeddir,'goodss_3dhst.v4.1.cats/','Fast','goodss_3dhst.v4.1.fout')
for file in [gndfile, gndzoutfile, gndlinefile, gndzfitfile,gndfoutfile] :
if os.path.exists(file) : print ("EXISTS: %s" % file)
else: print("DOES NOT EXIST: %s" % d)
In [ ]:
# The COMBINED Directory is expected to look like this:
#for d in os.listdir(combineddir) :
for dls in os.listdir(combineddir) :
if dls != ".DS_Store" :
for d in os.listdir(os.path.join(combineddir,dls)) :
if d != ".DS_Store" :
print(os.path.join(dls,d))
In [ ]:
# define a routine to read in all the catalogs:
def loadclear( catfile, zoutfile, foutfile, zfitfile, linefile, doprint=False) :
cat = Table.read(catfile, format='ascii').to_pandas()
zcat = Table.read(zoutfile, format='ascii').to_pandas()
fcat = Table.read(foutfile, format='ascii').to_pandas()
zfitcat = Table.read(zfitfile).to_pandas()
linecat = Table.read(linefile).to_pandas()
return(cat, zcat, fcat, zfitcat, linecat)
Open all the catalogs. This may take a few seconds, we are opening all 10 files at once.
In [ ]:
gnd, gndz, gndf, gndzfit, gndline = loadclear(gndfile, gndzoutfile, gndfoutfile,
gndzfitfile, gndlinefile)
gsd, gsdz, gsdf, gsdzfit, gsdline = loadclear(gsdfile, gsdzoutfile, gsdfoutfile,
gsdzfitfile, gsdlinefile)
In the above, we now have
gnd: CLEAR photometric catalog for GOODS-N Deep. This is ID matched to 3DHST, but includes the HST WFC3 Y-band photometry.
gndz: CLEAR EAZY file (z-phot) based on the broad-band photometry.
gndf: CLEAR FOUT file (from EAZY) - has mass information, but something is odd with it...
gndzfit: CLEAR G102 grism redshift fits from Iva, NOT LINEMATCHED
gndlinefit: CLEAR G102 grism emission line identification using the zfit's, NOT LINEMATCHED
Similar files for gsd for GOODS-S Deep.
Below you can display some of these:
In [ ]:
# limit display to 10 rows:
pd.options.display.max_rows=10
# select only objects with "use==1" and display them to see some of their properties:
# scroll down to see all tables and to the right to see all columns
# Note: they all have the same number of rows!
ok = (gnd['use']==1)
display(gnd[ok],gndz[ok],gndf[ok])
In [ ]:
# show the concatenated z_grism and emission line tables:
# Note: these are much shorter
display(gndzfit,gndline)
In [ ]:
#tok = (np.where((gndline['Hb_EQW'] > 10) & (gndline['OIII_EQW'] > 40)))[0]
tok = (gndline['Hb_EQW'] > 10) & (gndline['OIII_EQW'] > 40) & (gndline['Hb_EQW']/gndline['Hb_EQW_ERR'] > 3) & (gndline['OIII_EQW']/gndline['OIII_EQW_ERR'] > 3)
print("There are %d objects which fit these criteria" % (np.sum(tok)))
pd.options.display.max_rows=9999
display(gndline.loc[tok])
pd.options.display.max_rows=10
From above, take the fourth entry above (line 39 in the full catalog), PHOT_ID=33115 in GND, print it's information and display the files associated with it:
If you want to see a bad emission line, change these to the object is line 31 (PHOT_ID = 32632).
In [ ]:
print(gndzfit['phot_id'][39])
In [ ]:
SearchID = 33115
ok = (np.where(gndzfit['phot_id'] == SearchID))
pd.options.display.max_rows=9999
display(gndzfit.loc[ok], gndline.loc[ok])
Let's find what files are available for this object in the directories.
In [ ]:
# List all the subdirectories available under the 'combineddir'
os.listdir(combineddir)
There are a total of 14 files for the combined spectrum of this object and out of the :
In [ ]:
### I will search all GN* directories for files associated with this object:
files_all = glob('%s/*/*/*%s*' % (combineddir, SearchID))
print('There are a total of %d files for the combined spectrum of this object.' % (len(files_all)))
### Uncommend the lines below if you want to list the files
#for file in files:
# print(file)
Looks like this object is only found in the GN3 field.
There are several PNG files which are meant to be for diagnostic purposes.
In [ ]:
files_png = glob('%s/*/*/*%s*.png' % (combineddir, SearchID))
print('Found %d PNG files.' % len(files_png))
for file in files_png:
print(file)
The first file in this list, the GN3-G102_32632_stack.png file, shows all the individual spectra that went into the stack. Let's look at it.
In [ ]:
Image(files_png[0])
The plot shows a row for each spectrum in the final stack with the bottom-most spectrum showing the final co-add. The left column shows the observed spectra, the middle one is the calculated contamination, the right one if flux minus contamination. The top two spectra (from pointings GDN18 and GDN22) are from the Barro GO program. The lines clearly show in all PAs which indicates that they are real lines.
In [ ]:
### Change 4 to 1,2,3,5 to see the other diagnostic plots
Image(files_png[4])
The plot above shows the (1) grism spectrum counts/s vs. lambda (2) same converted to flux vs lambda (3) p(z) vs z and (4) best fit SED, photometric points, spectrum.
The lines Hb and OIII are clearly visible. The p(z) matches the ground-based spec_z really well.
Now let's look at some of the FITS files and see where the data for these plots came from.
In [ ]:
files_fits = glob('%s/*/*/*%s*.fits' % (combineddir, SearchID))
print('Found %d FITS files.' % len(files_fits))
for file in files_fits:
file = file.split('/')[-1]
print(file)
In [ ]:
### This is the content of the 2D file, this is the file which contains the stack:
print(files_fits[1])
twod_spec = fits.open(files_fits[1])
twod_spec.info()
twod_spec[0].header
The 0th extension is header whith some information about the spectrum. View is with foo[0].header
. All the spectra which went into the stack are in the TWOD
keywords in this header.
Extensions 1,2,3 and 4 are postage stams from the science direct image, interlaced image, weight map and segmentation map respectively (hint: they are sqare so clearly not spectra, see the dimentions in the 4th column).
Extensions 5,6,7 and 8 are spectral 2D arrays containing the spectrum (INCLUDING CONTAMINATION), the error array, the model (of the spectrum itself) and the contramination model (of everything else) respectively.
Extension 9, 10 and 11 are 1D arrays containing the wavelength solution, the sensitivity and the trace (in y) at each pixel.
In [ ]:
plt.figure(figsize=(20,10))
plt.subplot(1, 4, 1)
plt.imshow(twod_spec[1].data)
plt.subplot(1, 4, 2)
plt.imshow(twod_spec[2].data)
plt.subplot(1, 4, 3)
plt.imshow(twod_spec[3].data)
plt.subplot(1, 4, 4)
plt.imshow(twod_spec[4].data)
In [ ]:
plt.figure(figsize=(20,10))
plt.subplot(5, 1, 1)
plt.imshow(twod_spec[5].data, vmin=0.0, vmax=0.01)
plt.subplot(5, 1, 2)
plt.imshow(twod_spec[5].data-twod_spec[8].data, vmin=0.0, vmax=0.01)
plt.subplot(5, 1, 3)
plt.imshow(twod_spec[6].data, vmin=0.0, vmax=0.01)
plt.subplot(5, 1, 4)
plt.imshow(twod_spec[7].data, vmin=0.0, vmax=0.01)
plt.subplot(5, 1, 5)
plt.imshow(twod_spec[8].data, vmin=0.0, vmax=0.01)
Here we make a pandas data frame, matching by ID numbers only those objects in the CLEAR ZFIT and LINE catalogs:
We will use the pandas (pd) merge routine (pd.merge()). (You can also use pd.concat, but that matches line-by-line.). There are other routines to match by RA and Dec.
When the routine below is done, we have one dataframe with all the information from the other dataframes in it, but only for objects that appear in the CLEAR zfit and linefit catalogs.
In [ ]:
# could use pd.concat, but this matches line-by-line
# gsddf = pd.concat([gsd,gsdz],axis=1)
#gsddf = pd.concat([gsddf,gsdf],axis=1)
# default is join='outer', which pads cells with NaNs (that's what you want)
# join='inner' only keeps rows that are in both catalogs.
# the axis=1 means to match by ID number
#
# The only caveat is if columns in the different dataframes have the same column name - not sure what happens then.
#display(gnddf)
# INSTEAD: here we use PANDAS MERGE routine to match based on keys (on="key") that are the name of the columns.
def domerge(gnd, gndz, gndf, gndzfit, gndlinefit) :
# add 'id' to zfit and linefit so we can key off those.
# could also use .merge(left, right, left_on='phot_id', right_on='id')....
gndzfit['id'] = gndzfit['phot_id']
gndlinefit['id'] = gndlinefit['phot_id']
#
gnddf = gndzfit
print("Initial number of objects with grism zfits: %i" % len(gnddf))
for f in [gnd, gndz, gndf, gndlinefit] :
gnddf = pd.merge(gnddf,f,on='id',how='inner')
print("Final number (includes objects with different line identifications: %i" % len(gnddf))
print()
return(gnddf)
print("The reason there are more objects in the final than initial is that about ~100 objects fall in regions of overlap between CLEAR fields\n ")
gnddf = domerge(gnd,gndz,gndf, gndzfit, gndline)
gsddf = domerge(gsd,gsdz,gsdf, gsdzfit, gsdline)
In [ ]:
# Let's look at the Duplicates -
# this only finds objects that have the same "phot_id" and lists them:
ok = gnddf['id'].duplicated()
#pd.options.display.max_columns = 10
display(gnddf[ok])
In [ ]:
# Let's look at object 34077
ok=np.where((gnddf['phot_id_x']==34077))[0]
display(gnddf.iloc[ok])
#ok
#display(gndline[ok])
In [ ]:
sname = [gnddf['grism_id_x'][ok[i]].decode('ASCII') for i in range(len(ok))]
sname
In [ ]:
fig, axes = plt.subplots(2,1, figsize=(50,25))
#for s, i, j in zip(sname,[0,1,0,1], [0,0,1,1]) :
for s, i in zip(sname[1:3], np.arange(0,2)) :
img=mpimg.imread('%s/2D/PNG/%s_stack.png' % (combineddir,s))
#img=mpimg.imread('%s/2D/PNG/%s_stack.png' % (combineddir, s))
axes[i].imshow(img)
In [ ]:
pd.options.display.max_rows=2
pd.options.display.max_columns=9999
display(gnddf)
pd.options.display.max_rows=10
In [ ]:
# select "usable" objects:
okn = ((gnddf.use == 1) & (gnddf.z_peak_grism > 0))
oks = ((gsddf.use == 1) & (gsddf.z_peak_grism > 0))
print(len(okn),len(oks))
In [ ]:
# Plot z(phot) against z(grism)
#mpl.rc('xtick', labelsize=20)
#mpl.rc('ytick', labelsize=20)
plt.rcParams.update({'font.size': 22})
fig, axes = plt.subplots(2,1, figsize=(15,15))
axes[0].scatter( gnddf['z_peak_phot'][okn], gnddf['z_peak_grism'][okn])
axes[1].scatter( gsddf['z_peak_phot'][oks], gsddf['z_peak_grism'][oks])
for ax, label in zip(axes,['GND','GSD']) :
ax.set_xlabel('z(phot)')
ax.set_ylabel('z(grism)')
ax.set_xlim([0,5])
ax.set_ylim([0,5])
ax.plot([0,5],[0,5],'-r')
ax.text(0.5,4.5,label)
In [ ]:
gndfastfile = os.path.join(threeddir,
'goodsn_3dhst.v4.1.cats','Fast',
'goodsn_3dhst.v4.1.fout')
gsdfastfile = os.path.join(threeddir,
'goodss_3dhst.v4.1.cats','Fast',
'goodss_3dhst.v4.1.fout')
def readfast(fastfile,doprint=True) :
#fcolnames = getcol(fastfile, doprint=doprint)
fcat = Table.read(fastfile, format='ascii').to_pandas()
#fcat = pd.read_table(fastfile,comment='#',
# delim_whitespace=True, names=fcolnames)
return(fcat)
gndfast = readfast(gndfastfile)
gsdfast = readfast(gsdfastfile)
In [ ]:
# match the CLEAR grism line catalogs with the 3DHST FAST catalogs based on ID number
gndEW = pd.merge(gndline,gndfast,on='id',how='inner')
gsdEW = pd.merge(gsdline,gndfast, on='id', how='inner')
pd.options.display.max_rows=2
display(gndEW)
pd.options.display.max_rows=10
In [ ]:
zmin = (8500./6563. - 1)
zmax = (12500./6563. - 1)
okn = ((gndEW.Ha_EQW > 1) & (gndEW.z_max_grism > zmin) &
(gndEW.z_max_grism < zmax))
oks = ((gsdEW.Ha_EQW > 1) & (gsdEW.z_max_grism > zmin) &
(gsdEW.z_max_grism < zmax))
print("# of GND galaxies with Ha and %4.2f < z < %4.2f = %i" %
(zmin, zmax, len((np.where(okn==True))[0])))
print("# of GSD galaxies with Ha and %4.2f < z < %4.2f = %i" %
(zmin, zmax, len((np.where(oks==True))[0])))
In [ ]:
plt.rcParams.update({'font.size': 22})
fig, axes = plt.subplots(2,1, figsize=(15,15))
axes[0].scatter( gndEW['lmass'][okn], gndEW['Ha_EQW'][okn])
axes[1].scatter( gsdEW['lmass'][oks], gsdEW['Ha_EQW'][oks])
#axes[1].scatter( gsddf['z_peak_phot'][oks], gsddf['z_peak_grism'][oks])
for ax, label in zip(axes,['GND','GSD']) :
ax.set_xlabel('log Mass')
ax.set_ylabel(r'EW(H$\alpha$)')
ax.set_ylim([1,1000])
ax.set_yscale("log", nonposy='clip')
ax.set_xlim([8,11.5])
# #ax.plot([0,5],[0,5],'-r')
ax.text(11,500,label)
In [ ]:
In [ ]: