ToDo: Update to DR5
The goal of this notebook is to use observations of a large number of luminous red galaxies (LRGs) with DECaLS/DR3 grzW1W2 photometry and ancillary spectroscopic redshifts to generate a set of spectroscopic templates suitable for spectroscopic simulations, targeting investigations, and other DESI-related projects.
The parent sample consists of 2,511,248 LRGs selected from the official DECaLS/DR3.1 target catalog targets-dr3.1-0.14.0.fits
using the selection criterion
DESI_TARGET & desi_mask.LRG != 0
Subsequently, Rongpu Zhou (Pittsburgh) assembled spectroscopic redshifts for the sample from SDSS/Main, SDSS/BOSS, AGES, DEEP2, and VIPERS, supplemented with COSMOS medium-band photometric redshifts, yielding precise redshifts for 62,177 objects (2.4% of the parent sample). Using these data, Zhou used a random forest technique (described elsewhere) to compute photometric redshifts for the complete sample, producing the catalog dr3.1-0.14.0-lrg-rf-photoz-0.2.fits
, which we use as input into the generation of the spectral templates.
Specifically, we generate templates using the following procedure:
legacysurvey_lrg_ckc14z_kroupa01_charlot_sfhgrid01.fits
. In addition, we compute K-corrected absolute magnitudes in the DECaLS g-, r-, and z-band bandpasses and store them in the file legacysurvey_lrg_ckc14z_kroupa01_charlot_sfhgrid01_kcorr.z0.0.fits
. The code that generates these files is desi_legacysurvey_lrg_isedfit.
In [1]:
import os
import numpy as np
from astropy.table import Table, hstack
from scipy.spatial import cKDTree as KDTree
In [2]:
import matplotlib.pyplot as plt
from corner import corner
In [3]:
plt.style.use('seaborn-talk')
%matplotlib inline
In [4]:
seed = 123
rand = np.random.RandomState(seed)
In [5]:
isedfit_version = 'v1.0'
templates_version = 'v2.0'
In [6]:
isedfit_dir = os.path.join(os.getenv('DESI_ROOT'), 'spectro', 'templates', 'lrg_templates', 'isedfit', isedfit_version)
In [7]:
metadata_outfile = os.path.join(isedfit_dir, 'lrg-templates-isedfit-{}.fits'.format(isedfit_version))
In [8]:
def read_isedfit():
"""Read the iSEDfit fitting results."""
isedfile = os.path.join(isedfit_dir, 'legacysurvey_lrg_ckc14z_kroupa01_charlot_sfhgrid01.fits.gz')
kcorrfile = os.path.join(isedfit_dir, 'legacysurvey_lrg_ckc14z_kroupa01_charlot_sfhgrid01_kcorr.z0.0.fits.gz')
print('Reading {}'.format(isedfile))
ised = Table.read(isedfile)
print('Reading {}'.format(kcorrfile))
kcorr = Table.read(kcorrfile)
snrmin = 3.0
chi2min = 10
keep = np.where(
(ised['CHI2'] < chi2min) *
(np.sum(ised['MAGGIES'] * np.sqrt(ised['IVARMAGGIES']) > snrmin, axis=1) == 5)
)[0]
print('Read {} galaxies with chi2 < {} and S/N > {} in all 5 photometric bands.'.format(
len(keep), chi2min, snrmin))
cat = dict()
cat['redshift'] = kcorr['Z'][keep].data
cat['Mstar'] = ised['MSTAR_50'][keep].data
cat['Mg'] = kcorr['ABSMAG'][keep, 0].data
cat['Mr'] = kcorr['ABSMAG'][keep, 1].data
cat['Mz'] = kcorr['ABSMAG'][keep, 2].data
cat['gr'] = cat['Mg'] - cat['Mr']
cat['rz'] = cat['Mr'] - cat['Mz']
with np.errstate(invalid='ignore'):
cat['grobs'] = -2.5 * np.log10( ised['MAGGIES'][keep, 0].data / ised['MAGGIES'][keep, 1].data )
cat['rzobs'] = -2.5 * np.log10( ised['MAGGIES'][keep, 1].data / ised['MAGGIES'][keep, 2].data )
cat['zW1obs'] = -2.5 * np.log10( ised['MAGGIES'][keep, 2].data / ised['MAGGIES'][keep, 3].data )
#mm = - 2.5 * np.log10(ised['MAGGIES'][keep, 0].data)
#_ = plt.hist(mm, bins=100)
kcorr.remove_columns(['Z', 'ISEDFIT_ID', 'MAGGIES', 'IVARMAGGIES'])
out = hstack([ised[keep], kcorr[keep]])
return cat, out
In [9]:
cat, ised = read_isedfit()
In [10]:
print(cat.keys())
In [11]:
ised[:2]
Out[11]:
In [12]:
zlim, Mrlim, grlim, rzlim = (0.2, 1.1), (-21, -25), (0.2, 1.2), (0.2, 0.8)
In [13]:
def qa_rest_corner():
fig = corner(np.vstack((cat['redshift'], cat['Mr'], cat['gr'], cat['rz'])).T,
labels=('Redshift', r'$M_{0.0r}$', r'$^{0.0}(g - r)$', r'$^{0.0}(r - z)$'),
range=(zlim, Mrlim, grlim, rzlim))
In [14]:
qa_rest_corner()
In [15]:
def qa_rest():
fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(2, 2, figsize=(15, 10))
ax1.hexbin(cat['redshift'], cat['Mr'], mincnt=1, bins='log',
cmap=plt.cm.get_cmap('RdYlBu'))
ax1.set_ylim(Mrlim)
ax1.set_xlim(zlim)
ax1.set_xlabel('Redshift')
ax1.set_ylabel(r'$M_{0.0r}$')
ax2.hexbin(cat['redshift'], cat['gr'], mincnt=1, bins='log',
cmap=plt.cm.get_cmap('RdYlBu'))
ax2.set_xlim(zlim)
ax2.set_ylim(grlim)
ax2.set_xlabel('Redshift')
ax2.set_ylabel(r'$^{0.0}(g - r)$')
ax3.hexbin(cat['rz'], cat['gr'], mincnt=1, bins='log',
cmap=plt.cm.get_cmap('RdYlBu'))
ax3.set_xlabel(r'$^{0.0}(r - z)$')
ax3.set_ylabel(r'$^{0.0}(g - r)$')
ax3.set_ylim(grlim)
ax3.set_xlim(rzlim)
hb = ax4.hexbin(cat['Mr'], cat['gr'], mincnt=1, bins='log',
cmap=plt.cm.get_cmap('RdYlBu'))
ax4.set_xlabel(r'$M_{0.0r}$')
ax4.set_ylabel(r'$^{0.0}(g - r)$')
ax4.set_xlim(Mrlim)
ax4.set_ylim(grlim)
cax = fig.add_axes([0.88, 0.15, 0.02, 0.7])
fig.colorbar(hb, cax=cax, label=r'$\log_{10}$ (Number of Galaxies)')
for aa in (ax1, ax2, ax3, ax4):
aa.grid(True)
plt.subplots_adjust(wspace=0.2, right=0.85)
In [16]:
qa_rest()
In [17]:
grobslim, rzobslim = (0.5, 3), (0.8, 2.2)
In [18]:
def qa_obs():
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(14, 5))
ax1.hexbin(cat['rzobs'], cat['grobs'], mincnt=1, bins='log',
cmap=plt.cm.get_cmap('RdYlBu'))
ax1.set_xlabel(r'$(r - z)_{\rm obs}$')
ax1.set_ylabel(r'$(g - r)_{\rm obs}$')
hb = ax2.hexbin(cat['rzobs'], cat['zW1obs'], mincnt=1, bins='log',
cmap=plt.cm.get_cmap('RdYlBu'))
cb = plt.colorbar(hb)
cb.set_label(r'$\log_{10}$ (Number of Galaxies)')
ax2.set_ylabel(r'$(r - z)_{\rm obs}$')
ax2.set_xlabel(r'$(z - W1)_{\rm obs}$')
for aa in (ax1, ax2):
aa.grid(True)
In [19]:
qa_obs()
Here's the black magic. The question is: what minimum set templates can be used to describe this population of LRGs. This problem is NP hard, although there are methods for solving it (see, e.g., the SetCoverPy algorithm). Here we opt for a much simpler procedure.
Based on the preceding plots, we have opted to use the rest-frame g-r and r-z color together with the r-band absolute magnitude to identify "like" galaxies. Specifically, we simply choose a random subset of NPERBIN galaxies in each 3D bin (containing at least MINCOUNT objects) of rest-frame color-color-magnitude space. This procedure has the benefit of being relatively simple and yielding diversity of templates.
In addition, the NPERBIN and MINCOUNT parameters (as well as the size of the bins) can be tuned to yield the desired number of total templates. Here, we opt for approximately ~1100 templates.
In [20]:
def group_templates(X_in, lim_in, delta_in):
"""Use scipy's binned_statistic_dd method to assigned galaxies to
fixed input bins.
"""
from scipy.stats import binned_statistic_dd
nbins = [np.ceil(np.ptp(lim) / delta).astype('int') for lim, delta in zip(lim_in, delta_in)]
X = np.vstack( X_in ).T
stat, edges, num = binned_statistic_dd(X, _, statistic='count', bins=nbins, range=lim_in)
index = [np.where(num == ii)[0] for ii in np.unique(num)]
count = np.array([len(aa) for aa in index])
return index, count
In [21]:
if False:
X_in = (cat['redshift'], cat['gr'], cat['rz'])
lim_in = (zlim, grlim, rzlim)
delta_in = (0.05, 0.05, 0.05)
else:
X_in = (cat['Mr'], cat['gr'], cat['rz'])
lim_in = (np.flip(Mrlim, 0), grlim, rzlim)
delta_in = (0.1, 0.05, 0.05)
In [22]:
%time index, count = group_templates(X_in, lim_in, delta_in)
In [23]:
print('Identified {} color-color bins with at least one galaxy:'.format(len(index)))
for mnmx in ((1, 5), (5, 10), (10, 30), (30, 100), (100, count.max()+1)):
cnt = np.sum( (count >= mnmx[0]) * (count < mnmx[1]) ).astype('int')
print(' {:03} bins with {} - {}'.format(cnt, mnmx[0], mnmx[1]))
In [24]:
def choose_templates(mincount=3, nperbin=2):
"""Return the indices of the chosen templates and the responsibility,
or the number of galaxies in that template's bin.
"""
if nperbin > mincount:
print('NPERBIN should be less than or equal to MINCOUNT.')
iarch = list()
norm = list()
good = np.where(count >= mincount)[0]
for gg in good:
iarch.append(rand.choice(index[gg], nperbin, replace=False))
iarch = np.hstack(iarch)
resp = count[good]
print('Chose {} templates to represent {} galaxies.'.format(len(iarch), np.sum(resp)))
return iarch, resp
In [25]:
iarch, resp = choose_templates()
In [26]:
def _markers():
size = 50 * (1+(resp - resp.min()) / resp.ptp())
shade = (cat['gr'][iarch] - cat['gr'][iarch].min()) / cat['gr'][iarch].ptp()
col = plt.cm.coolwarm(shade)
return size, col
In [27]:
def qa_templates():
"""Generate a color-color plot with the symbol size scaled by the responsibility.
"""
size, col = _markers()
fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(2, 2, figsize=(15, 10))
ax1.scatter(cat['rzobs'], cat['grobs'], s=10, c='lightgray', alpha=0.5, edgecolor='k')
ax1.scatter(cat['rzobs'][iarch], cat['grobs'][iarch], c=col, marker='o', s=size,
alpha=0.9, edgecolor='k')
ax1.set_xlabel(r'$(r - z)_{\rm obs}$')
ax1.set_ylabel(r'$(g - r)_{\rm obs}$')
ax2.scatter(cat['zW1obs'], cat['rzobs'], s=10, c='lightgray', alpha=0.5, edgecolor='k')
ax2.scatter(cat['zW1obs'][iarch], cat['rzobs'][iarch], c=col, marker='o', s=size,
alpha=0.9, edgecolor='k')
ax2.set_ylabel(r'$(r - z)_{\rm obs}$')
ax2.set_xlabel(r'$(z - W1)_{\rm obs}$')
ax3.scatter(cat['rz'], cat['gr'], s=10, c='lightgray', alpha=0.5, edgecolor='k')
ax3.scatter(cat['rz'][iarch], cat['gr'][iarch], c=col, marker='o', s=size,
alpha=0.9, edgecolor='k')
ax3.set_xlabel(r'$^{0.0}(r - z)$')
ax3.set_ylabel(r'$^{0.0}(g - r)$')
ax3.set_ylim(grlim)
ax3.set_xlim(rzlim)
ax4.scatter(cat['Mr'], cat['gr'], s=10, c='lightgray', alpha=0.5, edgecolor='k')
ax4.scatter(cat['Mr'][iarch], cat['gr'][iarch], c=col, marker='o', s=size,
alpha=0.9, edgecolor='k')
ax4.set_xlabel(r'$M_{0.0r}$')
ax4.set_ylabel(r'$^{0.0}(g - r)$')
ax4.set_xlim(Mrlim)
ax4.set_ylim(grlim)
for aa in (ax1, ax2, ax3, ax4):
aa.grid(True)
fig.subplots_adjust(wspace=0.3)
In [28]:
qa_templates()
SFR100 is the star formation rate (SFR) averaged over the previous 100 Myr. Note that templates with higher SFRs are somewhat over-represented in the template set because we choose templates with equal probability, not weighted by the number of galaxies in each cell.
In [29]:
def qa_template_properties():
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 5), sharey=True)
ax1.hist(ised['MSTAR_50'], normed=True, bins=30, label='Full Sample')
ax1.hist(ised['MSTAR_50'][iarch], normed=True, bins=30, alpha=0.5,
label='Templates')
ax1.set_xlabel(r'$\log_{10}\ (M\ /\ M_{\odot})$')
ax1.set_ylabel('Relative Number')
ax2.hist(ised['SFR100_50'], normed=True, bins=30, label='Full Sample')
ax2.hist(ised['SFR100_50'][iarch], normed=True, bins=30, alpha=0.5,
label='Templates')
ax2.set_xlabel(r'$\log_{10}\ (SFR_{100}\ /\ M_{\odot}\ {\rm yr}^{-1})$')
ax2.legend(loc='upper right')
plt.subplots_adjust(wspace=0.02)
In [30]:
qa_template_properties()
In [31]:
print('Writing {}'.format(metadata_outfile))
ised[iarch].write(metadata_outfile, overwrite=True)
The spectra have to built in IDL using the script build_lrg_basis_templates, which we assume has happend.
In [32]:
from desisim.io import read_basis_templates
In [33]:
flux, wave, meta = read_basis_templates('LRG')
nt = len(meta)
print('Number of templates = {}'.format(nt))
In [34]:
meta
Out[34]:
In [35]:
def plot_subset(nplot=25, ncol=5):
"""Plot a random sampling of the basis templates."""
nspec, npix = flux.shape
nrow = np.ceil(nplot / ncol).astype('int')
these = rand.choice(nspec, nplot, replace=False)
fig, ax = plt.subplots(nrow, ncol, figsize=(2.2*ncol, 2.2*nrow), sharey=True, sharex=True)
for thisax, indx in zip(ax.flat, these):
thisax.plot(wave, flux[indx, :])
thisax.text(0.95, 0.93, '{:0d}'.format(indx), ha='right',
va='top', transform=thisax.transAxes, fontsize=11)
thisax.xaxis.set_major_locator(plt.MaxNLocator(3))
thisax.set_xscale('log')
thisax.set_yscale('log')
fig.subplots_adjust(wspace=0.05, hspace=0.05)
In [36]:
plot_subset()
In [37]:
def zgrid_colors():
"""Compute the colors of the templates on a fixed redshift grid."""
from speclite import filters
filt = filters.load_filters('decam2014-g', 'decam2014-r', 'decam2014-z', 'wise2010-W1')
zmin, zmax, dz = 0.0, 1.0, 0.1
nz = np.round( (zmax - zmin) / dz ).astype('i2')
print('Number of redshift points = {}'.format(nz))
cc = dict(
redshift = np.linspace(0.0, 2.0, nz),
gr = np.zeros( (nt, nz) ),
rz = np.zeros( (nt, nz) ),
zW1 = np.zeros( (nt, nz), )
)
for iz, red in enumerate(cc['redshift']):
zwave = wave.astype('float') * (1 + red)
phot = filt.get_ab_maggies(flux, zwave, mask_invalid=False)
cc['gr'][:, iz] = -2.5 * np.log10( phot['decam2014-g'] / phot['decam2014-r'] )
cc['rz'][:, iz] = -2.5 * np.log10( phot['decam2014-r'] / phot['decam2014-z'] )
cc['zW1'][:, iz] = -2.5 * np.log10( phot['decam2014-z'] / phot['wise2010-W1'] )
return cc
In [38]:
%time cc = zgrid_colors()
In [39]:
grrange = (0.0, 3.0)
rzrange = (0.0, 2.5)
zW1range = (-1, 4)
ntspace = 20 # spacing between model curves
In [40]:
def grz(pngfile=None):
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 4), sharey=True)
hb = ax1.hexbin(cat['rzobs'], cat['grobs'], bins='log',
mincnt=1, extent=rzrange+grrange,
cmap=plt.cm.get_cmap('RdYlBu'))
ax1.set_xlabel(r'$(r - z)_{\rm obs}$')
ax1.set_ylabel(r'$(g - r)_{\rm obs}$')
ax1.set_xlim(rzrange)
ax1.set_ylim(grrange)
ax1.text(0.05, 0.9, 'Data', ha='left', va='bottom',
transform=ax1.transAxes, fontsize=14)
cb = fig.colorbar(hb, ax=ax1)
cb.set_label(r'log$_{10}$ (Number of Galaxies)')
for tt in np.arange(0, nt, ntspace):
ax2.plot(cc['rz'][tt, :], cc['gr'][tt, :], marker='s',
markersize=5, ls='-', alpha=0.5)
for tt in np.arange(0, nt, ntspace):
ax2.scatter(cc['rz'][tt, 0], cc['gr'][tt, 0], marker='o',
facecolors='none', s=80, edgecolors='k',
linewidth=2)
ax2.text(0.1, 0.05, 'z=0', ha='left', va='bottom',
transform=ax2.transAxes, fontsize=14)
ax2.text(0.05, 0.9, 'Models (z=0-1, dz=0.1)', ha='left', va='bottom',
transform=ax2.transAxes, fontsize=14)
ax2.set_xlim(rzrange)
ax2.set_ylim(grrange)
ax2.set_xlabel(r'$(r - z)_{\rm obs}$')
ax2.set_ylabel(r'$(g - r)_{\rm obs}$')
ax2.yaxis.set_label_position('right')
ax2.yaxis.tick_right()
plt.subplots_adjust(wspace=0.15)
if pngfile:
fig.savefig(pngfile)
In [41]:
grz()
In [42]:
def rzW1(pngfile=None):
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 4))
hb = ax1.hexbin(cat['zW1obs'], cat['rzobs'], bins='log',
mincnt=1, extent=rzrange+grrange,
cmap=plt.cm.get_cmap('RdYlBu'))
ax1.set_ylabel(r'$(r - z)_{\rm obs}$')
ax1.set_xlabel(r'$(z - W1)_{\rm obs}$')
ax1.set_xlim(zW1range)
ax1.set_ylim(rzrange)
ax1.text(0.05, 0.9, 'Data', ha='left', va='bottom',
transform=ax1.transAxes, fontsize=14)
cb = fig.colorbar(hb, ax=ax1)
cb.set_label(r'log$_{10}$ (Number of Galaxies)')
for tt in np.arange(0, nt, ntspace):
ax2.plot(cc['zW1'][tt, :], cc['rz'][tt, :], marker='s',
markersize=5, ls='-', alpha=0.5)
for tt in np.arange(0, nt, ntspace):
ax2.scatter(cc['zW1'][tt, 0], cc['rz'][tt, 0], marker='o',
facecolors='none', s=80, edgecolors='k',
linewidth=2)
ax2.text(0.01, 0.05, 'z=0', ha='left', va='bottom',
transform=ax2.transAxes, fontsize=14)
ax2.text(0.05, 0.9, 'Models (z=0-1, dz=0.1)', ha='left', va='bottom',
transform=ax2.transAxes, fontsize=14)
ax2.set_xlim(zW1range)
ax2.set_ylim(rzrange)
ax2.set_xlabel(r'$(z - W1)_{\rm obs}$')
ax2.set_ylabel(r'$(r - z)_{\rm obs}$')
ax2.yaxis.set_label_position('right')
ax2.yaxis.tick_right()
plt.subplots_adjust(wspace=0.15)
if pngfile:
fig.savefig(pngfile)
In [43]:
rzW1()