This notebook is not a complete effort, although it does contain several potentially useful plots which show how QSO, ELG, BGS, and LRG target density varies with Galactic reddening and imaging depth (based on DR5).
The bottom of the notebook also contains a simple regression of target density versus reddening, which we use in desitarget.mock.targets_truth
.
In [1]:
import os
import numpy as np
import matplotlib.pyplot as plt
In [2]:
import pandas as pd
from astropy.table import Table
In [3]:
from sklearn.mixture import GaussianMixture as GMM
In [4]:
import seaborn as sns
sns.set(style='white', font_scale=1.1, palette='deep')
In [5]:
%matplotlib inline
In [6]:
def read_infotable():
infofile = os.path.join(os.getenv('DESI_ROOT'), 'target', 'catalogs', 'hp-info-dr5-0.17.1.fits')
info = Table.read(infofile)
cols = ['{}DEPTH_{}_PERCENTILES'.format(prefix, band) for band in ('G', 'R', 'Z') for prefix in ('PSF', 'GAL')]
for target in ('ELG', 'LRG', 'QSO', 'BGS'):
with np.errstate(all='ignore'):
info['DENSITY_{}'.format(target)] = np.log10(info['DENSITY_{}'.format(target)])
info.remove_columns(cols)
return info.to_pandas()
In [7]:
_info = read_infotable()
_info
Out[7]:
In [8]:
minexp = 3
In [9]:
info = _info.loc[(_info['NEXP_G'] >= minexp) & (_info['NEXP_R'] >= minexp) &
(_info['NEXP_Z'] >= minexp) &
np.isfinite(_info['DENSITY_ELG']) &
np.isfinite(_info['DENSITY_LRG']) &
np.isfinite(_info['DENSITY_BGS']) &
np.isfinite(_info['DENSITY_QSO'])
#(_info['DENSITY_ELG'] < 5000) & (_info['DENSITY_ELG'] > 0) &
#(_info['DENSITY_LRG'] < 1500) & (_info['DENSITY_LRG'] > 0) &
#(_info['DENSITY_BGS'] < 6000) & (_info['DENSITY_BGS'] > 0) &
#(_info['DENSITY_QSO'] < 1000) & (_info['DENSITY_QSO'] > 0)
]
print('There are {} / {} healpixels with >={} exposures in grz.'.format(len(info), len(_info), minexp))
In [10]:
def pairplot(target, only_dust=False):
if only_dust:
cols = ['DENSITY_{}'.format(target), 'EBV']
else:
cols = ['DENSITY_{}'.format(target), 'GALDEPTH_G', 'GALDEPTH_R', 'GALDEPTH_Z', 'EBV']
print(info[cols].describe())
sns.pairplot(info, vars=cols, diag_kind='hist', plot_kws={'s': 3}, size=2)
In [11]:
pairplot('ELG')
In [12]:
pairplot('LRG')
In [13]:
pairplot('QSO')
In [14]:
pairplot('BGS')
Regress just the QSOs against the dust reddening for now, to demonstrate the method. Unfortunately, a simple mixture of Gaussians does not appear to be a good representation of the data (i.e., the Bayesian information criterion, BIC, does not converge), so this approach requires more work.
In [15]:
ncomparray = np.arange(10, 50)
In [16]:
def qa_bic(ncomp, bic, title):
fig, ax = plt.subplots(figsize=(8, 5))
ax.plot(ncomp, bic, marker='s', ls='-')
ax.set_xlabel('Number of Gaussian Components')
ax.set_ylabel('Bayesian Information Criterion')
ax.set_title(title)
In [17]:
def getbic(target, ncomp=[3]):
cols = ['DENSITY_{}'.format(target), 'EBV']
X = info[cols]
bic = [GMM(n_components=nc).fit(X).bic(X) for nc in ncomp]
return bic
In [18]:
%time bic_qso = getbic('QSO', ncomparray)
In [19]:
qa_bic(ncomparray, bic_qso, title='QSO')
In [20]:
qsocols = ['DENSITY_QSO', 'EBV']
In [21]:
mog = GMM(n_components=25).fit(info[qsocols])
In [22]:
print(info[qsocols].describe())
sns.pairplot(info[qsocols], plot_kws={'s': 10})
Out[22]:
In [24]:
samp = pd.DataFrame(np.vstack(mog.sample(1000)[0]), columns=qsocols)
In [25]:
print(samp.describe())
sns.pairplot(samp, plot_kws={'s': 10})
Out[25]:
In [26]:
def medxbin(x, y, binsize, minpts=20, xmin=None, xmax=None):
"""
Compute the median (and other statistics) in fixed bins along the x-axis.
"""
import numpy as np
from scipy import ptp
if xmin == None:
xmin = x.min()
if xmax == None:
xmax = x.max()
nbin = int(ptp(x) / binsize)
bins = np.linspace(xmin, xmax, nbin)
idx = np.digitize(x, bins)
stats = np.zeros(nbin,[('med', 'f4'), ('sig', 'f4'),
('q25', 'f4'), ('q75', 'f4') ])
for kk in np.arange(nbin):
npts = len(y[idx == kk])
if npts > minpts:
stats['med'][kk] = np.median(y[idx == kk])
stats['sig'][kk] = np.std(y[idx == kk])
stats['q25'][kk] = np.percentile(y[idx == kk], 25)
stats['q75'][kk] = np.percentile(y[idx == kk], 75)
good = np.nonzero(stats['med'])
stats = stats[good]
return bins[good], stats
In [27]:
def density_vs_ebv(info, target, order=2, doplot=True, ylim=None):
xx, yy = info['EBV'].values, info['DENSITY_{}'.format(target)].values
if ylim:
good = (yy > ylim[0]) * (yy < ylim[1])
else:
good = np.ones(len(yy), dtype='bool')
bins, stats = medxbin(xx[good], yy[good], 0.01, minpts=10, xmin=0)
coeff = np.polyfit(bins, stats['med'], order)
#from desiutil.funcfits import iter_fit
#fit, mask = iter_fit(xx, yy, 'polynomial', order, sig_rej=10)
#good = np.where(mask == 0)[0]
#coeff = fit['coeff']
#scatter = np.std(yy[good] - np.polyval(coeff, xx[good]))
med = 10**np.mean(stats['med'])
sig = np.log(10) * med * np.mean(stats['sig'])
scatter = np.std(yy[good] - np.polyval(coeff, xx[good]))
print('{}: median={:.2f}+/-{:.2f} deg2'.format(target, med, sig))
print(' Slope, Intercept, scatter = {:.5f}, {:.3f}, {:.3f}'.format(coeff[0], coeff[1],
scatter))
if doplot:
fig, ax = plt.subplots()
ax.scatter(xx, yy, s=5, alpha=0.5)
ax.plot(bins, stats['med'], lw=3, ls='-', color='orange')
ax.plot(bins, stats['med'] + stats['sig'], lw=3, ls='--', color='orange')
ax.plot(bins, stats['med'] - stats['sig'], lw=3, ls='--', color='orange')
#ax.plot(bins, stats['q25'], lw=3, ls='--', color='orange')
#ax.plot(bins, stats['q75'], lw=3, ls='--', color='orange')
xaxis = np.linspace(xx.min(), xx.max(), 100)
ax.plot(xaxis, np.polyval(coeff, xaxis), color='red', lw=2, alpha=0.5)
#ax.plot(xaxis, np.polyval(coeff, xaxis) + scatter)
#ax.plot(xaxis, np.polyval(coeff, xaxis) - scatter)
ax.set_xlabel(r'$E(B-V)$')
ax.set_ylabel(r'$\log_{{10}}\ (N\ {}\ /\ {{\rm deg}}^2)$'.format(target))
if ylim:
ax.set_ylim(ylim)
return coeff
In [28]:
coeff = density_vs_ebv(info, 'QSO', order=1)
In [32]:
coeff = density_vs_ebv(info, 'ELG', order=1, ylim=(2.5, 4))
In [30]:
coeff = density_vs_ebv(info, 'LRG', order=1, ylim=(1.7, 3.3))
In [31]:
coeff = density_vs_ebv(info, 'BGS', order=1, ylim=(2.5, 4))