Model the color, size, and shape of real galaxies (ELG, star forming) sampled by DESI and eBOSS Target Selection


In [1]:
import numpy as np
import os
import pandas as pd
import seaborn as sns
import fitsio
import sys
sys.path.append('/Users/kaylan1/PhdStudent/Research/desi/astrometry.net/astrometry')
from astrometry.util.fits import fits_table, merge_tables
from glob import glob

from sklearn import mixture

# to make this notebook's output stable across runs
np.random.seed(7)

# To plot pretty figures
%matplotlib inline
import matplotlib
import matplotlib.pyplot as plt
plt.rcParams['axes.labelsize'] = 14
plt.rcParams['xtick.labelsize'] = 12
plt.rcParams['ytick.labelsize'] = 12

%load_ext autoreload
%autoreload 2

In [4]:
from obiwan.common import fits2pandas

In [5]:
def stack_tables(fns):
    cat=[]
    assert(len(fns) > 0)
    for fn in fns:
        assert(os.path.exists(fn))
        print('Stacking %s' % fn)
        cat.append( fits_table(fn) )
    return merge_tables(cat, columns='fillzero')

def flux2mag(nmgy):
    return -2.5 * (np.log10(nmgy) - 9)

In [405]:
def my_mixture(X,n_comp=None,cov_type='full'):
    # Compute density via Gaussian Mixtures
    # we'll try several numbers of clusters
    if n_comp is None:
        n_comp = np.arange(1, 16)
    gmms = [mixture.GaussianMixture(n_components=n,
                                    covariance_type= cov_type).fit(X) 
            for n in n_comp]
    BICs = [gmm.bic(X)/X.shape[0] for gmm in gmms]
    i_min = np.argmin(BICs)
    print("%d components" % n_comp[i_min])
    return gmms,i_min, n_comp, BICs

In [452]:
class GaussianMixtureModel(object):
    """
    John's class to read, write, and sample from a mixture model.
    
    Args: 
        weights,means,covars: array-like, from mixture fit
        covar_type: usually 'full'
        py: one of ['27','36']
    """
    def __init__(self, weights, means, covars, 
                 py=None,covar_type='full',is1D=False):
        assert(py in ['27','36'])
        self.py= py
        self.is1D= is1D
        self.weights_ = weights
        self.means_ = means
        self.covariances_ = covars
        if self.is1D:
            self.weights_ = self.weights_.reshape(-1,1)
            self.means_ = self.means_.reshape(-1,1)
            self.covariances_ = self.covariances_.reshape(-1,1,1)
        #    self.n_components, self.n_dimensions = self.means_.shape[0],1
        #else:
        self.n_components, self.n_dimensions = self.means_.shape
        #print(self.weights_.shape,self.covariances_.shape,len(self.covariances_.shape))
        self.covariance_type= covar_type
    
    @staticmethod
    def save(model, filename):
        for name,data in zip(['means','weights','covars'],
                     [model.means_, model.weights_,
                      model.covariances_]):
            fn= '%s_%s.txt' % (filename,name)
            np.savetxt(fn,data,delimiter=',')
            print('Wrote %s' % fn)

    @staticmethod
    def load(filename,py=None,is1D=False):
        d={name:np.loadtxt('%s_%s.txt' % (filename,name),delimiter=',')
           for name in ['means','weights','covars']}
        return GaussianMixtureModel(
                    d['weights'],d['means'],d['covars'],
                    covar_type='full',py=py,is1D=is1D)
    
    def sample(self, n_samples=1, random_state=None):
        assert(n_samples >= 1)
        self.n_samples= n_samples
        if random_state is None:
            random_state = np.random.RandomState()
        self.rng= random_state
        
        if self.py == '2.7':
            X= self.sample_py2()
        else:
            X,Y= self.sample_py3()
        return X
    
    def sample_py2(self):
        weight_cdf = np.cumsum(self.weights_)
        X = np.empty((self.n_samples, self.n_components))
        rand = self.rng.rand(self.n_samples)
        # decide which component to use for each sample
        comps = weight_cdf.searchsorted(rand)
        # for each component, generate all needed samples
        for comp in range(self.n_components):
            # occurrences of current component in X
            comp_in_X = (comp == comps)
            # number of those occurrences
            num_comp_in_X = comp_in_X.sum()
            if num_comp_in_X > 0:
                X[comp_in_X] = self.rng.multivariate_normal(
                    self.means_[comp], self.covariances_[comp], num_comp_in_X)
        return X
    
    def sample_py3(self):
        """Copied from sklearn's mixture.GaussianMixture().sample()"""
        print(self.weights_.shape)
        try:
            n_samples_comp = self.rng.multinomial(self.n_samples, self.weights_)
        except ValueError:
            self.weights_= np.reshape(self.weights_,len(self.weights_))
            n_samples_comp = self.rng.multinomial(self.n_samples, self.weights_)
        if self.covariance_type == 'full':
            X = np.vstack([
                self.rng.multivariate_normal(mean, covariance, int(sample))
                for (mean, covariance, sample) in zip(
                    self.means_, self.covariances_, n_samples_comp)])
        elif self.covariance_type == "tied":
            X = np.vstack([
                self.rng.multivariate_normal(mean, self.covariances_, int(sample))
                for (mean, sample) in zip(
                    self.means_, n_samples_comp)])
        else:
            X = np.vstack([
                mean + self.rng.randn(sample, n_features) * np.sqrt(covariance)
                for (mean, covariance, sample) in zip(
                    self.means_, self.covariances_, n_samples_comp)])

        y = np.concatenate([j * np.ones(sample, dtype=int)
                           for j, sample in enumerate(n_samples_comp)])

        return (X, y)

eBOSS

Exploratory Data analysis


In [781]:
fns_ngc= [os.path.join(os.environ['HOME'],'Downloads',
            'ebossDR3','redshift_tractorcats','eBOSS.ELG.obiwan.eboss%s.fits' % name)
          for name in ['23.v5_10_7']]
fns_sgc= [os.path.join(os.environ['HOME'],'Downloads',
            'ebossDR3','redshift_tractorcats','eBOSS.ELG.obiwan.eboss%s.fits' % name)
         for name in ['2122.v5_10_7']]

ngc=merge_tables([fits_table(fn)
                 for fn in fns_ngc],columns='fillzero')
ngc.set('field',np.array(['ngc']*len(ngc),dtype=object))
sgc=merge_tables([fits_table(fn)
                 for fn in fns_sgc],columns='fillzero')
sgc.set('field',np.array(['sgc']*len(sgc),dtype=object))

print(len(ngc),len(ngc.get_columns()))
print(len(sgc),len(sgc.get_columns()))
for col in sgc.get_columns():
    if not 'decam' in col:
        continue
    print(col,ngc.get(col).shape,sgc.get(col).shape)


Converted brickname from |S8 to <U8
Converted type from |S4 to <U4
Converted chunk from |S7 to <U7
Converted datadir from |S150 to <U150
Converted brickname from |S8 to <U8
Converted type from |S4 to <U4
Converted chunk from |S7 to <U7
Converted datadir from |S150 to <U150
48397 51
108060 57
decam_flux (48397, 6) (108060, 6)
decam_flux_ivar (48397, 6) (108060, 6)
decam_apflux (48397, 48) (108060, 6, 8)
decam_apflux_resid (48397, 48) (108060, 6, 8)
decam_apflux_ivar (48397, 48) (108060, 6, 8)
decam_mw_transmission (48397, 6) (108060, 6)
decam_nobs (48397, 6) (108060, 6)
decam_rchi2 (48397, 6) (108060, 6)
decam_fracflux (48397, 6) (108060, 6)
decam_fracmasked (48397, 6) (108060, 6)
decam_fracin (48397, 6) (108060, 6)
decam_anymask (48397, 6) (108060, 6)
decam_allmask (48397, 6) (108060, 6)
decam_psfsize (48397, 6) (108060, 6)

In [782]:
for col in set(sgc.get_columns()).intersection(set(ngc.get_columns())):
    if 'apflux' in col:
        print('Removing col: ',col)
        sgc.delete_column(col)
        ngc.delete_column(col)
a= merge_tables([ngc,sgc],columns='fillzero')


Removing col:  decam_apflux
Removing col:  decam_apflux_resid
Removing col:  decam_apflux_ivar

In [783]:
isStar= a.z_flag == -1
hasRedshift= a.z_flag == 1
isPrim= a.brick_primary == True
print('isStar & brick primary: %d/%d' % \
     (len(a[(isStar) & (isPrim)]),len(a)))
print('hasRedshift & brick primary: %d/%d' % \
     (len(a[(hasRedshift) & (isPrim)]),len(a)))
a.cut((hasRedshift) & (isPrim))


isStar & brick primary: 924/156457
hasRedshift & brick primary: 124818/156457

In [776]:
for key in ['sector_tsr','chunk',
            'plate','fiberid']:
    print(key,a.get(key)[0],type(a.get(key)[0]))


sector_tsr 0.54 <class 'numpy.float32'>
chunk eboss23 <class 'numpy.str_'>
plate 9564 <class 'numpy.int32'>
fiberid 504 <class 'numpy.int32'>

In [794]:
d={}
for i,b in zip([1,2,4],'grz'):
    d[b+'flux']= a.get('decam_flux')[:,i]
    d[b+'fluxivar']= a.get('decam_flux_ivar')[:,i]
    d[b+'psfsize']= a.get('decam_psfsize')[:,i]
    d[b+'_mw_transmission']= a.get('decam_mw_transmission')[:,i]
d['type']= a.type
d['shapeexp_r']= a.shapeexp_r
d['shapedev_r']= a.shapedev_r

d['redshift']= a.z
d['sector_tsr']= a.sector_tsr
d['field']= a.field

eboss= pd.DataFrame(d)
eboss= eboss.apply(lambda x: x.values.byteswap().newbyteorder())


for i,b in zip([1,2,4],'grz'):
    eboss[b]= flux2mag(eboss[b+'flux'].values/eboss[b+'_mw_transmission'].values)
eboss['g-r']= eboss['g'] - eboss['r']
eboss['r-z']= eboss['r'] - eboss['z']

# SDSS unique identifier is (PLATE,MJD,FIBERID)
for key in ['plate','mjd','fiberid']:
    assert(not key in eboss.columns)
    eboss[key]= a.get(key)


eboss.describe()


Out[794]:
g_mw_transmission gflux gfluxivar gpsfsize r_mw_transmission redshift rflux rfluxivar rpsfsize sector_tsr ... zfluxivar zpsfsize g r z g-r r-z plate mjd fiberid
count 124818.000000 124818.000000 124818.000000 124818.000000 124818.000000 124818.000000 124818.000000 124818.000000 124818.000000 124818.000000 ... 124818.000000 124818.000000 124818.000000 124818.000000 124818.000000 124818.000000 124818.000000 124818.000000 124818.000000 124818.000000
mean 0.887981 0.894358 663.596191 1.585073 0.922737 0.845346 1.695404 404.526794 1.262581 0.925419 ... 41.047676 1.154662 22.523006 21.882172 20.909605 0.640837 0.972565 9394.769817 57891.387572 498.775401
std 0.051016 0.233051 586.811768 0.182776 0.036416 0.220672 0.508343 534.675598 0.186244 0.116810 ... 35.968708 0.145182 0.253308 0.299463 0.361003 0.138784 0.179721 139.772085 152.686943 287.418148
min 0.541891 0.423581 0.000285 0.000000 0.661849 0.000010 0.813217 0.016720 0.000000 0.094000 ... 0.000004 0.000000 21.825092 20.921309 19.572298 0.345093 0.661255 9197.000000 57656.000000 1.000000
25% 0.870798 0.719299 258.765213 1.463014 0.911019 0.764202 1.324715 116.953539 1.139694 0.887000 ... 15.754657 1.049614 22.363008 21.690143 20.667189 0.523310 0.828516 9309.000000 57728.000000 247.000000
50% 0.904013 0.825104 546.302612 1.590574 0.934283 0.831748 1.570839 260.817749 1.223145 0.985000 ... 31.407959 1.124252 22.580585 21.921579 20.939089 0.639626 0.944961 9374.000000 57864.000000 500.000000
75% 0.921569 1.008793 895.015701 1.703754 0.946467 0.920407 1.945317 469.033813 1.336910 0.994000 ... 54.780305 1.229884 22.729773 22.106296 21.178267 0.758045 1.090259 9556.000000 58055.000000 748.000000
max 0.961263 1.770160 11717.317383 4.630108 0.973738 6.737325 4.008598 7636.661621 4.266237 1.000000 ... 569.374512 2.403814 22.899969 22.529026 21.801359 0.927851 1.704117 9630.000000 58086.000000 1000.000000

8 rows × 24 columns


In [795]:
print(eboss['type'].value_counts())
# Remove COMP
isCOMP= eboss['type'].str.strip().values == 'COMP'
eboss= eboss[~isCOMP]
print(eboss['type'].value_counts())
print(eboss['type'].value_counts()/eboss.shape[0])


EXP     73791
SIMP    34960
DEV     10442
PSF      3641
COMP     1984
Name: type, dtype: int64
EXP     73791
SIMP    34960
DEV     10442
PSF      3641
Name: type, dtype: int64
EXP     0.600738
SIMP    0.284612
DEV     0.085009
PSF     0.029642
Name: type, dtype: float64

In [796]:
grz_gt0= ((eboss['gflux'] > 0) &
          (eboss['rflux'] > 0) &
          (eboss['zflux'] > 0) &
          (eboss['gfluxivar'] > 0) &
          (eboss['rfluxivar'] > 0) &
          (eboss['zfluxivar'] > 0))
assert(eboss[grz_gt0].shape[0] == eboss.shape[0])

print(set(eboss['type']))
fwhm_or_rhalf= np.zeros(eboss.shape[0])-1 # arcsec
strip_type= eboss['type'].str.strip().values
isPSF= strip_type == 'PSF'
isEXP= pd.Series(strip_type).isin(['EXP','REX']).values
isSIMP= strip_type == 'SIMP'
isDEV= strip_type == 'DEV'
# rhalf ~ fwhm/2
fwhm_or_rhalf[isPSF]= np.mean(np.array([eboss.loc[isPSF,'gpsfsize'],
                                        eboss.loc[isPSF,'rpsfsize'],
                                        eboss.loc[isPSF,'zpsfsize']]),axis=0)/2
fwhm_or_rhalf[isSIMP]= 0.5
fwhm_or_rhalf[isEXP]= eboss.loc[isEXP,'shapeexp_r']
fwhm_or_rhalf[isDEV]= eboss.loc[isDEV,'shapedev_r']
eboss['fwhm_or_rhalf']= fwhm_or_rhalf


{'DEV ', 'SIMP', 'EXP ', 'PSF '}

DEV are larger than EXP


In [797]:
fig,ax= plt.subplots(1,3,figsize=(10,4))
sns.distplot(eboss['fwhm_or_rhalf'],ax=ax[0])
isSmall= (eboss['fwhm_or_rhalf'] < 10).values
sns.distplot(eboss.loc[isSmall,'fwhm_or_rhalf'],ax=ax[1])
ax[1].set_xlim(0,10)

sns.distplot(eboss.loc[(isSmall) & (isDEV),'fwhm_or_rhalf'],color='b',ax=ax[2],
             label='DEV')
sns.distplot(eboss.loc[(isSmall) & (~isDEV),'fwhm_or_rhalf'],color='r',ax=ax[2],
             label='not DEV')
ax[2].set_xlim(0,10)
ax[2].legend()


Out[797]:
<matplotlib.legend.Legend at 0x138337b38>

DEV are brighter than EXP


In [798]:
attrs= ['g','r','z','redshift','fwhm_or_rhalf']
fig,ax= plt.subplots(2,3,figsize=(12,9))
i=-1
for row in range(2):
    for col in range(3):
        i+=1
        if i >= len(attrs):
            continue
        bins=None
        if attrs[i] == 'redshift':
            bins=np.linspace(0,2,20)
        if attrs[i] == 'fwhm_or_rhalf':
            bins=np.linspace(0,10,40)            
        _=ax[row,col].hist(eboss.loc[(~isDEV) & (isSmall),attrs[i]],histtype='step',normed=True,
                           color='b',bins=bins,label='not DEV')
        _=ax[row,col].hist(eboss.loc[(isDEV) & (isSmall),attrs[i]],histtype='step',normed=True,
                           color='r',bins=bins,label='isDEV')
        ax[row,col].set_xlabel(attrs[i])
        if i == 0:
            ax[row,col].legend(loc='upper left')
ax[1,0].set_xlim(0,2)

#gband
oii_emitters= [21.825, 22.825]
#ax[0,0].set_xlim(oii_emitters)
ax[0,0].axvline(oii_emitters[0],c='k',ls='--')
ax[0,0].axvline(oii_emitters[1],c='k',ls='--')

print('fraction NOT dev: ',eboss[~isDEV].shape[0]/eboss.shape[0])
print('fraction dev: ',eboss[isDEV].shape[0]/eboss.shape[0])


fraction NOT dev:  0.9149909634140384
fraction dev:  0.08500903658596154

NGC is incomplete


In [799]:
isNGC= eboss['field'] == 'ngc'

attrs= ['g','r','z','redshift','fwhm_or_rhalf']
fig,ax= plt.subplots(2,3,figsize=(12,9))
i=-1
for row in range(2):
    for col in range(3):
        i+=1
        if i >= len(attrs):
            continue
        bins=None
        if attrs[i] == 'redshift':
            bins=np.linspace(0,2,20)
        if attrs[i] == 'fwhm_or_rhalf':
            bins=np.linspace(0,10,40)            
        _=ax[row,col].hist(eboss.loc[(isNGC) & (isDEV) & (isSmall),attrs[i]],histtype='step',normed=True,
                           color='b',bins=bins,label='NGC')
        _=ax[row,col].hist(eboss.loc[(~isNGC) & (isDEV) & (isSmall),attrs[i]],histtype='step',normed=True,
                           color='r',bins=bins,label='SGC')
        ax[row,col].set_xlabel(attrs[i])
        if i == 0:
            ax[row,col].legend(loc='upper left')
ax[1,0].set_xlim(0,2)

#gband
oii_emitters= [21.825, 22.825]
#ax[0,0].set_xlim(oii_emitters)
ax[0,0].axvline(oii_emitters[0],c='k',ls='--')
ax[0,0].axvline(oii_emitters[1],c='k',ls='--')


Out[799]:
<matplotlib.lines.Line2D at 0x1427e8160>

In [800]:
fig,ax= plt.subplots()           
bins=None
_=ax.hist(eboss.loc[(isNGC) & (isDEV) & (isSmall),'g'],histtype='step',normed=True,
                  color='b',bins=bins,label='DEV in N')
_=ax.hist(eboss.loc[(~isNGC) & (isDEV) & (isSmall),'g'],histtype='step',normed=True,
                  color='r',bins=bins,label='DEV in S')
_=ax.hist(eboss.loc[(isNGC) & (~isDEV) & (isSmall),'g'],histtype='step',normed=True,
                  color='m',bins=bins,label='EXP in N')
_=ax.hist(eboss.loc[(~isNGC) & (~isDEV) & (isSmall),'g'],histtype='step',normed=True,
                  color='k',bins=bins,label='EXP in S')
ax.legend() #loc='upper left')

#gband
oii_emitters= [21.825, 22.825]
#ax[0,0].set_xlim(oii_emitters)
ax.axvline(oii_emitters[0],c='k',ls='--')
ax.axvline(oii_emitters[1],c='k',ls='--')


Out[800]:
<matplotlib.lines.Line2D at 0x17108efd0>

In [801]:
attrs= ['g','r','z','redshift','fwhm_or_rhalf']
fig,ax= plt.subplots(2,3,figsize=(12,9))
i=-1
for row in range(2):
    for col in range(3):
        i+=1
        if i >= len(attrs):
            continue
        bins=None
        if attrs[i] == 'redshift':
            bins=np.linspace(0,2,20)
        if attrs[i] == 'fwhm_or_rhalf':
            bins=np.linspace(0,10,40)            
        _=ax[row,col].hist(eboss.loc[(~isNGC) & (isDEV) & (isSmall),attrs[i]],histtype='step',normed=True,
                           color='b',bins=bins,label='DEV in S')
        _=ax[row,col].hist(eboss.loc[(isNGC) & (~isDEV) & (isSmall),attrs[i]],histtype='step',normed=True,
                           color='r',bins=bins,label='EXP in N')
        _=ax[row,col].hist(eboss.loc[(~isNGC) & (~isDEV) & (isSmall),attrs[i]],histtype='step',normed=True,
                           color='k',bins=bins,label='ExP in S')
        ax[row,col].set_xlabel(attrs[i])
        if i == 0:
            ax[row,col].legend(loc='upper left')
ax[1,0].set_xlim(0,2)

#gband
oii_emitters= [21.825, 22.825]
#ax[0,0].set_xlim(oii_emitters)
ax[0,0].axvline(oii_emitters[0],c='k',ls='--')
ax[0,0].axvline(oii_emitters[1],c='k',ls='--')

print('Frac in NGC',eboss[(isNGC) & (isSmall)].shape[0]/eboss[(isSmall)].shape[0])
print('Frac in SGC',eboss[(~isNGC) & (isSmall)].shape[0]/eboss[(isSmall)].shape[0])


Frac in NGC 0.3051520987009596
Frac in SGC 0.6948479012990404

Need sample to extend outside eBOSS TS box

  • Mag Errors won't do it ecause median g,r mag err is 0.05. Some outliers in z dmag, but no matter 0.05 in two bands isn't large enough...
  • DR3-DEEP2 matched smaple in the larger of the two eBOSS boxes, but kinda need the redshift versus g,r,z,rhalf plots to look similar to the eBOSS spectra/tractor data

In [802]:
def ivar_ext_corr(flux_ivar, mw_trans):
    return flux_ivar * np.power(mw_trans,2)
    

def fluxErrorsToMagErrors(flux, flux_invvar):
    """From Dustin Lang's tractor.brightness module
    NanoMaggies().fluxErrorsToMagErrors
    """
    flux = np.atleast_1d(flux)
    flux_invvar = np.atleast_1d(flux_invvar)
    dflux = np.zeros(len(flux))
    okiv = (flux_invvar > 0)
    dflux[okiv] = (1./np.sqrt(flux_invvar[okiv]))
    okflux = (flux > 0)
    mag = np.zeros(len(flux))
    mag[okflux] = (flux2mag(flux[okflux]))
    dmag = np.zeros(len(flux))
    ok = (okiv * okflux)
    dmag[ok] = (np.abs((-2.5 / np.log(10.)) * dflux[ok] / flux[ok]))
    mag[np.logical_not(okflux)] = np.nan
    dmag[np.logical_not(ok)] = np.nan
    return mag.astype(np.float32), dmag.astype(np.float32)


exp_df= eboss.loc[(~isNGC) & (~isDEV) & (isSmall)]
a={}
for b in 'grz':
    #print(exp_df[b+'_mw_transmission'].values)
    flux_EC= exp_df[b+'flux']/exp_df[b+'_mw_transmission']
    ivar_EC= ivar_ext_corr(exp_df[b+'fluxivar'], 
                           exp_df[b+'_mw_transmission'])
    a[b],a[b+'_err']= fluxErrorsToMagErrors(flux_EC,ivar_EC)
    print('max diff',np.max(a[b] - exp_df[b]))


max diff 0.0
max diff 0.0
max diff 0.0

In [ ]:
exp_df['zfluxivar'] > 0.1

In [508]:
exp_df.loc[:,['gflux','gfluxivar','rflux','rfluxivar','zflux','zfluxivar',]].describe()


Out[508]:
gflux gfluxivar rflux rfluxivar zflux zfluxivar
count 77707.000000 77707.000000 77707.000000 77707.000000 77707.000000 77707.000000
mean 0.888922 881.916504 1.707809 548.811218 4.328659 52.991436
std 0.224979 612.729980 0.497227 593.549988 1.494015 37.039284
min 0.423581 15.386562 0.813217 4.425666 1.855282 0.000009
25% 0.722753 531.541199 1.348789 249.192551 3.235148 28.715761
50% 0.823451 766.096497 1.585970 381.044220 3.992064 44.390278
75% 0.997608 1069.143066 1.948330 586.753754 5.091161 66.973400
max 1.758362 11717.317383 4.004512 7636.661621 14.356393 569.374512

In [504]:
pd.DataFrame(a).describe()


Out[504]:
g g_err r r_err z z_err
count 77707.000000 77707.000000 77707.000000 77707.000000 77707.000000 77707.000000
mean 22.517084 0.048797 21.864561 0.036079 20.913521 0.044286
std 0.241931 0.017435 0.288296 0.015156 0.347103 0.199887
min 21.825092 0.008628 20.921309 0.004216 19.572298 0.005758
25% 22.367023 0.037267 21.682262 0.026140 20.679452 0.031045
50% 22.574741 0.046364 21.904720 0.034198 20.943573 0.040345
75% 22.715694 0.056969 22.078629 0.043640 21.171512 0.052133
max 22.824989 0.320735 22.450010 0.385943 21.730873 55.529587

In [507]:
len(a['g'][a['z_err'] > 50])


Out[507]:
1

In [672]:
fig,ax= plt.subplots(1,3,figsize=(10,4))
for i,b in zip(range(3),'grz'):
    bins=None
    if b == 'z':
        bins= np.arange(0,2,0.2)
    sns.distplot(a[b+'_err'],ax=ax[i],bins=bins)
    ax[i].set_xlabel(b)
for i in range(2): 
    ax[i].set_xlim(0,0.4)
ax[2].set_xlim(bins[0],bins[-1])
#plt.xlim(-0.5,0.5)


Out[672]:
(0.0, 1.8)

Model (only use SGC data)

Colors, sizes, shapes


In [803]:
attrs= ['g','r','z','redshift','fwhm_or_rhalf']
fig,ax= plt.subplots(2,3,figsize=(12,9))
i=-1
for row in range(2):
    for col in range(3):
        i+=1
        if i >= len(attrs):
            continue
        bins=None
        if attrs[i] == 'redshift':
            bins=np.linspace(0,2,20)
        if attrs[i] == 'fwhm_or_rhalf':
            bins=np.linspace(0,10,40)            
        _=ax[row,col].hist(eboss.loc[(~isNGC) & (isDEV) & (isSmall),attrs[i]],histtype='step',normed=True,
                           color='b',bins=bins,label='DEV in S')
        _=ax[row,col].hist(eboss.loc[(~isNGC) & (~isDEV) & (isSmall),attrs[i]],histtype='step',normed=True,
                           color='r',bins=bins,label='EXP in S')
        ax[row,col].set_xlabel(attrs[i])
        if i == 0:
            ax[row,col].legend(loc='upper left')
ax[1,0].set_xlim(0,2)

#gband
oii_emitters= [21.825, 22.825]
#ax[0,0].set_xlim(oii_emitters)
ax[0,0].axvline(oii_emitters[0],c='k',ls='--')
ax[0,0].axvline(oii_emitters[1],c='k',ls='--')

num_in_s=dict(isDEV=eboss[(~isNGC) & (isDEV) & (isSmall)].shape[0],
              notDEV=eboss[(~isNGC) & (~isDEV) & (isSmall)].shape[0],
              total=eboss[(isSmall) & (~isNGC)].shape[0])
print('DEV in S: Number=%d, Frac=%.3f' % (num_in_s['isDEV'],num_in_s['isDEV']/num_in_s['total']))
print('EXP in S: Number=%d, Frac=%.3f' % (num_in_s['notDEV'],num_in_s['notDEV']/num_in_s['total']))


DEV in S: Number=7448, Frac=0.087
EXP in S: Number=77707, Frac=0.913

GMM for EXP


In [674]:
fit_cols= ['fwhm_or_rhalf', 'g-r', 'r-z','z','redshift']
eboss_notdev= eboss.loc[(~isNGC) & (~isDEV) & (isSmall), fit_cols]
eboss_dev= eboss.loc[(~isNGC) & (isDEV) & (isSmall), fit_cols]
X= eboss_notdev.values

gmms,i_min, n_comp, BICs= my_mixture(X)
plt.plot(n_comp,BICs)


15 components
Out[674]:
[<matplotlib.lines.Line2D at 0x22c3c4748>]

Cannot model power law (g-mag histogram) with Gaussians


In [474]:
fig,ax= plt.subplots(3,2,figsize=(13,8))

zbins=np.arange(0.,1.8,0.1)
fwhm_bins=np.arange(0,4,0.2)
for row,n in enumerate([6,9,12]):
    i= np.where(n_comp == n)[0][0]
    Xpred,Ypred= gmms[i].sample(10000)
    _,bins,_= ax[row,0].hist(eboss_notdev['redshift'],bins=zbins,histtype='step',color='k',normed=True)
    _=ax[row,0].hist(Xpred[:,-1],bins=zbins,histtype='step',color='b',normed=True)

    _,bins,_= ax[row,1].hist(eboss_notdev['fwhm_or_rhalf'],bins=fwhm_bins,histtype='step',color='k',normed=True,
                             label='data')
    _=ax[row,1].hist(Xpred[:,0],bins=fwhm_bins,histtype='step',color='b',normed=True,
                     label='pred')
    #ax[2].set_xlabel('fwhm_or_rhalf')
    
ax[2,0].set_xlabel('redshift')
ax[2,1].set_xlabel('fwhm_or_rhalf')
ax[0,1].legend()

### g,r,z
#fit_cols= ['fwhm_or_rhalf', 'g-r', 'r-z','z','redshift']
fig,ax= plt.subplots(3,3,figsize=(13,8))

for row,n in enumerate([6,9,12]):
    i= np.where(n_comp == n)[0][0]
    Xpred,Ypred= gmms[i].sample(10000)
    # each band: z,r,g
    _,bins,_= ax[row,0].hist(eboss_notdev['z'],bins=20,histtype='step',color='k',normed=True)
    _=ax[row,0].hist(Xpred[:,-2],bins=bins,histtype='step',color='b',normed=True)
    ax[row,0].set_xlabel('z')

    _,bins,_= ax[row,1].hist(eboss_notdev['r-z'] + eboss_notdev['z'],bins=20,histtype='step',color='k',normed=True)
    _=ax[row,1].hist(Xpred[:,-3] + Xpred[:,-2],bins=bins,histtype='step',color='b',normed=True)
    ax[row,1].set_xlabel('r')
    
    _,bins,_= ax[row,2].hist(eboss_notdev['g-r'] + eboss_notdev['r-z'] + eboss_notdev['z'],bins=20,histtype='step',color='k',normed=True)
    _=ax[row,2].hist(Xpred[:,-4] + Xpred[:,-3] + Xpred[:,-2],bins=bins,histtype='step',color='b',normed=True)
    ax[row,2].set_xlabel('g')
    
for i,b in zip(range(3),'zrg'):
    ax[-1,i].set_xlabel(b)



In [592]:
def ebossInSGC(rz,gr):
    return ((-0.068*rz + 0.457 < gr) &
            (gr < 0.112*rz + 0.773) &
            (0.218*gr + 0.571 < rz) & 
            (rz < -0.555*gr + 1.901))

def ebossInNGC(rz,gr):
    return ((-0.068*rz + 0.457 < gr) &
            (gr < 0.112*rz + 0.773) &
            (0.637*gr + 0.399 < rz) & 
            (rz < -0.555*gr + 1.901))

class EbossBox(object):
    def get_xy_pad(self,slope,pad=0):
        """Returns dx,dy"""
        theta= np.arctan(abs(slope))
        return pad*np.sin(theta), pad*np.cos(theta)
    
    def get_yint_pad(self,slope,pad=0):
        """Returns dx,dy"""
        theta= np.arctan(slope)
        return pad / np.cos(theta)

    def three_lines(self,rz,pad=0):
        slopes= np.array([-0.068,0.112, 1/(-0.555)])
        yints=  np.array([0.457,0.773,-1.901/(-0.555)])
        lines= []
        for cnt,slope,yint in zip(range(len(slopes)),slopes,yints):
            dy= 0
            #dx,dy= self.get_xy_pad(slope,pad)
            dy= self.get_yint_pad(slope,pad)
            if cnt == 0:
                dy *= -1
            #lines.append(slope*(rz-dx) + yint + dy)
            lines.append(slope*rz + yint + dy)
        return tuple(lines)
    
    def sgc_line(self,rz,pad=0):
        slope,yint= 1/0.218, -0.571/0.218
        dy=0.
        #dx,dy= self.get_xy_pad(slope,pad)
        dy= self.get_yint_pad(slope,pad)
        return slope*rz + yint + dy

    def ngc_line(self,rz,pad=0):
        slope,yint= 1/0.637, -0.399/0.637
        #dx,dy= self.get_xy_pad(slope,pad)
        dy= self.get_yint_pad(slope,pad)
        return slope*rz + yint + dy

    def SGC(self,rz, pad):
        """
        Args:
            rz: r-z
            pad: magnitudes of padding to expand TS box
        """
        d['y1'],d['y2'],d['y3']= self.three_lines(rz,pad) 
        d['y4']= self.sgc_line(rz,pad)
        return d
    
    def NGC(self,rz, pad):
        """
        Args:
            rz: r-z
            pad: magnitudes of padding to expand TS box
        """
        d['y1'],d['y2'],d['y3']= self.three_lines(rz,pad) 
        d['y4']= self.ngc_line(rz,pad)
        return d

TS box with padding (0.5 mag)


In [582]:
plt.scatter(df.loc[(~isNGC) & (~isDEV) & (isSmall),'r-z'],
              df.loc[(~isNGC) & (~isDEV) & (isSmall),'g-r'],
              c='b',s=10,alpha=0.5,label='S')
plt.scatter(df.loc[(isNGC) & (~isDEV) & (isSmall),'r-z'],
              df.loc[(isNGC) & (~isDEV) & (isSmall),'g-r'],
              c='r',s=10,alpha=0.5,label='N')

rz= np.linspace(0.5,2,num=20)
ngc_d= EbossBox().NGC(rz,pad=0.2)
plt.plot(rz,ngc_d['y1'],'k--',lw=2)
plt.plot(rz,ngc_d['y2'],'k--',lw=2)
plt.plot(rz,ngc_d['y3'],'k--',lw=2)
plt.plot(rz,ngc_d['y4'],'m--',lw=2)

sgc_d= EbossBox().SGC(rz,pad=0.2)
plt.plot(rz,sgc_d['y4'],'b--',lw=2)


plt.legend()
plt.xlim(0.5,2)
plt.ylim(0.2,1.3)
plt.xlabel('r-z')
plt.ylabel('g-r')


Out[582]:
<matplotlib.text.Text at 0x17516d710>

Add DR3-Deep2 to Model


In [879]:
DATA_DIR = os.path.join(os.environ['HOME'],'Downloads',
                        'truth')
dr3_fns= glob(os.path.join(DATA_DIR,
                'dr3.1/trimmed/decals-dr3.1-deep2-field*-trim.fits'))
dp2_fns= glob(os.path.join(DATA_DIR,
                'dr3.1/trimmed/deep2-field*-trim.fits'))                                                               
print(dr3_fns,dp2_fns)
###
def stack_tables(fns):
    cat=[]
    assert(len(fns) > 0)
    for fn in fns:
        assert(os.path.exists(fn))
        print('Stacking %s' % fn)
        cat.append( fits_table(fn) )
    return merge_tables(cat, columns='fillzero')

dr3= stack_tables(dr3_fns)
dp2= stack_tables(dp2_fns)
print(len(dr3),len(dp2))
#######
grz_gt0= (np.all(dr3.decam_flux[:,[1,2,4]] > 0,axis=1) &
          np.all(dr3.decam_flux_ivar[:,[1,2,4]] > 0,axis=1))
notCOMP= dr3.type != 'COMP'
###
fwhm_or_rhalf= np.zeros(len(dr3))-1 # arcsec
isPSF= dr3.type == 'PSF'
isEXP= dr3.type == 'EXP'
isSIMP= dr3.type == 'SIMP'
isDEV= dr3.type == 'DEV'
fwhm_or_rhalf[isPSF]= np.mean(dr3[isPSF].decam_psfsize[:,[1,2,4]],axis=1)
fwhm_or_rhalf[isSIMP]= 0.5
fwhm_or_rhalf[isEXP]= dr3[isEXP].shapeexp_r
fwhm_or_rhalf[isDEV]= dr3[isDEV].shapedev_r
dr3.set('fwhm_or_rhalf',fwhm_or_rhalf)
###
reshift_lims= [0,2.] # eBOSS 
###
keep= ((grz_gt0) & 
       (notCOMP) & 
       (fwhm_or_rhalf < 5) & 
       (dp2.zhelio > reshift_lims[0]) & 
       (dp2.zhelio < reshift_lims[1]))
dr3.cut(keep)
dp2.cut(keep)
###
d= {}
for i,b in zip([1,2,4],'grz'):
    d[b]= flux2mag(dr3.get('decam_flux')[:,i]/dr3.get('decam_mw_transmission')[:,i])
    #d[b+'_ivar']= flux2mag(dr3.get('decam_flux_ivar')[:,i]/dr3.get('decam_mw_transmission')[:,i])
d['redshift']= dp2.get('zhelio')
d['fwhm_or_rhalf']= dr3.fwhm_or_rhalf
d['type']= dr3.get('type')
dr3_deep2= pd.DataFrame(d)
###
print(df['type'].value_counts()/df.shape[0])
isDEV= dr3_deep2['type'] == 'DEV'
dr3_deep2['r-z']= dr3_deep2['r'] - dr3_deep2['z']
dr3_deep2['g-r']= dr3_deep2['g'] - dr3_deep2['r']
dr3_deep2['brickid']= dr3.brickid
dr3_deep2['objid']= dr3.objid


['/Users/kaylan1/Downloads/truth/dr3.1/trimmed/decals-dr3.1-deep2-field2-trim.fits', '/Users/kaylan1/Downloads/truth/dr3.1/trimmed/decals-dr3.1-deep2-field3-trim.fits', '/Users/kaylan1/Downloads/truth/dr3.1/trimmed/decals-dr3.1-deep2-field4-trim.fits'] ['/Users/kaylan1/Downloads/truth/dr3.1/trimmed/deep2-field2-trim.fits', '/Users/kaylan1/Downloads/truth/dr3.1/trimmed/deep2-field3-trim.fits', '/Users/kaylan1/Downloads/truth/dr3.1/trimmed/deep2-field4-trim.fits']
Stacking /Users/kaylan1/Downloads/truth/dr3.1/trimmed/decals-dr3.1-deep2-field2-trim.fits
Converted brickname from |S8 to <U8
Converted type from |S4 to <U4
Stacking /Users/kaylan1/Downloads/truth/dr3.1/trimmed/decals-dr3.1-deep2-field3-trim.fits
Converted brickname from |S8 to <U8
Converted type from |S4 to <U4
Stacking /Users/kaylan1/Downloads/truth/dr3.1/trimmed/decals-dr3.1-deep2-field4-trim.fits
Converted brickname from |S8 to <U8
Converted type from |S4 to <U4
Stacking /Users/kaylan1/Downloads/truth/dr3.1/trimmed/deep2-field2-trim.fits
Converted source from |S12 to <U12
Converted vis_morph from |S1 to <U1
Stacking /Users/kaylan1/Downloads/truth/dr3.1/trimmed/deep2-field3-trim.fits
Converted source from |S12 to <U12
Converted vis_morph from |S1 to <U1
Stacking /Users/kaylan1/Downloads/truth/dr3.1/trimmed/deep2-field4-trim.fits
Converted source from |S12 to <U12
Converted vis_morph from |S1 to <U1
28284 28284
EXP     0.600738
SIMP    0.284612
DEV     0.085009
PSF     0.029642
Name: type, dtype: float64

DR3-Deep2 for outside the TS box


In [880]:
def eboss_scatterplot(rz,gr,redshift,
                      fig,ax,pad=0.):
    """Given cleaned gr,gr, and redshifts, cuts to padded SGC region
    and plots scatter plot colored by redshift"""
    #inSGC= ebossInSGC(df['r-z'],df['g-r'])
    sgc_d= EbossBox().SGC(rz,pad=mag_pad)
    inSGC= ((gr > sgc_d['y1']) & 
            (gr < sgc_d['y2']) &
            (gr < sgc_d['y3']) &
            (gr < sgc_d['y4']))
    
    import matplotlib.colors as colors
    vmin= redshift[inSGC].min()
    vmax= redshift[inSGC].max()
    bounds= np.arange(vmin,vmax+0.2,0.2)
    norm = colors.BoundaryNorm(boundaries=bounds, ncolors=256)
    scat= ax.scatter(rz[inSGC], gr[inSGC],
                     c=redshift[inSGC],s=10,alpha=1,
                     vmin=vmin,vmax=vmax,norm=norm)
    rz_pts= np.linspace(0.4,2.1,num=20)
    sgc_d= EbossBox().SGC(rz_pts,pad=mag_pad)
    for key in ['y1','y2','y3','y4']:
        ax.plot(rz_pts,sgc_d[key],'k--',lw=2)
    # eBOSS Box
    sgc_d= EbossBox().SGC(rz_pts,pad=0.)
    for key in ['y1','y2','y3','y4']:
        ax.plot(rz_pts,sgc_d[key],'m--',lw=3)

    ax.set_xlabel('r-z')
    ax.set_ylabel('g-r')
    ax.set_xlim(rz_pts[0],rz_pts[-1])
    ax.set_ylim(0,1.2)
    print(len(rz))

    from mpl_toolkits.axes_grid1 import make_axes_locatable
    divider = make_axes_locatable(ax)
    cax = divider.append_axes('right', size='5%', pad=0.05)
    fig.colorbar(scat, cax=cax, orientation='vertical')

# eBOSS data
redshift_lims= [0,2] # eBOSS 
eboss_cut= ((~isNGC) & 
            (isSmall) &
            (eboss['redshift'] >= redshift_lims[0]) &
            (eboss['redshift'] <= redshift_lims[1]))
# type
isDEV= dict(dr3= dr3_deep2['type'].str.strip() == 'DEV',
            eboss= eboss_zcut['type'].str.strip() == 'DEV')
    

pad=0.2  
fig,ax=plt.subplots(2,2,figsize=(16,14))
eboss_scatterplot(dr3_deep2.loc[~isDEV['dr3'],'r-z'],dr3_deep2.loc[~isDEV['dr3'],'g-r'],
                  dr3_deep2.loc[~isDEV['dr3'],'redshift'],
                  fig,ax[0,0],pad=pad)
eboss_scatterplot(eboss.loc[(eboss_cut) & (~isDEV['eboss']),'r-z'],
                  eboss.loc[(eboss_cut) & (~isDEV['eboss']),'g-r'],
                  eboss.loc[(eboss_cut) & (~isDEV['eboss']),'redshift'],
                  fig,ax[0,1],pad=pad)
ax[0,0].set_title('EXP, DR3-DEEP2')
ax[0,1].set_title('EXP, eBOSS')

# DEV
eboss_scatterplot(dr3_deep2.loc[isDEV['dr3'],'r-z'],dr3_deep2.loc[isDEV['dr3'],'g-r'],
                  dr3_deep2.loc[isDEV['dr3'],'redshift'],
                  fig,ax[1,0],pad=pad)
eboss_scatterplot(eboss.loc[(eboss_cut) & (isDEV['eboss']),'r-z'],
                  eboss.loc[(eboss_cut) & (isDEV['eboss']),'g-r'],
                  eboss.loc[(eboss_cut) & (isDEV['eboss']),'redshift'],
                  fig,ax[1,1],pad=pad)
ax[1,0].set_title('DEV, DR3-DEEP2')
ax[1,1].set_title('DEV, eBOSS')


25473
77525
927
7439
Out[880]:
<matplotlib.text.Text at 0x1948a8630>

DR3-Deep2 reproduces SGC data when cut to TS box!


In [882]:
pad=0.
sgc_d= EbossBox().SGC(dr3_deep2['r-z'],pad=pad)
inSGC= ((dr3_deep2['g-r'] > sgc_d['y1']) & 
        (dr3_deep2['g-r'] < sgc_d['y2']) &
        (dr3_deep2['g-r'] < sgc_d['y3']) &
        (dr3_deep2['g-r'] < sgc_d['y4']))

reshift_lims= [0,2.] # eBOSS 
eboss_cut= ((~isNGC) & 
            (~isDEV['eboss']) & 
            (isSmall) &
            (eboss['redshift'] >= reshift_lims[0]) &
            (eboss['redshift'] <= reshift_lims[1]))

g_min,g_max= eboss.loc[eboss_cut,'g'].min(), eboss.loc[eboss_cut,'g'].max()
dr3_cut= ((inSGC) &
          (dr3_deep2['g'] >= g_min) &
          (dr3_deep2['g'] <= g_max + pad) &
          (~isDEV['dr3']))
    
    
attrs= ['g','r','z','redshift','fwhm_or_rhalf']
bins=dict(g=np.linspace(21.5,23,20),
          r=np.linspace(20.5,23,20),
          z=np.linspace(19,22,20),
          redshift=np.linspace(0,2,20),
          fwhm_or_rhalf=np.linspace(0,5,20))
fig,ax= plt.subplots(2,3,figsize=(12,9))
i=-1
for row in range(2):
    for col in range(3):
        i+=1
        if i >= len(attrs):
            continue         
        _=ax[row,col].hist(dr3_deep2.loc[dr3_cut,attrs[i]],
                           histtype='step',normed=True,
                           color='b',bins=bins[attrs[i]],label='DR3-DP2 (EXP & eBOSS Box)')
        _=ax[row,col].hist(eboss.loc[eboss_cut,attrs[i]],
                           histtype='step',normed=True,
                           color='r',bins=bins[attrs[i]],label='eBOSS (EXP & SGC)')
        ax[row,col].set_xlabel(attrs[i])
        if i == 0:
            ax[row,col].legend(loc='upper left')

print('DR3-DP2: Number=%d' % dr3_deep2[dr3_cut].shape[0])
print('eBOSS: Number=%d' % eboss[eboss_cut].shape[0])


DR3-DP2: Number=374
eBOSS: Number=77525

Redshift correlations also reproduced!


In [883]:
## Reduce size of eboss_zcut so can plot
iboot=np.random.randint(0,eboss[eboss_cut].shape[0],size=5000)
kwargs= dict(shade=True, shade_lowest=False)
##

attrs= ['g','r','z','fwhm_or_rhalf']
xylims=dict(g=(21.7,23),
          r=(20.9,22.5),
          z=(19.8,22),
          redshift=(0.6,1.3),
          fwhm_or_rhalf=(0,2))
fig,ax= plt.subplots(2,2,figsize=(12,12))
i=-1
for row in range(2):
    for col in range(2):
        i+=1
        if i >= len(attrs):
            continue         
#         ax[row,col].scatter(eboss_zcut.loc[~isDEV['eboss'],attrs[i]],
#                             eboss_zcut.loc[~isDEV['eboss'],'redshift'],
#                             c='b',s=10,alpha=0.5,label='EXP, eBOSS')
#         ax[row,col].scatter(dr3_deep2_gcut[attrs[i]],
#                             dr3_deep2_gcut['redshift'],
#                             c='r',s=10,alpha=0.5,label='EXP, DR3-DP2')
        sns.kdeplot(eboss.loc[eboss_cut,attrs[i]].iloc[iboot],
                    eboss.loc[eboss_cut,'redshift'].iloc[iboot],
                    ax= ax[row,col], cmap='Blues', label='EXP, eBOSS',**kwargs)
        sns.kdeplot(dr3_deep2.loc[dr3_cut,attrs[i]],
                    dr3_deep2.loc[dr3_cut,'redshift'],
                    ax= ax[row,col], cmap="Reds", label='EXP, DR3-DP2',alpha=0.5,**kwargs)
        ax[row,col].set_xlim(xylims[attrs[i]])
        ax[row,col].set_ylim(xylims['redshift'])
        ax[row,col].set_xlabel(attrs[i])
ax[1,1].legend(['Blue: EXP, eBOSS',
                'Red: EXP, DR3-DP2'],loc='upper right')


Out[883]:
<matplotlib.legend.Legend at 0x18651bac8>

DEV from DR3-Deep2 span redshifts [0,2] and 0.2 mag deeper than SGC g-mag, so will can model this even though just 79 data points


In [884]:
pad=0.2
sgc_d= EbossBox().SGC(dr3_deep2['r-z'],pad=pad)
inSGC= ((dr3_deep2['g-r'] > sgc_d['y1']) & 
        (dr3_deep2['g-r'] < sgc_d['y2']) &
        (dr3_deep2['g-r'] < sgc_d['y3']) &
        (dr3_deep2['g-r'] < sgc_d['y4']))

reshift_lims= [0,2.] # eBOSS 
eboss_cut= ((~isNGC) & 
            (isDEV['eboss']) & 
            (isSmall) &
            (eboss['redshift'] >= reshift_lims[0]) &
            (eboss['redshift'] <= reshift_lims[1]))

g_min,g_max= eboss.loc[eboss_cut,'g'].min(), eboss.loc[eboss_cut,'g'].max()
dr3_cut= ((inSGC) &
          (dr3_deep2['g'] >= g_min - pad) &
          (dr3_deep2['g'] <= g_max + pad) &
          (isDEV['dr3']))
    
    
attrs= ['g','r','z','redshift','fwhm_or_rhalf']
bins=dict(g=np.linspace(21.5,23,20),
          r=np.linspace(20.5,23,20),
          z=np.linspace(19,22,20),
          redshift=np.linspace(0,2,20),
          fwhm_or_rhalf=np.linspace(0,5,20))
fig,ax= plt.subplots(2,3,figsize=(12,9))
i=-1
for row in range(2):
    for col in range(3):
        i+=1
        if i >= len(attrs):
            continue         
        _=ax[row,col].hist(dr3_deep2.loc[dr3_cut,attrs[i]],
                           histtype='step',normed=True,
                           color='b',bins=bins[attrs[i]],label='DR3-DP2 (DEV & eBOSS Box)')
        _=ax[row,col].hist(eboss.loc[eboss_cut,attrs[i]],
                           histtype='step',normed=True,
                           color='r',bins=bins[attrs[i]],label='eBOSS (DEV & SGC)')
        ax[row,col].set_xlabel(attrs[i])
        if i == 0:
            ax[row,col].legend(loc='upper left')

print('DR3-DP2: Number DEV=%d' % dr3_deep2[dr3_cut].shape[0])
print('eBOSS: Number DEV=%d' % eboss[eboss_cut].shape[0])


DR3-DP2: Number DEV=85
eBOSS: Number DEV=7439

Same padding (0.2 mag) for EXP


In [885]:
pad=0.2
sgc_d= EbossBox().SGC(dr3_deep2['r-z'],pad=pad)
inSGC= ((dr3_deep2['g-r'] > sgc_d['y1']) & 
        (dr3_deep2['g-r'] < sgc_d['y2']) &
        (dr3_deep2['g-r'] < sgc_d['y3']) &
        (dr3_deep2['g-r'] < sgc_d['y4']))

redshift_lims= [0,2.] # eBOSS 
eboss_cut= ((~isNGC) & 
            (~isDEV['eboss']) & 
            (isSmall) &
            (eboss['redshift'] >= redshift_lims[0]) &
            (eboss['redshift'] <= redshift_lims[1]))

g_min,g_max= eboss.loc[eboss_cut,'g'].min(), eboss.loc[eboss_cut,'g'].max()
dr3_cut= ((inSGC) &
          (dr3_deep2['g'] >= g_min - pad) &
          (dr3_deep2['g'] <= g_max + pad) &
          (~isDEV['dr3']))
    
    
attrs= ['g','r','z','redshift','fwhm_or_rhalf']
bins=dict(g=np.linspace(21.5,23,20),
          r=np.linspace(20.5,23,20),
          z=np.linspace(19,22,20),
          redshift=np.linspace(0,2,20),
          fwhm_or_rhalf=np.linspace(0,5,20))
fig,ax= plt.subplots(2,3,figsize=(12,9))
i=-1
for row in range(2):
    for col in range(3):
        i+=1
        if i >= len(attrs):
            continue         
        _=ax[row,col].hist(dr3_deep2.loc[dr3_cut,attrs[i]],
                           histtype='step',normed=True,
                           color='b',bins=bins[attrs[i]],label='DR3-DP2 (EXP & eBOSS Box)')
        _=ax[row,col].hist(eboss.loc[eboss_cut,attrs[i]],
                           histtype='step',normed=True,
                           color='r',bins=bins[attrs[i]],label='eBOSS (EXP & SGC)')
        ax[row,col].set_xlabel(attrs[i])
        if i == 0:
            ax[row,col].legend(loc='upper left')

print('DR3-DP2: Number=%d' % dr3_deep2[dr3_cut].shape[0])
print('eBOSS: Number=%d' % eboss[eboss_cut].shape[0])


DR3-DP2: Number=1066
eBOSS: Number=77525

4 Samples = Model (EXP/DEV and SGC/DR3-Deep2)


In [2]:
print('SGC: ~77k EXP, 7k DEV')
print('DR3-Deep2: ~1k EXP, 100 DEV')


SGC: ~77k EXP, 7k DEV
DR3-Deep2: ~1k EXP, 100 DEV

In [892]:
# Limits
## eBOSS
redshift_lims= [0,2.] # eBOSS 
rhalf_lim_exp= (0.262/2, 2.5) # Camera, Data
rhalf_lim_dev= (0.262/2, 5.) # Camera, Data
eboss_cut_gen= ((~isNGC) & 
                (isSmall) & 
                (eboss['redshift'] >= redshift_lims[0]) &
                (eboss['redshift'] <= redshift_lims[1]))
eboss_cut_exp= ((eboss_cut_gen) &
                (~isDEV['eboss']) & 
                (eboss['fwhm_or_rhalf'] >= rhalf_lim_exp[0]) &
                (eboss['fwhm_or_rhalf'] <= rhalf_lim_exp[1]))
eboss_cut_dev= ((eboss_cut_gen) &
                (isDEV['eboss']) & 
                (eboss['fwhm_or_rhalf'] >= rhalf_lim_dev[0]) &
                (eboss['fwhm_or_rhalf'] <= rhalf_lim_dev[1]))

## DR3-DP2
g_min= eboss.loc[(eboss_cut_exp) | (eboss_cut_dev),'g'].min()
g_max= eboss.loc[(eboss_cut_exp) | (eboss_cut_dev),'g'].max()
assert(pad==0.2)
dr3_cut_gen= ((inSGC) &
              (dr3_deep2['g'] >= g_min - pad) &
              (dr3_deep2['g'] <= g_max + pad) &
              (dr3_deep2['redshift'] >= redshift_lims[0]) &
              (dr3_deep2['redshift'] <= redshift_lims[1]))
dr3_cut_exp= ((dr3_cut_gen) &
              (~isDEV['dr3']) & 
              (dr3_deep2['fwhm_or_rhalf'] >= rhalf_lim_exp[0]) &
              (dr3_deep2['fwhm_or_rhalf'] <= rhalf_lim_exp[1]))
dr3_cut_dev= ((dr3_cut_gen) &
              (isDEV['dr3']) & 
              (dr3_deep2['fwhm_or_rhalf'] >= rhalf_lim_dev[0]) &
              (dr3_deep2['fwhm_or_rhalf'] <= rhalf_lim_dev[1]))
              
  

save_cols= ['g', 'r','z','fwhm_or_rhalf','redshift']

# DR3-DP2: in 0.2 padded eBOSS TS box
# unique_identfier
dr3_deep2['tractor_id']= dr3_deep2['brickid'].astype(str) + '-' + \
                         dr3_deep2['objid'].astype(str)  
fn='eboss_elg_dr3deep2_EXP.csv'
if not os.path.exists(fn):  
    (dr3_deep2.loc[dr3_cut_exp,:]
              .set_index('tractor_id')
              .loc[:,save_cols]
              #.head())
              .to_csv(fn, index_label='tractor_id'))
    print('Wrote %s' % fn)
fn='eboss_elg_dr3deep2_DEV.csv'
if not os.path.exists(fn):  
    (dr3_deep2.loc[dr3_cut_dev,:]
              .set_index('tractor_id')
              .loc[:,save_cols]
              #.head())
              .to_csv(fn, index_label='tractor_id'))
    print('Wrote %s' % fn)

# eBOSS data
# unique_identfier
eboss['sdss_id']= eboss['plate'].astype(str) + '-' + \
                    eboss['mjd'].astype(str) + '-' + \
                    eboss['fiberid'].astype(str) 
# Save ~ 77k eboss data cut to redshift and rhalf limits
# (not doing 10k random without relacement rows from the 77k eBOSS data points)
fn='eboss_elg_tsspectra_EXP.csv'
if not os.path.exists(fn):
    (eboss.loc[eboss_cut_exp,:]
          .set_index('sdss_id')
          .loc[:,save_cols]
          .to_csv(fn, index_label='sdss_id'))
    print('Wrote %s' % fn)
fn='eboss_elg_tsspectra_DEV.csv'
if not os.path.exists(fn):
    (eboss.loc[eboss_cut_dev,:]
          .set_index('sdss_id')
          .loc[:,save_cols]
          .to_csv(fn, index_label='sdss_id'))
    print('Wrote %s' % fn)


Wrote eboss_elg_dr3deep2_EXP.csv
Wrote eboss_elg_dr3deep2_DEV.csv
Wrote eboss_elg_tsspectra_EXP.csv
Wrote eboss_elg_tsspectra_DEV.csv

In [895]:
dr3dp2_exp= pd.read_csv('eboss_elg_dr3deep2_EXP.csv')
dr3dp2_dev= pd.read_csv('eboss_elg_dr3deep2_DEV.csv')
eboss_exp= pd.read_csv('eboss_elg_tsspectra_EXP.csv')
eboss_dev= pd.read_csv('eboss_elg_tsspectra_DEV.csv')
print(dr3dp2_exp.shape[0],dr3dp2_dev.shape[0])
print(eboss_exp.shape[0],eboss_dev.shape[0])
print(dr3dp2_exp.columns)
print(eboss_exp.columns)


1064 85
77367 7229
Index(['tractor_id', 'g', 'r', 'z', 'fwhm_or_rhalf', 'redshift'], dtype='object')
Index(['sdss_id', 'g', 'r', 'z', 'fwhm_or_rhalf', 'redshift'], dtype='object')

Model for EXP


In [898]:
cols= ['g', 'r', 'z', 'fwhm_or_rhalf','redshift']

fig,ax= plt.subplots(2,3,figsize=(12,9))
i=-1
for row in range(2):
    for col in range(3):
        i+=1
        if i >= len(cols):
            continue 
        _=ax[row,col].hist(eboss_exp[cols[i]],
                           histtype='step',normed=True,
                           bins=30,color='b',label='eboss_exp') #bins[col[i]])
        _=ax[row,col].hist(dr3dp2_exp[cols[i]],
                           histtype='step',normed=True,
                           bins=30,color='r',label='dr3dp2_exp') #bins[col[i]])
        ax[row,col].set_xlabel(cols[i])
ax[0,2].legend()
# ax[1,0].set_xlim(-0.2,2)
# ax[1,1].set_xlim(0,2)

print('eboss redshift min,max=',eboss_exp['redshift'].min(),eboss_exp['redshift'].max())
print('dr3dp2 redshift min,max=',dr3dp2_exp['redshift'].min(),dr3dp2_exp['redshift'].max())
print('eboss rhalf min,max=',eboss_exp['fwhm_or_rhalf'].min(),eboss_exp['fwhm_or_rhalf'].max())
print('dr3dp2 rhalf min,max=',dr3dp2_exp['fwhm_or_rhalf'].min(),dr3dp2_exp['fwhm_or_rhalf'].max())


eboss redshift min,max= 5.69866460864e-05 1.6508961916
dr3dp2 redshift min,max= 0.000414778420236 1.93213009834
eboss rhalf min,max= 0.20438374579 2.4978723526
dr3dp2 rhalf min,max= 0.238977983594 2.45425891876

Model for DEV


In [899]:
fig,ax= plt.subplots(2,3,figsize=(12,9))
i=-1
for row in range(2):
    for col in range(3):
        i+=1
        if i >= len(cols):
            continue 
        _=ax[row,col].hist(eboss_dev[cols[i]],
                           histtype='step',normed=True,
                           bins=30,color='b',label='eboss_dev') #bins[col[i]])
        _=ax[row,col].hist(dr3dp2_dev[cols[i]],
                           histtype='step',normed=True,
                           bins=30,color='r',label='dr3dp2_dev') #bins[col[i]])
        ax[row,col].set_xlabel(cols[i])
ax[0,2].legend()
# ax[1,0].set_xlim(-0.2,2)
# ax[1,1].set_xlim(0,2)

print('eboss redshift min,max=',eboss_dev['redshift'].min(),eboss_dev['redshift'].max())
print('dr3dp2 redshift min,max=',dr3dp2_dev['redshift'].min(),dr3dp2_dev['redshift'].max())
print('eboss rhalf min,max=',eboss_dev['fwhm_or_rhalf'].min(),eboss_dev['fwhm_or_rhalf'].max())
print('dr3dp2 rhalf min,max=',dr3dp2_dev['fwhm_or_rhalf'].min(),dr3dp2_dev['fwhm_or_rhalf'].max())


eboss redshift min,max= 4.56776215287e-05 1.64284992218
dr3dp2 redshift min,max= 0.114836841822 1.53109943867
eboss rhalf min,max= 0.149582266808 4.99710035324
dr3dp2 rhalf min,max= 0.186883717775 4.76907682419

Takeing 10k without replacement samples of the 77k eboss data runs risk of not having enough redshift > 1.4 spectra examples


In [893]:
fig,ax= plt.subplots(2,3,figsize=(12,9))

ind= np.arange(eboss[eboss_cut_exp].shape[0])
for ishuff in range(20):
    i=-1
    for row in range(2):
        for col in range(3):
            i+=1
            if i >= len(attrs):
                continue 
            np.random.shuffle(ind)
            _=ax[row,col].hist(eboss.loc[eboss_cut_exp,attrs[i]].iloc[ind[:10000]],
                               histtype='step',normed=True,
                               bins=bins[attrs[i]])
            ax[row,col].set_xlabel(attrs[i])


n(z) for eBOSS ELGs

print('Right is the n(z) Anand Raichoor provided me, Left is my reproduction of it')


In [845]:
zbins= np.arange(0,2.+0.1,0.1)
print('zbins=',zbins)
print('number zs from EXP: ',eboss[(~isNGC) & (~isDEV['eboss']) & (isSmall)].shape[0])
print('number zs from DEV: ',eboss[(~isNGC) & (isDEV['eboss'])  & (isSmall)].shape[0])

fig,ax=plt.subplots(1,2,figsize=(8,5))
for i,wt,name in zip(range(2),[np.ones(eboss.shape[0]), eboss['sector_tsr'].values],
                              ['No wt', 'tsr']):
    nz= (eboss['redshift'] / wt)[(~isNGC) & (isSmall)]
    _=ax[i].hist(nz,histtype='step',normed=True,
              color='k',bins=zbins,label='n(z)')     
    _=ax[i].hist(df.loc[(~isNGC) & (isDEV['eboss']) & (isSmall),'redshift'],histtype='step',normed=True,
                color='b',bins=zbins,label='DEV in S')
    _=ax[i].hist(df.loc[(~isNGC) & (~isDEV['eboss']) & (isSmall),'redshift'],histtype='step',normed=True,
                       color='r',bins=zbins,label='EXP in N&S')
ax[1].legend()


zbins= [ 0.   0.1  0.2  0.3  0.4  0.5  0.6  0.7  0.8  0.9  1.   1.1  1.2  1.3  1.4
  1.5  1.6  1.7  1.8  1.9  2. ]
number zs from EXP:  77525
number zs from DEV:  7439
Out[845]:
<matplotlib.legend.Legend at 0x1222303c8>

In [846]:
df_z['wt']= 1/df_z['sector_tsr']

zbins= np.arange(-0.05,2.+0.1,0.1)
print('zbins=',zbins)
df_z['bins']= pd.cut(df_z['redshift'],bins=zbins)
nz= df_z.loc[:,['wt','bins']].groupby('bins').agg(np.sum)

zbins2= np.arange(0.,2.+0.1,0.1)
df_z['bins2']= pd.cut(df_z['redshift'],bins=zbins2)
nz2= df_z.loc[:,['wt','bins2']].groupby('bins2').agg(np.sum)

print('redshift bins=',zbins)
print('bin centers=', nz.index.categories.mid)
print('redshift bins2=',zbins2)
print('bin centers2=', nz2.index.categories.mid)
plt.step(nz.index.categories.mid,nz['wt'],where='mid',c='b',label='binc 0,0.1,...')
plt.step(nz2.index.categories.mid,nz2['wt'],where='mid',c='r',label='binc 0.5,0.15,...')
plt.xlabel('redshift')
plt.ylabel('Sum 1/tsr')
plt.title('SGC')
plt.legend()


zbins= [-0.05  0.05  0.15  0.25  0.35  0.45  0.55  0.65  0.75  0.85  0.95  1.05
  1.15  1.25  1.35  1.45  1.55  1.65  1.75  1.85  1.95  2.05]
redshift bins= [-0.05  0.05  0.15  0.25  0.35  0.45  0.55  0.65  0.75  0.85  0.95  1.05
  1.15  1.25  1.35  1.45  1.55  1.65  1.75  1.85  1.95  2.05]
bin centers= Float64Index([0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 1.1, 1.2,
              1.3, 1.4, 1.5, 1.6, 1.7, 1.8, 1.9, 2.0],
             dtype='float64')
redshift bins2= [ 0.   0.1  0.2  0.3  0.4  0.5  0.6  0.7  0.8  0.9  1.   1.1  1.2  1.3  1.4
  1.5  1.6  1.7  1.8  1.9  2. ]
bin centers2= Float64Index([0.05, 0.15, 0.25, 0.35, 0.45, 0.55, 0.65, 0.75, 0.85, 0.95, 1.05,
              1.15, 1.25, 1.35, 1.45, 1.55, 1.65, 1.75, 1.85, 1.95],
             dtype='float64')
Out[846]:
<matplotlib.legend.Legend at 0x12226a2b0>

n(z) weighted by spectroscopic completness (1/TSR)


In [847]:
dz= 0.02
zbins= np.arange(0,2.+dz,dz)
df_z['bins']= pd.cut(df_z['redshift'],bins=zbins)
nz= df_z.loc[:,['wt','bins']].groupby('bins').agg(np.sum)
nz['wt']= nz['wt']/np.sum(nz['wt'])

print('redshift bins=',zbins)
print('bin centers=', nz.index.categories.mid)
plt.step(nz.index.categories.mid,nz['wt']/dz,where='mid',c='b')
plt.xlabel('redshift')
plt.ylabel('PDF(z), Sum(1/tsr) / max() / dz')
plt.title('SGC')

print('isNORMED?', np.sum(nz['wt']/dz) * dz)


redshift bins= [ 0.    0.02  0.04  0.06  0.08  0.1   0.12  0.14  0.16  0.18  0.2   0.22
  0.24  0.26  0.28  0.3   0.32  0.34  0.36  0.38  0.4   0.42  0.44  0.46
  0.48  0.5   0.52  0.54  0.56  0.58  0.6   0.62  0.64  0.66  0.68  0.7
  0.72  0.74  0.76  0.78  0.8   0.82  0.84  0.86  0.88  0.9   0.92  0.94
  0.96  0.98  1.    1.02  1.04  1.06  1.08  1.1   1.12  1.14  1.16  1.18
  1.2   1.22  1.24  1.26  1.28  1.3   1.32  1.34  1.36  1.38  1.4   1.42
  1.44  1.46  1.48  1.5   1.52  1.54  1.56  1.58  1.6   1.62  1.64  1.66
  1.68  1.7   1.72  1.74  1.76  1.78  1.8   1.82  1.84  1.86  1.88  1.9
  1.92  1.94  1.96  1.98  2.  ]
bin centers= Float64Index([0.01, 0.03, 0.05, 0.07, 0.09, 0.11, 0.13, 0.15, 0.17, 0.19, 0.21,
              0.23, 0.25, 0.27, 0.29, 0.31, 0.33, 0.35, 0.37, 0.39, 0.41, 0.43,
              0.45, 0.47, 0.49, 0.51, 0.53, 0.55, 0.57, 0.59, 0.61, 0.63, 0.65,
              0.67, 0.69, 0.71, 0.73, 0.75, 0.77, 0.79, 0.81, 0.83, 0.85, 0.87,
              0.89, 0.91, 0.93, 0.95, 0.97, 0.99, 1.01, 1.03, 1.05, 1.07, 1.09,
              1.11, 1.13, 1.15, 1.17, 1.19, 1.21, 1.23, 1.25, 1.27, 1.29, 1.31,
              1.33, 1.35, 1.37, 1.39, 1.41, 1.43, 1.45, 1.47, 1.49, 1.51, 1.53,
              1.55, 1.57, 1.59, 1.61, 1.63, 1.65, 1.67, 1.69, 1.71, 1.73, 1.75,
              1.77, 1.79, 1.81, 1.83, 1.85, 1.87, 1.89, 1.91, 1.93, 1.95, 1.97,
              1.99],
             dtype='float64')
isNORMED? 1.00000007629

Gaussian Mixture for n(z)


In [849]:
gmms,i_min, n_comp, BICs= my_mixture(np.array(X).reshape(-1,1),
                                     n_comp=np.arange(2,17))
plt.plot(n_comp,BICs)


3 components
Out[849]:
[<matplotlib.lines.Line2D at 0x1222bd898>]

Go with 10 components suff to overfit


In [850]:
fig,ax= plt.subplots(4,1,figsize=(7,12))

for col,n in enumerate([3,6,8,10]):
    i= np.where(n_comp == n)[0][0]
    Xpred,Ypred= gmms[i].sample(10000)
    _=ax[col].hist(X,bins=zbins,histtype='step',color='b',normed=True)
    _=ax[col].hist(Xpred,bins=zbins,histtype='step',color='r',normed=True)
    
ax[-1].set_xlabel('redshift')


Out[850]:
<matplotlib.text.Text at 0x1222a8d30>

In [851]:
gmm= gmms[np.where(n_comp == 10)[0][0]]
z_pred,pdf_pred= gmm.sample(10000)
_=plt.hist(X,bins=zbins,histtype='step',color='b',normed=True)
_=plt.hist(Xpred,bins=zbins,histtype='step',color='r',normed=True)


10 components reproduce true n(z)


In [848]:
nz_pdf= nz['wt'].values
redshifts= nz.index.categories.mid.values
X= []
for z,num in zip(redshifts, nz_pdf * 10000):
    if not np.isfinite(num):
        print('its nan',z,num)
        continue
    X += [z] * int(num)
# for z,num in zip(redshifts, nz_pdf * 10000):
#     print(z,int(num))
print(len(X),X[0])

plt.step(nz.index.categories.mid,nz['wt']/dz,where='mid',c='b',label='exact')
_=plt.hist(X,bins=zbins, normed=True,color='r',label='approx')
plt.xlabel('redshift')
plt.ylabel('PDF(z), Sum(1/tsr) / max() / dz')
plt.title('SGC')


its nan 1.67 nan
its nan 1.69 nan
its nan 1.71 nan
its nan 1.73 nan
its nan 1.75 nan
its nan 1.77 nan
its nan 1.79 nan
its nan 1.81 nan
its nan 1.83 nan
its nan 1.85 nan
its nan 1.87 nan
its nan 1.89 nan
its nan 1.91 nan
its nan 1.93 nan
its nan 1.95 nan
9958 0.01
Out[848]:
<matplotlib.text.Text at 0x122296908>

Save GM model


In [904]:
GaussianMixtureModel.save(gmm, filename='eboss_nz_elg')


Wrote eboss_nz_elg_means.txt
Wrote eboss_nz_elg_weights.txt
Wrote eboss_nz_elg_covars.txt

In [908]:
# Test load
gmm= GaussianMixtureModel.load('eboss_nz_elg',py='36',is1D=True)
redshifts= gmm.sample(10000)
len(redshifts)
sns.distplot(redshifts)


(10, 1)
Out[908]:
<matplotlib.axes._subplots.AxesSubplot at 0x137306c88>

Draw n(z) & Query KDTree for nearest redshift in sample


In [943]:
gmm= GaussianMixtureModel.load('eboss_nz_elg',py='36',is1D=True)
# These are in my google drive
dr3dp2_exp= pd.read_csv('eboss_elg_dr3deep2_EXP.csv')
dr3dp2_dev= pd.read_csv('eboss_elg_dr3deep2_DEV.csv')
eboss_exp= pd.read_csv('eboss_elg_tsspectra_EXP.csv')
eboss_dev= pd.read_csv('eboss_elg_tsspectra_DEV.csv')

dr3dp2_both= pd.concat([dr3dp2_exp,dr3dp2_dev],axis='rows')
assert(dr3dp2_both.shape[0] == dr3dp2_exp.shape[0] + dr3dp2_dev.shape[0])

from scipy import spatial
trees= dict(dr3dp2_exp= spatial.KDTree(dr3dp2_exp['redshift'].values.reshape(-1,1)),
            dr3dp2_dev= spatial.KDTree(dr3dp2_dev['redshift'].values.reshape(-1,1)),
            dr3dp2_both= spatial.KDTree(dr3dp2_both['redshift'].values.reshape(-1,1)),
            eboss_exp= spatial.KDTree(eboss_exp['redshift'].values.reshape(-1,1)),
            eboss_dev= spatial.KDTree(eboss_dev['redshift'].values.reshape(-1,1)))


def inEbossBox(rz,gr,pad=0.):
    sgc_d= EbossBox().SGC(rz,pad=pad)
    return ((gr > sgc_d['y1']) & 
            (gr < sgc_d['y2']) &
            (gr < sgc_d['y3']) &
            (gr < sgc_d['y4']))

inBox=dict(dr3dp2_exp=inEbossBox(dr3dp2_exp['r'] - dr3dp2_exp['z'],
                                 dr3dp2_exp['g'] - dr3dp2_exp['r']),
           dr3dp2_dev=inEbossBox(dr3dp2_dev['r'] - dr3dp2_dev['z'],
                                 dr3dp2_dev['g'] - dr3dp2_dev['r']))

In [977]:
T= fits_table()
# Draw z from n(z), if z not in [0,2] redraw, store z
redshifts= gmm.sample(10000).reshape(-1) 

def outside_lims(z):
    red_lims=[0.,2.]
    return ((z < red_lims[0]) | 
            (z > red_lims[1])) 

i=0
redraw= outside_lims(redshifts)
num= len(redshifts[redraw])
while num > 0:
    i+=1
    if i > 20:
        raise ValueError
    print('redrawing %d redshifts' % num)
    redshifts[redraw]= gmm.sample(num).reshape(-1)
    redraw= outside_lims(redshifts)
    num= len(redshifts[redraw])
# Store
T.set('redshift',redshifts)

# flip coin, 90% assign type = EXP, 10% assign type = DEV, store type
types= np.array(['EXP']*9 + ['DEV'])
T.set('type',types[np.random.randint(0,len(types),size=len(T))])

# in eboss TS box
# in dr3_deep2 exp+dev combined sample, find NN redshift
print('len T=',len(T))
_,i_both= trees['dr3dp2_both'].query(T.redshift.reshape(-1,1))
print('len i_both=',len(i_both))
inBox['dr3dp2_both']= inEbossBox(dr3dp2_both['r'].iloc[i_both] - dr3dp2_both['z'].iloc[i_both],
                                 dr3dp2_both['g'].iloc[i_both] - dr3dp2_both['r'].iloc[i_both])
print('len inBox=',len(inBox['dr3dp2_both']))

# Assign g,r,z,rhalf for NNs + unique id + NN redshift to see how close really is...
d= {}
mag_shapes= ['g','r','z','fwhm_or_rhalf']
for col in mag_shapes:
    d[col]= np.zeros(len(T))-1
d['nn_redshift']= np.zeros(len(T))-1
d['id']= np.zeros(len(T)).astype(str)
    
# inBox, use eBOSS data
keep= (inBox['dr3dp2_both']) & (T.type == 'EXP')
_,i_df= trees['eboss_exp'].query(T.redshift[keep].reshape(-1,1))
for col in mag_shapes:
    d[col][keep]= eboss_exp[col].iloc[i_df]
d['id'][keep]= eboss_exp['sdss_id'].iloc[i_df]
d['nn_redshift'][keep]= eboss_exp['redshift'].iloc[i_df]
    
keep= (inBox['dr3dp2_both']) & (T.type == 'DEV')
_,i_df= trees['eboss_dev'].query(T.redshift[keep].reshape(-1,1))
for col in mag_shapes:
    d[col][keep]= eboss_dev[col].iloc[i_df]
d['id'][keep]= eboss_dev['sdss_id'].iloc[i_df]
d['nn_redshift'][keep]= eboss_dev['redshift'].iloc[i_df]

# outBox, use DR3-Deep2 data
keep= (~inBox['dr3dp2_both']) & (T.type == 'EXP')
_,i_df= trees['dr3dp2_exp'].query(T.redshift[keep].reshape(-1,1))
for col in mag_shapes:
    d[col][keep]= dr3dp2_exp[col].iloc[i_df]
d['id'][keep]= dr3dp2_exp['tractor_id'].iloc[i_df]
d['nn_redshift'][keep]= dr3dp2_exp['redshift'].iloc[i_df]
    
keep= (~inBox['dr3dp2_both']) & (T.type == 'DEV')
_,i_df= trees['dr3dp2_dev'].query(T.redshift[keep].reshape(-1,1))
for col in mag_shapes:
    d[col][keep]= dr3dp2_dev[col].iloc[i_df]
d['id'][keep]= dr3dp2_dev['tractor_id'].iloc[i_df]
d['nn_redshift'][keep]= dr3dp2_dev['redshift'].iloc[i_df]

# Add sersic n
d['n']= np.zeros(len(T)) - 1
d['n'][T.type == 'EXP']= 1
d['n'][T.type == 'DEV']= 4

for col in mag_shapes + ['nn_redshift','n']:
    assert(np.all(d[col] > 0))
assert(np.all(pd.Series(d['id']).str.len() > 1))

for col in mag_shapes + ['nn_redshift','n']:
    T.set(col,d[col])
T.set('id',d['id'])
T.delete_column('type')


(10,)
len T= 10000
len i_both= 10000
len inBox= 10000

In [978]:
T.get_columns()


Out[978]:
['redshift', 'g', 'r', 'z', 'fwhm_or_rhalf', 'nn_redshift', 'n', 'id']

Nearest redshifts are nearly all within ~ 0.02


In [979]:
sns.distplot(T.nn_redshift - T.redshift)
pd.Series(T.nn_redshift - T.redshift).describe()


Out[979]:
count    1.000000e+04
mean     6.410920e-06
std      4.001393e-03
min     -6.803535e-02
25%     -1.384403e-05
50%      1.198784e-07
75%      2.445551e-05
max      7.804136e-02
dtype: float64

Correct fraction of DEV ~ 10%


In [980]:
pd.Series(T.n).value_counts()


Out[980]:
1.0    8937
4.0    1063
dtype: int64

EXP/DEV have correct color, shape, size AND eBOSS n(z)


In [983]:
cols= mag_shapes + ['redshift']

fig,ax= plt.subplots(2,3,figsize=(12,9))
i=-1
for row in range(2):
    for col in range(3):
        i+=1
        if i >= len(cols):
            continue 
        _=ax[row,col].hist(T.get(cols[i])[T.n == 1],
                           histtype='step',normed=True,
                           bins=30,color='b',label='EXP')
        _=ax[row,col].hist(T.get(cols[i])[T.n == 4.],
                           histtype='step',normed=True,
                           bins=30,color='r',label='DEV')
        ax[row,col].set_xlabel(cols[i])
ax[1,1].legend()


Out[983]:
<matplotlib.legend.Legend at 0x1aede3780>

eBOSS DR3 CCDS


In [206]:
ccd_coadd= fits_table(os.path.join(os.environ['HOME'],'Downloads',
                         'ebossDR3','legacysurvey-3583p000-ccds.fits'))
#dr3_ccds= fits_table(os.path.join(os.environ['HOME'],'Downloads',
#                         'ebossDR3','survey-ccds-decals.fits.gz'))
#dr3_ccds= fits_table(os.path.join(os.environ['HOME'],'Downloads',
#                         'ebossDR3','survey-ccds-extra.fits.gz'))
dr3_ccds= fits_table(os.path.join(os.environ['HOME'],'Downloads',
                         'ebossDR3','survey-ccds-nondecals.fits.gz'))


Converted object from |S37 to <U37
Converted filter from |S1 to <U1
Converted date_obs from |S10 to <U10
Converted ut from |S15 to <U15
Converted ha from |S13 to <U13
Converted propid from |S10 to <U10
Converted ccdname from |S3 to <U3
Converted camera from |S5 to <U5
Converted expid from |S12 to <U12
Converted image_filename from |S61 to <U61
Converted plver from |S4 to <U4
Converted skyver from |S16 to <U16
Converted wcsver from |S1 to <U1
Converted psfver from |S12 to <U12
Converted skyplver from |S4 to <U4
Converted wcsplver from |S4 to <U4
Converted psfplver from |S4 to <U4
Converted object from |S35 to <U35
Converted filter from |S1 to <U1
Converted date_obs from |S10 to <U10
Converted ut from |S15 to <U15
Converted ha from |S13 to <U13
Converted propid from |S10 to <U10
Converted ccdname from |S3 to <U3
Converted camera from |S5 to <U5
Converted expid from |S12 to <U12
Converted image_filename from |S61 to <U61

In [207]:
print(len(ccd_coadd.get_columns()), len(dr3_ccds.get_columns()))
same_cols= set(ccd_coadd.get_columns()).intersection(set(dr3_ccds.get_columns()))
print(len(same_cols))


70 52
52

In [208]:
print(ccd_coadd.ccdname[0],ccd_coadd.image_filename[0],ccd_coadd.date_obs[0])
print(dr3_ccds.ccdname[0],dr3_ccds.image_filename[0])


S25 decam/CPDES82/c4d_130902_061728_ooi_g_v1.fits.fz              2013-09-02
S29 decam/NonDECaLS/CP20130129/c4d_130130_004145_ooi_r_v1.fits.fz

In [209]:
i= ((pd.Series(dr3_ccds.ccdname).str.strip() == 'S25') &
    (pd.Series(dr3_ccds.image_filename).str.strip() == \
               'decam/CPDES82/c4d_130902_061728_ooi_g_v1.fits.fz'))

In [210]:
dr3_ccds[i]


Out[210]:
<tabledata object with 1 rows and 52 columns: object=DES survey hex -12-14 tiling 4     , expnum=229686, exptime=90.0, filter=g, seeing=1.72676, date_obs=2013-09-02, mjd_obs=56537.2621324, ut=06:17:28.238325, ha=00:24:18.570 , airmass=1.16, propid=2012B-0001, zpt=25.2166, avsky=124.355, arawgain=4.6, fwhm=7.44643, crpix1=11167.8, crpix2=8436.0, crval1=358.871992422, crval2=-0.635496023898, cd1_1=-7.99839e-08, cd1_2=7.28623e-05, cd2_1=-7.28599e-05, cd2_2=-8.01916e-08, ccdnum=4, ccdname=S25, ccdzpt=25.2163, ccdzpta=25.2169, ccdzptb=25.2181, ccdphoff=0.263215, ccdphrms=0.0156626, ccdskyrms=6.17303, ccdskymag=21.6912, ccdskycounts=124.622, ccdraoff=-0.0104072, ccddecoff=-0.015203, ccdtransp=1.30667, ccdnstar=72, ccdnmatch=52, ccdnmatcha=30, ccdnmatchb=22, ccdmdncol=1.63293, temp=10.1, camera=decam, expid=00229686-S25, image_hdu=4, image_filename=decam/CPDES82/c4d_130902_061728_ooi_g_v1.fits.fz             , width=2046, height=4094, ra_bore=358.871992422, dec_bore=-0.635496023898, ra=358.407016199, dec=0.104954658094>

In [212]:
for col in list(same_cols):
    print(ccd_coadd.get(col)[0],dr3_ccds[i].get(col)[0])


229686 229686
124.622 124.622
-7.99839e-08 -7.99839e-08
22 22
-0.0104072 -0.0104072
decam decam
DES survey hex -12-14 tiling 4        DES survey hex -12-14 tiling 4     
00229686-S25 00229686-S25
25.2166 25.2166
7.44643 7.44643
g g
2012B-0001 2012B-0001
-8.01916e-08 -8.01916e-08
25.2169 25.2169
-0.635496023898 -0.635496023898
2046 2046
S25 S25
358.407016199 358.407016199
-0.635496023898 -0.635496023898
52 52
1.16 1.16
72 72
90.0 90.0
4 4
358.871992422 358.871992422
1.63293 1.63293
0.0156626 0.0156626
21.6912 21.6912
11167.8 11167.8
10.1 10.1
6.17303 6.17303
00:24:18.570  00:24:18.570 
56537.2621324 56537.2621324
8436.0 8436.0
06:17:28.238325 06:17:28.238325
25.2181 25.2181
0.263215 0.263215
4 4
4.6 4.6
decam/CPDES82/c4d_130902_061728_ooi_g_v1.fits.fz              decam/CPDES82/c4d_130902_061728_ooi_g_v1.fits.fz             
30 30
4094 4094
2013-09-02 2013-09-02
-7.28599e-05 -7.28599e-05
7.28623e-05 7.28623e-05
1.72676 1.72676
124.355 124.355
1.30667 1.30667
-0.015203 -0.015203
25.2163 25.2163
0.104954658094 0.104954658094
358.871992422 358.871992422

which bricks used same ccds eBOSS/DR3 and which did not (e.g. more in eBOSS)?

Is it always more in eBOSS?


In [213]:
allccds= [fits_table(os.path.join(os.environ['HOME'],'Downloads',
                            'ebossDR3','survey-ccds-%s.fits.gz' % name))
          for name in ['decals','extra','nondecals']]
allccds= merge_tables(allccds, columns='fillzero')


Converted object from |S35 to <U35
Converted filter from |S1 to <U1
Converted date_obs from |S10 to <U10
Converted ut from |S15 to <U15
Converted ha from |S13 to <U13
Converted propid from |S10 to <U10
Converted ccdname from |S3 to <U3
Converted camera from |S5 to <U5
Converted expid from |S12 to <U12
Converted image_filename from |S61 to <U61
Converted object from |S37 to <U37
Converted filter from |S1 to <U1
Converted date_obs from |S10 to <U10
Converted ut from |S15 to <U15
Converted ha from |S13 to <U13
Converted propid from |S10 to <U10
Converted ccdname from |S3 to <U3
Converted camera from |S5 to <U5
Converted expid from |S12 to <U12
Converted image_filename from |S61 to <U61
Converted object from |S35 to <U35
Converted filter from |S1 to <U1
Converted date_obs from |S10 to <U10
Converted ut from |S15 to <U15
Converted ha from |S13 to <U13
Converted propid from |S10 to <U10
Converted ccdname from |S3 to <U3
Converted camera from |S5 to <U5
Converted expid from |S12 to <U12
Converted image_filename from |S61 to <U61

In [230]:
expids= pd.Series(.expid).str.split('-').str[0].values
ccdnames= pd.Series(dr3_ccds.ccdname).str.strip().values


Out[230]:
7944

Rongpu's Stellar Density model

  • f(b,l) + 3 params for normalization
  • first try: normalize to avg star counts in some bricks at specifc b,l
    • 12 < G_gaia < 16
    • does model predict nstars similar to number in tractor cats?
  • improve on first try: give Rongpu a new G_gaia range for the stars I'm intereseted in and get the new normalizataion params from him

In [143]:
def nstars(l, b):
    """Rongpu Zhou's model
    https://desi.lbl.gov/DocDB/cgi-bin/private/RetrieveFile?docid=2966
    """
    c1, c2, c3 =  [0.56099869, 0.08483852,  0.428828]
    return (1/np.sin(np.abs(b)/180*np.pi)-1) * \
            (1+c1*np.cos(l/180*np.pi)+c2*np.cos(2*l/180*np.pi))+c3

    
xv, yv = np.meshgrid(np.linspace(-180,180,1000),np.linspace(-80,80,1000))
n= nstars(xv,yv)
n.shape


Out[143]:
(1000, 1000)

In [155]:
xv, yv = np.meshgrid(np.linspace(0,20,1000),np.linspace(30,50,1000))
n= nstars(xv,yv)
print(np.median(n))

vmin,vmax= np.percentile(n,q=5),np.percentile(n,q=95)
plt.imshow(np.log10(n),vmin=np.log10(vmin),vmax=np.log10(vmax))
plt.colorbar()


1.33336159055
Out[155]:
<matplotlib.colorbar.Colorbar at 0x119064b70>

In [149]:
vmin,vmax= np.percentile(n,q=5),np.percentile(n,q=95)
fig,ax= plt.subplots(1,2,figsize=(10,5))
ax[0].imshow(n,vmin=vmin,vmax=vmax)
plt.colorbar(ax=ax[0])

ax[1].imshow(np.log10(n),vmin=np.log10(vmin),vmax=np.log10(vmax))
ax[1].colorbar()


---------------------------------------------------------------------------
RuntimeError                              Traceback (most recent call last)
<ipython-input-149-988af8044db7> in <module>()
      2 fig,ax= plt.subplots(1,2,figsize=(10,5))
      3 ax[0].imshow(n,vmin=vmin,vmax=vmax)
----> 4 plt.colorbar(ax=ax[0])
      5 
      6 ax[1].imshow(np.log10(n),vmin=np.log10(vmin),vmax=np.log10(vmax))

~/miniconda3/envs/mlbook/lib/python3.6/site-packages/matplotlib/pyplot.py in colorbar(mappable, cax, ax, **kw)
   2252         mappable = gci()
   2253         if mappable is None:
-> 2254             raise RuntimeError('No mappable was found to use for colorbar '
   2255                                'creation. First define a mappable such as '
   2256                                'an image (with imshow) or a contour set ('

RuntimeError: No mappable was found to use for colorbar creation. First define a mappable such as an image (with imshow) or a contour set (with contourf).

In [150]:
vmin,vmax= np.percentile(n,q=5),np.percentile(n,q=95)
plt.imshow(n,vmin=vmin,vmax=vmax)
plt.colorbar()


Out[150]:
<matplotlib.colorbar.Colorbar at 0x1193c27b8>

In [151]:
plt.imshow(np.log10(n),vmin=np.log10(vmin),vmax=np.log10(vmax))
plt.colorbar()


Out[151]:
<matplotlib.colorbar.Colorbar at 0x119778320>

In [146]:
sns.distplot(np.log10(n.flatten()))
#plt.xlim(-1,1)
print(n.min(),n.max())


0.436909073748 1176.345523

In [ ]:
#sns.distplot(np.log10(n.flatten()))

DESI

Use DR5 data


In [583]:
DATA_DIR = os.path.join(os.environ['HOME'],'Downloads',
                        'truth')
dr5_fns= glob(os.path.join(DATA_DIR,
                'dr5.0/trimmed/decals-dr5.0-deep2-field*-trim.fits'))
dp2_fns= glob(os.path.join(DATA_DIR,
                'dr5.0/trimmed/deep2-field*-trim.fits'))                                                               
print(dr5_fns,dp2_fns)

dr5= stack_tables(dr5_fns)
dp2= stack_tables(dp2_fns)


['/Users/kaylan1/Downloads/truth/dr5.0/trimmed/decals-dr5.0-deep2-field2-trim.fits', '/Users/kaylan1/Downloads/truth/dr5.0/trimmed/decals-dr5.0-deep2-field3-trim.fits', '/Users/kaylan1/Downloads/truth/dr5.0/trimmed/decals-dr5.0-deep2-field4-trim.fits'] ['/Users/kaylan1/Downloads/truth/dr5.0/trimmed/deep2-field2-trim.fits', '/Users/kaylan1/Downloads/truth/dr5.0/trimmed/deep2-field3-trim.fits', '/Users/kaylan1/Downloads/truth/dr5.0/trimmed/deep2-field4-trim.fits']
Stacking /Users/kaylan1/Downloads/truth/dr5.0/trimmed/decals-dr5.0-deep2-field2-trim.fits
Converted brickname from |S8 to <U8
Converted type from |S4 to <U4
Converted wise_coadd_id from |S8 to <U8
Stacking /Users/kaylan1/Downloads/truth/dr5.0/trimmed/decals-dr5.0-deep2-field3-trim.fits
Converted brickname from |S8 to <U8
Converted type from |S4 to <U4
Converted wise_coadd_id from |S8 to <U8
Stacking /Users/kaylan1/Downloads/truth/dr5.0/trimmed/decals-dr5.0-deep2-field4-trim.fits
Converted brickname from |S8 to <U8
Converted type from |S4 to <U4
Converted wise_coadd_id from |S8 to <U8
Stacking /Users/kaylan1/Downloads/truth/dr5.0/trimmed/deep2-field2-trim.fits
Converted source from |S12 to <U12
Converted vis_morph from |S1 to <U1
Stacking /Users/kaylan1/Downloads/truth/dr5.0/trimmed/deep2-field3-trim.fits
Converted source from |S12 to <U12
Converted vis_morph from |S1 to <U1
Stacking /Users/kaylan1/Downloads/truth/dr5.0/trimmed/deep2-field4-trim.fits
Converted source from |S12 to <U12
Converted vis_morph from |S1 to <U1

In [584]:
def get_xy_pad(slope,pad):
    """Returns dx,dy"""
    theta= np.arctan(abs(slope))
    return pad*np.sin(theta), pad*np.cos(theta)

def y1_line(rz,pad=None):
    slope,yint= 1.15,-0.15
    if pad: 
        dx,dy= get_xy_pad(slope,pad)
        return slope*(rz+dx) + yint + dy
    else:
        return slope*rz + yint
    
def y2_line(rz,pad=None):
    slope,yint= -1.2,1.6
    if pad: 
        dx,dy= get_xy_pad(slope,pad)
        return slope*(rz-dx) + yint + dy
    else:
        return slope*rz + yint

def get_ELG_box(rz,gr, pad=None):
    """
    Args:
        rz: r-z
        gr: g-r
        pad: magnitudes of padding to expand TS box
    """
    x1,y1= rz,y1_line(rz)
    x2,y2= rz,y2_line(rz)
    x3,y3= np.array([0.3]*len(rz)),gr
    x4,y4= np.array([1.6]*len(rz)),gr
    if pad:
        dx,dy= get_xy_pad(1.15,pad)
        x1,y1= x1-dx,y1+dy
        dx,dy= get_xy_pad(-1.2,pad)
        x2,y2= x2+dx,y2+dy
        x3 -= pad
        x4 += pad
    return dict(x1=x1, y1=y1,
                x2=x2, y2=y2,
                x3=x3, y3=y3,
                x4=x4, y4=y4)

In [585]:
grz_gt0= ((dr5.flux_g > 0) &
          (dr5.flux_r > 0) &
          (dr5.flux_z > 0) &
          (dr5.flux_ivar_g > 0) &
          (dr5.flux_ivar_r > 0) &
          (dr5.flux_ivar_z > 0))
#redshift_gt0= dp2.zhelio > 0
complDP2_buff= ((dp2.zhelio >= 0.8-0.2) & 
                (dp2.zhelio <= 1.4+0.2))

fwhm_or_rhalf= np.zeros(len(dr5))-1 # arcsec
isPSF= np.char.strip(dr5.type) == 'PSF'
isEXP= pd.Series(np.char.strip(dr5.type)).isin(['EXP','REX'])
isSIMP= np.char.strip(dr5.type) == 'SIMP'
isDEV= np.char.strip(dr5.type) == 'DEV'
isCOMP= np.char.strip(dr5.type) == 'COMP'
# rhalf ~ fwhm/2
fwhm_or_rhalf[isPSF]= np.mean(np.array([dr5[isPSF].psfsize_g,
                                        dr5[isPSF].psfsize_r,
                                        dr5[isPSF].psfsize_z]),axis=0)/2
fwhm_or_rhalf[isSIMP]= 0.5
fwhm_or_rhalf[isEXP]= dr5[isEXP].shapeexp_r
fwhm_or_rhalf[isDEV]= dr5[isDEV].shapedev_r
dr5.set('fwhm_or_rhalf',fwhm_or_rhalf)

print(len(dr5),len(dp2))
print(len(dr5[grz_gt0]), len(dr5[isDEV]), len(dr5[isCOMP]))
print(len(dr5[fwhm_or_rhalf < 5]),len(dr5[complDP2_buff]))

print(set(dr5.type))
print(pd.Series(dr5.type).value_counts()/len(dr5))


28300 28300
27707 769 78
28283 23211
{'EXP', 'COMP', 'DEV', 'PSF', 'REX'}
REX     0.506325
PSF     0.324912
EXP     0.138834
DEV     0.027173
COMP    0.002756
dtype: float64

In [586]:
keep= ((grz_gt0) & 
       (isCOMP == False) &
       (fwhm_or_rhalf < 5) & 
       (complDP2_buff)) 
dr5.cut(keep)
dp2.cut(keep)
len(dr5)


Out[586]:
22705

In [587]:
d= {}
for i,b in zip([1,2,4],'grz'):
    d[b]= flux2mag(dr5.get('flux_'+b)/dr5.get('mw_transmission_'+b))
    #d[b+'_ivar']= flux2mag(dr5.get('decam_flux_ivar')[:,i]/dr5.get('decam_mw_transmission')[:,i])  
d['redshift']= dp2.get('zhelio')
d['fwhm_or_rhalf']= dr5.fwhm_or_rhalf
d['type']= dr5.get('type')
df= pd.DataFrame(d)
df['g-r']= df['g'] - df['r']
df['r-z']= df['r'] - df['z']

# TS box
pad= get_ELG_box(df['r-z'].values,df['g-r'].values,pad=0.5)
inBox= ((df['g-r'] <= y1_line(df['r-z'],pad=0.5)) &
        (df['g-r'] <= y2_line(df['r-z'],pad=0.5)) & 
        (df['r-z'] >= 0.3 - 0.5) & 
        (df['r-z'] <= 1.6 + 0.5))

# separate out DEV, important galaxy type but in minority for Deep 2
isDEV= df['type'] == 'DEV'
print(df.loc[isDEV,'type'].value_counts())
print(df.loc[isDEV == False,'type'].value_counts())


DEV    654
Name: type, dtype: int64
REX    12046
PSF     6621
EXP     3384
Name: type, dtype: int64

in TS box


In [588]:
fig,ax=plt.subplots()
ax.scatter(df.loc[inBox,'r-z'], df.loc[inBox,'g-r'],alpha=0.5)
ax.plot(pad['x1'],pad['y1'],'r--')
ax.plot(pad['x2'],pad['y2'],c='r',ls='--',lw=2)
ax.plot(pad['x3'],pad['y3'],c='r',ls='--',lw=2)
ax.plot(pad['x4'],pad['y4'],c='r',ls='--',lw=2)
ax.set_xlabel('r-z')
ax.set_ylabel('g-r')
ax.set_ylim(-0.3,2)
ax.set_xlim(-0.6,2.2)


Out[588]:
(-0.6, 2.2)

Properties != func(TS box)


In [112]:
attrs= ['redshift','g','r','z','fwhm_or_rhalf']

fig,ax= plt.subplots(2,3,figsize=(12,10))
i=-1
for row in range(2):
    for col in range(3):
        i+=1
        if i >= len(attrs):
            continue
        _=ax[row,col].hist(df.loc[inBox,attrs[i]],histtype='step',normed=True,
                           color='b',label='inBox')
        _=ax[row,col].hist(df[attrs[i]],histtype='step',normed=True,
                           color='r',label='All')
        ax[row,col].set_xlabel(attrs[i])
        if i == 0:
            ax[row,col].legend()


DEV are larger and brighter than EXP


In [113]:
attrs= ['redshift','g','r','z','fwhm_or_rhalf']

fig,ax= plt.subplots(2,3,figsize=(12,10))
i=-1
for row in range(2):
    for col in range(3):
        i+=1
        if i >= len(attrs):
            continue
        _=ax[row,col].hist(df.loc[(inBox) & (isDEV),attrs[i]],histtype='step',normed=True,
                           color='b',label='inBox, isDEV')
        _=ax[row,col].hist(df.loc[(inBox) & (~isDEV),attrs[i]],histtype='step',normed=True,
                           color='r',label='inBox, notDEV')
        ax[row,col].set_xlabel(attrs[i])
        if i == 0:
            ax[row,col].legend()



In [160]:
fit_cols= ['fwhm_or_rhalf', 'g-r', 'r-z','z','redshift']
df_dev= df.loc[(inBox) & (isDEV),fit_cols]
df_notdev= df.loc[(inBox) & (~isDEV),fit_cols]
# Carry on with NOT DEV
df_notdev.hist(bins=20,figsize=(12,8))


Out[160]:
array([[<matplotlib.axes._subplots.AxesSubplot object at 0x128365a58>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x1240ee898>],
       [<matplotlib.axes._subplots.AxesSubplot object at 0x123cf5eb8>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x11d172780>],
       [<matplotlib.axes._subplots.AxesSubplot object at 0x126857f28>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x126857f60>]], dtype=object)

Model as Gaussian Mixture

Fit model


In [115]:
X= df_notdev.values
gmms,i_min, n_comp, BICs= my_mixture(X)
plt.plot(n_comp,BICs)


14 components
Out[115]:
[<matplotlib.lines.Line2D at 0x120a4b550>]

Minimum components for good model is 8 (don't want to overfit)


In [120]:
fig,ax= plt.subplots(3,2,figsize=(13,5))

for row,n in enumerate([8,10,12]):
    i= np.where(n_comp == n)[0][0]
    Xpred,Ypred= gmms[i].sample(10000)
    _,bins,_= ax[row,0].hist(df_notdev['redshift'],bins=20,histtype='step',color='k',normed=True)
    _=ax[row,0].hist(Xpred[:,-1],bins=20,histtype='step',color='b',normed=True)
    #ax[1].set_xlabel('redshift')

    _,bins,_= ax[row,1].hist(df_notdev['fwhm_or_rhalf'],bins=20,histtype='step',color='k',normed=True)
    _=ax[row,1].hist(Xpred[:,0],bins=20,histtype='step',color='b',normed=True)
    #ax[2].set_xlabel('fwhm_or_rhalf')
    
ax[2,0].set_xlabel('redshift')
ax[2,1].set_xlabel('fwhm_or_rhalf')


Out[120]:
<matplotlib.text.Text at 0x121b18710>

In [121]:
fig,ax= plt.subplots(3,3,figsize=(16,5))

for row,n in enumerate([8,10,12]):
    i= np.where(n_comp == n)[0][0]
    Xpred,Ypred= gmms[i].sample(10000)
    _,bins,_= ax[row,0].hist(df_notdev['z'],bins=20,histtype='step',color='k',normed=True)
    _=ax[row,0].hist(Xpred[:,-2],bins=20,histtype='step',color='b',normed=True)

    _,bins,_= ax[row,1].hist(df_notdev['r-z']+df_notdev['z'],bins=20,histtype='step',color='k',normed=True)
    _=ax[row,1].hist(Xpred[:,-3]+Xpred[:,-2],bins=20,histtype='step',color='b',normed=True)

    _,bins,_= ax[row,2].hist(df_notdev['g-r'] + df_notdev['r-z']+df_notdev['z'],bins=20,histtype='step',color='k',normed=True)
    _=ax[row,2].hist(Xpred[:,-4] + Xpred[:,-3] + Xpred[:,-2],bins=20,histtype='step',color='b',normed=True)
    
ax[2,0].set_xlabel('z')
ax[2,1].set_xlabel('r')
ax[2,2].set_xlabel('g')


Out[121]:
<matplotlib.text.Text at 0x121f69d68>

In [117]:
# smallest n_comp feel is ok, don't overfit!
gmm= gmms[np.where(n_comp == 8)[0][0]]
logprob= gmm.score_samples(X)
responsibilities = gmm.predict_proba(X)

Xpred,Ypred= gmm.sample(10000)
d={}
for i,col in enumerate(fit_cols):
    d[col]= Xpred[:,i]
df_pred= pd.DataFrame(d)

Model in TS region


In [77]:
fig,ax= plt.subplots(1,2,figsize=(10,5))
ax[0].scatter(df_notdev['r-z'],df_notdev['g-r'],color='b',alpha=0.05)
ax[1].scatter(df_pred['r-z'],df_pred['g-r'],color='r',alpha=0.05)
for i in range(2):
    ax[i].set_xlabel('r-z')
    ax[i].set_xlim(-0.5,2.5)
    ax[i].set_ylim(-1,2)
ax[0].set_ylabel('g-r')


Out[77]:
<matplotlib.text.Text at 0x11c6a6e80>

In [79]:
fig,ax= plt.subplots(1,2,figsize=(10,5))
ax[0].scatter(df_new['z'],df_notdev['fwhm_or_rhalf'],color='b',alpha=0.05)
ax[1].scatter(df_pred['z'],df_pred['fwhm_or_rhalf'],color='r',alpha=0.05)
for i in range(2):
    ax[i].set_xlabel('z mag')
ax[0].set_ylabel('fwhm_or_rhalf')
for i in range(2):
    ax[i].set_xlim(19,26)
    ax[i].set_ylim(-1,4.5)


Redshift correlations


In [80]:
fig,ax= plt.subplots(1,2,figsize=(10,5))
ax[0].scatter(df_notdev['z'],df_notdev['redshift'],color='b',alpha=0.05)
ax[1].scatter(df_pred['z'],df_pred['redshift'],color='r',alpha=0.05)
for i in range(2):
    ax[i].set_xlabel('z mag')
ax[0].set_ylabel('redshift')
for i in range(2):
    ax[i].set_xlim(19,26)
    ax[i].set_ylim(0.5,1.7)



In [94]:
fig,ax= plt.subplots(2,2,figsize=(10,10))
ax[0,0].scatter(df_notdev['r-z'],df_notdev['redshift'],color='b',alpha=0.05)
ax[0,1].scatter(df_pred['r-z'],df_pred['redshift'],color='r',alpha=0.05)
for i in range(2):
    ax[0,i].set_xlabel('r-z')
    ax[0,i].set_xlim(-0.6,3)
    ax[0,i].set_ylim(0.2,2)
    
ax[1,0].scatter(df_notdev['g-r'],df_notdev['redshift'],color='b',alpha=0.05)
ax[1,1].scatter(df_pred['g-r'],df_pred['redshift'],color='r',alpha=0.05)
for i in range(2):
    ax[1,i].set_xlabel('g-r')
    ax[1,i].set_xlim(-2,2)
    ax[1,i].set_ylim(0.2,2)
    
for i in range(2):
    ax[i,0].set_ylabel('redshift')



In [96]:
sns.distplot(df_new['fwhm_or_rhalf'])
df_new['fwhm_or_rhalf'].describe()


Out[96]:
count    15919.000000
mean         0.537243
std          0.271235
min          0.113250
25%          0.320844
50%          0.536520
75%          0.678899
max          4.954905
Name: fwhm_or_rhalf, dtype: float64

In [97]:
print(fit_cols)


['fwhm_or_rhalf', 'g-r', 'r-z', 'z', 'redshift']

Sample = Model

Starting sample of 10k


In [124]:
Xpred,Ypred= gmm.sample(10000)

fig,ax= plt.subplots(2,3,figsize=(14,10))

_=ax[0,0].hist(Xpred[:,3],bins=20,histtype='step',color='r',normed=True)
ax[0,0].set_xlabel('z mag')

_=ax[0,1].hist(np.sum(Xpred[:,2:3+1],axis=1),bins=20,histtype='step',color='r',normed=True)
ax[0,1].set_xlabel('r mag')

_=ax[0,2].hist(np.sum(Xpred[:,1:3+1],axis=1),bins=20,histtype='step',color='r',normed=True)
ax[0,2].set_xlabel('g mag')

_=ax[1,0].hist(Xpred[:,-1],bins=20,histtype='step',color='r',normed=True)
ax[1,0].set_xlabel('redshift')

_=ax[1,1].hist(Xpred[:,0],bins=20,histtype='step',color='r',normed=True)
ax[1,1].set_xlabel('fwhm_or_rhalf')


Out[124]:
<matplotlib.text.Text at 0x12390b208>

Resample until 10k within redshift and size limits


In [133]:
def ELGcuts(Xpred):
    """
    Args:
        Xpred: (N,5) array-like
            5 columns are: 
            fit_cols= ['fwhm_or_rhalf', 'g-r', 'r-z','z','redshift']
    """
    red_lim= (0.6,1.6) # DESI
    rhalf_lim= (0.262/2,2.) # Camera, Data
    g,r,z= tuple(np.array([24.0,23.4,22.5])+0.5)
    return ((Xpred[:,0] < rhalf_lim[0]) |
            (Xpred[:,0] > rhalf_lim[1]) |
            (Xpred[:,-1] < red_lim[0]) |
            (Xpred[:,-1] > red_lim[1]) |
            (Xpred[:,3] >= z) | #beyond mag limit
            (np.sum(Xpred[:,2:3+1],axis=1) >= r) |
            (np.sum(Xpred[:,1:3+1],axis=1) >= g)
            )

Xpred,Ypred= gmm.sample(10000)
i=0
outLimit= ELGcuts(Xpred)
num= len(Ypred[outLimit])
while num > 0:
    i+=1
    if i > 20:
        raise ValueError
    print(num)
    Xpred[outLimit,:],Ypred[outLimit]= gmm.sample(num)
    outLimit= ELGcuts(Xpred)
    num= len(Ypred[outLimit])


4345
1934
881
382
177
88
39
17
7
3
1
1
1

In [134]:
d={}
for i,col in enumerate(fit_cols):
    d[col]= Xpred[:,i]
df_fin= pd.DataFrame(d)

In [135]:
fig,ax= plt.subplots(1,2,figsize=(10,5))
ax[0].scatter(df_new['r-z'],df_new['g-r'],color='b',alpha=0.05)
ax[1].scatter(df_fin['r-z'],df_fin['g-r'],color='r',alpha=0.05)
for i in range(2):
    ax[i].set_xlabel('r-z')
    ax[i].set_xlim(-0.5,2.5)
    ax[i].set_ylim(-1,2)
ax[0].set_ylabel('g-r')


Out[135]:
<matplotlib.text.Text at 0x123d1a358>

The GM mixture produced a sample that was then filtered to our redshift and mag limits


In [137]:
fig,ax= plt.subplots(2,3,figsize=(14,10))

_,bins,_= ax[0,0].hist(df_new['z'],bins=20,histtype='step',color='b',normed=True)
_=ax[0,0].hist(df_fin['z'],bins=20,histtype='step',color='r',normed=True)
ax[0,0].set_xlabel('z mag')

_,bins,_= ax[0,1].hist(df_new['r-z']+df_new['z'],bins=20,histtype='step',color='b',normed=True)
_=ax[0,1].hist(df_fin['r-z']+df_fin['z'],bins=20,histtype='step',color='r',normed=True)
ax[0,1].set_xlabel('r mag')

_,bins,_= ax[0,2].hist(df_new['g-r']+df_new['r-z']+df_new['z'],bins=20,histtype='step',color='b',normed=True)
_=ax[0,2].hist(df_fin['g-r']+df_fin['r-z']+df_fin['z'],bins=20,histtype='step',color='r',normed=True)
ax[0,2].set_xlabel('g mag')


_,bins,_= ax[1,0].hist(df_new['redshift'],bins=20,histtype='step',color='b',normed=True)
_=ax[1,0].hist(df_fin['redshift'],bins=20,histtype='step',color='r',normed=True)
ax[1,0].set_xlabel('redshift')

_,bins,_= ax[1,1].hist(df_new['fwhm_or_rhalf'],bins=20,histtype='step',color='b',normed=True)
_=ax[1,1].hist(df_fin['fwhm_or_rhalf'],bins=20,histtype='step',color='r',normed=True)
ax[1,1].set_xlabel('fwhm_or_rhalf')


Out[137]:
<matplotlib.text.Text at 0x12404f5f8>

Save the model produced sample


In [138]:
df_fin.columns
a=fits_table()
a.set('id',np.arange(len(df_fin)))
a.set('rhalf',df_fin['fwhm_or_rhalf'].values)
a.set('redshift',df_fin['redshift'].values)
a.set('z',df_fin['z'].values)
a.set('r',df_fin['r-z'].values + a.z)
a.set('g',df_fin['g-r'].values + a.r)
a.writeto('elg_sample_5dim_10k.fits')

Model DEV

Fit Mixture


In [170]:
X= df_dev.values
print('Training Size=',X.shape[0])
gmms,i_min, n_comp, BICs= my_mixture(X,n_comp=np.arange(1,14))
plt.plot(n_comp,BICs)


Training Size= 225
2 components
Out[170]:
[<matplotlib.lines.Line2D at 0x128f70358>]

Cannot build a good model, simply not nough DEV data points


In [174]:
fig,ax= plt.subplots(3,2,figsize=(13,5))

for row,n in enumerate([2,6,10]):
    i= np.where(n_comp == n)[0][0]
    Xpred,Ypred= gmms[i].sample(10000)
    _,bins,_= ax[row,0].hist(df_dev['redshift'],bins=20,histtype='step',color='k',normed=True)
    _=ax[row,0].hist(Xpred[:,-1],bins=20,histtype='step',color='b',normed=True)
    #ax[1].set_xlabel('redshift')

    _,bins,_= ax[row,1].hist(df_dev['fwhm_or_rhalf'],bins=20,histtype='step',color='k',normed=True)
    _=ax[row,1].hist(Xpred[:,0],bins=20,histtype='step',color='b',normed=True)
    #ax[2].set_xlabel('fwhm_or_rhalf')
    
ax[2,0].set_xlabel('redshift')
ax[2,1].set_xlabel('fwhm_or_rhalf')


Out[174]:
<matplotlib.text.Text at 0x129deef28>

In [175]:
fig,ax= plt.subplots(3,3,figsize=(16,5))

for row,n in enumerate([2,6,10]):
    i= np.where(n_comp == n)[0][0]
    Xpred,Ypred= gmms[i].sample(10000)
    _,bins,_= ax[row,0].hist(df_dev['z'],bins=20,histtype='step',color='k',normed=True)
    _=ax[row,0].hist(Xpred[:,-2],bins=20,histtype='step',color='b',normed=True)

    _,bins,_= ax[row,1].hist(df_dev['r-z']+df_dev['z'],bins=20,histtype='step',color='k',normed=True)
    _=ax[row,1].hist(Xpred[:,-3]+Xpred[:,-2],bins=20,histtype='step',color='b',normed=True)

    _,bins,_= ax[row,2].hist(df_dev['g-r'] + df_dev['r-z']+df_dev['z'],bins=20,histtype='step',color='k',normed=True)
    _=ax[row,2].hist(Xpred[:,-4] + Xpred[:,-3] + Xpred[:,-2],bins=20,histtype='step',color='b',normed=True)
    
ax[2,0].set_xlabel('z')
ax[2,1].set_xlabel('r')
ax[2,2].set_xlabel('g')


Out[175]:
<matplotlib.text.Text at 0x12a389518>

Model the DESI n(z)

100,000 random draws from sanches & kirkby, w/out replacement


In [232]:
z1e5= fits_table(os.path.join(DATA_DIR,'sanchez_kirkby_z1e5.fits'))

In [233]:
_=plt.hist(z1e5.z_cosmo,bins=20,histtype='step',color='b',normed=True)


Fit Gaussian Mixture


In [234]:
X= z1e5.z_cosmo.reshape(-1,1)
gmms,i_min, n_comp, BICs= my_mixture(X)
plt.plot(n_comp,BICs)


4 components
Out[234]:
[<matplotlib.lines.Line2D at 0x11d4f5278>]

In [235]:
fig,ax= plt.subplots(1,3,figsize=(13,5))

for col,n in enumerate([4,8,12]):
    i= np.where(n_comp == n)[0][0]
    Xpred,Ypred= gmms[i].sample(10000)
    _=ax[col].hist(z1e5.z_cosmo,bins=20,histtype='step',color='b',normed=True)
    _=ax[col].hist(Xpred,bins=20,histtype='step',color='r',normed=True)
    
ax[2].set_xlabel('redshift')


Out[235]:
<matplotlib.text.Text at 0x11d5104a8>

12 compoents to intentionally overfit (we want the exact n(z))


In [236]:
gmm= gmms[12]
z_pred,pdf_pred= gmm.sample(100000)
_=plt.hist(z1e5.z_cosmo,bins=20,histtype='step',color='b',normed=True)
_=plt.hist(z_pred,bins=20,histtype='step',color='r',normed=True)


Save the model


In [239]:
GaussianMixtureModel.save(gmm, filename='elg_nz', py='36')


Wrote elg_nz_means_36.txt
Wrote elg_nz_weights_36.txt
Wrote elg_nz_covars_36.txt

Build our final sample (from the DR3-Deep2 and n(z) model)


In [5]:
from scipy import spatial
sample_5d_10k=fits_table('elg_sample_5dim_10k.fits')
from obiwan.common import fits2pandas
sample_5d_10k= fits2pandas(sample_5d_10k)

tree = spatial.KDTree(sample_5d_10k['redshift'].values.reshape(-1,1))

In [253]:
gmm.covariances_.shape


Out[253]:
(15, 1, 1)

In [9]:
model= GaussianMixtureModel.load(filename='elg_nz',py='36',is1D=True)

In [13]:
z= model.sample(1000)
_,ind= tree.query(z)
randoms= sample_5d_10k.iloc[ind]

print(len(sample_5d_10k),len(z),len(randoms))
print(randoms.columns), print(sample_5d_10k.columns)


(15,)
10000 1000 1000
Index(['g', 'id', 'r', 'redshift', 'rhalf', 'z'], dtype='object')
Index(['g', 'id', 'r', 'redshift', 'rhalf', 'z'], dtype='object')
Out[13]:
(None, None)

In [18]:
for i in range(10):
    print(z[i],randoms['redshift'].iloc[i])


[ 1.17863478] 1.17876047504
[ 1.17322301] 1.17322153666
[ 1.1506008] 1.15054300718
[ 1.19767244] 1.19775757811
[ 1.19893259] 1.19892718065
[ 1.15093723] 1.15096243379
[ 1.15994011] 1.15990993527
[ 1.15066201] 1.15067316369
[ 1.13602797] 1.13600715492
[ 1.15877533] 1.1587258549

In [306]:
# Randoms table 
a=fits_table()
a.set('id',np.arange(len(randoms)))
a.set('id_5d10k_sample',randoms['id'].values) 
a.set('rhalf',randoms['rhalf'].values)
a.set('redshift',randoms['redshift'].values)
a.set('z',randoms['z'].values)
a.set('r',randoms['r'].values)
a.set('g',randoms['g'].values)
a.writeto('randoms_1.fits')

Final sample has DR3-Deep2 color, shape, size AND DESI n(z)


In [139]:
fig,ax= plt.subplots(1,3,figsize=(12,5))
ax[0].scatter(df_fin['r-z'],df_fin['g-r'],color='b',alpha=0.05)
ax[1].scatter(randoms['r-z'],randoms['g-r'],color='r',alpha=0.05)
_=ax[2].hist(z_10k.z_cosmo,bins=20,histtype='step',color='b',normed=True)
_=ax[2].hist(randoms['redshift'],bins=20,histtype='step',color='r',normed=True)
for i in range(2):
    ax[i].set_xlabel('r-z')
    ax[i].set_xlim(-0.5,2.5)
    ax[i].set_ylim(-1,2)
ax[0].set_ylabel('g-r')
ax[2].set_xlabel('redshift')


Out[139]:
<matplotlib.text.Text at 0x122cdfa20>

In [8]:
tenk= fits_table('../../etc/elg_sample_5dim_10k.fits')

42% is in the TS box


In [14]:
gr= tenk.g - tenk.r
rz= tenk.r - tenk.z
inBox= ((gr <= y1_line(rz)) &
        (gr <= y2_line(rz)) & 
        (rz >= 0.3) & 
        (rz <= 1.6))
plt.scatter(rz,gr,color='b',alpha=0.1)
plt.scatter(rz[inBox],gr[inBox],color='r',alpha=0.1)
print(len(rz[inBox])/float(len(rz)))


0.4201

Cosmos Randoms


In [29]:
r= fits_table('/Users/kaylan1/Downloads/randoms_seed_1_startid_1.fits')
ra,dec= [149.7, 150.8],[1.6, 2.7]
r.get_columns()


Out[29]:
['id', 'ra', 'dec', 'n', 'rhalf', 'g', 'r', 'z', 'ba', 'pa']

In [36]:
plt.scatter(r.ra,r.dec,s=2,alpha=0.01)
for i in range(2):
    plt.axvline(ra[i],ls='--',c='k')
    plt.axhline(dec[i],ls='--',c='k')



In [31]:
set(r.n), len(r.n[r.n == 1]), len(r.n[r.n == 4]), set(r.rhalf)


Out[31]:
({1.0, 4.0}, 120000, 120000, {0.5})

In [32]:
len(r),len(set(r.id))


Out[32]:
(240000, 240000)

In [37]:
plt.scatter(r.ba,r.pa,s=2,alpha=0.01)


Out[37]:
<matplotlib.collections.PathCollection at 0x1188bcd30>

In [34]:
limit= dict(g=24.0,
            r=23.4,
            z=22.5)
for b,color in zip('grz','gbk'):
    lo,hi= limit[b]-2, limit[b]+0.5
    bins= np.linspace(lo-0.5,hi+0.5,num=30)
    _= plt.hist(r.get(b),bins=bins,color=color,histtype='step',
                label=b,lw=2)
    plt.axvline(lo,ls='--',c=color)
    plt.axvline(hi,ls='--',c=color)
plt.legend()


Out[34]:
<matplotlib.legend.Legend at 0x116319d68>

In [35]:
def isElg(g,r,z):
    return ((r < 23.4) &
            (r-z > 0.3) &
            (r-z < 1.6) &
            (g-r < 1.15*(r-z) - 0.15) &
            (g-r < 1.6 - 1.2*(r-z))
           )
print('fraction TS ELG = %f' % (len(r[isElg(r.g,r.r,r.z)])/len(r)))


fraction TS ELG = 0.119608

In [ ]: