In [1]:
import numpy as np
import os
import pandas as pd
import seaborn as sns
import fitsio
import sys
sys.path.append('/Users/kaylan1/PhdStudent/Research/desi/astrometry.net/astrometry')
from astrometry.util.fits import fits_table, merge_tables
from glob import glob
from sklearn import mixture
# to make this notebook's output stable across runs
np.random.seed(7)
# To plot pretty figures
%matplotlib inline
import matplotlib
import matplotlib.pyplot as plt
plt.rcParams['axes.labelsize'] = 14
plt.rcParams['xtick.labelsize'] = 12
plt.rcParams['ytick.labelsize'] = 12
%load_ext autoreload
%autoreload 2
In [4]:
from obiwan.common import fits2pandas
In [5]:
def stack_tables(fns):
cat=[]
assert(len(fns) > 0)
for fn in fns:
assert(os.path.exists(fn))
print('Stacking %s' % fn)
cat.append( fits_table(fn) )
return merge_tables(cat, columns='fillzero')
def flux2mag(nmgy):
return -2.5 * (np.log10(nmgy) - 9)
In [405]:
def my_mixture(X,n_comp=None,cov_type='full'):
# Compute density via Gaussian Mixtures
# we'll try several numbers of clusters
if n_comp is None:
n_comp = np.arange(1, 16)
gmms = [mixture.GaussianMixture(n_components=n,
covariance_type= cov_type).fit(X)
for n in n_comp]
BICs = [gmm.bic(X)/X.shape[0] for gmm in gmms]
i_min = np.argmin(BICs)
print("%d components" % n_comp[i_min])
return gmms,i_min, n_comp, BICs
In [452]:
class GaussianMixtureModel(object):
"""
John's class to read, write, and sample from a mixture model.
Args:
weights,means,covars: array-like, from mixture fit
covar_type: usually 'full'
py: one of ['27','36']
"""
def __init__(self, weights, means, covars,
py=None,covar_type='full',is1D=False):
assert(py in ['27','36'])
self.py= py
self.is1D= is1D
self.weights_ = weights
self.means_ = means
self.covariances_ = covars
if self.is1D:
self.weights_ = self.weights_.reshape(-1,1)
self.means_ = self.means_.reshape(-1,1)
self.covariances_ = self.covariances_.reshape(-1,1,1)
# self.n_components, self.n_dimensions = self.means_.shape[0],1
#else:
self.n_components, self.n_dimensions = self.means_.shape
#print(self.weights_.shape,self.covariances_.shape,len(self.covariances_.shape))
self.covariance_type= covar_type
@staticmethod
def save(model, filename):
for name,data in zip(['means','weights','covars'],
[model.means_, model.weights_,
model.covariances_]):
fn= '%s_%s.txt' % (filename,name)
np.savetxt(fn,data,delimiter=',')
print('Wrote %s' % fn)
@staticmethod
def load(filename,py=None,is1D=False):
d={name:np.loadtxt('%s_%s.txt' % (filename,name),delimiter=',')
for name in ['means','weights','covars']}
return GaussianMixtureModel(
d['weights'],d['means'],d['covars'],
covar_type='full',py=py,is1D=is1D)
def sample(self, n_samples=1, random_state=None):
assert(n_samples >= 1)
self.n_samples= n_samples
if random_state is None:
random_state = np.random.RandomState()
self.rng= random_state
if self.py == '2.7':
X= self.sample_py2()
else:
X,Y= self.sample_py3()
return X
def sample_py2(self):
weight_cdf = np.cumsum(self.weights_)
X = np.empty((self.n_samples, self.n_components))
rand = self.rng.rand(self.n_samples)
# decide which component to use for each sample
comps = weight_cdf.searchsorted(rand)
# for each component, generate all needed samples
for comp in range(self.n_components):
# occurrences of current component in X
comp_in_X = (comp == comps)
# number of those occurrences
num_comp_in_X = comp_in_X.sum()
if num_comp_in_X > 0:
X[comp_in_X] = self.rng.multivariate_normal(
self.means_[comp], self.covariances_[comp], num_comp_in_X)
return X
def sample_py3(self):
"""Copied from sklearn's mixture.GaussianMixture().sample()"""
print(self.weights_.shape)
try:
n_samples_comp = self.rng.multinomial(self.n_samples, self.weights_)
except ValueError:
self.weights_= np.reshape(self.weights_,len(self.weights_))
n_samples_comp = self.rng.multinomial(self.n_samples, self.weights_)
if self.covariance_type == 'full':
X = np.vstack([
self.rng.multivariate_normal(mean, covariance, int(sample))
for (mean, covariance, sample) in zip(
self.means_, self.covariances_, n_samples_comp)])
elif self.covariance_type == "tied":
X = np.vstack([
self.rng.multivariate_normal(mean, self.covariances_, int(sample))
for (mean, sample) in zip(
self.means_, n_samples_comp)])
else:
X = np.vstack([
mean + self.rng.randn(sample, n_features) * np.sqrt(covariance)
for (mean, covariance, sample) in zip(
self.means_, self.covariances_, n_samples_comp)])
y = np.concatenate([j * np.ones(sample, dtype=int)
for j, sample in enumerate(n_samples_comp)])
return (X, y)
In [781]:
fns_ngc= [os.path.join(os.environ['HOME'],'Downloads',
'ebossDR3','redshift_tractorcats','eBOSS.ELG.obiwan.eboss%s.fits' % name)
for name in ['23.v5_10_7']]
fns_sgc= [os.path.join(os.environ['HOME'],'Downloads',
'ebossDR3','redshift_tractorcats','eBOSS.ELG.obiwan.eboss%s.fits' % name)
for name in ['2122.v5_10_7']]
ngc=merge_tables([fits_table(fn)
for fn in fns_ngc],columns='fillzero')
ngc.set('field',np.array(['ngc']*len(ngc),dtype=object))
sgc=merge_tables([fits_table(fn)
for fn in fns_sgc],columns='fillzero')
sgc.set('field',np.array(['sgc']*len(sgc),dtype=object))
print(len(ngc),len(ngc.get_columns()))
print(len(sgc),len(sgc.get_columns()))
for col in sgc.get_columns():
if not 'decam' in col:
continue
print(col,ngc.get(col).shape,sgc.get(col).shape)
In [782]:
for col in set(sgc.get_columns()).intersection(set(ngc.get_columns())):
if 'apflux' in col:
print('Removing col: ',col)
sgc.delete_column(col)
ngc.delete_column(col)
a= merge_tables([ngc,sgc],columns='fillzero')
In [783]:
isStar= a.z_flag == -1
hasRedshift= a.z_flag == 1
isPrim= a.brick_primary == True
print('isStar & brick primary: %d/%d' % \
(len(a[(isStar) & (isPrim)]),len(a)))
print('hasRedshift & brick primary: %d/%d' % \
(len(a[(hasRedshift) & (isPrim)]),len(a)))
a.cut((hasRedshift) & (isPrim))
In [776]:
for key in ['sector_tsr','chunk',
'plate','fiberid']:
print(key,a.get(key)[0],type(a.get(key)[0]))
In [794]:
d={}
for i,b in zip([1,2,4],'grz'):
d[b+'flux']= a.get('decam_flux')[:,i]
d[b+'fluxivar']= a.get('decam_flux_ivar')[:,i]
d[b+'psfsize']= a.get('decam_psfsize')[:,i]
d[b+'_mw_transmission']= a.get('decam_mw_transmission')[:,i]
d['type']= a.type
d['shapeexp_r']= a.shapeexp_r
d['shapedev_r']= a.shapedev_r
d['redshift']= a.z
d['sector_tsr']= a.sector_tsr
d['field']= a.field
eboss= pd.DataFrame(d)
eboss= eboss.apply(lambda x: x.values.byteswap().newbyteorder())
for i,b in zip([1,2,4],'grz'):
eboss[b]= flux2mag(eboss[b+'flux'].values/eboss[b+'_mw_transmission'].values)
eboss['g-r']= eboss['g'] - eboss['r']
eboss['r-z']= eboss['r'] - eboss['z']
# SDSS unique identifier is (PLATE,MJD,FIBERID)
for key in ['plate','mjd','fiberid']:
assert(not key in eboss.columns)
eboss[key]= a.get(key)
eboss.describe()
Out[794]:
In [795]:
print(eboss['type'].value_counts())
# Remove COMP
isCOMP= eboss['type'].str.strip().values == 'COMP'
eboss= eboss[~isCOMP]
print(eboss['type'].value_counts())
print(eboss['type'].value_counts()/eboss.shape[0])
In [796]:
grz_gt0= ((eboss['gflux'] > 0) &
(eboss['rflux'] > 0) &
(eboss['zflux'] > 0) &
(eboss['gfluxivar'] > 0) &
(eboss['rfluxivar'] > 0) &
(eboss['zfluxivar'] > 0))
assert(eboss[grz_gt0].shape[0] == eboss.shape[0])
print(set(eboss['type']))
fwhm_or_rhalf= np.zeros(eboss.shape[0])-1 # arcsec
strip_type= eboss['type'].str.strip().values
isPSF= strip_type == 'PSF'
isEXP= pd.Series(strip_type).isin(['EXP','REX']).values
isSIMP= strip_type == 'SIMP'
isDEV= strip_type == 'DEV'
# rhalf ~ fwhm/2
fwhm_or_rhalf[isPSF]= np.mean(np.array([eboss.loc[isPSF,'gpsfsize'],
eboss.loc[isPSF,'rpsfsize'],
eboss.loc[isPSF,'zpsfsize']]),axis=0)/2
fwhm_or_rhalf[isSIMP]= 0.5
fwhm_or_rhalf[isEXP]= eboss.loc[isEXP,'shapeexp_r']
fwhm_or_rhalf[isDEV]= eboss.loc[isDEV,'shapedev_r']
eboss['fwhm_or_rhalf']= fwhm_or_rhalf
In [797]:
fig,ax= plt.subplots(1,3,figsize=(10,4))
sns.distplot(eboss['fwhm_or_rhalf'],ax=ax[0])
isSmall= (eboss['fwhm_or_rhalf'] < 10).values
sns.distplot(eboss.loc[isSmall,'fwhm_or_rhalf'],ax=ax[1])
ax[1].set_xlim(0,10)
sns.distplot(eboss.loc[(isSmall) & (isDEV),'fwhm_or_rhalf'],color='b',ax=ax[2],
label='DEV')
sns.distplot(eboss.loc[(isSmall) & (~isDEV),'fwhm_or_rhalf'],color='r',ax=ax[2],
label='not DEV')
ax[2].set_xlim(0,10)
ax[2].legend()
Out[797]:
In [798]:
attrs= ['g','r','z','redshift','fwhm_or_rhalf']
fig,ax= plt.subplots(2,3,figsize=(12,9))
i=-1
for row in range(2):
for col in range(3):
i+=1
if i >= len(attrs):
continue
bins=None
if attrs[i] == 'redshift':
bins=np.linspace(0,2,20)
if attrs[i] == 'fwhm_or_rhalf':
bins=np.linspace(0,10,40)
_=ax[row,col].hist(eboss.loc[(~isDEV) & (isSmall),attrs[i]],histtype='step',normed=True,
color='b',bins=bins,label='not DEV')
_=ax[row,col].hist(eboss.loc[(isDEV) & (isSmall),attrs[i]],histtype='step',normed=True,
color='r',bins=bins,label='isDEV')
ax[row,col].set_xlabel(attrs[i])
if i == 0:
ax[row,col].legend(loc='upper left')
ax[1,0].set_xlim(0,2)
#gband
oii_emitters= [21.825, 22.825]
#ax[0,0].set_xlim(oii_emitters)
ax[0,0].axvline(oii_emitters[0],c='k',ls='--')
ax[0,0].axvline(oii_emitters[1],c='k',ls='--')
print('fraction NOT dev: ',eboss[~isDEV].shape[0]/eboss.shape[0])
print('fraction dev: ',eboss[isDEV].shape[0]/eboss.shape[0])
In [799]:
isNGC= eboss['field'] == 'ngc'
attrs= ['g','r','z','redshift','fwhm_or_rhalf']
fig,ax= plt.subplots(2,3,figsize=(12,9))
i=-1
for row in range(2):
for col in range(3):
i+=1
if i >= len(attrs):
continue
bins=None
if attrs[i] == 'redshift':
bins=np.linspace(0,2,20)
if attrs[i] == 'fwhm_or_rhalf':
bins=np.linspace(0,10,40)
_=ax[row,col].hist(eboss.loc[(isNGC) & (isDEV) & (isSmall),attrs[i]],histtype='step',normed=True,
color='b',bins=bins,label='NGC')
_=ax[row,col].hist(eboss.loc[(~isNGC) & (isDEV) & (isSmall),attrs[i]],histtype='step',normed=True,
color='r',bins=bins,label='SGC')
ax[row,col].set_xlabel(attrs[i])
if i == 0:
ax[row,col].legend(loc='upper left')
ax[1,0].set_xlim(0,2)
#gband
oii_emitters= [21.825, 22.825]
#ax[0,0].set_xlim(oii_emitters)
ax[0,0].axvline(oii_emitters[0],c='k',ls='--')
ax[0,0].axvline(oii_emitters[1],c='k',ls='--')
Out[799]:
In [800]:
fig,ax= plt.subplots()
bins=None
_=ax.hist(eboss.loc[(isNGC) & (isDEV) & (isSmall),'g'],histtype='step',normed=True,
color='b',bins=bins,label='DEV in N')
_=ax.hist(eboss.loc[(~isNGC) & (isDEV) & (isSmall),'g'],histtype='step',normed=True,
color='r',bins=bins,label='DEV in S')
_=ax.hist(eboss.loc[(isNGC) & (~isDEV) & (isSmall),'g'],histtype='step',normed=True,
color='m',bins=bins,label='EXP in N')
_=ax.hist(eboss.loc[(~isNGC) & (~isDEV) & (isSmall),'g'],histtype='step',normed=True,
color='k',bins=bins,label='EXP in S')
ax.legend() #loc='upper left')
#gband
oii_emitters= [21.825, 22.825]
#ax[0,0].set_xlim(oii_emitters)
ax.axvline(oii_emitters[0],c='k',ls='--')
ax.axvline(oii_emitters[1],c='k',ls='--')
Out[800]:
In [801]:
attrs= ['g','r','z','redshift','fwhm_or_rhalf']
fig,ax= plt.subplots(2,3,figsize=(12,9))
i=-1
for row in range(2):
for col in range(3):
i+=1
if i >= len(attrs):
continue
bins=None
if attrs[i] == 'redshift':
bins=np.linspace(0,2,20)
if attrs[i] == 'fwhm_or_rhalf':
bins=np.linspace(0,10,40)
_=ax[row,col].hist(eboss.loc[(~isNGC) & (isDEV) & (isSmall),attrs[i]],histtype='step',normed=True,
color='b',bins=bins,label='DEV in S')
_=ax[row,col].hist(eboss.loc[(isNGC) & (~isDEV) & (isSmall),attrs[i]],histtype='step',normed=True,
color='r',bins=bins,label='EXP in N')
_=ax[row,col].hist(eboss.loc[(~isNGC) & (~isDEV) & (isSmall),attrs[i]],histtype='step',normed=True,
color='k',bins=bins,label='ExP in S')
ax[row,col].set_xlabel(attrs[i])
if i == 0:
ax[row,col].legend(loc='upper left')
ax[1,0].set_xlim(0,2)
#gband
oii_emitters= [21.825, 22.825]
#ax[0,0].set_xlim(oii_emitters)
ax[0,0].axvline(oii_emitters[0],c='k',ls='--')
ax[0,0].axvline(oii_emitters[1],c='k',ls='--')
print('Frac in NGC',eboss[(isNGC) & (isSmall)].shape[0]/eboss[(isSmall)].shape[0])
print('Frac in SGC',eboss[(~isNGC) & (isSmall)].shape[0]/eboss[(isSmall)].shape[0])
In [802]:
def ivar_ext_corr(flux_ivar, mw_trans):
return flux_ivar * np.power(mw_trans,2)
def fluxErrorsToMagErrors(flux, flux_invvar):
"""From Dustin Lang's tractor.brightness module
NanoMaggies().fluxErrorsToMagErrors
"""
flux = np.atleast_1d(flux)
flux_invvar = np.atleast_1d(flux_invvar)
dflux = np.zeros(len(flux))
okiv = (flux_invvar > 0)
dflux[okiv] = (1./np.sqrt(flux_invvar[okiv]))
okflux = (flux > 0)
mag = np.zeros(len(flux))
mag[okflux] = (flux2mag(flux[okflux]))
dmag = np.zeros(len(flux))
ok = (okiv * okflux)
dmag[ok] = (np.abs((-2.5 / np.log(10.)) * dflux[ok] / flux[ok]))
mag[np.logical_not(okflux)] = np.nan
dmag[np.logical_not(ok)] = np.nan
return mag.astype(np.float32), dmag.astype(np.float32)
exp_df= eboss.loc[(~isNGC) & (~isDEV) & (isSmall)]
a={}
for b in 'grz':
#print(exp_df[b+'_mw_transmission'].values)
flux_EC= exp_df[b+'flux']/exp_df[b+'_mw_transmission']
ivar_EC= ivar_ext_corr(exp_df[b+'fluxivar'],
exp_df[b+'_mw_transmission'])
a[b],a[b+'_err']= fluxErrorsToMagErrors(flux_EC,ivar_EC)
print('max diff',np.max(a[b] - exp_df[b]))
In [ ]:
exp_df['zfluxivar'] > 0.1
In [508]:
exp_df.loc[:,['gflux','gfluxivar','rflux','rfluxivar','zflux','zfluxivar',]].describe()
Out[508]:
In [504]:
pd.DataFrame(a).describe()
Out[504]:
In [507]:
len(a['g'][a['z_err'] > 50])
Out[507]:
In [672]:
fig,ax= plt.subplots(1,3,figsize=(10,4))
for i,b in zip(range(3),'grz'):
bins=None
if b == 'z':
bins= np.arange(0,2,0.2)
sns.distplot(a[b+'_err'],ax=ax[i],bins=bins)
ax[i].set_xlabel(b)
for i in range(2):
ax[i].set_xlim(0,0.4)
ax[2].set_xlim(bins[0],bins[-1])
#plt.xlim(-0.5,0.5)
Out[672]:
In [803]:
attrs= ['g','r','z','redshift','fwhm_or_rhalf']
fig,ax= plt.subplots(2,3,figsize=(12,9))
i=-1
for row in range(2):
for col in range(3):
i+=1
if i >= len(attrs):
continue
bins=None
if attrs[i] == 'redshift':
bins=np.linspace(0,2,20)
if attrs[i] == 'fwhm_or_rhalf':
bins=np.linspace(0,10,40)
_=ax[row,col].hist(eboss.loc[(~isNGC) & (isDEV) & (isSmall),attrs[i]],histtype='step',normed=True,
color='b',bins=bins,label='DEV in S')
_=ax[row,col].hist(eboss.loc[(~isNGC) & (~isDEV) & (isSmall),attrs[i]],histtype='step',normed=True,
color='r',bins=bins,label='EXP in S')
ax[row,col].set_xlabel(attrs[i])
if i == 0:
ax[row,col].legend(loc='upper left')
ax[1,0].set_xlim(0,2)
#gband
oii_emitters= [21.825, 22.825]
#ax[0,0].set_xlim(oii_emitters)
ax[0,0].axvline(oii_emitters[0],c='k',ls='--')
ax[0,0].axvline(oii_emitters[1],c='k',ls='--')
num_in_s=dict(isDEV=eboss[(~isNGC) & (isDEV) & (isSmall)].shape[0],
notDEV=eboss[(~isNGC) & (~isDEV) & (isSmall)].shape[0],
total=eboss[(isSmall) & (~isNGC)].shape[0])
print('DEV in S: Number=%d, Frac=%.3f' % (num_in_s['isDEV'],num_in_s['isDEV']/num_in_s['total']))
print('EXP in S: Number=%d, Frac=%.3f' % (num_in_s['notDEV'],num_in_s['notDEV']/num_in_s['total']))
In [674]:
fit_cols= ['fwhm_or_rhalf', 'g-r', 'r-z','z','redshift']
eboss_notdev= eboss.loc[(~isNGC) & (~isDEV) & (isSmall), fit_cols]
eboss_dev= eboss.loc[(~isNGC) & (isDEV) & (isSmall), fit_cols]
X= eboss_notdev.values
gmms,i_min, n_comp, BICs= my_mixture(X)
plt.plot(n_comp,BICs)
Out[674]:
In [474]:
fig,ax= plt.subplots(3,2,figsize=(13,8))
zbins=np.arange(0.,1.8,0.1)
fwhm_bins=np.arange(0,4,0.2)
for row,n in enumerate([6,9,12]):
i= np.where(n_comp == n)[0][0]
Xpred,Ypred= gmms[i].sample(10000)
_,bins,_= ax[row,0].hist(eboss_notdev['redshift'],bins=zbins,histtype='step',color='k',normed=True)
_=ax[row,0].hist(Xpred[:,-1],bins=zbins,histtype='step',color='b',normed=True)
_,bins,_= ax[row,1].hist(eboss_notdev['fwhm_or_rhalf'],bins=fwhm_bins,histtype='step',color='k',normed=True,
label='data')
_=ax[row,1].hist(Xpred[:,0],bins=fwhm_bins,histtype='step',color='b',normed=True,
label='pred')
#ax[2].set_xlabel('fwhm_or_rhalf')
ax[2,0].set_xlabel('redshift')
ax[2,1].set_xlabel('fwhm_or_rhalf')
ax[0,1].legend()
### g,r,z
#fit_cols= ['fwhm_or_rhalf', 'g-r', 'r-z','z','redshift']
fig,ax= plt.subplots(3,3,figsize=(13,8))
for row,n in enumerate([6,9,12]):
i= np.where(n_comp == n)[0][0]
Xpred,Ypred= gmms[i].sample(10000)
# each band: z,r,g
_,bins,_= ax[row,0].hist(eboss_notdev['z'],bins=20,histtype='step',color='k',normed=True)
_=ax[row,0].hist(Xpred[:,-2],bins=bins,histtype='step',color='b',normed=True)
ax[row,0].set_xlabel('z')
_,bins,_= ax[row,1].hist(eboss_notdev['r-z'] + eboss_notdev['z'],bins=20,histtype='step',color='k',normed=True)
_=ax[row,1].hist(Xpred[:,-3] + Xpred[:,-2],bins=bins,histtype='step',color='b',normed=True)
ax[row,1].set_xlabel('r')
_,bins,_= ax[row,2].hist(eboss_notdev['g-r'] + eboss_notdev['r-z'] + eboss_notdev['z'],bins=20,histtype='step',color='k',normed=True)
_=ax[row,2].hist(Xpred[:,-4] + Xpred[:,-3] + Xpred[:,-2],bins=bins,histtype='step',color='b',normed=True)
ax[row,2].set_xlabel('g')
for i,b in zip(range(3),'zrg'):
ax[-1,i].set_xlabel(b)
In [592]:
def ebossInSGC(rz,gr):
return ((-0.068*rz + 0.457 < gr) &
(gr < 0.112*rz + 0.773) &
(0.218*gr + 0.571 < rz) &
(rz < -0.555*gr + 1.901))
def ebossInNGC(rz,gr):
return ((-0.068*rz + 0.457 < gr) &
(gr < 0.112*rz + 0.773) &
(0.637*gr + 0.399 < rz) &
(rz < -0.555*gr + 1.901))
class EbossBox(object):
def get_xy_pad(self,slope,pad=0):
"""Returns dx,dy"""
theta= np.arctan(abs(slope))
return pad*np.sin(theta), pad*np.cos(theta)
def get_yint_pad(self,slope,pad=0):
"""Returns dx,dy"""
theta= np.arctan(slope)
return pad / np.cos(theta)
def three_lines(self,rz,pad=0):
slopes= np.array([-0.068,0.112, 1/(-0.555)])
yints= np.array([0.457,0.773,-1.901/(-0.555)])
lines= []
for cnt,slope,yint in zip(range(len(slopes)),slopes,yints):
dy= 0
#dx,dy= self.get_xy_pad(slope,pad)
dy= self.get_yint_pad(slope,pad)
if cnt == 0:
dy *= -1
#lines.append(slope*(rz-dx) + yint + dy)
lines.append(slope*rz + yint + dy)
return tuple(lines)
def sgc_line(self,rz,pad=0):
slope,yint= 1/0.218, -0.571/0.218
dy=0.
#dx,dy= self.get_xy_pad(slope,pad)
dy= self.get_yint_pad(slope,pad)
return slope*rz + yint + dy
def ngc_line(self,rz,pad=0):
slope,yint= 1/0.637, -0.399/0.637
#dx,dy= self.get_xy_pad(slope,pad)
dy= self.get_yint_pad(slope,pad)
return slope*rz + yint + dy
def SGC(self,rz, pad):
"""
Args:
rz: r-z
pad: magnitudes of padding to expand TS box
"""
d['y1'],d['y2'],d['y3']= self.three_lines(rz,pad)
d['y4']= self.sgc_line(rz,pad)
return d
def NGC(self,rz, pad):
"""
Args:
rz: r-z
pad: magnitudes of padding to expand TS box
"""
d['y1'],d['y2'],d['y3']= self.three_lines(rz,pad)
d['y4']= self.ngc_line(rz,pad)
return d
In [582]:
plt.scatter(df.loc[(~isNGC) & (~isDEV) & (isSmall),'r-z'],
df.loc[(~isNGC) & (~isDEV) & (isSmall),'g-r'],
c='b',s=10,alpha=0.5,label='S')
plt.scatter(df.loc[(isNGC) & (~isDEV) & (isSmall),'r-z'],
df.loc[(isNGC) & (~isDEV) & (isSmall),'g-r'],
c='r',s=10,alpha=0.5,label='N')
rz= np.linspace(0.5,2,num=20)
ngc_d= EbossBox().NGC(rz,pad=0.2)
plt.plot(rz,ngc_d['y1'],'k--',lw=2)
plt.plot(rz,ngc_d['y2'],'k--',lw=2)
plt.plot(rz,ngc_d['y3'],'k--',lw=2)
plt.plot(rz,ngc_d['y4'],'m--',lw=2)
sgc_d= EbossBox().SGC(rz,pad=0.2)
plt.plot(rz,sgc_d['y4'],'b--',lw=2)
plt.legend()
plt.xlim(0.5,2)
plt.ylim(0.2,1.3)
plt.xlabel('r-z')
plt.ylabel('g-r')
Out[582]:
In [879]:
DATA_DIR = os.path.join(os.environ['HOME'],'Downloads',
'truth')
dr3_fns= glob(os.path.join(DATA_DIR,
'dr3.1/trimmed/decals-dr3.1-deep2-field*-trim.fits'))
dp2_fns= glob(os.path.join(DATA_DIR,
'dr3.1/trimmed/deep2-field*-trim.fits'))
print(dr3_fns,dp2_fns)
###
def stack_tables(fns):
cat=[]
assert(len(fns) > 0)
for fn in fns:
assert(os.path.exists(fn))
print('Stacking %s' % fn)
cat.append( fits_table(fn) )
return merge_tables(cat, columns='fillzero')
dr3= stack_tables(dr3_fns)
dp2= stack_tables(dp2_fns)
print(len(dr3),len(dp2))
#######
grz_gt0= (np.all(dr3.decam_flux[:,[1,2,4]] > 0,axis=1) &
np.all(dr3.decam_flux_ivar[:,[1,2,4]] > 0,axis=1))
notCOMP= dr3.type != 'COMP'
###
fwhm_or_rhalf= np.zeros(len(dr3))-1 # arcsec
isPSF= dr3.type == 'PSF'
isEXP= dr3.type == 'EXP'
isSIMP= dr3.type == 'SIMP'
isDEV= dr3.type == 'DEV'
fwhm_or_rhalf[isPSF]= np.mean(dr3[isPSF].decam_psfsize[:,[1,2,4]],axis=1)
fwhm_or_rhalf[isSIMP]= 0.5
fwhm_or_rhalf[isEXP]= dr3[isEXP].shapeexp_r
fwhm_or_rhalf[isDEV]= dr3[isDEV].shapedev_r
dr3.set('fwhm_or_rhalf',fwhm_or_rhalf)
###
reshift_lims= [0,2.] # eBOSS
###
keep= ((grz_gt0) &
(notCOMP) &
(fwhm_or_rhalf < 5) &
(dp2.zhelio > reshift_lims[0]) &
(dp2.zhelio < reshift_lims[1]))
dr3.cut(keep)
dp2.cut(keep)
###
d= {}
for i,b in zip([1,2,4],'grz'):
d[b]= flux2mag(dr3.get('decam_flux')[:,i]/dr3.get('decam_mw_transmission')[:,i])
#d[b+'_ivar']= flux2mag(dr3.get('decam_flux_ivar')[:,i]/dr3.get('decam_mw_transmission')[:,i])
d['redshift']= dp2.get('zhelio')
d['fwhm_or_rhalf']= dr3.fwhm_or_rhalf
d['type']= dr3.get('type')
dr3_deep2= pd.DataFrame(d)
###
print(df['type'].value_counts()/df.shape[0])
isDEV= dr3_deep2['type'] == 'DEV'
dr3_deep2['r-z']= dr3_deep2['r'] - dr3_deep2['z']
dr3_deep2['g-r']= dr3_deep2['g'] - dr3_deep2['r']
dr3_deep2['brickid']= dr3.brickid
dr3_deep2['objid']= dr3.objid
In [880]:
def eboss_scatterplot(rz,gr,redshift,
fig,ax,pad=0.):
"""Given cleaned gr,gr, and redshifts, cuts to padded SGC region
and plots scatter plot colored by redshift"""
#inSGC= ebossInSGC(df['r-z'],df['g-r'])
sgc_d= EbossBox().SGC(rz,pad=mag_pad)
inSGC= ((gr > sgc_d['y1']) &
(gr < sgc_d['y2']) &
(gr < sgc_d['y3']) &
(gr < sgc_d['y4']))
import matplotlib.colors as colors
vmin= redshift[inSGC].min()
vmax= redshift[inSGC].max()
bounds= np.arange(vmin,vmax+0.2,0.2)
norm = colors.BoundaryNorm(boundaries=bounds, ncolors=256)
scat= ax.scatter(rz[inSGC], gr[inSGC],
c=redshift[inSGC],s=10,alpha=1,
vmin=vmin,vmax=vmax,norm=norm)
rz_pts= np.linspace(0.4,2.1,num=20)
sgc_d= EbossBox().SGC(rz_pts,pad=mag_pad)
for key in ['y1','y2','y3','y4']:
ax.plot(rz_pts,sgc_d[key],'k--',lw=2)
# eBOSS Box
sgc_d= EbossBox().SGC(rz_pts,pad=0.)
for key in ['y1','y2','y3','y4']:
ax.plot(rz_pts,sgc_d[key],'m--',lw=3)
ax.set_xlabel('r-z')
ax.set_ylabel('g-r')
ax.set_xlim(rz_pts[0],rz_pts[-1])
ax.set_ylim(0,1.2)
print(len(rz))
from mpl_toolkits.axes_grid1 import make_axes_locatable
divider = make_axes_locatable(ax)
cax = divider.append_axes('right', size='5%', pad=0.05)
fig.colorbar(scat, cax=cax, orientation='vertical')
# eBOSS data
redshift_lims= [0,2] # eBOSS
eboss_cut= ((~isNGC) &
(isSmall) &
(eboss['redshift'] >= redshift_lims[0]) &
(eboss['redshift'] <= redshift_lims[1]))
# type
isDEV= dict(dr3= dr3_deep2['type'].str.strip() == 'DEV',
eboss= eboss_zcut['type'].str.strip() == 'DEV')
pad=0.2
fig,ax=plt.subplots(2,2,figsize=(16,14))
eboss_scatterplot(dr3_deep2.loc[~isDEV['dr3'],'r-z'],dr3_deep2.loc[~isDEV['dr3'],'g-r'],
dr3_deep2.loc[~isDEV['dr3'],'redshift'],
fig,ax[0,0],pad=pad)
eboss_scatterplot(eboss.loc[(eboss_cut) & (~isDEV['eboss']),'r-z'],
eboss.loc[(eboss_cut) & (~isDEV['eboss']),'g-r'],
eboss.loc[(eboss_cut) & (~isDEV['eboss']),'redshift'],
fig,ax[0,1],pad=pad)
ax[0,0].set_title('EXP, DR3-DEEP2')
ax[0,1].set_title('EXP, eBOSS')
# DEV
eboss_scatterplot(dr3_deep2.loc[isDEV['dr3'],'r-z'],dr3_deep2.loc[isDEV['dr3'],'g-r'],
dr3_deep2.loc[isDEV['dr3'],'redshift'],
fig,ax[1,0],pad=pad)
eboss_scatterplot(eboss.loc[(eboss_cut) & (isDEV['eboss']),'r-z'],
eboss.loc[(eboss_cut) & (isDEV['eboss']),'g-r'],
eboss.loc[(eboss_cut) & (isDEV['eboss']),'redshift'],
fig,ax[1,1],pad=pad)
ax[1,0].set_title('DEV, DR3-DEEP2')
ax[1,1].set_title('DEV, eBOSS')
Out[880]:
In [882]:
pad=0.
sgc_d= EbossBox().SGC(dr3_deep2['r-z'],pad=pad)
inSGC= ((dr3_deep2['g-r'] > sgc_d['y1']) &
(dr3_deep2['g-r'] < sgc_d['y2']) &
(dr3_deep2['g-r'] < sgc_d['y3']) &
(dr3_deep2['g-r'] < sgc_d['y4']))
reshift_lims= [0,2.] # eBOSS
eboss_cut= ((~isNGC) &
(~isDEV['eboss']) &
(isSmall) &
(eboss['redshift'] >= reshift_lims[0]) &
(eboss['redshift'] <= reshift_lims[1]))
g_min,g_max= eboss.loc[eboss_cut,'g'].min(), eboss.loc[eboss_cut,'g'].max()
dr3_cut= ((inSGC) &
(dr3_deep2['g'] >= g_min) &
(dr3_deep2['g'] <= g_max + pad) &
(~isDEV['dr3']))
attrs= ['g','r','z','redshift','fwhm_or_rhalf']
bins=dict(g=np.linspace(21.5,23,20),
r=np.linspace(20.5,23,20),
z=np.linspace(19,22,20),
redshift=np.linspace(0,2,20),
fwhm_or_rhalf=np.linspace(0,5,20))
fig,ax= plt.subplots(2,3,figsize=(12,9))
i=-1
for row in range(2):
for col in range(3):
i+=1
if i >= len(attrs):
continue
_=ax[row,col].hist(dr3_deep2.loc[dr3_cut,attrs[i]],
histtype='step',normed=True,
color='b',bins=bins[attrs[i]],label='DR3-DP2 (EXP & eBOSS Box)')
_=ax[row,col].hist(eboss.loc[eboss_cut,attrs[i]],
histtype='step',normed=True,
color='r',bins=bins[attrs[i]],label='eBOSS (EXP & SGC)')
ax[row,col].set_xlabel(attrs[i])
if i == 0:
ax[row,col].legend(loc='upper left')
print('DR3-DP2: Number=%d' % dr3_deep2[dr3_cut].shape[0])
print('eBOSS: Number=%d' % eboss[eboss_cut].shape[0])
In [883]:
## Reduce size of eboss_zcut so can plot
iboot=np.random.randint(0,eboss[eboss_cut].shape[0],size=5000)
kwargs= dict(shade=True, shade_lowest=False)
##
attrs= ['g','r','z','fwhm_or_rhalf']
xylims=dict(g=(21.7,23),
r=(20.9,22.5),
z=(19.8,22),
redshift=(0.6,1.3),
fwhm_or_rhalf=(0,2))
fig,ax= plt.subplots(2,2,figsize=(12,12))
i=-1
for row in range(2):
for col in range(2):
i+=1
if i >= len(attrs):
continue
# ax[row,col].scatter(eboss_zcut.loc[~isDEV['eboss'],attrs[i]],
# eboss_zcut.loc[~isDEV['eboss'],'redshift'],
# c='b',s=10,alpha=0.5,label='EXP, eBOSS')
# ax[row,col].scatter(dr3_deep2_gcut[attrs[i]],
# dr3_deep2_gcut['redshift'],
# c='r',s=10,alpha=0.5,label='EXP, DR3-DP2')
sns.kdeplot(eboss.loc[eboss_cut,attrs[i]].iloc[iboot],
eboss.loc[eboss_cut,'redshift'].iloc[iboot],
ax= ax[row,col], cmap='Blues', label='EXP, eBOSS',**kwargs)
sns.kdeplot(dr3_deep2.loc[dr3_cut,attrs[i]],
dr3_deep2.loc[dr3_cut,'redshift'],
ax= ax[row,col], cmap="Reds", label='EXP, DR3-DP2',alpha=0.5,**kwargs)
ax[row,col].set_xlim(xylims[attrs[i]])
ax[row,col].set_ylim(xylims['redshift'])
ax[row,col].set_xlabel(attrs[i])
ax[1,1].legend(['Blue: EXP, eBOSS',
'Red: EXP, DR3-DP2'],loc='upper right')
Out[883]:
In [884]:
pad=0.2
sgc_d= EbossBox().SGC(dr3_deep2['r-z'],pad=pad)
inSGC= ((dr3_deep2['g-r'] > sgc_d['y1']) &
(dr3_deep2['g-r'] < sgc_d['y2']) &
(dr3_deep2['g-r'] < sgc_d['y3']) &
(dr3_deep2['g-r'] < sgc_d['y4']))
reshift_lims= [0,2.] # eBOSS
eboss_cut= ((~isNGC) &
(isDEV['eboss']) &
(isSmall) &
(eboss['redshift'] >= reshift_lims[0]) &
(eboss['redshift'] <= reshift_lims[1]))
g_min,g_max= eboss.loc[eboss_cut,'g'].min(), eboss.loc[eboss_cut,'g'].max()
dr3_cut= ((inSGC) &
(dr3_deep2['g'] >= g_min - pad) &
(dr3_deep2['g'] <= g_max + pad) &
(isDEV['dr3']))
attrs= ['g','r','z','redshift','fwhm_or_rhalf']
bins=dict(g=np.linspace(21.5,23,20),
r=np.linspace(20.5,23,20),
z=np.linspace(19,22,20),
redshift=np.linspace(0,2,20),
fwhm_or_rhalf=np.linspace(0,5,20))
fig,ax= plt.subplots(2,3,figsize=(12,9))
i=-1
for row in range(2):
for col in range(3):
i+=1
if i >= len(attrs):
continue
_=ax[row,col].hist(dr3_deep2.loc[dr3_cut,attrs[i]],
histtype='step',normed=True,
color='b',bins=bins[attrs[i]],label='DR3-DP2 (DEV & eBOSS Box)')
_=ax[row,col].hist(eboss.loc[eboss_cut,attrs[i]],
histtype='step',normed=True,
color='r',bins=bins[attrs[i]],label='eBOSS (DEV & SGC)')
ax[row,col].set_xlabel(attrs[i])
if i == 0:
ax[row,col].legend(loc='upper left')
print('DR3-DP2: Number DEV=%d' % dr3_deep2[dr3_cut].shape[0])
print('eBOSS: Number DEV=%d' % eboss[eboss_cut].shape[0])
In [885]:
pad=0.2
sgc_d= EbossBox().SGC(dr3_deep2['r-z'],pad=pad)
inSGC= ((dr3_deep2['g-r'] > sgc_d['y1']) &
(dr3_deep2['g-r'] < sgc_d['y2']) &
(dr3_deep2['g-r'] < sgc_d['y3']) &
(dr3_deep2['g-r'] < sgc_d['y4']))
redshift_lims= [0,2.] # eBOSS
eboss_cut= ((~isNGC) &
(~isDEV['eboss']) &
(isSmall) &
(eboss['redshift'] >= redshift_lims[0]) &
(eboss['redshift'] <= redshift_lims[1]))
g_min,g_max= eboss.loc[eboss_cut,'g'].min(), eboss.loc[eboss_cut,'g'].max()
dr3_cut= ((inSGC) &
(dr3_deep2['g'] >= g_min - pad) &
(dr3_deep2['g'] <= g_max + pad) &
(~isDEV['dr3']))
attrs= ['g','r','z','redshift','fwhm_or_rhalf']
bins=dict(g=np.linspace(21.5,23,20),
r=np.linspace(20.5,23,20),
z=np.linspace(19,22,20),
redshift=np.linspace(0,2,20),
fwhm_or_rhalf=np.linspace(0,5,20))
fig,ax= plt.subplots(2,3,figsize=(12,9))
i=-1
for row in range(2):
for col in range(3):
i+=1
if i >= len(attrs):
continue
_=ax[row,col].hist(dr3_deep2.loc[dr3_cut,attrs[i]],
histtype='step',normed=True,
color='b',bins=bins[attrs[i]],label='DR3-DP2 (EXP & eBOSS Box)')
_=ax[row,col].hist(eboss.loc[eboss_cut,attrs[i]],
histtype='step',normed=True,
color='r',bins=bins[attrs[i]],label='eBOSS (EXP & SGC)')
ax[row,col].set_xlabel(attrs[i])
if i == 0:
ax[row,col].legend(loc='upper left')
print('DR3-DP2: Number=%d' % dr3_deep2[dr3_cut].shape[0])
print('eBOSS: Number=%d' % eboss[eboss_cut].shape[0])
In [2]:
print('SGC: ~77k EXP, 7k DEV')
print('DR3-Deep2: ~1k EXP, 100 DEV')
In [892]:
# Limits
## eBOSS
redshift_lims= [0,2.] # eBOSS
rhalf_lim_exp= (0.262/2, 2.5) # Camera, Data
rhalf_lim_dev= (0.262/2, 5.) # Camera, Data
eboss_cut_gen= ((~isNGC) &
(isSmall) &
(eboss['redshift'] >= redshift_lims[0]) &
(eboss['redshift'] <= redshift_lims[1]))
eboss_cut_exp= ((eboss_cut_gen) &
(~isDEV['eboss']) &
(eboss['fwhm_or_rhalf'] >= rhalf_lim_exp[0]) &
(eboss['fwhm_or_rhalf'] <= rhalf_lim_exp[1]))
eboss_cut_dev= ((eboss_cut_gen) &
(isDEV['eboss']) &
(eboss['fwhm_or_rhalf'] >= rhalf_lim_dev[0]) &
(eboss['fwhm_or_rhalf'] <= rhalf_lim_dev[1]))
## DR3-DP2
g_min= eboss.loc[(eboss_cut_exp) | (eboss_cut_dev),'g'].min()
g_max= eboss.loc[(eboss_cut_exp) | (eboss_cut_dev),'g'].max()
assert(pad==0.2)
dr3_cut_gen= ((inSGC) &
(dr3_deep2['g'] >= g_min - pad) &
(dr3_deep2['g'] <= g_max + pad) &
(dr3_deep2['redshift'] >= redshift_lims[0]) &
(dr3_deep2['redshift'] <= redshift_lims[1]))
dr3_cut_exp= ((dr3_cut_gen) &
(~isDEV['dr3']) &
(dr3_deep2['fwhm_or_rhalf'] >= rhalf_lim_exp[0]) &
(dr3_deep2['fwhm_or_rhalf'] <= rhalf_lim_exp[1]))
dr3_cut_dev= ((dr3_cut_gen) &
(isDEV['dr3']) &
(dr3_deep2['fwhm_or_rhalf'] >= rhalf_lim_dev[0]) &
(dr3_deep2['fwhm_or_rhalf'] <= rhalf_lim_dev[1]))
save_cols= ['g', 'r','z','fwhm_or_rhalf','redshift']
# DR3-DP2: in 0.2 padded eBOSS TS box
# unique_identfier
dr3_deep2['tractor_id']= dr3_deep2['brickid'].astype(str) + '-' + \
dr3_deep2['objid'].astype(str)
fn='eboss_elg_dr3deep2_EXP.csv'
if not os.path.exists(fn):
(dr3_deep2.loc[dr3_cut_exp,:]
.set_index('tractor_id')
.loc[:,save_cols]
#.head())
.to_csv(fn, index_label='tractor_id'))
print('Wrote %s' % fn)
fn='eboss_elg_dr3deep2_DEV.csv'
if not os.path.exists(fn):
(dr3_deep2.loc[dr3_cut_dev,:]
.set_index('tractor_id')
.loc[:,save_cols]
#.head())
.to_csv(fn, index_label='tractor_id'))
print('Wrote %s' % fn)
# eBOSS data
# unique_identfier
eboss['sdss_id']= eboss['plate'].astype(str) + '-' + \
eboss['mjd'].astype(str) + '-' + \
eboss['fiberid'].astype(str)
# Save ~ 77k eboss data cut to redshift and rhalf limits
# (not doing 10k random without relacement rows from the 77k eBOSS data points)
fn='eboss_elg_tsspectra_EXP.csv'
if not os.path.exists(fn):
(eboss.loc[eboss_cut_exp,:]
.set_index('sdss_id')
.loc[:,save_cols]
.to_csv(fn, index_label='sdss_id'))
print('Wrote %s' % fn)
fn='eboss_elg_tsspectra_DEV.csv'
if not os.path.exists(fn):
(eboss.loc[eboss_cut_dev,:]
.set_index('sdss_id')
.loc[:,save_cols]
.to_csv(fn, index_label='sdss_id'))
print('Wrote %s' % fn)
In [895]:
dr3dp2_exp= pd.read_csv('eboss_elg_dr3deep2_EXP.csv')
dr3dp2_dev= pd.read_csv('eboss_elg_dr3deep2_DEV.csv')
eboss_exp= pd.read_csv('eboss_elg_tsspectra_EXP.csv')
eboss_dev= pd.read_csv('eboss_elg_tsspectra_DEV.csv')
print(dr3dp2_exp.shape[0],dr3dp2_dev.shape[0])
print(eboss_exp.shape[0],eboss_dev.shape[0])
print(dr3dp2_exp.columns)
print(eboss_exp.columns)
In [898]:
cols= ['g', 'r', 'z', 'fwhm_or_rhalf','redshift']
fig,ax= plt.subplots(2,3,figsize=(12,9))
i=-1
for row in range(2):
for col in range(3):
i+=1
if i >= len(cols):
continue
_=ax[row,col].hist(eboss_exp[cols[i]],
histtype='step',normed=True,
bins=30,color='b',label='eboss_exp') #bins[col[i]])
_=ax[row,col].hist(dr3dp2_exp[cols[i]],
histtype='step',normed=True,
bins=30,color='r',label='dr3dp2_exp') #bins[col[i]])
ax[row,col].set_xlabel(cols[i])
ax[0,2].legend()
# ax[1,0].set_xlim(-0.2,2)
# ax[1,1].set_xlim(0,2)
print('eboss redshift min,max=',eboss_exp['redshift'].min(),eboss_exp['redshift'].max())
print('dr3dp2 redshift min,max=',dr3dp2_exp['redshift'].min(),dr3dp2_exp['redshift'].max())
print('eboss rhalf min,max=',eboss_exp['fwhm_or_rhalf'].min(),eboss_exp['fwhm_or_rhalf'].max())
print('dr3dp2 rhalf min,max=',dr3dp2_exp['fwhm_or_rhalf'].min(),dr3dp2_exp['fwhm_or_rhalf'].max())
In [899]:
fig,ax= plt.subplots(2,3,figsize=(12,9))
i=-1
for row in range(2):
for col in range(3):
i+=1
if i >= len(cols):
continue
_=ax[row,col].hist(eboss_dev[cols[i]],
histtype='step',normed=True,
bins=30,color='b',label='eboss_dev') #bins[col[i]])
_=ax[row,col].hist(dr3dp2_dev[cols[i]],
histtype='step',normed=True,
bins=30,color='r',label='dr3dp2_dev') #bins[col[i]])
ax[row,col].set_xlabel(cols[i])
ax[0,2].legend()
# ax[1,0].set_xlim(-0.2,2)
# ax[1,1].set_xlim(0,2)
print('eboss redshift min,max=',eboss_dev['redshift'].min(),eboss_dev['redshift'].max())
print('dr3dp2 redshift min,max=',dr3dp2_dev['redshift'].min(),dr3dp2_dev['redshift'].max())
print('eboss rhalf min,max=',eboss_dev['fwhm_or_rhalf'].min(),eboss_dev['fwhm_or_rhalf'].max())
print('dr3dp2 rhalf min,max=',dr3dp2_dev['fwhm_or_rhalf'].min(),dr3dp2_dev['fwhm_or_rhalf'].max())
In [893]:
fig,ax= plt.subplots(2,3,figsize=(12,9))
ind= np.arange(eboss[eboss_cut_exp].shape[0])
for ishuff in range(20):
i=-1
for row in range(2):
for col in range(3):
i+=1
if i >= len(attrs):
continue
np.random.shuffle(ind)
_=ax[row,col].hist(eboss.loc[eboss_cut_exp,attrs[i]].iloc[ind[:10000]],
histtype='step',normed=True,
bins=bins[attrs[i]])
ax[row,col].set_xlabel(attrs[i])
In [845]:
zbins= np.arange(0,2.+0.1,0.1)
print('zbins=',zbins)
print('number zs from EXP: ',eboss[(~isNGC) & (~isDEV['eboss']) & (isSmall)].shape[0])
print('number zs from DEV: ',eboss[(~isNGC) & (isDEV['eboss']) & (isSmall)].shape[0])
fig,ax=plt.subplots(1,2,figsize=(8,5))
for i,wt,name in zip(range(2),[np.ones(eboss.shape[0]), eboss['sector_tsr'].values],
['No wt', 'tsr']):
nz= (eboss['redshift'] / wt)[(~isNGC) & (isSmall)]
_=ax[i].hist(nz,histtype='step',normed=True,
color='k',bins=zbins,label='n(z)')
_=ax[i].hist(df.loc[(~isNGC) & (isDEV['eboss']) & (isSmall),'redshift'],histtype='step',normed=True,
color='b',bins=zbins,label='DEV in S')
_=ax[i].hist(df.loc[(~isNGC) & (~isDEV['eboss']) & (isSmall),'redshift'],histtype='step',normed=True,
color='r',bins=zbins,label='EXP in N&S')
ax[1].legend()
Out[845]:
In [846]:
df_z['wt']= 1/df_z['sector_tsr']
zbins= np.arange(-0.05,2.+0.1,0.1)
print('zbins=',zbins)
df_z['bins']= pd.cut(df_z['redshift'],bins=zbins)
nz= df_z.loc[:,['wt','bins']].groupby('bins').agg(np.sum)
zbins2= np.arange(0.,2.+0.1,0.1)
df_z['bins2']= pd.cut(df_z['redshift'],bins=zbins2)
nz2= df_z.loc[:,['wt','bins2']].groupby('bins2').agg(np.sum)
print('redshift bins=',zbins)
print('bin centers=', nz.index.categories.mid)
print('redshift bins2=',zbins2)
print('bin centers2=', nz2.index.categories.mid)
plt.step(nz.index.categories.mid,nz['wt'],where='mid',c='b',label='binc 0,0.1,...')
plt.step(nz2.index.categories.mid,nz2['wt'],where='mid',c='r',label='binc 0.5,0.15,...')
plt.xlabel('redshift')
plt.ylabel('Sum 1/tsr')
plt.title('SGC')
plt.legend()
Out[846]:
In [847]:
dz= 0.02
zbins= np.arange(0,2.+dz,dz)
df_z['bins']= pd.cut(df_z['redshift'],bins=zbins)
nz= df_z.loc[:,['wt','bins']].groupby('bins').agg(np.sum)
nz['wt']= nz['wt']/np.sum(nz['wt'])
print('redshift bins=',zbins)
print('bin centers=', nz.index.categories.mid)
plt.step(nz.index.categories.mid,nz['wt']/dz,where='mid',c='b')
plt.xlabel('redshift')
plt.ylabel('PDF(z), Sum(1/tsr) / max() / dz')
plt.title('SGC')
print('isNORMED?', np.sum(nz['wt']/dz) * dz)
In [849]:
gmms,i_min, n_comp, BICs= my_mixture(np.array(X).reshape(-1,1),
n_comp=np.arange(2,17))
plt.plot(n_comp,BICs)
Out[849]:
In [850]:
fig,ax= plt.subplots(4,1,figsize=(7,12))
for col,n in enumerate([3,6,8,10]):
i= np.where(n_comp == n)[0][0]
Xpred,Ypred= gmms[i].sample(10000)
_=ax[col].hist(X,bins=zbins,histtype='step',color='b',normed=True)
_=ax[col].hist(Xpred,bins=zbins,histtype='step',color='r',normed=True)
ax[-1].set_xlabel('redshift')
Out[850]:
In [851]:
gmm= gmms[np.where(n_comp == 10)[0][0]]
z_pred,pdf_pred= gmm.sample(10000)
_=plt.hist(X,bins=zbins,histtype='step',color='b',normed=True)
_=plt.hist(Xpred,bins=zbins,histtype='step',color='r',normed=True)
In [848]:
nz_pdf= nz['wt'].values
redshifts= nz.index.categories.mid.values
X= []
for z,num in zip(redshifts, nz_pdf * 10000):
if not np.isfinite(num):
print('its nan',z,num)
continue
X += [z] * int(num)
# for z,num in zip(redshifts, nz_pdf * 10000):
# print(z,int(num))
print(len(X),X[0])
plt.step(nz.index.categories.mid,nz['wt']/dz,where='mid',c='b',label='exact')
_=plt.hist(X,bins=zbins, normed=True,color='r',label='approx')
plt.xlabel('redshift')
plt.ylabel('PDF(z), Sum(1/tsr) / max() / dz')
plt.title('SGC')
Out[848]:
In [904]:
GaussianMixtureModel.save(gmm, filename='eboss_nz_elg')
In [908]:
# Test load
gmm= GaussianMixtureModel.load('eboss_nz_elg',py='36',is1D=True)
redshifts= gmm.sample(10000)
len(redshifts)
sns.distplot(redshifts)
Out[908]:
In [943]:
gmm= GaussianMixtureModel.load('eboss_nz_elg',py='36',is1D=True)
# These are in my google drive
dr3dp2_exp= pd.read_csv('eboss_elg_dr3deep2_EXP.csv')
dr3dp2_dev= pd.read_csv('eboss_elg_dr3deep2_DEV.csv')
eboss_exp= pd.read_csv('eboss_elg_tsspectra_EXP.csv')
eboss_dev= pd.read_csv('eboss_elg_tsspectra_DEV.csv')
dr3dp2_both= pd.concat([dr3dp2_exp,dr3dp2_dev],axis='rows')
assert(dr3dp2_both.shape[0] == dr3dp2_exp.shape[0] + dr3dp2_dev.shape[0])
from scipy import spatial
trees= dict(dr3dp2_exp= spatial.KDTree(dr3dp2_exp['redshift'].values.reshape(-1,1)),
dr3dp2_dev= spatial.KDTree(dr3dp2_dev['redshift'].values.reshape(-1,1)),
dr3dp2_both= spatial.KDTree(dr3dp2_both['redshift'].values.reshape(-1,1)),
eboss_exp= spatial.KDTree(eboss_exp['redshift'].values.reshape(-1,1)),
eboss_dev= spatial.KDTree(eboss_dev['redshift'].values.reshape(-1,1)))
def inEbossBox(rz,gr,pad=0.):
sgc_d= EbossBox().SGC(rz,pad=pad)
return ((gr > sgc_d['y1']) &
(gr < sgc_d['y2']) &
(gr < sgc_d['y3']) &
(gr < sgc_d['y4']))
inBox=dict(dr3dp2_exp=inEbossBox(dr3dp2_exp['r'] - dr3dp2_exp['z'],
dr3dp2_exp['g'] - dr3dp2_exp['r']),
dr3dp2_dev=inEbossBox(dr3dp2_dev['r'] - dr3dp2_dev['z'],
dr3dp2_dev['g'] - dr3dp2_dev['r']))
In [977]:
T= fits_table()
# Draw z from n(z), if z not in [0,2] redraw, store z
redshifts= gmm.sample(10000).reshape(-1)
def outside_lims(z):
red_lims=[0.,2.]
return ((z < red_lims[0]) |
(z > red_lims[1]))
i=0
redraw= outside_lims(redshifts)
num= len(redshifts[redraw])
while num > 0:
i+=1
if i > 20:
raise ValueError
print('redrawing %d redshifts' % num)
redshifts[redraw]= gmm.sample(num).reshape(-1)
redraw= outside_lims(redshifts)
num= len(redshifts[redraw])
# Store
T.set('redshift',redshifts)
# flip coin, 90% assign type = EXP, 10% assign type = DEV, store type
types= np.array(['EXP']*9 + ['DEV'])
T.set('type',types[np.random.randint(0,len(types),size=len(T))])
# in eboss TS box
# in dr3_deep2 exp+dev combined sample, find NN redshift
print('len T=',len(T))
_,i_both= trees['dr3dp2_both'].query(T.redshift.reshape(-1,1))
print('len i_both=',len(i_both))
inBox['dr3dp2_both']= inEbossBox(dr3dp2_both['r'].iloc[i_both] - dr3dp2_both['z'].iloc[i_both],
dr3dp2_both['g'].iloc[i_both] - dr3dp2_both['r'].iloc[i_both])
print('len inBox=',len(inBox['dr3dp2_both']))
# Assign g,r,z,rhalf for NNs + unique id + NN redshift to see how close really is...
d= {}
mag_shapes= ['g','r','z','fwhm_or_rhalf']
for col in mag_shapes:
d[col]= np.zeros(len(T))-1
d['nn_redshift']= np.zeros(len(T))-1
d['id']= np.zeros(len(T)).astype(str)
# inBox, use eBOSS data
keep= (inBox['dr3dp2_both']) & (T.type == 'EXP')
_,i_df= trees['eboss_exp'].query(T.redshift[keep].reshape(-1,1))
for col in mag_shapes:
d[col][keep]= eboss_exp[col].iloc[i_df]
d['id'][keep]= eboss_exp['sdss_id'].iloc[i_df]
d['nn_redshift'][keep]= eboss_exp['redshift'].iloc[i_df]
keep= (inBox['dr3dp2_both']) & (T.type == 'DEV')
_,i_df= trees['eboss_dev'].query(T.redshift[keep].reshape(-1,1))
for col in mag_shapes:
d[col][keep]= eboss_dev[col].iloc[i_df]
d['id'][keep]= eboss_dev['sdss_id'].iloc[i_df]
d['nn_redshift'][keep]= eboss_dev['redshift'].iloc[i_df]
# outBox, use DR3-Deep2 data
keep= (~inBox['dr3dp2_both']) & (T.type == 'EXP')
_,i_df= trees['dr3dp2_exp'].query(T.redshift[keep].reshape(-1,1))
for col in mag_shapes:
d[col][keep]= dr3dp2_exp[col].iloc[i_df]
d['id'][keep]= dr3dp2_exp['tractor_id'].iloc[i_df]
d['nn_redshift'][keep]= dr3dp2_exp['redshift'].iloc[i_df]
keep= (~inBox['dr3dp2_both']) & (T.type == 'DEV')
_,i_df= trees['dr3dp2_dev'].query(T.redshift[keep].reshape(-1,1))
for col in mag_shapes:
d[col][keep]= dr3dp2_dev[col].iloc[i_df]
d['id'][keep]= dr3dp2_dev['tractor_id'].iloc[i_df]
d['nn_redshift'][keep]= dr3dp2_dev['redshift'].iloc[i_df]
# Add sersic n
d['n']= np.zeros(len(T)) - 1
d['n'][T.type == 'EXP']= 1
d['n'][T.type == 'DEV']= 4
for col in mag_shapes + ['nn_redshift','n']:
assert(np.all(d[col] > 0))
assert(np.all(pd.Series(d['id']).str.len() > 1))
for col in mag_shapes + ['nn_redshift','n']:
T.set(col,d[col])
T.set('id',d['id'])
T.delete_column('type')
In [978]:
T.get_columns()
Out[978]:
In [979]:
sns.distplot(T.nn_redshift - T.redshift)
pd.Series(T.nn_redshift - T.redshift).describe()
Out[979]:
In [980]:
pd.Series(T.n).value_counts()
Out[980]:
In [983]:
cols= mag_shapes + ['redshift']
fig,ax= plt.subplots(2,3,figsize=(12,9))
i=-1
for row in range(2):
for col in range(3):
i+=1
if i >= len(cols):
continue
_=ax[row,col].hist(T.get(cols[i])[T.n == 1],
histtype='step',normed=True,
bins=30,color='b',label='EXP')
_=ax[row,col].hist(T.get(cols[i])[T.n == 4.],
histtype='step',normed=True,
bins=30,color='r',label='DEV')
ax[row,col].set_xlabel(cols[i])
ax[1,1].legend()
Out[983]:
In [206]:
ccd_coadd= fits_table(os.path.join(os.environ['HOME'],'Downloads',
'ebossDR3','legacysurvey-3583p000-ccds.fits'))
#dr3_ccds= fits_table(os.path.join(os.environ['HOME'],'Downloads',
# 'ebossDR3','survey-ccds-decals.fits.gz'))
#dr3_ccds= fits_table(os.path.join(os.environ['HOME'],'Downloads',
# 'ebossDR3','survey-ccds-extra.fits.gz'))
dr3_ccds= fits_table(os.path.join(os.environ['HOME'],'Downloads',
'ebossDR3','survey-ccds-nondecals.fits.gz'))
In [207]:
print(len(ccd_coadd.get_columns()), len(dr3_ccds.get_columns()))
same_cols= set(ccd_coadd.get_columns()).intersection(set(dr3_ccds.get_columns()))
print(len(same_cols))
In [208]:
print(ccd_coadd.ccdname[0],ccd_coadd.image_filename[0],ccd_coadd.date_obs[0])
print(dr3_ccds.ccdname[0],dr3_ccds.image_filename[0])
In [209]:
i= ((pd.Series(dr3_ccds.ccdname).str.strip() == 'S25') &
(pd.Series(dr3_ccds.image_filename).str.strip() == \
'decam/CPDES82/c4d_130902_061728_ooi_g_v1.fits.fz'))
In [210]:
dr3_ccds[i]
Out[210]:
In [212]:
for col in list(same_cols):
print(ccd_coadd.get(col)[0],dr3_ccds[i].get(col)[0])
In [213]:
allccds= [fits_table(os.path.join(os.environ['HOME'],'Downloads',
'ebossDR3','survey-ccds-%s.fits.gz' % name))
for name in ['decals','extra','nondecals']]
allccds= merge_tables(allccds, columns='fillzero')
In [230]:
expids= pd.Series(.expid).str.split('-').str[0].values
ccdnames= pd.Series(dr3_ccds.ccdname).str.strip().values
Out[230]:
In [143]:
def nstars(l, b):
"""Rongpu Zhou's model
https://desi.lbl.gov/DocDB/cgi-bin/private/RetrieveFile?docid=2966
"""
c1, c2, c3 = [0.56099869, 0.08483852, 0.428828]
return (1/np.sin(np.abs(b)/180*np.pi)-1) * \
(1+c1*np.cos(l/180*np.pi)+c2*np.cos(2*l/180*np.pi))+c3
xv, yv = np.meshgrid(np.linspace(-180,180,1000),np.linspace(-80,80,1000))
n= nstars(xv,yv)
n.shape
Out[143]:
In [155]:
xv, yv = np.meshgrid(np.linspace(0,20,1000),np.linspace(30,50,1000))
n= nstars(xv,yv)
print(np.median(n))
vmin,vmax= np.percentile(n,q=5),np.percentile(n,q=95)
plt.imshow(np.log10(n),vmin=np.log10(vmin),vmax=np.log10(vmax))
plt.colorbar()
Out[155]:
In [149]:
vmin,vmax= np.percentile(n,q=5),np.percentile(n,q=95)
fig,ax= plt.subplots(1,2,figsize=(10,5))
ax[0].imshow(n,vmin=vmin,vmax=vmax)
plt.colorbar(ax=ax[0])
ax[1].imshow(np.log10(n),vmin=np.log10(vmin),vmax=np.log10(vmax))
ax[1].colorbar()
In [150]:
vmin,vmax= np.percentile(n,q=5),np.percentile(n,q=95)
plt.imshow(n,vmin=vmin,vmax=vmax)
plt.colorbar()
Out[150]:
In [151]:
plt.imshow(np.log10(n),vmin=np.log10(vmin),vmax=np.log10(vmax))
plt.colorbar()
Out[151]:
In [146]:
sns.distplot(np.log10(n.flatten()))
#plt.xlim(-1,1)
print(n.min(),n.max())
In [ ]:
#sns.distplot(np.log10(n.flatten()))
In [583]:
DATA_DIR = os.path.join(os.environ['HOME'],'Downloads',
'truth')
dr5_fns= glob(os.path.join(DATA_DIR,
'dr5.0/trimmed/decals-dr5.0-deep2-field*-trim.fits'))
dp2_fns= glob(os.path.join(DATA_DIR,
'dr5.0/trimmed/deep2-field*-trim.fits'))
print(dr5_fns,dp2_fns)
dr5= stack_tables(dr5_fns)
dp2= stack_tables(dp2_fns)
In [584]:
def get_xy_pad(slope,pad):
"""Returns dx,dy"""
theta= np.arctan(abs(slope))
return pad*np.sin(theta), pad*np.cos(theta)
def y1_line(rz,pad=None):
slope,yint= 1.15,-0.15
if pad:
dx,dy= get_xy_pad(slope,pad)
return slope*(rz+dx) + yint + dy
else:
return slope*rz + yint
def y2_line(rz,pad=None):
slope,yint= -1.2,1.6
if pad:
dx,dy= get_xy_pad(slope,pad)
return slope*(rz-dx) + yint + dy
else:
return slope*rz + yint
def get_ELG_box(rz,gr, pad=None):
"""
Args:
rz: r-z
gr: g-r
pad: magnitudes of padding to expand TS box
"""
x1,y1= rz,y1_line(rz)
x2,y2= rz,y2_line(rz)
x3,y3= np.array([0.3]*len(rz)),gr
x4,y4= np.array([1.6]*len(rz)),gr
if pad:
dx,dy= get_xy_pad(1.15,pad)
x1,y1= x1-dx,y1+dy
dx,dy= get_xy_pad(-1.2,pad)
x2,y2= x2+dx,y2+dy
x3 -= pad
x4 += pad
return dict(x1=x1, y1=y1,
x2=x2, y2=y2,
x3=x3, y3=y3,
x4=x4, y4=y4)
In [585]:
grz_gt0= ((dr5.flux_g > 0) &
(dr5.flux_r > 0) &
(dr5.flux_z > 0) &
(dr5.flux_ivar_g > 0) &
(dr5.flux_ivar_r > 0) &
(dr5.flux_ivar_z > 0))
#redshift_gt0= dp2.zhelio > 0
complDP2_buff= ((dp2.zhelio >= 0.8-0.2) &
(dp2.zhelio <= 1.4+0.2))
fwhm_or_rhalf= np.zeros(len(dr5))-1 # arcsec
isPSF= np.char.strip(dr5.type) == 'PSF'
isEXP= pd.Series(np.char.strip(dr5.type)).isin(['EXP','REX'])
isSIMP= np.char.strip(dr5.type) == 'SIMP'
isDEV= np.char.strip(dr5.type) == 'DEV'
isCOMP= np.char.strip(dr5.type) == 'COMP'
# rhalf ~ fwhm/2
fwhm_or_rhalf[isPSF]= np.mean(np.array([dr5[isPSF].psfsize_g,
dr5[isPSF].psfsize_r,
dr5[isPSF].psfsize_z]),axis=0)/2
fwhm_or_rhalf[isSIMP]= 0.5
fwhm_or_rhalf[isEXP]= dr5[isEXP].shapeexp_r
fwhm_or_rhalf[isDEV]= dr5[isDEV].shapedev_r
dr5.set('fwhm_or_rhalf',fwhm_or_rhalf)
print(len(dr5),len(dp2))
print(len(dr5[grz_gt0]), len(dr5[isDEV]), len(dr5[isCOMP]))
print(len(dr5[fwhm_or_rhalf < 5]),len(dr5[complDP2_buff]))
print(set(dr5.type))
print(pd.Series(dr5.type).value_counts()/len(dr5))
In [586]:
keep= ((grz_gt0) &
(isCOMP == False) &
(fwhm_or_rhalf < 5) &
(complDP2_buff))
dr5.cut(keep)
dp2.cut(keep)
len(dr5)
Out[586]:
In [587]:
d= {}
for i,b in zip([1,2,4],'grz'):
d[b]= flux2mag(dr5.get('flux_'+b)/dr5.get('mw_transmission_'+b))
#d[b+'_ivar']= flux2mag(dr5.get('decam_flux_ivar')[:,i]/dr5.get('decam_mw_transmission')[:,i])
d['redshift']= dp2.get('zhelio')
d['fwhm_or_rhalf']= dr5.fwhm_or_rhalf
d['type']= dr5.get('type')
df= pd.DataFrame(d)
df['g-r']= df['g'] - df['r']
df['r-z']= df['r'] - df['z']
# TS box
pad= get_ELG_box(df['r-z'].values,df['g-r'].values,pad=0.5)
inBox= ((df['g-r'] <= y1_line(df['r-z'],pad=0.5)) &
(df['g-r'] <= y2_line(df['r-z'],pad=0.5)) &
(df['r-z'] >= 0.3 - 0.5) &
(df['r-z'] <= 1.6 + 0.5))
# separate out DEV, important galaxy type but in minority for Deep 2
isDEV= df['type'] == 'DEV'
print(df.loc[isDEV,'type'].value_counts())
print(df.loc[isDEV == False,'type'].value_counts())
In [588]:
fig,ax=plt.subplots()
ax.scatter(df.loc[inBox,'r-z'], df.loc[inBox,'g-r'],alpha=0.5)
ax.plot(pad['x1'],pad['y1'],'r--')
ax.plot(pad['x2'],pad['y2'],c='r',ls='--',lw=2)
ax.plot(pad['x3'],pad['y3'],c='r',ls='--',lw=2)
ax.plot(pad['x4'],pad['y4'],c='r',ls='--',lw=2)
ax.set_xlabel('r-z')
ax.set_ylabel('g-r')
ax.set_ylim(-0.3,2)
ax.set_xlim(-0.6,2.2)
Out[588]:
In [112]:
attrs= ['redshift','g','r','z','fwhm_or_rhalf']
fig,ax= plt.subplots(2,3,figsize=(12,10))
i=-1
for row in range(2):
for col in range(3):
i+=1
if i >= len(attrs):
continue
_=ax[row,col].hist(df.loc[inBox,attrs[i]],histtype='step',normed=True,
color='b',label='inBox')
_=ax[row,col].hist(df[attrs[i]],histtype='step',normed=True,
color='r',label='All')
ax[row,col].set_xlabel(attrs[i])
if i == 0:
ax[row,col].legend()
In [113]:
attrs= ['redshift','g','r','z','fwhm_or_rhalf']
fig,ax= plt.subplots(2,3,figsize=(12,10))
i=-1
for row in range(2):
for col in range(3):
i+=1
if i >= len(attrs):
continue
_=ax[row,col].hist(df.loc[(inBox) & (isDEV),attrs[i]],histtype='step',normed=True,
color='b',label='inBox, isDEV')
_=ax[row,col].hist(df.loc[(inBox) & (~isDEV),attrs[i]],histtype='step',normed=True,
color='r',label='inBox, notDEV')
ax[row,col].set_xlabel(attrs[i])
if i == 0:
ax[row,col].legend()
In [160]:
fit_cols= ['fwhm_or_rhalf', 'g-r', 'r-z','z','redshift']
df_dev= df.loc[(inBox) & (isDEV),fit_cols]
df_notdev= df.loc[(inBox) & (~isDEV),fit_cols]
# Carry on with NOT DEV
df_notdev.hist(bins=20,figsize=(12,8))
Out[160]:
In [115]:
X= df_notdev.values
gmms,i_min, n_comp, BICs= my_mixture(X)
plt.plot(n_comp,BICs)
Out[115]:
In [120]:
fig,ax= plt.subplots(3,2,figsize=(13,5))
for row,n in enumerate([8,10,12]):
i= np.where(n_comp == n)[0][0]
Xpred,Ypred= gmms[i].sample(10000)
_,bins,_= ax[row,0].hist(df_notdev['redshift'],bins=20,histtype='step',color='k',normed=True)
_=ax[row,0].hist(Xpred[:,-1],bins=20,histtype='step',color='b',normed=True)
#ax[1].set_xlabel('redshift')
_,bins,_= ax[row,1].hist(df_notdev['fwhm_or_rhalf'],bins=20,histtype='step',color='k',normed=True)
_=ax[row,1].hist(Xpred[:,0],bins=20,histtype='step',color='b',normed=True)
#ax[2].set_xlabel('fwhm_or_rhalf')
ax[2,0].set_xlabel('redshift')
ax[2,1].set_xlabel('fwhm_or_rhalf')
Out[120]:
In [121]:
fig,ax= plt.subplots(3,3,figsize=(16,5))
for row,n in enumerate([8,10,12]):
i= np.where(n_comp == n)[0][0]
Xpred,Ypred= gmms[i].sample(10000)
_,bins,_= ax[row,0].hist(df_notdev['z'],bins=20,histtype='step',color='k',normed=True)
_=ax[row,0].hist(Xpred[:,-2],bins=20,histtype='step',color='b',normed=True)
_,bins,_= ax[row,1].hist(df_notdev['r-z']+df_notdev['z'],bins=20,histtype='step',color='k',normed=True)
_=ax[row,1].hist(Xpred[:,-3]+Xpred[:,-2],bins=20,histtype='step',color='b',normed=True)
_,bins,_= ax[row,2].hist(df_notdev['g-r'] + df_notdev['r-z']+df_notdev['z'],bins=20,histtype='step',color='k',normed=True)
_=ax[row,2].hist(Xpred[:,-4] + Xpred[:,-3] + Xpred[:,-2],bins=20,histtype='step',color='b',normed=True)
ax[2,0].set_xlabel('z')
ax[2,1].set_xlabel('r')
ax[2,2].set_xlabel('g')
Out[121]:
In [117]:
# smallest n_comp feel is ok, don't overfit!
gmm= gmms[np.where(n_comp == 8)[0][0]]
logprob= gmm.score_samples(X)
responsibilities = gmm.predict_proba(X)
Xpred,Ypred= gmm.sample(10000)
d={}
for i,col in enumerate(fit_cols):
d[col]= Xpred[:,i]
df_pred= pd.DataFrame(d)
In [77]:
fig,ax= plt.subplots(1,2,figsize=(10,5))
ax[0].scatter(df_notdev['r-z'],df_notdev['g-r'],color='b',alpha=0.05)
ax[1].scatter(df_pred['r-z'],df_pred['g-r'],color='r',alpha=0.05)
for i in range(2):
ax[i].set_xlabel('r-z')
ax[i].set_xlim(-0.5,2.5)
ax[i].set_ylim(-1,2)
ax[0].set_ylabel('g-r')
Out[77]:
In [79]:
fig,ax= plt.subplots(1,2,figsize=(10,5))
ax[0].scatter(df_new['z'],df_notdev['fwhm_or_rhalf'],color='b',alpha=0.05)
ax[1].scatter(df_pred['z'],df_pred['fwhm_or_rhalf'],color='r',alpha=0.05)
for i in range(2):
ax[i].set_xlabel('z mag')
ax[0].set_ylabel('fwhm_or_rhalf')
for i in range(2):
ax[i].set_xlim(19,26)
ax[i].set_ylim(-1,4.5)
In [80]:
fig,ax= plt.subplots(1,2,figsize=(10,5))
ax[0].scatter(df_notdev['z'],df_notdev['redshift'],color='b',alpha=0.05)
ax[1].scatter(df_pred['z'],df_pred['redshift'],color='r',alpha=0.05)
for i in range(2):
ax[i].set_xlabel('z mag')
ax[0].set_ylabel('redshift')
for i in range(2):
ax[i].set_xlim(19,26)
ax[i].set_ylim(0.5,1.7)
In [94]:
fig,ax= plt.subplots(2,2,figsize=(10,10))
ax[0,0].scatter(df_notdev['r-z'],df_notdev['redshift'],color='b',alpha=0.05)
ax[0,1].scatter(df_pred['r-z'],df_pred['redshift'],color='r',alpha=0.05)
for i in range(2):
ax[0,i].set_xlabel('r-z')
ax[0,i].set_xlim(-0.6,3)
ax[0,i].set_ylim(0.2,2)
ax[1,0].scatter(df_notdev['g-r'],df_notdev['redshift'],color='b',alpha=0.05)
ax[1,1].scatter(df_pred['g-r'],df_pred['redshift'],color='r',alpha=0.05)
for i in range(2):
ax[1,i].set_xlabel('g-r')
ax[1,i].set_xlim(-2,2)
ax[1,i].set_ylim(0.2,2)
for i in range(2):
ax[i,0].set_ylabel('redshift')
In [96]:
sns.distplot(df_new['fwhm_or_rhalf'])
df_new['fwhm_or_rhalf'].describe()
Out[96]:
In [97]:
print(fit_cols)
In [124]:
Xpred,Ypred= gmm.sample(10000)
fig,ax= plt.subplots(2,3,figsize=(14,10))
_=ax[0,0].hist(Xpred[:,3],bins=20,histtype='step',color='r',normed=True)
ax[0,0].set_xlabel('z mag')
_=ax[0,1].hist(np.sum(Xpred[:,2:3+1],axis=1),bins=20,histtype='step',color='r',normed=True)
ax[0,1].set_xlabel('r mag')
_=ax[0,2].hist(np.sum(Xpred[:,1:3+1],axis=1),bins=20,histtype='step',color='r',normed=True)
ax[0,2].set_xlabel('g mag')
_=ax[1,0].hist(Xpred[:,-1],bins=20,histtype='step',color='r',normed=True)
ax[1,0].set_xlabel('redshift')
_=ax[1,1].hist(Xpred[:,0],bins=20,histtype='step',color='r',normed=True)
ax[1,1].set_xlabel('fwhm_or_rhalf')
Out[124]:
In [133]:
def ELGcuts(Xpred):
"""
Args:
Xpred: (N,5) array-like
5 columns are:
fit_cols= ['fwhm_or_rhalf', 'g-r', 'r-z','z','redshift']
"""
red_lim= (0.6,1.6) # DESI
rhalf_lim= (0.262/2,2.) # Camera, Data
g,r,z= tuple(np.array([24.0,23.4,22.5])+0.5)
return ((Xpred[:,0] < rhalf_lim[0]) |
(Xpred[:,0] > rhalf_lim[1]) |
(Xpred[:,-1] < red_lim[0]) |
(Xpred[:,-1] > red_lim[1]) |
(Xpred[:,3] >= z) | #beyond mag limit
(np.sum(Xpred[:,2:3+1],axis=1) >= r) |
(np.sum(Xpred[:,1:3+1],axis=1) >= g)
)
Xpred,Ypred= gmm.sample(10000)
i=0
outLimit= ELGcuts(Xpred)
num= len(Ypred[outLimit])
while num > 0:
i+=1
if i > 20:
raise ValueError
print(num)
Xpred[outLimit,:],Ypred[outLimit]= gmm.sample(num)
outLimit= ELGcuts(Xpred)
num= len(Ypred[outLimit])
In [134]:
d={}
for i,col in enumerate(fit_cols):
d[col]= Xpred[:,i]
df_fin= pd.DataFrame(d)
In [135]:
fig,ax= plt.subplots(1,2,figsize=(10,5))
ax[0].scatter(df_new['r-z'],df_new['g-r'],color='b',alpha=0.05)
ax[1].scatter(df_fin['r-z'],df_fin['g-r'],color='r',alpha=0.05)
for i in range(2):
ax[i].set_xlabel('r-z')
ax[i].set_xlim(-0.5,2.5)
ax[i].set_ylim(-1,2)
ax[0].set_ylabel('g-r')
Out[135]:
In [137]:
fig,ax= plt.subplots(2,3,figsize=(14,10))
_,bins,_= ax[0,0].hist(df_new['z'],bins=20,histtype='step',color='b',normed=True)
_=ax[0,0].hist(df_fin['z'],bins=20,histtype='step',color='r',normed=True)
ax[0,0].set_xlabel('z mag')
_,bins,_= ax[0,1].hist(df_new['r-z']+df_new['z'],bins=20,histtype='step',color='b',normed=True)
_=ax[0,1].hist(df_fin['r-z']+df_fin['z'],bins=20,histtype='step',color='r',normed=True)
ax[0,1].set_xlabel('r mag')
_,bins,_= ax[0,2].hist(df_new['g-r']+df_new['r-z']+df_new['z'],bins=20,histtype='step',color='b',normed=True)
_=ax[0,2].hist(df_fin['g-r']+df_fin['r-z']+df_fin['z'],bins=20,histtype='step',color='r',normed=True)
ax[0,2].set_xlabel('g mag')
_,bins,_= ax[1,0].hist(df_new['redshift'],bins=20,histtype='step',color='b',normed=True)
_=ax[1,0].hist(df_fin['redshift'],bins=20,histtype='step',color='r',normed=True)
ax[1,0].set_xlabel('redshift')
_,bins,_= ax[1,1].hist(df_new['fwhm_or_rhalf'],bins=20,histtype='step',color='b',normed=True)
_=ax[1,1].hist(df_fin['fwhm_or_rhalf'],bins=20,histtype='step',color='r',normed=True)
ax[1,1].set_xlabel('fwhm_or_rhalf')
Out[137]:
In [138]:
df_fin.columns
a=fits_table()
a.set('id',np.arange(len(df_fin)))
a.set('rhalf',df_fin['fwhm_or_rhalf'].values)
a.set('redshift',df_fin['redshift'].values)
a.set('z',df_fin['z'].values)
a.set('r',df_fin['r-z'].values + a.z)
a.set('g',df_fin['g-r'].values + a.r)
a.writeto('elg_sample_5dim_10k.fits')
In [170]:
X= df_dev.values
print('Training Size=',X.shape[0])
gmms,i_min, n_comp, BICs= my_mixture(X,n_comp=np.arange(1,14))
plt.plot(n_comp,BICs)
Out[170]:
In [174]:
fig,ax= plt.subplots(3,2,figsize=(13,5))
for row,n in enumerate([2,6,10]):
i= np.where(n_comp == n)[0][0]
Xpred,Ypred= gmms[i].sample(10000)
_,bins,_= ax[row,0].hist(df_dev['redshift'],bins=20,histtype='step',color='k',normed=True)
_=ax[row,0].hist(Xpred[:,-1],bins=20,histtype='step',color='b',normed=True)
#ax[1].set_xlabel('redshift')
_,bins,_= ax[row,1].hist(df_dev['fwhm_or_rhalf'],bins=20,histtype='step',color='k',normed=True)
_=ax[row,1].hist(Xpred[:,0],bins=20,histtype='step',color='b',normed=True)
#ax[2].set_xlabel('fwhm_or_rhalf')
ax[2,0].set_xlabel('redshift')
ax[2,1].set_xlabel('fwhm_or_rhalf')
Out[174]:
In [175]:
fig,ax= plt.subplots(3,3,figsize=(16,5))
for row,n in enumerate([2,6,10]):
i= np.where(n_comp == n)[0][0]
Xpred,Ypred= gmms[i].sample(10000)
_,bins,_= ax[row,0].hist(df_dev['z'],bins=20,histtype='step',color='k',normed=True)
_=ax[row,0].hist(Xpred[:,-2],bins=20,histtype='step',color='b',normed=True)
_,bins,_= ax[row,1].hist(df_dev['r-z']+df_dev['z'],bins=20,histtype='step',color='k',normed=True)
_=ax[row,1].hist(Xpred[:,-3]+Xpred[:,-2],bins=20,histtype='step',color='b',normed=True)
_,bins,_= ax[row,2].hist(df_dev['g-r'] + df_dev['r-z']+df_dev['z'],bins=20,histtype='step',color='k',normed=True)
_=ax[row,2].hist(Xpred[:,-4] + Xpred[:,-3] + Xpred[:,-2],bins=20,histtype='step',color='b',normed=True)
ax[2,0].set_xlabel('z')
ax[2,1].set_xlabel('r')
ax[2,2].set_xlabel('g')
Out[175]:
In [232]:
z1e5= fits_table(os.path.join(DATA_DIR,'sanchez_kirkby_z1e5.fits'))
In [233]:
_=plt.hist(z1e5.z_cosmo,bins=20,histtype='step',color='b',normed=True)
In [234]:
X= z1e5.z_cosmo.reshape(-1,1)
gmms,i_min, n_comp, BICs= my_mixture(X)
plt.plot(n_comp,BICs)
Out[234]:
In [235]:
fig,ax= plt.subplots(1,3,figsize=(13,5))
for col,n in enumerate([4,8,12]):
i= np.where(n_comp == n)[0][0]
Xpred,Ypred= gmms[i].sample(10000)
_=ax[col].hist(z1e5.z_cosmo,bins=20,histtype='step',color='b',normed=True)
_=ax[col].hist(Xpred,bins=20,histtype='step',color='r',normed=True)
ax[2].set_xlabel('redshift')
Out[235]:
In [236]:
gmm= gmms[12]
z_pred,pdf_pred= gmm.sample(100000)
_=plt.hist(z1e5.z_cosmo,bins=20,histtype='step',color='b',normed=True)
_=plt.hist(z_pred,bins=20,histtype='step',color='r',normed=True)
In [239]:
GaussianMixtureModel.save(gmm, filename='elg_nz', py='36')
In [5]:
from scipy import spatial
sample_5d_10k=fits_table('elg_sample_5dim_10k.fits')
from obiwan.common import fits2pandas
sample_5d_10k= fits2pandas(sample_5d_10k)
tree = spatial.KDTree(sample_5d_10k['redshift'].values.reshape(-1,1))
In [253]:
gmm.covariances_.shape
Out[253]:
In [9]:
model= GaussianMixtureModel.load(filename='elg_nz',py='36',is1D=True)
In [13]:
z= model.sample(1000)
_,ind= tree.query(z)
randoms= sample_5d_10k.iloc[ind]
print(len(sample_5d_10k),len(z),len(randoms))
print(randoms.columns), print(sample_5d_10k.columns)
Out[13]:
In [18]:
for i in range(10):
print(z[i],randoms['redshift'].iloc[i])
In [306]:
# Randoms table
a=fits_table()
a.set('id',np.arange(len(randoms)))
a.set('id_5d10k_sample',randoms['id'].values)
a.set('rhalf',randoms['rhalf'].values)
a.set('redshift',randoms['redshift'].values)
a.set('z',randoms['z'].values)
a.set('r',randoms['r'].values)
a.set('g',randoms['g'].values)
a.writeto('randoms_1.fits')
In [139]:
fig,ax= plt.subplots(1,3,figsize=(12,5))
ax[0].scatter(df_fin['r-z'],df_fin['g-r'],color='b',alpha=0.05)
ax[1].scatter(randoms['r-z'],randoms['g-r'],color='r',alpha=0.05)
_=ax[2].hist(z_10k.z_cosmo,bins=20,histtype='step',color='b',normed=True)
_=ax[2].hist(randoms['redshift'],bins=20,histtype='step',color='r',normed=True)
for i in range(2):
ax[i].set_xlabel('r-z')
ax[i].set_xlim(-0.5,2.5)
ax[i].set_ylim(-1,2)
ax[0].set_ylabel('g-r')
ax[2].set_xlabel('redshift')
Out[139]:
In [8]:
tenk= fits_table('../../etc/elg_sample_5dim_10k.fits')
In [14]:
gr= tenk.g - tenk.r
rz= tenk.r - tenk.z
inBox= ((gr <= y1_line(rz)) &
(gr <= y2_line(rz)) &
(rz >= 0.3) &
(rz <= 1.6))
plt.scatter(rz,gr,color='b',alpha=0.1)
plt.scatter(rz[inBox],gr[inBox],color='r',alpha=0.1)
print(len(rz[inBox])/float(len(rz)))
In [29]:
r= fits_table('/Users/kaylan1/Downloads/randoms_seed_1_startid_1.fits')
ra,dec= [149.7, 150.8],[1.6, 2.7]
r.get_columns()
Out[29]:
In [36]:
plt.scatter(r.ra,r.dec,s=2,alpha=0.01)
for i in range(2):
plt.axvline(ra[i],ls='--',c='k')
plt.axhline(dec[i],ls='--',c='k')
In [31]:
set(r.n), len(r.n[r.n == 1]), len(r.n[r.n == 4]), set(r.rhalf)
Out[31]:
In [32]:
len(r),len(set(r.id))
Out[32]:
In [37]:
plt.scatter(r.ba,r.pa,s=2,alpha=0.01)
Out[37]:
In [34]:
limit= dict(g=24.0,
r=23.4,
z=22.5)
for b,color in zip('grz','gbk'):
lo,hi= limit[b]-2, limit[b]+0.5
bins= np.linspace(lo-0.5,hi+0.5,num=30)
_= plt.hist(r.get(b),bins=bins,color=color,histtype='step',
label=b,lw=2)
plt.axvline(lo,ls='--',c=color)
plt.axvline(hi,ls='--',c=color)
plt.legend()
Out[34]:
In [35]:
def isElg(g,r,z):
return ((r < 23.4) &
(r-z > 0.3) &
(r-z < 1.6) &
(g-r < 1.15*(r-z) - 0.15) &
(g-r < 1.6 - 1.2*(r-z))
)
print('fraction TS ELG = %f' % (len(r[isElg(r.g,r.r,r.z)])/len(r)))
In [ ]: