In [1]:
# This changes the current directory to the base saga directory - make sure to run this first!
# This is necessary to be able to import the py files and use the right directories,
# while keeping all the notebooks in their own directory.
import os
import sys
if 'saga_base_dir' not in locals():
saga_base_dir = os.path.abspath('..')
if saga_base_dir not in sys.path:
os.chdir(saga_base_dir)
In [2]:
from __future__ import print_function
import hosts
import targeting
import mmthecto
import numpy as np
from astropy import units as u
from astropy.coordinates import *
from astropy import table
from astropy.table import Table
from astropy.io import fits
In [3]:
%matplotlib inline
from matplotlib import pyplot as plt
from matplotlib import rcParams
rcParams['image.interpolation'] = 'none'
rcParams['figure.figsize'] = (16, 10)
In [4]:
hostlst = hosts.get_saga_hosts_from_google(useobservingsummary=True) #'named' hosts
In [5]:
candidates = """
NSAID RA DEC Name
150887 23.2455 4.53406 N/A
150578. 23.0824 12.3229 N/A
149781. 22.3942 -3.43167 OBrother
150307. 22.9283 -5.49535 N/A
61945.0 23.6087 0.297265 AnaK
149977. 22.6248 10.5315 N/A
153017. 23.9904 20.7498 N/A
126115. 0.0663717 20.7524 N/A
127226. 0.579656 -8.39644 N/A
129387. 1.40109 12.9175 N/A
129237. 1.36327 17.5922 N/A
132339. 2.63655 -1.31876 Narnia
130133. 1.65181 -7.51266 N/A
131531. 2.25767 6.00248 N/A
130625. 1.88703 4.19576 N/A
"""
candidates = Table.read(candidates,format='ascii')
hostdct = dict([(h.name, h) for h in hostlst])
candhosts = [hostdct[name] if name in hostdct else hosts.NSAHost(int(nsaid))
for nsaid, name in zip(candidates['NSAID'], candidates['Name'])]
candhosts
Out[5]:
In [6]:
# now set to the latest base catalogs
for h in candhosts:
h.fnsdss = 'catalogs/base_sql_nsa{0}.fits.gz'.format(h.nsaid)
h._cached_sdss = None
In [7]:
# actually make sure they're loaded here
for h in candhosts:
h.get_sdss_catalog()
h.get_sdss_catalog() # just to see
Out[7]:
In [8]:
p_column_for_ranking = 'PROBABILITY_CLASS_1' #this is *just* SDSS
In [9]:
hostnames_to_target = [150887, 150578, 'OBrother', 150307, 'AnaK', 149977, 153017, 129387, 129237, 'Narnia', 126115]
hosts_to_target = []
for hostname in hostnames_to_target:
if isinstance(hostname, basestring):
for host in candhosts:
if host.name == hostname:
hosts_to_target.append(host)
break
else:
raise ValueError('could not find host named {0}'.format(hostname))
else:
for host in candhosts:
if host.nsaid == hostname:
hosts_to_target.append(host)
break
else:
raise ValueError('could not find host with nsa number {0}'.format(hostname))
hosts_to_target
Out[9]:
In [10]:
for hr, hnm in sorted([(h.coords.ra.hour, h.name) for h in hosts_to_target]):
print('Host', hnm, 'is at', hr, 'hours of RA')
In [11]:
mlpred = Table.read('catalogs/SAGAobjid_PredictionAug28_SDSS_SDSSwise_v4removed_combinedPs.fits.gz')
mlpred['OBJID'].name = 'objID'
mlpred
Out[11]:
In [12]:
joined_catalogs = {}
for h in hosts_to_target:
#filter on manual remove lists *and* REMOVE!=-1
cat = targeting.remove_targets_with_remlist(h.get_sdss_catalog(), h, 'catalogs/TargetRemoveAug28_2015.csv')
pre_joined = table.join(cat, mlpred, join_type='left', keys='objID')
tokeep = pre_joined['REMOVE']==-1
notobserved = pre_joined['ZQUALITY']<3
print('removed', np.sum(~tokeep),'for remove list and', np.sum(~notobserved),'b/c observed')
joined_catalogs[h] = pre_joined[tokeep¬observed]
highp = pre_joined[p_column_for_ranking]>0.01
print('n w/ ML pred>0.01 and observed:', np.sum(highp&~notobserved), 'not yet obs:', np.sum(highp¬observed))
In [22]:
# fix up the rhost columns, which are wrong near the 0-360 boundary
for h in hosts_to_target:
cat = joined_catalogs[h]
catsc = SkyCoord(u.Quantity(cat['ra'], u.deg), u.Quantity(cat['dec'], u.deg))
seps = catsc.separation(h.coords)
dsep = seps - u.Quantity(cat['RHOST_ARCM'], u.arcmin)
if np.abs(np.mean(dsep)) > 1*u.arcsec:
print('Host', h.name, 'has >10" offsets from calculated and catalog separation. Issues? Showing histogram')
plt.figure()
plt.hist(dsep.to(u.arcsec), bins=100, histtype='step')
plt.axvline(0, color='k')
plt.axvline(np.mean(dsep).to(u.arcsec).value, color='r', ls='--')
plt.title(h.name)
joined_catalogs[h]['RHOST_ARCM'][:] = seps.to(u.arcmin).value
joined_catalogs[h]['RHOST_KPC'][:] = (seps.radian*h.dist).to(u.kpc).value
In [115]:
# compute ranks, which includes filtering on mag
rankdct = {}
for h in hosts_to_target:
cat = joined_catalogs[h]
highprob = cat[p_column_for_ranking].filled(0) > 0.5
medprob = (cat[p_column_for_ranking].filled(0) > 0.05) & ~highprob
lowprob = (cat[p_column_for_ranking].filled(0) > 0.01) & ~highprob & ~medprob
inside = cat['RHOST_KPC']<300
rankdct[h] = ranks = np.ones(len(cat), dtype=int)*-1 #the -1 will be re-set at the end
# rank 1 reserved for flux stars
ranks[inside&highprob] = 2
ranks[~inside&highprob] = 3
ranks[inside&medprob] = 4
ranks[~inside&medprob] = 5
ranks[inside&lowprob] = 6
ranks[~inside&lowprob] = 7
validtarget = (cat['r'] < 21.5) & (cat['fibermag_r']<23) & (cat['phot_sg']=='GALAXY')
other = ranks < 0
colorcutmsk = targeting.colorcut_mask(cat, targeting.bossanova_color_cuts)
ranks[validtarget&other&colorcutmsk] = 8
ranks[validtarget&other&~colorcutmsk] = 9
#informational
for h, ranks in rankdct.items():
cat = joined_catalogs[h]
hranks = dict(enumerate(np.bincount(ranks[ranks>-1])))
del hranks[0]
del hranks[1]
hranks[-1] = np.sum(ranks==-1)
print(h.name,'rank counts:', hranks)
nother = np.sum(ranks==8)
nprob = np.sum((ranks<8)&(ranks>0))
nother_obs = 270-np.sum((ranks<8)&(ranks>0)&(cat['RHOST_ARCM']<30))
print('fraction not "other": {0:.1%}, nprob_obs={1}\n'.format(nprob/(nprob+nother), nother_obs))
In [116]:
# quick checks: are the ML-selected objects meeting the fibermag and r cuts?
for h in hosts_to_target:
cat = joined_catalogs[h]
ranks = rankdct[h]
plt.figure()
msk = (ranks>0)&(ranks<8)
plt.subplot(1,2,1)
plt.hist(cat[msk]['r'],bins=25, histtype='step')
plt.axvline(21,c='r')
plt.xlabel('r')
plt.title(h.name, fontsize=24)
plt.subplot(1,2,2)
plt.hist(cat[msk]['fibermag_r'],bins=25, histtype='step')
plt.axvline(23,c='r')
plt.xlabel('fibermag_r')
plt.title(h.name, fontsize=24)
plt.tight_layout()
In [117]:
generated_cats = {}
for h in hosts_to_target:
print('On host', h.name)
sys.stdout.flush()
cat = joined_catalogs[h]
ranks = rankdct[h]
fnout = 'mmthecto/{0}_sep2015.cat'.format(h.name)
fluxfnout = fnout.replace('.cat', '.fluxstars')
print('Going to write', fnout)
msk = (cat['RHOST_ARCM']<40) & (ranks>0) & (ranks<9)
generated_cats[h] = mmthecto.generate_catalog(h, cat[msk], ranks[msk],
repeatflux=3, removefluxdistance=1*u.arcmin,
fnout=fnout, fluxfnout=fluxfnout)
In [23]:
redmappertargs = table.Table.read('catalogs/redmagic_targets_0828.fit')
redmappertargs
Out[23]:
In [24]:
# sanity checks to make sure redmapper targets are in the catalog
rmscs = SkyCoord(redmappertargs['RA']*u.deg, redmappertargs['DEC']*u.deg)
for h in hosts_to_target:
jcat = joined_catalogs[h]
rmhs = redmappertargs[rmscs.separation(h.coords)<2*u.deg]
incat = np.in1d(rmhs['OBJID'], jcat['objID'])
print(h.name, np.sum(incat), np.sum(~incat))
incat2 = np.in1d(rmhs['OBJID'], h.get_sdss_catalog()['objID'])
print(np.sum(incat2), np.sum(~incat2))
catinrmmsk = np.in1d(jcat['objID'], redmappertargs['OBJID'])
print(np.sum(catinrmmsk), np.sum(~catinrmmsk))
In [25]:
# compute ranks, which includes filtering on mag
rankdct = {}
for h in hosts_to_target:
cat = joined_catalogs[h]
highprob = cat[p_column_for_ranking].filled(0) > 0.5
medprob = (cat[p_column_for_ranking].filled(0) > 0.05) & ~highprob
lowprob = (cat[p_column_for_ranking].filled(0) > 0.01) & ~highprob & ~medprob
inside = cat['RHOST_KPC']<300
rankdct[h] = ranks = np.ones(len(cat), dtype=int)*-1 #the -1 will be re-set at the end
# rank 1 reserved for flux stars
ranks[inside&highprob] = 2
ranks[~inside&highprob] = 3
ranks[inside&medprob] = 4
ranks[~inside&medprob] = 5
ranks[inside&lowprob] = 6
ranks[~inside&lowprob] = 7
validtarget = (cat['r'] < 21.5) & (cat['fibermag_r']<23) & (cat['phot_sg']=='GALAXY')
other = ranks < 0
colorcutmsk = targeting.colorcut_mask(cat, targeting.bossanova_color_cuts)
ranks[validtarget&other&colorcutmsk] = 9
ranks[validtarget&other&~colorcutmsk] = 10
redmappertarg = np.in1d(cat['objID'], redmappertargs['OBJID'])
rg7 = ranks>7
ranks[validtarget&redmappertarg&rg7] = 8
#informational
for h, ranks in rankdct.items():
cat = joined_catalogs[h]
hranks = dict(enumerate(np.bincount(ranks[ranks>-1])))
del hranks[0]
del hranks[1]
hranks[-1] = np.sum(ranks==-1)
print(h.name,'rank counts:', hranks)
nother = np.sum(ranks==8)
nprob = np.sum((ranks<8)&(ranks>0))
nother_obs = 270-np.sum((ranks<8)&(ranks>0)&(cat['RHOST_ARCM']<30))
print('fraction not "other": {0:.1%}, nprob_obs={1}\n'.format(nprob/(nprob+nother), nother_obs))
In [36]:
hh.coords.ra.deg
Out[36]:
In [28]:
generated_cats = {}
for h in hosts_to_target:
print('On host', h.name)
sys.stdout.flush()
cat = joined_catalogs[h]
ranks = rankdct[h]
def names_from_suffix(suffix, verbose=True):
fnout = 'mmthecto/{0}_{1}.cat'.format(h.name, suffix)
fluxfnout = fnout.replace('.cat', '.fluxstars')
if verbose:
print('Going to write', fnout, fluxfnout)
return fnout, fluxfnout
fnout, fluxfnout = names_from_suffix('sep2015_w_redmapper')
msk = (cat['RHOST_ARCM']<40) & (ranks>0) & (ranks<10)
generated_cats[h] = gcat = mmthecto.generate_catalog(h, cat[msk], ranks[msk],
repeatflux=3, removefluxdistance=20*u.arcsec,
fnout=fnout, fluxfnout=fluxfnout)
check_msk = (gcat['type'] == 'TARGET')&(gcat['rank']=='8')&(gcat['object']!='qso_candidate')
gids = np.array(gcat[check_msk]['object']).astype(int)
print('Check: N in redmapper:', np.sum(np.in1d(gids, redmappertargs['OBJID'])), 'out of n that are rank 8:', len(gids))
print('')
#these can be used to apply fibermag cuts... but at least when last run *all* of the high-p object already meet the cut
#fnout, fluxfnout = names_from_suffix('sep2015_w_redmapper_fibermag22p5')
#brightfibmag = cat['fibermag_r']<22.5
#print('fibermag<22.5 cut removes', np.sum((1<ranks)&(ranks<8)&msk&~brightfibmag), 'of', np.sum((1<ranks)&(ranks<8)&msk))
#generated_cats[h] = gids = mmthecto.generate_catalog(h, cat[msk&brightfibmag], ranks[msk&brightfibmag],
# repeatflux=3, removefluxdistance=20*u.arcsec,
# fnout=fnout, fluxfnout=fluxfnout)
#fnout, fluxfnout = names_from_suffix('sep2015_w_redmapper_fibermag23')
#brightfibmag = cat['fibermag_r']<23
#print('fibermag<23 cut removes', np.sum((1<ranks)&(ranks<8)&msk&~brightfibmag), 'of', np.sum((1<ranks)&(ranks<8)&msk))
#generated_cats[h] = gids = mmthecto.generate_catalog(h, cat[msk&brightfibmag], ranks[msk&brightfibmag],
# repeatflux=3, removefluxdistance=20*u.arcsec,
# fnout=fnout, fluxfnout=fluxfnout)
In [435]:
#now *just* for AnaK we manually insert a line for the high-Z QSO candidate and the decals LSB
print(SkyCoord(354.42221684 *u.deg, 0.22783437*u.deg).to_string('hmsdms',sep=':'))
linestoadd = ['23:37:41.332\t0:13:40.204\tqso_candidate\t7\tTARGET\t21.89',
'23:37:07.90\t0:12:40.86\tDECALS_target_1\t2\tTARGET\t21.3']
anakfns = !ls mmthecto/AnaK_sep2015_w_redmapper*.cat
for catfn in anakfns:
print('Adding to', catfn)
lines = []
with open(catfn) as f:
for l in f:
lines.append(l)
with open(catfn, 'w') as f:
for l in lines:
f.write(l)
for linetoadd in linestoadd:
f.write('\n')
f.write(linetoadd)
In [399]:
for h in hosts_to_target:
gc = generated_cats[h]
fixrank = []
for r in gc['rank']:
try:
fixrank.append(int(r))
except ValueError:
fixrank.append(-2)
fixrank = np.array(fixrank)
msk = fixrank==1#(fixrank>1)&(fixrank<8)
#targeting.sampled_imagelist(gc['ra'][msk], gc['dec'][msk], names=gc['rank'][msk],n=200)
wrapat = 180*u.deg if h.ra>358 else 360*u.deg
plt.figure()
plt.scatter(Angle(gc['ra'][msk], u.deg).wrap_at(wrapat).deg, gc['dec'][msk], color='b')
plt.xlim(h.coords.ra.wrap_at(wrapat).deg-1, h.coords.ra.wrap_at(wrapat).deg+1)
plt.ylim(h.coords.dec.deg-1, h.coords.dec.deg+1)
plt.title(h.name)
In [400]:
for h in hosts_to_target:
fnout = 'mmthecto/{0}_sep2015_w_redmapper.cat'.format(h.name)
fluxfnout = fnout.replace('.cat', '.fluxstars')
cattab = Table.read(fnout, format='ascii',data_start=2)
fxtab = Table.read(fluxfnout, format='ascii')
catsc = SkyCoord(Longitude(cattab['ra'], u.hourangle), Latitude(cattab['dec'], u.deg))
fxsc = SkyCoord(fxtab['RA']*u.deg, fxtab['DEC']*u.deg)
catfx = catsc[cattab['rank']==1]
plt.figure()
plt.title(h.name)
plt.scatter(catfx.ra.deg, catfx.dec.deg, color='r') #in the catalog
plt.scatter(fxsc.ra.deg, fxsc.dec.deg, edgecolor='k', facecolor='none', marker='s',s=100)
In [401]:
for h in hosts_to_target:
fnout = 'mmthecto/{0}_sep2015_w_redmapper.cat'.format(h.name)
cattab = Table.read(fnout, format='ascii',data_start=2)
msk = (cattab['type'] == 'TARGET')&(cattab['rank']>1)&(cattab['rank']<10)&(cattab['object']!='qso_candidate')
ids = np.array(cattab[msk]['object']).astype(int)
print('n w/id in redmagic:', np.sum(np.in1d(ids, redmappertargs['OBJID'])),
'of', np.sum(cattab['rank']==8), 'rank 8 objects')
In [440]:
for h in hosts_to_target:
fnout = 'mmthecto/{0}_sep2015_w_redmapper.cat'.format(h.name)
cattab = Table.read(fnout, format='ascii',data_start=2)
msk = (cattab['type'] == 'TARGET')&(cattab['rank']>1)&(cattab['rank']<10)&(cattab['object']!='qso_candidate')
ids = np.array(cattab[msk]['object']).astype(int)
scat = h.get_sdss_catalog()
jcat = joined_catalogs[h]
scatincat = scat[np.in1d(scat['objID'], ids)]
jcatincat = jcat[np.in1d(jcat['objID'], ids)]
print(h.name)
print('targs w/ zqal>2:', np.sum(scatincat['ZQUALITY']>2), 'of', len(scatincat))
print('targs w/ remove!=-1:', np.sum(scatincat['REMOVE']!=-1), 'of', len(scatincat))
print('targs w/ fibermag>22.5', np.sum(scatincat['fibermag_r']>22.5), 'of', len(scatincat))
goodp = jcatincat[p_column_for_ranking]>0.01
print('high-p targs w/ fibermag>22.5', np.sum((jcatincat['fibermag_r']>22.5)&goodp), 'of', np.sum(goodp))
#scatincat[scatincat['ZQUALITY']>2].show_in_browser()
In [70]:
mlpred = Table.read('catalogs/SAGAobjid_Prediction_SDSSwise_gt0.txt.fits.gz')
objids = mlpred['OBJID'].astype(int)
mlpred.remove_column('OBJID')
mlpred.add_column(table.Column(name='objID', data=objids), index=0)
mlpred
Out[70]:
In [74]:
joined_catalogs = {}
for h in candhosts:
joined_catalogs[h] = table.join(h.get_sdss_catalog(), mlpred, join_type='left', keys='objID')
In [184]:
rankdct = {}
for h in candhosts:
print('On host', h.name)
sys.stdout.flush()
cat = joined_catalogs[h]
highprob = cat['PROBABILITY_CLASS_1'].filled(0) > 0.3
medprob = (cat['PROBABILITY_CLASS_1'].filled(0) > 0.05) & ~highprob
inside = cat['RHOST_KPC']<300
rankdct[h] = ranks = np.ones(len(cat))*6 # all not set below are 5
ranks[~inside] = 5 #up-weight those inside
ranks[highprob&inside] = 1
ranks[medprob&inside] = 2
ranks[highprob&~inside] = 3
ranks[medprob&~inside] = 4
fnout = 'mmthecto/{0}_sep2015_test.cat'.format(h.name)
fluxfnout = fnout.replace('.cat', '.fluxstars')
print('Going to write', fnout)
msk = ranks<5
stoutcat = mmthecto.generate_catalog(h, cat[msk], ranks[msk]+1,
repeatflux=5, removefluxdistance=1*u.arcmin,
fnout=fnout)#, fluxfnout=fluxfnout)
In [164]:
from matplotlib import patches
def scplot(sc, **kwargs):
x = sc.ra.wrap_at(180*u.deg).deg
y = sc.dec
kwargs.setdefault('edgecolor', 'none')
kwargs.setdefault('s', 5)
kwargs.setdefault('alpha', .5)
return plt.scatter(x, y, **kwargs)
In [176]:
for i, h in enumerate(candhosts):
cat = joined_catalogs[h]
sc = SkyCoord(u.Quantity(cat['ra'], u.deg), u.Quantity(cat['dec'], u.deg))
highprob = cat['PROBABILITY_CLASS_1'].filled(0) > 0.3
medprob = (cat['PROBABILITY_CLASS_1'].filled(0) > 0.05) & ~highprob
inside = cat['RHOST_KPC']<300
ranks = np.ones(len(cat))*6 # all not set below are 5
ranks[~inside] = 5 #up-weight those inside
ranks[highprob&inside] = 1
ranks[medprob&inside] = 2
ranks[highprob&~inside] = 3
ranks[medprob&~inside] = 4
fig = plt.figure()
ax = fig.add_subplot(111)
scplot(sc[ranks==6], color='c', label='rank=6')
scplot(sc[ranks==5], color='k', label='rank=5')
scplot(sc[ranks==4], color='m', alpha=1,s=30, label='rank=4')
scplot(sc[ranks==3], color='b', alpha=1,s=30, label='rank=3')
scplot(sc[ranks==2], color='g', alpha=1,s=30, label='rank=2')
scplot(sc[ranks==1], color='r', alpha=1,s=30, label='rank=1')
plt.legend(loc=0)
ell = patches.Ellipse((Angle(h.ra, u.deg).wrap_at(180*u.deg).deg, h.dec), 1/np.cos(h.dec*u.deg), 1, facecolor='none', lw=3, edgecolor='r')
ax.add_patch(ell)
plt.title(h.name)
Testing on the hectospec xfitfibs confirms the fairly obvious from above: we can get basically everything within the FOV with a high ML prob
In [47]:
n153017 = [h for h in hosts_to_target if h.nsaid == 153017][0]
cat = joined_catalogs[n153017]
arcmin_cat = u.Quantity(cat['RHOST_ARCM'], u.arcmin)
kpc_cat = u.Quantity(cat['RHOST_KPC'], u.kpc)
catsc = SkyCoord(u.Quantity(cat['ra'], u.deg), u.Quantity(cat['dec'], u.deg))
seps = catsc.separation(n153017.coords)
dsep = seps - arcmin_cat
sep_kpc = seps.radian*n153017.dist
dsep_kpc = sep_kpc - kpc_cat
In [44]:
plt.hist(dsep, bins=100, histtype='step')
None
In [45]:
plt.hist(dsep[np.abs(dsep)<10*u.deg].arcsec, bins=100, histtype='step')
None
In [51]:
plt.hist(dsep_kpc[np.abs(dsep)<10*u.deg].to(u.kpc), bins=100, histtype='step')
None