In [1]:
# This changes the current directory to the base saga directory - make sure to run this first!
# This is necessary to be able to import the py files and use the right directories,
# while keeping all the notebooks in their own directory.
import os
import sys
if 'saga_base_dir' not in locals():
saga_base_dir = os.path.abspath('..')
if saga_base_dir not in sys.path:
os.chdir(saga_base_dir)
In [2]:
from __future__ import print_function, division
from collections import Counter, OrderedDict
import hosts
import targeting
import mmthecto
import numpy as np
from astropy import units as u
from astropy.coordinates import *
from astropy import table
from astropy.visualization import hist as ahist
In [3]:
%matplotlib inline
from matplotlib import pyplot as plt
from matplotlib import rcParams
rcParams['image.interpolation'] = 'none'
rcParams['figure.figsize'] = (16, 10)
In [4]:
hostlst = hosts.get_saga_hosts_from_google() #'named' hosts
In [24]:
hosts_to_target = [h for h in hostlst if h.name in ('Sopranos', 'StarTrek', 'Alice', 'MobyDick')]
assert len(hosts_to_target)==4
hosts_to_target
Out[24]:
In [6]:
# now set to the latest base catalogs
for h in hosts_to_target:
h.fnsdss = 'SAGADropbox/base_catalogs/base_sql_nsa{0}.fits.gz'.format(h.nsaid)
h._cached_sdss = None
In [7]:
# actually make sure they're loaded here
for h in hosts_to_target:
h.get_sdss_catalog()
h.get_sdss_catalog().colnames # just to see
Out[7]:
In [8]:
# these are the already-observed objects
spectra = h.load_and_reprocess_sdss_catalog('SAGADropbox/data/saga_spectra_clean_jan15.fits.gz')
In [9]:
# diagnostic just to see how many are in the remove list
for h in hosts_to_target:
print(h.name, Counter(h.get_sdss_catalog()['REMOVE']))
Important difference from previous runs: ML probablities are now in the catalogs!
In [10]:
p_column_for_ranking = 'PROBABILITY_CLASS1' #this is *just* SDSS
In [11]:
#how many have an decent ML ranking?
print ("Fraction good ML, not good ML, no ML")
for h in hosts_to_target:
p = h.get_sdss_catalog()[p_column_for_ranking]
goodp = p>10**-4
patall = p!=-1
print(h.name, np.sum(goodp&patall)/len(p), np.sum(~goodp&patall)/len(p), np.sum(~patall)/len(p))
Note that the ranking scheme also functions as the "filtering" step - anything <=0 doesn't get written
In [12]:
# compute ranks, which includes filtering on mag
rankdct = {}
for h in hosts_to_target:
cat = h.get_sdss_catalog()
photgood = (cat['r'] < 21.5) & (cat['fibermag_r']<23) & (cat['phot_sg']=='GALAXY')
rankdct[h] = ranks = np.zeros(len(cat), dtype=int) #the 0s will be re-set at the end
inside = cat['RHOST_KPC']<300
# now set the ml prob targets
# highprob = cat[p_column_for_ranking] > 0.5
# medprob = (cat[p_column_for_ranking] > 0.05) & ~highprob
# lowprob = (cat[p_column_for_ranking] > 0.01) & ~highprob & ~medprob
highprob = cat[p_column_for_ranking] > 0.1
medprob = (cat[p_column_for_ranking] > 0.01) & ~highprob
lowprob = (cat[p_column_for_ranking] > 0.001) & ~highprob & ~medprob
lowestprob = (cat[p_column_for_ranking] > 0.0001) & ~highprob & ~medprob & ~lowprob
# rank 1 reserved for flux stars
ranks[inside&highprob] = 2
ranks[~inside&highprob] = 3
ranks[inside&medprob] = 4
ranks[~inside&medprob] = 5
ranks[inside&lowprob] = 6
ranks[~inside&lowprob] = 7
ranks[inside&lowestprob] = 8
ranks[~inside&lowestprob] = 9
notML = ranks == 0
colorcutmsk = targeting.colorcut_mask(cat, targeting.bossanova_color_cuts)
ranks[photgood¬ML&colorcutmsk] = 10
ranks[photgood¬ML&~colorcutmsk] = 11
#set to-remove objects to ranks<0
tokeep = cat['REMOVE']==-1
ranks[~tokeep] = -cat['REMOVE'][~tokeep] # sets the REMOVE objects to -their remove value
# and the downloaded rem list
remmsk = targeting.remove_targets_with_remlist(cat, h, maskonly=True)
ranks[~remmsk] = -5
# anything that's not ML should have the flag cuts applied (rank<=-10)
for i, (flag, present) in enumerate([('SATURATED', False),
('BAD_COUNTS_ERROR', False),
('BINNED1', True)]):
msk = cat[flag]==0 if present else cat[flag]!=0
ranks[msk¬ML] = -(i+10)
# finally, remove already-observed (rank<=-100)
spec_this_host = spectra[spectra['HOST_NSAID']==h.nsaid]
spec_this_host = spec_this_host[np.in1d(spec_this_host['OBJID'], cat['OBJID'])]
zq = cat['ZQUALITY'].copy()
for i, zqi in zip(spec_this_host['OBJID'], spec_this_host['ZQUALITY']):
zq[cat['OBJID']==i] = zqi
ranks[zq>2] = -100 - zq[zq>2]
if 1237651735756996681 in cat['OBJID']:
print('Found manual add in', h.name, 'setting rank to 2')
ranks[cat['OBJID']==1237651735756996681] = 2
#informational
print(h.name,'rank counts:', Counter(ranks))
In [13]:
bins = 25
for h in hosts_to_target:
cat = h.get_sdss_catalog()
ranks = rankdct[h]
plt.figure()
msk = cat[p_column_for_ranking] > 0.01
plt.subplot(1,2,1)
ahist(cat[msk]['r'],bins=bins, histtype='step')
plt.axvline(21,c='r')
plt.xlabel('r')
plt.title(h.name, fontsize=24)
plt.subplot(1,2,2)
ahist(cat[msk]['fibermag_r'],bins=bins, histtype='step')
plt.axvline(23,c='r')
plt.xlabel('fibermag_r')
plt.title(h.name, fontsize=24)
plt.tight_layout()
In [14]:
for h in hosts_to_target:
cat = h.get_sdss_catalog()
ranks = rankdct[h]
plt.figure()
msk_ml = (ranks>1)&(ranks<8)
msk_nml = (ranks>7)
plt.scatter(cat['ra'][msk_nml], cat['dec'][msk_nml], lw=0, alpha=.3, c=ranks[msk_nml],s=8, vmin=2, vmax=np.max(ranks))
plt.scatter(cat['ra'][msk_ml], cat['dec'][msk_ml], lw=1, alpha=.9, c=ranks[msk_ml],s=20, vmin=2, vmax=np.max(ranks))
plt.colorbar()
plt.title(h.name, fontsize=24)
plt.tight_layout()
In [15]:
h = [h for h in hosts_to_target if h.name=='Alice']
assert len(h)==1
h = h[0]
In [16]:
cat = h.get_sdss_catalog()
ranks = rankdct[h]
fibmag = cat['fibermag_r']
msk = (ranks>0)&(fibmag>22)#&(ranks<7)
targeting.sampled_imagelist(cat[msk], None, 200, names=['r={0[0]}_fm={0[1]}'.format(t) for t in zip(ranks[msk],fibmag[msk])])
np.sum(msk)
Out[16]:
In [74]:
generated_cats = {}
for h in hosts_to_target:
print('On host', h.name)
sys.stdout.flush()
cat = h.get_sdss_catalog()
ranks = rankdct[h]
fnout = 'mmthecto/{0}_feb2016.cat'.format(h.name)
fluxfnout = fnout.replace('.cat', '.fluxstars')
print('Going to write', fnout)
msk = (cat['RHOST_ARCM']<40) & (ranks>0) & (ranks<11)
generated_cats[h] = mmthecto.generate_catalog(h, cat[msk], ranks[msk],
repeatflux=3, removefluxdistance=(1*u.arcmin,ranks[msk]<9),
fnout=fnout, fluxfnout=fluxfnout)
In [77]:
h = [h for h in hosts_to_target if h.name=='Sopranos']
assert len(h)==1
h = h[0]
In [62]:
gcat = generated_cats[h]
ranknum = gcat['rank'].copy()
ranknum[gcat['rank']==''] = '-1'
ranknum = ranknum.astype(int)
ranknum = gcat['rank'].copy()
ranknum[gcat['rank']==''] = '-1'
ranknum = ranknum.astype(int)
{r: np.sum(r==ranknum) for r in set(ranknum)}
Out[62]:
In [79]:
msk = (ranknum>1)&(ranknum<6) # ML targets
#msk = (ranknum>6) #non-ML targets
msk = ranknum==1 # flux stars
gcatm = gcat[msk]
targeting.sampled_imagelist(gcatm[np.argsort(gcatm['ra'])], None, 200, names=gcat['rank'][msk])
len(gcatm)
Out[79]:
In [84]:
gcat
Out[84]:
In [91]:
for h in hosts_to_target:
gcat = generated_cats[h]
fmsk = gcat['rank']=='1'
gmsk = gcat['type'] == 'guide'
plt.figure()
plt.title(h.name)
plt.scatter(gcat['ra'][fmsk], gcat['dec'][fmsk],lw=0, label='flux')
plt.scatter(gcat['ra'][gmsk], gcat['dec'][gmsk], c='r',s=5, lw=0, label='guide')
plt.legend()
In [83]:
#identify clumps of nearby objects - note that this is slow for large sets of objects
def find_clumps(host, rankrng=(1, 10), show_in_sdss=True):
msk = (ranknum>rankrng[0])&(ranknum<rankrng[1]) # ML targets
gcatm = generated_cats[host][msk]
gcatmsc = SkyCoord(gcatm['ra'], gcatm['dec'], unit=u.deg)
seps = u.Quantity([c.separation(gcatmsc) for c in gcatmsc])
pairs = zip(*np.where((seps>0.1*u.arcsec)&(seps<10*u.arcsec)))
pairs = [pair for pair in pairs if pair[0]<pair[1]]
clumps = []
for p1, p2 in pairs:
for clump in clumps:
if p1 in clump:
clump.append(p2)
break
elif p2 in clump:
clump.append(p1)
break
else:
clumps.append([p1, p2])
if show_in_sdss:
for cl in clumps:
targeting.sampled_imagelist(gcatm[np.array(cl)], None, None, names=cl)
return clumps
for h in hosts_to_target:
clumps = find_clumps(h, show_in_sdss=False)
if clumps:
print(h.name, 'clumps:', clumps)
else:
print(h.name, 'is clump-less in ML targets')