In [1]:
    
# This changes the current directory to the base saga directory - make sure to run this first!
# This is necessary to be able to import the py files and use the right directories,
# while keeping all the notebooks in their own directory.
import os
import sys
if 'saga_base_dir' not in locals():
    saga_base_dir = os.path.abspath('..')
if saga_base_dir not in sys.path:
    os.chdir(saga_base_dir)
    
In [2]:
    
from __future__ import print_function, division
from collections import Counter, OrderedDict
import hosts
import targeting
import mmthecto
import numpy as np
from astropy import units as u
from astropy.coordinates import *
from astropy import table
from astropy.visualization import hist as ahist
    
In [3]:
    
%matplotlib inline
from matplotlib import pyplot as plt
from matplotlib import rcParams
rcParams['image.interpolation'] = 'none'
rcParams['figure.figsize'] = (16, 10)
    
In [4]:
    
hostlst = hosts.get_saga_hosts_from_google() #'named' hosts
    
    
In [24]:
    
hosts_to_target = [h for h in hostlst if h.name in ('Sopranos', 'StarTrek', 'Alice', 'MobyDick')]
assert len(hosts_to_target)==4
hosts_to_target
    
    Out[24]:
In [6]:
    
# now set to the latest base catalogs
for h in hosts_to_target:
    h.fnsdss = 'SAGADropbox/base_catalogs/base_sql_nsa{0}.fits.gz'.format(h.nsaid)
    h._cached_sdss = None
    
In [7]:
    
# actually make sure they're loaded here
for h in hosts_to_target:
    h.get_sdss_catalog()
h.get_sdss_catalog().colnames # just to see
    
    Out[7]:
In [8]:
    
# these are the already-observed objects
spectra = h.load_and_reprocess_sdss_catalog('SAGADropbox/data/saga_spectra_clean_jan15.fits.gz')
    
In [9]:
    
# diagnostic just to see how many are in the remove list
for h in hosts_to_target:
    print(h.name, Counter(h.get_sdss_catalog()['REMOVE']))
    
    
Important difference from previous runs: ML probablities are now in the catalogs!
In [10]:
    
p_column_for_ranking = 'PROBABILITY_CLASS1' #this is *just* SDSS
    
In [11]:
    
#how many have an decent ML ranking?
print ("Fraction good ML, not good ML, no ML")
for h in hosts_to_target:
    p = h.get_sdss_catalog()[p_column_for_ranking]
    goodp = p>10**-4
    patall = p!=-1
    print(h.name, np.sum(goodp&patall)/len(p), np.sum(~goodp&patall)/len(p), np.sum(~patall)/len(p))
    
    
Note that the ranking scheme also functions as the "filtering" step - anything <=0 doesn't get written
In [12]:
    
# compute ranks, which includes filtering on mag
rankdct = {}
for h in hosts_to_target:
    cat = h.get_sdss_catalog()
    
    photgood = (cat['r'] < 21.5) & (cat['fibermag_r']<23) & (cat['phot_sg']=='GALAXY')
    
    
    rankdct[h] = ranks = np.zeros(len(cat), dtype=int) #the 0s will be re-set at the end
    
    inside = cat['RHOST_KPC']<300
    # now set the ml prob targets
#     highprob = cat[p_column_for_ranking] > 0.5
#     medprob = (cat[p_column_for_ranking] > 0.05) & ~highprob
#     lowprob = (cat[p_column_for_ranking] > 0.01) & ~highprob & ~medprob
    highprob = cat[p_column_for_ranking] > 0.1
    medprob = (cat[p_column_for_ranking] > 0.01) & ~highprob
    lowprob = (cat[p_column_for_ranking] > 0.001) & ~highprob & ~medprob
    lowestprob = (cat[p_column_for_ranking] > 0.0001) & ~highprob & ~medprob & ~lowprob
    
    # rank 1 reserved for flux stars
    ranks[inside&highprob] = 2
    ranks[~inside&highprob] = 3
    ranks[inside&medprob] = 4
    ranks[~inside&medprob] = 5
    ranks[inside&lowprob] = 6
    ranks[~inside&lowprob] = 7
    ranks[inside&lowestprob] = 8
    ranks[~inside&lowestprob] = 9
    
    
    notML = ranks == 0
    colorcutmsk = targeting.colorcut_mask(cat,  targeting.bossanova_color_cuts)
    
    ranks[photgood¬ML&colorcutmsk] = 10
    ranks[photgood¬ML&~colorcutmsk] = 11
    
    #set to-remove objects to ranks<0
    tokeep = cat['REMOVE']==-1
    ranks[~tokeep] = -cat['REMOVE'][~tokeep] # sets the REMOVE objects to -their remove value
    # and the downloaded rem list
    remmsk = targeting.remove_targets_with_remlist(cat, h, maskonly=True)
    ranks[~remmsk] = -5
    
    # anything that's not ML should have the flag cuts applied (rank<=-10)
    for i, (flag, present) in enumerate([('SATURATED', False), 
                                         ('BAD_COUNTS_ERROR', False), 
                                         ('BINNED1', True)]):
        msk = cat[flag]==0 if present else cat[flag]!=0
        ranks[msk¬ML] = -(i+10)
    
    
    # finally, remove already-observed (rank<=-100)
    spec_this_host = spectra[spectra['HOST_NSAID']==h.nsaid]
    spec_this_host = spec_this_host[np.in1d(spec_this_host['OBJID'], cat['OBJID'])]
    
    zq = cat['ZQUALITY'].copy()
    for i, zqi in zip(spec_this_host['OBJID'], spec_this_host['ZQUALITY']):
        zq[cat['OBJID']==i] = zqi
    ranks[zq>2] = -100 - zq[zq>2]
    
    
    if 1237651735756996681 in cat['OBJID']:
        print('Found manual add in', h.name, 'setting rank to 2')
        ranks[cat['OBJID']==1237651735756996681] = 2
    
    #informational
    print(h.name,'rank counts:', Counter(ranks))
    
    
In [13]:
    
bins = 25
for h in hosts_to_target:
    cat = h.get_sdss_catalog()
    ranks = rankdct[h]
    plt.figure()
    
    msk = cat[p_column_for_ranking] > 0.01
    plt.subplot(1,2,1)
    ahist(cat[msk]['r'],bins=bins, histtype='step')
    plt.axvline(21,c='r')
    plt.xlabel('r')
    plt.title(h.name, fontsize=24)
    
    plt.subplot(1,2,2)
    ahist(cat[msk]['fibermag_r'],bins=bins, histtype='step')
    plt.axvline(23,c='r')
    plt.xlabel('fibermag_r')
    plt.title(h.name, fontsize=24)
    
    plt.tight_layout()
    
    
    
    
    
In [14]:
    
for h in hosts_to_target:
    cat = h.get_sdss_catalog()
    ranks = rankdct[h]
    plt.figure()
    
    msk_ml = (ranks>1)&(ranks<8)
    msk_nml = (ranks>7)
    
    plt.scatter(cat['ra'][msk_nml], cat['dec'][msk_nml], lw=0, alpha=.3, c=ranks[msk_nml],s=8, vmin=2, vmax=np.max(ranks))
    plt.scatter(cat['ra'][msk_ml], cat['dec'][msk_ml], lw=1, alpha=.9, c=ranks[msk_ml],s=20, vmin=2, vmax=np.max(ranks))
    
    plt.colorbar()
    
    
    plt.title(h.name, fontsize=24)
    
    plt.tight_layout()
    
    
    
    
    
In [15]:
    
h = [h for h in hosts_to_target if h.name=='Alice']
assert len(h)==1
h = h[0]
    
In [16]:
    
cat = h.get_sdss_catalog()
ranks = rankdct[h]
fibmag = cat['fibermag_r']
msk = (ranks>0)&(fibmag>22)#&(ranks<7)
targeting.sampled_imagelist(cat[msk], None, 200, names=['r={0[0]}_fm={0[1]}'.format(t) for t in zip(ranks[msk],fibmag[msk])])
np.sum(msk)
    
    Out[16]:
In [74]:
    
generated_cats = {}
for h in hosts_to_target:
    print('On host', h.name)
    sys.stdout.flush()
    cat = h.get_sdss_catalog()
    ranks = rankdct[h]
    
    fnout = 'mmthecto/{0}_feb2016.cat'.format(h.name)
    fluxfnout = fnout.replace('.cat', '.fluxstars')
    print('Going to write', fnout)
    
    msk = (cat['RHOST_ARCM']<40) & (ranks>0) & (ranks<11)
    generated_cats[h] = mmthecto.generate_catalog(h, cat[msk], ranks[msk], 
                                         repeatflux=3, removefluxdistance=(1*u.arcmin,ranks[msk]<9),
                                         fnout=fnout, fluxfnout=fluxfnout)
    
    
In [77]:
    
h = [h for h in hosts_to_target if h.name=='Sopranos']
assert len(h)==1
h = h[0]
    
In [62]:
    
gcat = generated_cats[h]
ranknum = gcat['rank'].copy()
ranknum[gcat['rank']==''] = '-1'
ranknum = ranknum.astype(int)
ranknum = gcat['rank'].copy()
ranknum[gcat['rank']==''] = '-1'
ranknum = ranknum.astype(int)
{r: np.sum(r==ranknum) for r in set(ranknum)}
    
    Out[62]:
In [79]:
    
msk = (ranknum>1)&(ranknum<6)  # ML targets
#msk = (ranknum>6) #non-ML targets
msk = ranknum==1 # flux stars
gcatm = gcat[msk]
targeting.sampled_imagelist(gcatm[np.argsort(gcatm['ra'])], None, 200, names=gcat['rank'][msk])
len(gcatm)
    
    Out[79]:
In [84]:
    
gcat
    
    Out[84]:
In [91]:
    
for h in hosts_to_target:
    gcat = generated_cats[h]
    fmsk = gcat['rank']=='1'
    gmsk = gcat['type'] == 'guide'
    
    plt.figure()
    plt.title(h.name)
    plt.scatter(gcat['ra'][fmsk], gcat['dec'][fmsk],lw=0, label='flux')
    plt.scatter(gcat['ra'][gmsk], gcat['dec'][gmsk], c='r',s=5, lw=0, label='guide')
    plt.legend()
    
    
    
    
    
In [83]:
    
#identify clumps of nearby objects - note that this is slow for large sets of objects
def find_clumps(host, rankrng=(1, 10), show_in_sdss=True):
    msk = (ranknum>rankrng[0])&(ranknum<rankrng[1])  # ML targets
    gcatm = generated_cats[host][msk]
    gcatmsc = SkyCoord(gcatm['ra'], gcatm['dec'], unit=u.deg)
    seps = u.Quantity([c.separation(gcatmsc) for c in gcatmsc])
    pairs = zip(*np.where((seps>0.1*u.arcsec)&(seps<10*u.arcsec)))
    pairs = [pair for pair in pairs if pair[0]<pair[1]]
    clumps = []
    for p1, p2 in pairs:
        for clump in clumps:
            if p1 in clump:
                clump.append(p2)
                break
            elif p2 in clump:
                clump.append(p1)
                break
        else:
            clumps.append([p1, p2])
    if show_in_sdss:
        for cl in clumps:
            targeting.sampled_imagelist(gcatm[np.array(cl)], None, None, names=cl)
    return clumps
for h in hosts_to_target:
    clumps = find_clumps(h, show_in_sdss=False)
    if clumps:
        print(h.name, 'clumps:', clumps)
    else:
        print(h.name, 'is clump-less in ML targets')