In [1]:

    
# This changes the current directory to the base saga directory - make sure to run this first!
# This is necessary to be able to import the py files and use the right directories,
# while keeping all the notebooks in their own directory.
import os
import sys

if 'saga_base_dir' not in locals():
    saga_base_dir = os.path.abspath('..')

if saga_base_dir not in sys.path:
    os.chdir(saga_base_dir)



In [2]:

    
from __future__ import print_function, division

from collections import Counter, OrderedDict

import hosts
import targeting
import mmthecto
import numpy as np

from astropy import units as u
from astropy.coordinates import *
from astropy import table
from astropy.visualization import hist as ahist



In [3]:

    
%matplotlib inline
from matplotlib import pyplot as plt
from matplotlib import rcParams

rcParams['image.interpolation'] = 'none'
rcParams['figure.figsize'] = (16, 10)

Choose hosts and Load stuff



In [4]:

    
hostlst = hosts.get_saga_hosts_from_google() #'named' hosts









    



Using cached version of google hosts list from file "hosts_dl.pkl2"



In [24]:

    
hosts_to_target = [h for h in hostlst if h.name in ('Sopranos', 'StarTrek', 'Alice', 'MobyDick')]
assert len(hosts_to_target)==4
hosts_to_target









    Out[24]:





[<hosts.NSAHost object w/ name 'Alice' AKA: ['NGC4030', 'NSA140594']>,
 <hosts.NSAHost object w/ name 'Sopranos' AKA: ['NGC4045', 'NSA13927']>,
 <hosts.NSAHost object w/ name 'MobyDick' AKA: ['NGC3067', 'NSA85746']>,
 <hosts.NSAHost object w/ name 'StarTrek' AKA: ['NGC2543', 'NSA33446']>]



In [6]:

    
# now set to the latest base catalogs
for h in hosts_to_target:
    h.fnsdss = 'SAGADropbox/base_catalogs/base_sql_nsa{0}.fits.gz'.format(h.nsaid)
    h._cached_sdss = None



In [7]:

    
# actually make sure they're loaded here
for h in hosts_to_target:
    h.get_sdss_catalog()
h.get_sdss_catalog().colnames # just to see









    Out[7]:





['OBJID',
 'RA',
 'DEC',
 'PHOTPTYPE',
 'PHOT_SG',
 'FLAGS',
 'SATURATED',
 'BAD_COUNTS_ERROR',
 'BINNED1',
 'u',
 'g',
 'r',
 'i',
 'z',
 'u_err',
 'g_err',
 'r_err',
 'i_err',
 'z_err',
 'MODELMAGERR_U',
 'MODELMAGERR_G',
 'MODELMAGERR_R',
 'MODELMAGERR_I',
 'MODELMAGERR_Z',
 'EXTINCTION_U',
 'EXTINCTION_G',
 'EXTINCTION_R',
 'EXTINCTION_I',
 'EXTINCTION_Z',
 'DERED_U',
 'DERED_G',
 'DERED_R',
 'DERED_I',
 'DERED_Z',
 'PETRORAD_U',
 'PETRORAD_G',
 'PETRORAD_R',
 'PETRORAD_I',
 'PETRORAD_Z',
 'PETRORADERR_U',
 'PETRORADERR_G',
 'PETRORADERR_R',
 'PETRORADERR_I',
 'PETRORADERR_Z',
 'DEVRAD_U',
 'DEVRADERR_U',
 'DEVRAD_G',
 'DEVRADERR_G',
 'DEVRAD_R',
 'DEVRADERR_R',
 'DEVRAD_I',
 'DEVRADERR_I',
 'DEVRAD_Z',
 'DEVRADERR_Z',
 'DEVAB_U',
 'DEVAB_G',
 'DEVAB_R',
 'DEVAB_I',
 'DEVAB_Z',
 'CMODELMAG_U',
 'CMODELMAGERR_U',
 'CMODELMAG_G',
 'CMODELMAGERR_G',
 'CMODELMAG_R',
 'CMODELMAGERR_R',
 'CMODELMAG_I',
 'CMODELMAGERR_I',
 'CMODELMAG_Z',
 'CMODELMAGERR_Z',
 'PSFMAG_U',
 'PSFMAGERR_U',
 'PSFMAG_G',
 'PSFMAGERR_G',
 'PSFMAG_R',
 'PSFMAGERR_R',
 'PSFMAG_I',
 'PSFMAGERR_I',
 'PSFMAG_Z',
 'PSFMAGERR_Z',
 'FIBERMAG_U',
 'FIBERMAGERR_U',
 'FIBERMAG_G',
 'FIBERMAGERR_G',
 'FIBERMAG_R',
 'FIBERMAGERR_R',
 'FIBERMAG_I',
 'FIBERMAGERR_I',
 'FIBERMAG_Z',
 'FIBERMAGERR_Z',
 'FRACDEV_U',
 'FRACDEV_G',
 'FRACDEV_R',
 'FRACDEV_I',
 'FRACDEV_Z',
 'Q_U',
 'U_U',
 'Q_G',
 'U_G',
 'Q_R',
 'U_R',
 'Q_I',
 'U_I',
 'Q_Z',
 'U_Z',
 'EXPAB_U',
 'EXPRAD_U',
 'EXPPHI_U',
 'EXPAB_G',
 'EXPRAD_G',
 'EXPPHI_G',
 'EXPAB_R',
 'EXPRAD_R',
 'EXPPHI_R',
 'EXPAB_I',
 'EXPRAD_I',
 'EXPPHI_I',
 'EXPAB_Z',
 'EXPRAD_Z',
 'EXPPHI_Z',
 'FIBER2MAG_R',
 'FIBER2MAGERR_R',
 'EXPMAG_R',
 'EXPMAGERR_R',
 'PETROR50_R',
 'PETROR90_R',
 'PETROMAG_R',
 'SB_EXP_R',
 'SB_PETRO_R',
 'J',
 'JERR',
 'H',
 'HERR',
 'K',
 'KERR',
 'SPEC_Z',
 'SPEC_Z_ERR',
 'SPEC_Z_WARN',
 'PHOTOZ',
 'PHOTOZ_ERR',
 'W1',
 'W1ERR',
 'W2',
 'W2ERR',
 'HOST_RA',
 'HOST_DEC',
 'HOST_DIST',
 'HOST_VHOST',
 'HOST_MK',
 'HOST_NSAID',
 'HOST_FLAG',
 'HOST_SAGA_NAME',
 'RHOST_ARCM',
 'RHOST_KPC',
 'OBJ_NSAID',
 'SATS',
 'PROBABILITY_CLASS1',
 'RESCALED_PROBABILITY_CLASS1',
 'REMOVE',
 'TELNAME',
 'MASKNAME',
 'ZQUALITY',
 'SPECOBJID',
 'SPEC_REPEAT',
 'Ai',
 'fibermag_z',
 'rhost',
 'fibermag_r',
 'fibermag_u',
 'Ag',
 'fibermag_i',
 'Az',
 'Ar',
 'dec',
 'Au',
 'fibermag_g',
 'type',
 'ra',
 'objID',
 'psf_r',
 'psf_u',
 'psf_z',
 'rhost_kpc',
 'phot_sg',
 'psf_g',
 'psf_i',
 'U',
 'B',
 'V',
 'R',
 'I',
 'psf_U',
 'psf_B',
 'psf_V',
 'psf_R',
 'psf_I']



In [8]:

    
# these are the already-observed objects
spectra = h.load_and_reprocess_sdss_catalog('SAGADropbox/data/saga_spectra_clean_jan15.fits.gz')



In [9]:

    
# diagnostic just to see how many are in the remove list
for h in hosts_to_target:
    print(h.name, Counter(h.get_sdss_catalog()['REMOVE']))









    



Alice Counter({-1: 78413, 3: 4377, 2: 417, 4: 79, 1: 13})
Sopranos Counter({-1: 66695, 3: 4090, 2: 757, 4: 160, 1: 15})
MobyDick Counter({-1: 79385, 3: 5848, 2: 201, 4: 77, 1: 24})
StarTrek Counter({-1: 87382, 3: 9274, 4: 183, 2: 142, 1: 7})

Generate ranks

Important difference from previous runs: ML probablities are now in the catalogs!



In [10]:

    
p_column_for_ranking = 'PROBABILITY_CLASS1' #this is *just* SDSS



In [11]:

    
#how many have an decent ML ranking?
print ("Fraction good ML, not good ML, no ML")
for h in hosts_to_target:
    p = h.get_sdss_catalog()[p_column_for_ranking]
    goodp = p>10**-4
    patall = p!=-1
    print(h.name, np.sum(goodp&patall)/len(p), np.sum(~goodp&patall)/len(p), np.sum(~patall)/len(p))









    



Fraction good ML, not good ML, no ML
Alice 0.00402165692265 0.995846288671 0.000132054406415
Sopranos 0.00585635205042 0.994032098387 0.000111549562865
MobyDick 0.0044075524639 0.995580756416 1.16911205939e-05
StarTrek 0.00570173629727 0.994277642595 2.06211077659e-05

Note that the ranking scheme also functions as the "filtering" step - anything <=0 doesn't get written



In [12]:

    
# compute ranks, which includes filtering on mag
rankdct = {}
for h in hosts_to_target:
    cat = h.get_sdss_catalog()
    
    photgood = (cat['r'] < 21.5) & (cat['fibermag_r']<23) & (cat['phot_sg']=='GALAXY')
    
    
    rankdct[h] = ranks = np.zeros(len(cat), dtype=int) #the 0s will be re-set at the end
    
    inside = cat['RHOST_KPC']<300
    # now set the ml prob targets
#     highprob = cat[p_column_for_ranking] > 0.5
#     medprob = (cat[p_column_for_ranking] > 0.05) & ~highprob
#     lowprob = (cat[p_column_for_ranking] > 0.01) & ~highprob & ~medprob
    highprob = cat[p_column_for_ranking] > 0.1
    medprob = (cat[p_column_for_ranking] > 0.01) & ~highprob
    lowprob = (cat[p_column_for_ranking] > 0.001) & ~highprob & ~medprob
    lowestprob = (cat[p_column_for_ranking] > 0.0001) & ~highprob & ~medprob & ~lowprob
    
    # rank 1 reserved for flux stars
    ranks[inside&highprob] = 2
    ranks[~inside&highprob] = 3
    ranks[inside&medprob] = 4
    ranks[~inside&medprob] = 5
    ranks[inside&lowprob] = 6
    ranks[~inside&lowprob] = 7
    ranks[inside&lowestprob] = 8
    ranks[~inside&lowestprob] = 9
    
    
    notML = ranks == 0
    colorcutmsk = targeting.colorcut_mask(cat,  targeting.bossanova_color_cuts)
    
    ranks[photgood&notML&colorcutmsk] = 10
    ranks[photgood&notML&~colorcutmsk] = 11
    
    #set to-remove objects to ranks<0
    tokeep = cat['REMOVE']==-1
    ranks[~tokeep] = -cat['REMOVE'][~tokeep] # sets the REMOVE objects to -their remove value
    # and the downloaded rem list
    remmsk = targeting.remove_targets_with_remlist(cat, h, maskonly=True)
    ranks[~remmsk] = -5
    
    # anything that's not ML should have the flag cuts applied (rank<=-10)
    for i, (flag, present) in enumerate([('SATURATED', False), 
                                         ('BAD_COUNTS_ERROR', False), 
                                         ('BINNED1', True)]):
        msk = cat[flag]==0 if present else cat[flag]!=0
        ranks[msk&notML] = -(i+10)
    
    
    # finally, remove already-observed (rank<=-100)
    spec_this_host = spectra[spectra['HOST_NSAID']==h.nsaid]
    spec_this_host = spec_this_host[np.in1d(spec_this_host['OBJID'], cat['OBJID'])]
    
    zq = cat['ZQUALITY'].copy()
    for i, zqi in zip(spec_this_host['OBJID'], spec_this_host['ZQUALITY']):
        zq[cat['OBJID']==i] = zqi
    ranks[zq>2] = -100 - zq[zq>2]
    
    
    if 1237651735756996681 in cat['OBJID']:
        print('Found manual add in', h.name, 'setting rank to 2')
        ranks[cat['OBJID']==1237651735756996681] = 2
    
    #informational
    print(h.name,'rank counts:', Counter(ranks))









    



Removed 20 objects for Alice
Alice rank counts: Counter({0: 58926, 10: 11122, 11: 5421, -104: 2503, -10: 2494, -12: 1812, -2: 410, -105: 237, -103: 115, -4: 72, -3: 56, 8: 29, 9: 28, 7: 21, -5: 20, 5: 12, 6: 11, 4: 7, 3: 2, 2: 1})
Removed 21 objects for Sopranos
Found manual add in Sopranos setting rank to 2
Sopranos rank counts: Counter({0: 48213, 10: 11178, 11: 5601, -10: 2227, -12: 1782, -104: 1398, -2: 747, -105: 171, -4: 148, -3: 73, 9: 46, 8: 43, 7: 22, -5: 22, 5: 15, 6: 13, 4: 9, 2: 4, -103: 4, 3: 1})
Removed 30 objects for MobyDick
MobyDick rank counts: Counter({0: 61395, 10: 10267, 11: 6348, -10: 3681, -12: 2057, -104: 1152, -2: 198, -3: 103, -103: 74, -4: 74, 9: 47, 8: 35, 7: 30, -5: 30, 6: 20, 5: 17, 4: 5, 3: 2})
Removed 16 objects for StarTrek
StarTrek rank counts: Counter({0: 72800, 10: 7804, -10: 7103, 11: 4740, -12: 1936, -104: 1758, -3: 220, -4: 180, -2: 137, 9: 91, -103: 88, 7: 49, 5: 28, 8: 17, -5: 16, 6: 13, 3: 4, 2: 2, 4: 2})

are the ML-selected objects meeting the fibermag and r cuts?



In [13]:

    
bins = 25
for h in hosts_to_target:
    cat = h.get_sdss_catalog()
    ranks = rankdct[h]

    plt.figure()
    
    msk = cat[p_column_for_ranking] > 0.01
    plt.subplot(1,2,1)
    ahist(cat[msk]['r'],bins=bins, histtype='step')
    plt.axvline(21,c='r')
    plt.xlabel('r')
    plt.title(h.name, fontsize=24)
    
    plt.subplot(1,2,2)
    ahist(cat[msk]['fibermag_r'],bins=bins, histtype='step')
    plt.axvline(23,c='r')
    plt.xlabel('fibermag_r')
    plt.title(h.name, fontsize=24)
    
    plt.tight_layout()

Plot the catalogs as a whole



In [14]:

    
for h in hosts_to_target:
    cat = h.get_sdss_catalog()
    ranks = rankdct[h]

    plt.figure()
    
    msk_ml = (ranks>1)&(ranks<8)
    msk_nml = (ranks>7)
    
    plt.scatter(cat['ra'][msk_nml], cat['dec'][msk_nml], lw=0, alpha=.3, c=ranks[msk_nml],s=8, vmin=2, vmax=np.max(ranks))
    plt.scatter(cat['ra'][msk_ml], cat['dec'][msk_ml], lw=1, alpha=.9, c=ranks[msk_ml],s=20, vmin=2, vmax=np.max(ranks))
    
    plt.colorbar()
    
    
    plt.title(h.name, fontsize=24)
    
    plt.tight_layout()

Inspect some objects in the catalog



In [15]:

    
h = [h for h in hosts_to_target if h.name=='Alice']
assert len(h)==1
h = h[0]



In [16]:

    
cat = h.get_sdss_catalog()
ranks = rankdct[h]

fibmag = cat['fibermag_r']
msk = (ranks>0)&(fibmag>22)#&(ranks<7)

targeting.sampled_imagelist(cat[msk], None, 200, names=['r={0[0]}_fm={0[1]}'.format(t) for t in zip(ranks[msk],fibmag[msk])])
np.sum(msk)









    Out[16]:





3060

Generate catalogs



In [74]:

    
generated_cats = {}
for h in hosts_to_target:
    print('On host', h.name)
    sys.stdout.flush()

    cat = h.get_sdss_catalog()
    ranks = rankdct[h]
    
    fnout = 'mmthecto/{0}_feb2016.cat'.format(h.name)
    fluxfnout = fnout.replace('.cat', '.fluxstars')
    print('Going to write', fnout)
    
    msk = (cat['RHOST_ARCM']<40) & (ranks>0) & (ranks<11)
    generated_cats[h] = mmthecto.generate_catalog(h, cat[msk], ranks[msk], 
                                         repeatflux=3, removefluxdistance=(1*u.arcmin,ranks[msk]<9),
                                         fnout=fnout, fluxfnout=fluxfnout)









    



On host Alice
Going to write mmthecto/Alice_feb2016.cat
Including 4659 targets
Found 118 Flux stars
Found 314 guide stars
On host Sopranos
Going to write mmthecto/Sopranos_feb2016.cat
Including 5259 targets
Found 88 Flux stars
Found 317 guide stars
On host MobyDick
Going to write mmthecto/MobyDick_feb2016.cat
Including 3999 targets
Found 57 Flux stars
Found 265 guide stars
On host StarTrek
Going to write mmthecto/StarTrek_feb2016.cat
Including 3077 targets
Found 64 Flux stars
Removing 1 Flux stars too close to program stars
Found 563 guide stars

Inspect a output catalog



In [77]:

    
h = [h for h in hosts_to_target if h.name=='Sopranos']
assert len(h)==1
h = h[0]



In [62]:

    
gcat = generated_cats[h]

ranknum = gcat['rank'].copy()
ranknum[gcat['rank']==''] = '-1'
ranknum = ranknum.astype(int)

ranknum = gcat['rank'].copy()
ranknum[gcat['rank']==''] = '-1'
ranknum = ranknum.astype(int)
{r: np.sum(r==ranknum) for r in set(ranknum)}









    Out[62]:





{-1: 563, 1: 186, 2: 2, 4: 2, 5: 10, 6: 13, 7: 15, 8: 17, 9: 26, 10: 2992}



In [79]:

    
msk = (ranknum>1)&(ranknum<6)  # ML targets
#msk = (ranknum>6) #non-ML targets
msk = ranknum==1 # flux stars

gcatm = gcat[msk]
targeting.sampled_imagelist(gcatm[np.argsort(gcatm['ra'])], None, 200, names=gcat['rank'][msk])
len(gcatm)









    Out[79]:





186

check flux and guide stars to make sure there are enough



In [84]:

    
gcat









    Out[84]:




<Table length=3829>

ra dec object rank type mag
float32 float32 str50 str2 str20 float32
123.287 36.2782 1237657607497384508 6 TARGET 20.31
123.289 36.2292 1237657607497319278 10 TARGET 21.47
123.201 36.2187 1237657607497319440 10 TARGET 21.27
123.277 35.6522 1237674290757501217 10 TARGET 21.43
123.224 35.6531 1237674290757501291 10 TARGET 20.74
123.375 35.66 1237674290757566977 10 TARGET 21.33
123.393 35.6624 1237674290757567008 10 TARGET 21.16
123.45 35.6747 1237674290757567109 10 TARGET 20.58
123.202 35.651 1237674290757501469 10 TARGET 20.37
123.199 35.6533 1237674290757501465 10 TARGET 21.4
... ... ... ... ... ...
123.009 36.6693 1237657608034320419 guide 14.12
123.271 36.7106 1237657608034385966 guide 14.87
123.253 36.761 1237657608034385991 guide 14.57
123.526 36.9663 1237657608034517019 guide 14.73
123.592 37.0964 1237657608034582563 guide 14.95
122.431 36.7958 1237657608571060356 guide 14.3
122.609 36.9708 1237657608571191322 guide 14.86
122.746 37.0484 1237657608571191535 guide 14.78
122.621 36.9797 1237657608571191359 guide 14.89
122.983 37.1142 1237657608571322407 guide 14.61



In [91]:

    
for h in hosts_to_target:
    gcat = generated_cats[h]
    fmsk = gcat['rank']=='1'
    gmsk = gcat['type'] == 'guide'
    
    plt.figure()
    plt.title(h.name)
    plt.scatter(gcat['ra'][fmsk], gcat['dec'][fmsk],lw=0, label='flux')
    plt.scatter(gcat['ra'][gmsk], gcat['dec'][gmsk], c='r',s=5, lw=0, label='guide')
    plt.legend()

Clump searching



In [83]:

    
#identify clumps of nearby objects - note that this is slow for large sets of objects

def find_clumps(host, rankrng=(1, 10), show_in_sdss=True):
    msk = (ranknum>rankrng[0])&(ranknum<rankrng[1])  # ML targets
    gcatm = generated_cats[host][msk]

    gcatmsc = SkyCoord(gcatm['ra'], gcatm['dec'], unit=u.deg)
    seps = u.Quantity([c.separation(gcatmsc) for c in gcatmsc])

    pairs = zip(*np.where((seps>0.1*u.arcsec)&(seps<10*u.arcsec)))
    pairs = [pair for pair in pairs if pair[0]<pair[1]]

    clumps = []
    for p1, p2 in pairs:
        for clump in clumps:
            if p1 in clump:
                clump.append(p2)
                break
            elif p2 in clump:
                clump.append(p1)
                break
        else:
            clumps.append([p1, p2])
    if show_in_sdss:
        for cl in clumps:
            targeting.sampled_imagelist(gcatm[np.array(cl)], None, None, names=cl)
    return clumps

for h in hosts_to_target:
    clumps = find_clumps(h, show_in_sdss=False)
    if clumps:
        print(h.name, 'clumps:', clumps)
    else:
        print(h.name, 'is clump-less in ML targets')









    



Alice is clump-less in ML targets
Sopranos is clump-less in ML targets
MobyDick is clump-less in ML targets
StarTrek clumps: [[12, 13], [33, 34], [35, 36]]

ra	dec	object	rank	type	mag
float32	float32	str50	str2	str20	float32
123.287	36.2782	1237657607497384508	6	TARGET	20.31
123.289	36.2292	1237657607497319278	10	TARGET	21.47
123.201	36.2187	1237657607497319440	10	TARGET	21.27
123.277	35.6522	1237674290757501217	10	TARGET	21.43
123.224	35.6531	1237674290757501291	10	TARGET	20.74
123.375	35.66	1237674290757566977	10	TARGET	21.33
123.393	35.6624	1237674290757567008	10	TARGET	21.16
123.45	35.6747	1237674290757567109	10	TARGET	20.58
123.202	35.651	1237674290757501469	10	TARGET	20.37
123.199	35.6533	1237674290757501465	10	TARGET	21.4
...	...	...	...	...	...
123.009	36.6693	1237657608034320419		guide	14.12
123.271	36.7106	1237657608034385966		guide	14.87
123.253	36.761	1237657608034385991		guide	14.57
123.526	36.9663	1237657608034517019		guide	14.73
123.592	37.0964	1237657608034582563		guide	14.95
122.431	36.7958	1237657608571060356		guide	14.3
122.609	36.9708	1237657608571191322		guide	14.86
122.746	37.0484	1237657608571191535		guide	14.78
122.621	36.9797	1237657608571191359		guide	14.89
122.983	37.1142	1237657608571322407		guide	14.61