In [1]:
# This changes the current directory to the base saga directory - make sure to run this first!
# This is necessary to be able to import the py files and use the right directories,
# while keeping all the notebooks in their own directory.
import os
import sys

if 'saga_base_dir' not in locals():
    saga_base_dir = os.path.abspath('..')

if saga_base_dir not in sys.path:
    os.chdir(saga_base_dir)

In [2]:
from __future__ import print_function, division

from collections import Counter, OrderedDict

import hosts
import targeting
import mmthecto
import numpy as np

from astropy import units as u
from astropy.coordinates import *
from astropy import table
from astropy.visualization import hist as ahist

In [3]:
%matplotlib inline
from matplotlib import pyplot as plt
from matplotlib import rcParams

rcParams['image.interpolation'] = 'none'
rcParams['figure.figsize'] = (16, 10)

Choose hosts and Load stuff


In [4]:
hostlst = hosts.get_saga_hosts_from_google() #'named' hosts


Using cached version of google hosts list from file "hosts_dl.pkl2"

In [24]:
hosts_to_target = [h for h in hostlst if h.name in ('Sopranos', 'StarTrek', 'Alice', 'MobyDick')]
assert len(hosts_to_target)==4
hosts_to_target


Out[24]:
[<hosts.NSAHost object w/ name 'Alice' AKA: ['NGC4030', 'NSA140594']>,
 <hosts.NSAHost object w/ name 'Sopranos' AKA: ['NGC4045', 'NSA13927']>,
 <hosts.NSAHost object w/ name 'MobyDick' AKA: ['NGC3067', 'NSA85746']>,
 <hosts.NSAHost object w/ name 'StarTrek' AKA: ['NGC2543', 'NSA33446']>]

In [6]:
# now set to the latest base catalogs
for h in hosts_to_target:
    h.fnsdss = 'SAGADropbox/base_catalogs/base_sql_nsa{0}.fits.gz'.format(h.nsaid)
    h._cached_sdss = None

In [7]:
# actually make sure they're loaded here
for h in hosts_to_target:
    h.get_sdss_catalog()
h.get_sdss_catalog().colnames # just to see


Out[7]:
['OBJID',
 'RA',
 'DEC',
 'PHOTPTYPE',
 'PHOT_SG',
 'FLAGS',
 'SATURATED',
 'BAD_COUNTS_ERROR',
 'BINNED1',
 'u',
 'g',
 'r',
 'i',
 'z',
 'u_err',
 'g_err',
 'r_err',
 'i_err',
 'z_err',
 'MODELMAGERR_U',
 'MODELMAGERR_G',
 'MODELMAGERR_R',
 'MODELMAGERR_I',
 'MODELMAGERR_Z',
 'EXTINCTION_U',
 'EXTINCTION_G',
 'EXTINCTION_R',
 'EXTINCTION_I',
 'EXTINCTION_Z',
 'DERED_U',
 'DERED_G',
 'DERED_R',
 'DERED_I',
 'DERED_Z',
 'PETRORAD_U',
 'PETRORAD_G',
 'PETRORAD_R',
 'PETRORAD_I',
 'PETRORAD_Z',
 'PETRORADERR_U',
 'PETRORADERR_G',
 'PETRORADERR_R',
 'PETRORADERR_I',
 'PETRORADERR_Z',
 'DEVRAD_U',
 'DEVRADERR_U',
 'DEVRAD_G',
 'DEVRADERR_G',
 'DEVRAD_R',
 'DEVRADERR_R',
 'DEVRAD_I',
 'DEVRADERR_I',
 'DEVRAD_Z',
 'DEVRADERR_Z',
 'DEVAB_U',
 'DEVAB_G',
 'DEVAB_R',
 'DEVAB_I',
 'DEVAB_Z',
 'CMODELMAG_U',
 'CMODELMAGERR_U',
 'CMODELMAG_G',
 'CMODELMAGERR_G',
 'CMODELMAG_R',
 'CMODELMAGERR_R',
 'CMODELMAG_I',
 'CMODELMAGERR_I',
 'CMODELMAG_Z',
 'CMODELMAGERR_Z',
 'PSFMAG_U',
 'PSFMAGERR_U',
 'PSFMAG_G',
 'PSFMAGERR_G',
 'PSFMAG_R',
 'PSFMAGERR_R',
 'PSFMAG_I',
 'PSFMAGERR_I',
 'PSFMAG_Z',
 'PSFMAGERR_Z',
 'FIBERMAG_U',
 'FIBERMAGERR_U',
 'FIBERMAG_G',
 'FIBERMAGERR_G',
 'FIBERMAG_R',
 'FIBERMAGERR_R',
 'FIBERMAG_I',
 'FIBERMAGERR_I',
 'FIBERMAG_Z',
 'FIBERMAGERR_Z',
 'FRACDEV_U',
 'FRACDEV_G',
 'FRACDEV_R',
 'FRACDEV_I',
 'FRACDEV_Z',
 'Q_U',
 'U_U',
 'Q_G',
 'U_G',
 'Q_R',
 'U_R',
 'Q_I',
 'U_I',
 'Q_Z',
 'U_Z',
 'EXPAB_U',
 'EXPRAD_U',
 'EXPPHI_U',
 'EXPAB_G',
 'EXPRAD_G',
 'EXPPHI_G',
 'EXPAB_R',
 'EXPRAD_R',
 'EXPPHI_R',
 'EXPAB_I',
 'EXPRAD_I',
 'EXPPHI_I',
 'EXPAB_Z',
 'EXPRAD_Z',
 'EXPPHI_Z',
 'FIBER2MAG_R',
 'FIBER2MAGERR_R',
 'EXPMAG_R',
 'EXPMAGERR_R',
 'PETROR50_R',
 'PETROR90_R',
 'PETROMAG_R',
 'SB_EXP_R',
 'SB_PETRO_R',
 'J',
 'JERR',
 'H',
 'HERR',
 'K',
 'KERR',
 'SPEC_Z',
 'SPEC_Z_ERR',
 'SPEC_Z_WARN',
 'PHOTOZ',
 'PHOTOZ_ERR',
 'W1',
 'W1ERR',
 'W2',
 'W2ERR',
 'HOST_RA',
 'HOST_DEC',
 'HOST_DIST',
 'HOST_VHOST',
 'HOST_MK',
 'HOST_NSAID',
 'HOST_FLAG',
 'HOST_SAGA_NAME',
 'RHOST_ARCM',
 'RHOST_KPC',
 'OBJ_NSAID',
 'SATS',
 'PROBABILITY_CLASS1',
 'RESCALED_PROBABILITY_CLASS1',
 'REMOVE',
 'TELNAME',
 'MASKNAME',
 'ZQUALITY',
 'SPECOBJID',
 'SPEC_REPEAT',
 'Ai',
 'fibermag_z',
 'rhost',
 'fibermag_r',
 'fibermag_u',
 'Ag',
 'fibermag_i',
 'Az',
 'Ar',
 'dec',
 'Au',
 'fibermag_g',
 'type',
 'ra',
 'objID',
 'psf_r',
 'psf_u',
 'psf_z',
 'rhost_kpc',
 'phot_sg',
 'psf_g',
 'psf_i',
 'U',
 'B',
 'V',
 'R',
 'I',
 'psf_U',
 'psf_B',
 'psf_V',
 'psf_R',
 'psf_I']

In [8]:
# these are the already-observed objects
spectra = h.load_and_reprocess_sdss_catalog('SAGADropbox/data/saga_spectra_clean_jan15.fits.gz')

In [9]:
# diagnostic just to see how many are in the remove list
for h in hosts_to_target:
    print(h.name, Counter(h.get_sdss_catalog()['REMOVE']))


Alice Counter({-1: 78413, 3: 4377, 2: 417, 4: 79, 1: 13})
Sopranos Counter({-1: 66695, 3: 4090, 2: 757, 4: 160, 1: 15})
MobyDick Counter({-1: 79385, 3: 5848, 2: 201, 4: 77, 1: 24})
StarTrek Counter({-1: 87382, 3: 9274, 4: 183, 2: 142, 1: 7})

Generate ranks

Important difference from previous runs: ML probablities are now in the catalogs!


In [10]:
p_column_for_ranking = 'PROBABILITY_CLASS1' #this is *just* SDSS

In [11]:
#how many have an decent ML ranking?
print ("Fraction good ML, not good ML, no ML")
for h in hosts_to_target:
    p = h.get_sdss_catalog()[p_column_for_ranking]
    goodp = p>10**-4
    patall = p!=-1
    print(h.name, np.sum(goodp&patall)/len(p), np.sum(~goodp&patall)/len(p), np.sum(~patall)/len(p))


Fraction good ML, not good ML, no ML
Alice 0.00402165692265 0.995846288671 0.000132054406415
Sopranos 0.00585635205042 0.994032098387 0.000111549562865
MobyDick 0.0044075524639 0.995580756416 1.16911205939e-05
StarTrek 0.00570173629727 0.994277642595 2.06211077659e-05

Note that the ranking scheme also functions as the "filtering" step - anything <=0 doesn't get written


In [12]:
# compute ranks, which includes filtering on mag
rankdct = {}
for h in hosts_to_target:
    cat = h.get_sdss_catalog()
    
    photgood = (cat['r'] < 21.5) & (cat['fibermag_r']<23) & (cat['phot_sg']=='GALAXY')
    
    
    rankdct[h] = ranks = np.zeros(len(cat), dtype=int) #the 0s will be re-set at the end
    
    inside = cat['RHOST_KPC']<300
    # now set the ml prob targets
#     highprob = cat[p_column_for_ranking] > 0.5
#     medprob = (cat[p_column_for_ranking] > 0.05) & ~highprob
#     lowprob = (cat[p_column_for_ranking] > 0.01) & ~highprob & ~medprob
    highprob = cat[p_column_for_ranking] > 0.1
    medprob = (cat[p_column_for_ranking] > 0.01) & ~highprob
    lowprob = (cat[p_column_for_ranking] > 0.001) & ~highprob & ~medprob
    lowestprob = (cat[p_column_for_ranking] > 0.0001) & ~highprob & ~medprob & ~lowprob
    
    # rank 1 reserved for flux stars
    ranks[inside&highprob] = 2
    ranks[~inside&highprob] = 3
    ranks[inside&medprob] = 4
    ranks[~inside&medprob] = 5
    ranks[inside&lowprob] = 6
    ranks[~inside&lowprob] = 7
    ranks[inside&lowestprob] = 8
    ranks[~inside&lowestprob] = 9
    
    
    notML = ranks == 0
    colorcutmsk = targeting.colorcut_mask(cat,  targeting.bossanova_color_cuts)
    
    ranks[photgood&notML&colorcutmsk] = 10
    ranks[photgood&notML&~colorcutmsk] = 11
    
    #set to-remove objects to ranks<0
    tokeep = cat['REMOVE']==-1
    ranks[~tokeep] = -cat['REMOVE'][~tokeep] # sets the REMOVE objects to -their remove value
    # and the downloaded rem list
    remmsk = targeting.remove_targets_with_remlist(cat, h, maskonly=True)
    ranks[~remmsk] = -5
    
    # anything that's not ML should have the flag cuts applied (rank<=-10)
    for i, (flag, present) in enumerate([('SATURATED', False), 
                                         ('BAD_COUNTS_ERROR', False), 
                                         ('BINNED1', True)]):
        msk = cat[flag]==0 if present else cat[flag]!=0
        ranks[msk&notML] = -(i+10)
    
    
    # finally, remove already-observed (rank<=-100)
    spec_this_host = spectra[spectra['HOST_NSAID']==h.nsaid]
    spec_this_host = spec_this_host[np.in1d(spec_this_host['OBJID'], cat['OBJID'])]
    
    zq = cat['ZQUALITY'].copy()
    for i, zqi in zip(spec_this_host['OBJID'], spec_this_host['ZQUALITY']):
        zq[cat['OBJID']==i] = zqi
    ranks[zq>2] = -100 - zq[zq>2]
    
    
    if 1237651735756996681 in cat['OBJID']:
        print('Found manual add in', h.name, 'setting rank to 2')
        ranks[cat['OBJID']==1237651735756996681] = 2
    
    #informational
    print(h.name,'rank counts:', Counter(ranks))


Removed 20 objects for Alice
Alice rank counts: Counter({0: 58926, 10: 11122, 11: 5421, -104: 2503, -10: 2494, -12: 1812, -2: 410, -105: 237, -103: 115, -4: 72, -3: 56, 8: 29, 9: 28, 7: 21, -5: 20, 5: 12, 6: 11, 4: 7, 3: 2, 2: 1})
Removed 21 objects for Sopranos
Found manual add in Sopranos setting rank to 2
Sopranos rank counts: Counter({0: 48213, 10: 11178, 11: 5601, -10: 2227, -12: 1782, -104: 1398, -2: 747, -105: 171, -4: 148, -3: 73, 9: 46, 8: 43, 7: 22, -5: 22, 5: 15, 6: 13, 4: 9, 2: 4, -103: 4, 3: 1})
Removed 30 objects for MobyDick
MobyDick rank counts: Counter({0: 61395, 10: 10267, 11: 6348, -10: 3681, -12: 2057, -104: 1152, -2: 198, -3: 103, -103: 74, -4: 74, 9: 47, 8: 35, 7: 30, -5: 30, 6: 20, 5: 17, 4: 5, 3: 2})
Removed 16 objects for StarTrek
StarTrek rank counts: Counter({0: 72800, 10: 7804, -10: 7103, 11: 4740, -12: 1936, -104: 1758, -3: 220, -4: 180, -2: 137, 9: 91, -103: 88, 7: 49, 5: 28, 8: 17, -5: 16, 6: 13, 3: 4, 2: 2, 4: 2})

are the ML-selected objects meeting the fibermag and r cuts?


In [13]:
bins = 25
for h in hosts_to_target:
    cat = h.get_sdss_catalog()
    ranks = rankdct[h]

    plt.figure()
    
    msk = cat[p_column_for_ranking] > 0.01
    plt.subplot(1,2,1)
    ahist(cat[msk]['r'],bins=bins, histtype='step')
    plt.axvline(21,c='r')
    plt.xlabel('r')
    plt.title(h.name, fontsize=24)
    
    plt.subplot(1,2,2)
    ahist(cat[msk]['fibermag_r'],bins=bins, histtype='step')
    plt.axvline(23,c='r')
    plt.xlabel('fibermag_r')
    plt.title(h.name, fontsize=24)
    
    plt.tight_layout()


Plot the catalogs as a whole


In [14]:
for h in hosts_to_target:
    cat = h.get_sdss_catalog()
    ranks = rankdct[h]

    plt.figure()
    
    msk_ml = (ranks>1)&(ranks<8)
    msk_nml = (ranks>7)
    
    plt.scatter(cat['ra'][msk_nml], cat['dec'][msk_nml], lw=0, alpha=.3, c=ranks[msk_nml],s=8, vmin=2, vmax=np.max(ranks))
    plt.scatter(cat['ra'][msk_ml], cat['dec'][msk_ml], lw=1, alpha=.9, c=ranks[msk_ml],s=20, vmin=2, vmax=np.max(ranks))
    
    plt.colorbar()
    
    
    plt.title(h.name, fontsize=24)
    
    plt.tight_layout()


Inspect some objects in the catalog


In [15]:
h = [h for h in hosts_to_target if h.name=='Alice']
assert len(h)==1
h = h[0]

In [16]:
cat = h.get_sdss_catalog()
ranks = rankdct[h]

fibmag = cat['fibermag_r']
msk = (ranks>0)&(fibmag>22)#&(ranks<7)

targeting.sampled_imagelist(cat[msk], None, 200, names=['r={0[0]}_fm={0[1]}'.format(t) for t in zip(ranks[msk],fibmag[msk])])
np.sum(msk)


Out[16]:
3060

Generate catalogs


In [74]:
generated_cats = {}
for h in hosts_to_target:
    print('On host', h.name)
    sys.stdout.flush()

    cat = h.get_sdss_catalog()
    ranks = rankdct[h]
    
    fnout = 'mmthecto/{0}_feb2016.cat'.format(h.name)
    fluxfnout = fnout.replace('.cat', '.fluxstars')
    print('Going to write', fnout)
    
    msk = (cat['RHOST_ARCM']<40) & (ranks>0) & (ranks<11)
    generated_cats[h] = mmthecto.generate_catalog(h, cat[msk], ranks[msk], 
                                         repeatflux=3, removefluxdistance=(1*u.arcmin,ranks[msk]<9),
                                         fnout=fnout, fluxfnout=fluxfnout)


On host Alice
Going to write mmthecto/Alice_feb2016.cat
Including 4659 targets
Found 118 Flux stars
Found 314 guide stars
On host Sopranos
Going to write mmthecto/Sopranos_feb2016.cat
Including 5259 targets
Found 88 Flux stars
Found 317 guide stars
On host MobyDick
Going to write mmthecto/MobyDick_feb2016.cat
Including 3999 targets
Found 57 Flux stars
Found 265 guide stars
On host StarTrek
Going to write mmthecto/StarTrek_feb2016.cat
Including 3077 targets
Found 64 Flux stars
Removing 1 Flux stars too close to program stars
Found 563 guide stars

Inspect a output catalog


In [77]:
h = [h for h in hosts_to_target if h.name=='Sopranos']
assert len(h)==1
h = h[0]

In [62]:
gcat = generated_cats[h]

ranknum = gcat['rank'].copy()
ranknum[gcat['rank']==''] = '-1'
ranknum = ranknum.astype(int)

ranknum = gcat['rank'].copy()
ranknum[gcat['rank']==''] = '-1'
ranknum = ranknum.astype(int)
{r: np.sum(r==ranknum) for r in set(ranknum)}


Out[62]:
{-1: 563, 1: 186, 2: 2, 4: 2, 5: 10, 6: 13, 7: 15, 8: 17, 9: 26, 10: 2992}

In [79]:
msk = (ranknum>1)&(ranknum<6)  # ML targets
#msk = (ranknum>6) #non-ML targets
msk = ranknum==1 # flux stars

gcatm = gcat[msk]
targeting.sampled_imagelist(gcatm[np.argsort(gcatm['ra'])], None, 200, names=gcat['rank'][msk])
len(gcatm)


Out[79]:
186

check flux and guide stars to make sure there are enough


In [84]:
gcat


Out[84]:
<Table length=3829>
radecobjectranktypemag
float32float32str50str2str20float32
123.28736.278212376576074973845086TARGET20.31
123.28936.2292123765760749731927810TARGET21.47
123.20136.2187123765760749731944010TARGET21.27
123.27735.6522123767429075750121710TARGET21.43
123.22435.6531123767429075750129110TARGET20.74
123.37535.66123767429075756697710TARGET21.33
123.39335.6624123767429075756700810TARGET21.16
123.4535.6747123767429075756710910TARGET20.58
123.20235.651123767429075750146910TARGET20.37
123.19935.6533123767429075750146510TARGET21.4
..................
123.00936.66931237657608034320419guide14.12
123.27136.71061237657608034385966guide14.87
123.25336.7611237657608034385991guide14.57
123.52636.96631237657608034517019guide14.73
123.59237.09641237657608034582563guide14.95
122.43136.79581237657608571060356guide14.3
122.60936.97081237657608571191322guide14.86
122.74637.04841237657608571191535guide14.78
122.62136.97971237657608571191359guide14.89
122.98337.11421237657608571322407guide14.61

In [91]:
for h in hosts_to_target:
    gcat = generated_cats[h]
    fmsk = gcat['rank']=='1'
    gmsk = gcat['type'] == 'guide'
    
    plt.figure()
    plt.title(h.name)
    plt.scatter(gcat['ra'][fmsk], gcat['dec'][fmsk],lw=0, label='flux')
    plt.scatter(gcat['ra'][gmsk], gcat['dec'][gmsk], c='r',s=5, lw=0, label='guide')
    plt.legend()


Clump searching


In [83]:
#identify clumps of nearby objects - note that this is slow for large sets of objects

def find_clumps(host, rankrng=(1, 10), show_in_sdss=True):
    msk = (ranknum>rankrng[0])&(ranknum<rankrng[1])  # ML targets
    gcatm = generated_cats[host][msk]

    gcatmsc = SkyCoord(gcatm['ra'], gcatm['dec'], unit=u.deg)
    seps = u.Quantity([c.separation(gcatmsc) for c in gcatmsc])

    pairs = zip(*np.where((seps>0.1*u.arcsec)&(seps<10*u.arcsec)))
    pairs = [pair for pair in pairs if pair[0]<pair[1]]

    clumps = []
    for p1, p2 in pairs:
        for clump in clumps:
            if p1 in clump:
                clump.append(p2)
                break
            elif p2 in clump:
                clump.append(p1)
                break
        else:
            clumps.append([p1, p2])
    if show_in_sdss:
        for cl in clumps:
            targeting.sampled_imagelist(gcatm[np.array(cl)], None, None, names=cl)
    return clumps

for h in hosts_to_target:
    clumps = find_clumps(h, show_in_sdss=False)
    if clumps:
        print(h.name, 'clumps:', clumps)
    else:
        print(h.name, 'is clump-less in ML targets')


Alice is clump-less in ML targets
Sopranos is clump-less in ML targets
MobyDick is clump-less in ML targets
StarTrek clumps: [[12, 13], [33, 34], [35, 36]]