realfast analysis


In [ ]:
import logging
logger = logging.getLogger()
for hdlr in logger.handlers:
    logger.removeHandler(hdlr)
logging.basicConfig()

import glob, os, pickle
from bokeh.plotting import show, output_notebook
from ipywidgets import interact, FloatSlider, Text, Dropdown
from rtpipe import interactive
from time import asctime
import activegit
import numpy as np

In [ ]:
output_notebook()

In [ ]:
print('Working directory: {0}'.format(os.getcwd()))
print('Run on {0}'.format(asctime()))
try:
    fileroot = os.environ['fileroot']
    print('Setting fileroot to {0} from environment variable.\n'.format(fileroot))
    candsfile = 'cands_{0}_merge.pkl'.format(fileroot)
    noisefile = 'noise_{0}_merge.pkl'.format(fileroot)
except KeyError:
    sdmdir = os.getcwd()
    print('Setting sdmdir to current directory {0}\n'.format(os.path.abspath(sdmdir)))
    candsfiles = glob.glob('cands_*_merge.pkl')
    noisefiles = glob.glob('noise_*_merge.pkl')
    if len(candsfiles) == 1 and len(noisefiles) == 1:
        print('Found one cands/merge file set')
    else:
        print('Found multiple cands/noise file sets. Taking first.')

    candsfile = candsfiles[0]
    noisefile = noisefiles[0]
    fileroot = candsfile.rstrip('_merge.pkl').lstrip('cands_')
print('Set: \n\t candsfile {} \n\t noisefile {} \n\t fileroot {} '.format(candsfile, noisefile, fileroot))

Prepare data and noise plot


In [ ]:
loc, prop, d = interactive.read_candidates(candsfile, returnstate=True)
data = interactive.readdata(d=d, cands=(loc, prop))

Calculate classifier score from latest training set


In [ ]:
try:
    agdir = os.environ['agdir']
except KeyError:
    home = os.environ['HOME']
    agdir = os.path.join(home, 'code', 'alnotebook')
try:
    ag = activegit.ActiveGit(agdir)
    clf = ag.read_classifier()
    statfeats = [0,4,5,6,7,8]
    data['score'] = clf.predict_proba((np.nan_to_num(prop[:,statfeats])))[:,1]  # take real score
except:
    print('Failure when parsing activegit repo or applying classification.')

Remove bad times and cands, then calculate indices


In [ ]:
plinds = {'cir': [], 'cro': [], 'edg': []}  # initialize plot symbols

def filterdata(threshold, ignorestr):
    """ Iteratively filter bad times and set indices for later plotting """

    ignoret = parseignoret(ignorestr)    
    hight, highcount = interactive.findhight(data, ignoret=ignoret, threshold=threshold)
    plinds['cir'] = interactive.calcinds(data, 6., ignoret=ignoret) # positive cands
    plinds['cro'] = interactive.calcinds(data, -6., ignoret=ignoret) # negative cands
    plinds['edg'] = interactive.calcinds(data, 7., ignoret=ignoret) # cands with png plots
    sortinds = sorted(set(plinds['cir'] + plinds['cro'] + plinds['edg']))
    print('Selected {} ({} linked) points.'.format(len(sortinds), len(plinds['edg'])))
    
    print('Estimated total on target time: {} s\n'.format(interactive.calcontime(
        data, inds=plinds['cir']+plinds['cro']+plinds['edg'])))
    
    # these must get get rescaled when cands are ignored
    data['zs'] = interactive.normprob(d, data['snrs'], inds=sortinds)   

    # print high 1s bin counts
    if len(hight):
        print('High times \t High counts:')
        for i in range(len(hight)):
              print('{0}\t{1}'.format(hight[i], highcount[i]))
    else:
        print('No high 1s bin counts.')
    print('\n')

    # print high cands and their times
    biginds = np.argsort(data['abssnr'][sortinds])[-5:]    
    print('Top 5 absnr candidates and times:')
    for ind in biginds:
        print(data['snrs'][sortinds][ind], data['time'][sortinds][ind])
    print('\n')

    # update ignorestr.txt if changed
    ignorestr0 = open('ignorestr.txt', 'r').read() if os.path.exists('ignorestr.txt') else ''
    if ignorestr != ignorestr0:
        print('Updating ignorestr.txt with {0}'.format(ignorestr))
        with open('ignorestr.txt', 'w') as pkl:
            pkl.write(ignorestr)

def parseignoret(ignorestr):
    if ',' in ignorestr:
        ignorelist = ignorestr.split(',')
        assert (len(ignorelist)/2.).is_integer(), 'ignorestr be pairs of comma-delimited values.'
        ignoret = [(int(ignorelist[i]), int(ignorelist[i+1])) for i in range(0, len(ignorelist), 2)]
    else:
        ignoret = []
    return ignoret        

# set ignorestr
ignorestr = open('ignorestr.txt', 'r').read() if os.path.exists('ignorestr.txt') else ''

# set widgets
threshw = FloatSlider(value=15, min=5, max=20, description='Threshold to find bad times', padding=10)
textw = Text(value=ignorestr, description='Times to ignore (comma-delimited)', padding=10)

filterdata(15, ignorestr) # run once to prefill notebook
hndl = interact(filterdata, threshold=threshw, ignorestr=textw, __manual=True) # set up interaction

Generate candidate and noise plots


In [ ]:
def displayplot(plottype, sizespec):
    """ Generate interactive plot """

    plotdict = {'dmt': interactive.plotdmt, 'norm': interactive.plotnorm,
               'loc': interactive.plotloc, 'stat': interactive.plotstat,
               'all': interactive.plotall}
    sizedict = {'dmt': [900,500], 'norm': [700, 700], 'loc': [700,700],
                'stat': [700,700]}

    sortinds = sorted(set(plinds['cir'] + plinds['cro'] + plinds['edg']))
    sizesrc, plaw = sizespec.split('_')
    data['sizes'] = interactive.calcsize(data[sizesrc], inds=sortinds, plaw=int(plaw))

    if plottype != 'all':
        wid, hei = sizedict[plottype]
        pl = plotdict[plottype](data, circleinds=plinds['cir'], crossinds=plinds['cro'],
                                edgeinds=plinds['edg'], url_path='../files',
                                fileroot=fileroot, plot_width=wid, plot_height=hei)
    else:
        pl = interactive.plotall(data, circleinds=plinds['cir'], crossinds=plinds['cro'],
                                 edgeinds=plinds['edg'], url_path='../files',
                                 fileroot=fileroot)
    hdl = show(pl)

sizespecw = Dropdown(value='snrs_3', options=['snrs_3', 'snrs_5', 'score_5', 'score_7'],
                     description='Sizes spec ("source_powerlaw")', padding=10)
plotw = Dropdown(options=['all', 'dmt', 'norm', 'loc', 'stat'], description='Type of plot to make', padding=10)

displayplot('all', 'snrs_3') # run once to prefill notebook
hndl = interact(displayplot, plottype=plotw, sizespec=sizespecw, __manual=True)  # set up interaction

In [ ]:
noiseplot = interactive.plotnoise(noisefile, candsfile, plot_width=950, plot_height=400)
hndl = show(noiseplot)

Take notes


In [ ]:
def addcomment(commentstr):
    with open('commentstr.txt', 'w') as pkl:
        pkl.write(commentstr)

commentstr = open('commentstr.txt', 'r'). read() if os.path.exists('commentstr.txt') else ''
textc = Text(value=commentstr, description='Add Comments', padding=20)
hndl = interact(addcomment, commentstr=textc, __manual=True)

Optional refinement analysis

Requires download data from archive into working directory


In [ ]: