realfast analysis



In [ ]:

    
import logging
logger = logging.getLogger()
for hdlr in logger.handlers:
    logger.removeHandler(hdlr)
logging.basicConfig()

import glob, os, pickle
from bokeh.plotting import show, output_notebook
from ipywidgets import interact, FloatSlider, Text, Dropdown
from rtpipe import interactive
from time import asctime
import activegit
import numpy as np



In [ ]:

    
output_notebook()



In [ ]:

    
print('Working directory: {0}'.format(os.getcwd()))
print('Run on {0}'.format(asctime()))
try:
    fileroot = os.environ['fileroot']
    print('Setting fileroot to {0} from environment variable.\n'.format(fileroot))
    candsfile = 'cands_{0}_merge.pkl'.format(fileroot)
    noisefile = 'noise_{0}_merge.pkl'.format(fileroot)
except KeyError:
    sdmdir = os.getcwd()
    print('Setting sdmdir to current directory {0}\n'.format(os.path.abspath(sdmdir)))
    candsfiles = glob.glob('cands_*_merge.pkl')
    noisefiles = glob.glob('noise_*_merge.pkl')
    if len(candsfiles) == 1 and len(noisefiles) == 1:
        print('Found one cands/merge file set')
    else:
        print('Found multiple cands/noise file sets. Taking first.')

    candsfile = candsfiles[0]
    noisefile = noisefiles[0]
    fileroot = candsfile.rstrip('_merge.pkl').lstrip('cands_')
print('Set: \n\t candsfile {} \n\t noisefile {} \n\t fileroot {} '.format(candsfile, noisefile, fileroot))

Prepare data and noise plot



In [ ]:

    
loc, prop, d = interactive.read_candidates(candsfile, returnstate=True)
data = interactive.readdata(d=d, cands=(loc, prop))

Calculate classifier score from latest training set



In [ ]:

    
try:
    agdir = os.environ['agdir']
except KeyError:
    home = os.environ['HOME']
    agdir = os.path.join(home, 'code', 'alnotebook')
try:
    ag = activegit.ActiveGit(agdir)
    clf = ag.read_classifier()
    statfeats = [0,4,5,6,7,8]
    data['score'] = clf.predict_proba((np.nan_to_num(prop[:,statfeats])))[:,1]  # take real score
except:
    print('Failure when parsing activegit repo or applying classification.')

Remove bad times and cands, then calculate indices



In [ ]:

    
plinds = {'cir': [], 'cro': [], 'edg': []}  # initialize plot symbols

def filterdata(threshold, ignorestr):
    """ Iteratively filter bad times and set indices for later plotting """

    ignoret = parseignoret(ignorestr)    
    hight, highcount = interactive.findhight(data, ignoret=ignoret, threshold=threshold)
    plinds['cir'] = interactive.calcinds(data, 6., ignoret=ignoret) # positive cands
    plinds['cro'] = interactive.calcinds(data, -6., ignoret=ignoret) # negative cands
    plinds['edg'] = interactive.calcinds(data, 7., ignoret=ignoret) # cands with png plots
    sortinds = sorted(set(plinds['cir'] + plinds['cro'] + plinds['edg']))
    print('Selected {} ({} linked) points.'.format(len(sortinds), len(plinds['edg'])))
    
    print('Estimated total on target time: {} s\n'.format(interactive.calcontime(
        data, inds=plinds['cir']+plinds['cro']+plinds['edg'])))
    
    # these must get get rescaled when cands are ignored
    data['zs'] = interactive.normprob(d, data['snrs'], inds=sortinds)   

    # print high 1s bin counts
    if len(hight):
        print('High times \t High counts:')
        for i in range(len(hight)):
              print('{0}\t{1}'.format(hight[i], highcount[i]))
    else:
        print('No high 1s bin counts.')
    print('\n')

    # print high cands and their times
    biginds = np.argsort(data['abssnr'][sortinds])[-5:]    
    print('Top 5 absnr candidates and times:')
    for ind in biginds:
        print(data['snrs'][sortinds][ind], data['time'][sortinds][ind])
    print('\n')

    # update ignorestr.txt if changed
    ignorestr0 = open('ignorestr.txt', 'r').read() if os.path.exists('ignorestr.txt') else ''
    if ignorestr != ignorestr0:
        print('Updating ignorestr.txt with {0}'.format(ignorestr))
        with open('ignorestr.txt', 'w') as pkl:
            pkl.write(ignorestr)

def parseignoret(ignorestr):
    if ',' in ignorestr:
        ignorelist = ignorestr.split(',')
        assert (len(ignorelist)/2.).is_integer(), 'ignorestr be pairs of comma-delimited values.'
        ignoret = [(int(ignorelist[i]), int(ignorelist[i+1])) for i in range(0, len(ignorelist), 2)]
    else:
        ignoret = []
    return ignoret        

# set ignorestr
ignorestr = open('ignorestr.txt', 'r').read() if os.path.exists('ignorestr.txt') else ''

# set widgets
threshw = FloatSlider(value=15, min=5, max=20, description='Threshold to find bad times', padding=10)
textw = Text(value=ignorestr, description='Times to ignore (comma-delimited)', padding=10)

filterdata(15, ignorestr) # run once to prefill notebook
hndl = interact(filterdata, threshold=threshw, ignorestr=textw, __manual=True) # set up interaction

Generate candidate and noise plots



In [ ]:

    
def displayplot(plottype, sizespec):
    """ Generate interactive plot """

    plotdict = {'dmt': interactive.plotdmt, 'norm': interactive.plotnorm,
               'loc': interactive.plotloc, 'stat': interactive.plotstat,
               'all': interactive.plotall}
    sizedict = {'dmt': [900,500], 'norm': [700, 700], 'loc': [700,700],
                'stat': [700,700]}

    sortinds = sorted(set(plinds['cir'] + plinds['cro'] + plinds['edg']))
    sizesrc, plaw = sizespec.split('_')
    data['sizes'] = interactive.calcsize(data[sizesrc], inds=sortinds, plaw=int(plaw))

    if plottype != 'all':
        wid, hei = sizedict[plottype]
        pl = plotdict[plottype](data, circleinds=plinds['cir'], crossinds=plinds['cro'],
                                edgeinds=plinds['edg'], url_path='../files',
                                fileroot=fileroot, plot_width=wid, plot_height=hei)
    else:
        pl = interactive.plotall(data, circleinds=plinds['cir'], crossinds=plinds['cro'],
                                 edgeinds=plinds['edg'], url_path='../files',
                                 fileroot=fileroot)
    hdl = show(pl)

sizespecw = Dropdown(value='snrs_3', options=['snrs_3', 'snrs_5', 'score_5', 'score_7'],
                     description='Sizes spec ("source_powerlaw")', padding=10)
plotw = Dropdown(options=['all', 'dmt', 'norm', 'loc', 'stat'], description='Type of plot to make', padding=10)

displayplot('all', 'snrs_3') # run once to prefill notebook
hndl = interact(displayplot, plottype=plotw, sizespec=sizespecw, __manual=True)  # set up interaction



In [ ]:

    
noiseplot = interactive.plotnoise(noisefile, candsfile, plot_width=950, plot_height=400)
hndl = show(noiseplot)

Take notes



In [ ]:

    
def addcomment(commentstr):
    with open('commentstr.txt', 'w') as pkl:
        pkl.write(commentstr)

commentstr = open('commentstr.txt', 'r'). read() if os.path.exists('commentstr.txt') else ''
textc = Text(value=commentstr, description='Add Comments', padding=20)
hndl = interact(addcomment, commentstr=textc, __manual=True)

Requires download data from archive into working directory



In [ ]:

realfast analysis

Prepare data and noise plot

Calculate classifier score from latest training set

Remove bad times and cands, then calculate indices

Generate candidate and noise plots

Take notes

Optional refinement analysis

Requires download data from archive into working directory