notebook.community

Edit and run



In [256]:

    
import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt
from pulsar_tools import disp_delay
import pandas as pd
import sys
import bokeh.io
from bokeh.models import CustomJS, ColumnDataSource, BoxSelectTool, Slider
from bokeh.plotting import figure, gridplot, show, vplot
import ipywidgets

bokeh.io.output_notebook()









    





    
        
        Loading BokehJS ...



In [257]:

    
def load_file(filename):
    if filename == None:
        print "No filename supplied to read..."

    elif filename.endswith('.singlepulse'):
        print "Generating singlepulse file Data Frame..."
        data = pd.read_csv(filename)
        return data

    elif filename.endswith('.flag'):
        print "Generating flags list..."

        flags = np.genfromtxt(filename ,comments="#", autostrip=True)

        if len(flags) == 0:
            print "No flags/bad times provided. No times in final output will be masked."

        return flags

    else:
        print "File name suplied is not recognised. Must be either .singlepulse, .bad or .flag"



In [258]:

    
def obs_stats(time, flags):
    """
    Print the masker observation stats, i.e. how much time has been masked
    """
    flag_time = 0

    # BWM: if there is only 1 masked region, flags is a list, 
    # if there are 2+ masked regions, flags is a list of lists.
    if any(isinstance(l, np.ndarray) for l in flags):
        for flag in flags:
            flag_time += (float(flag[1]) - float(flag[0]))
    else:
        flag_time = float(flags[1]) - float(flags[0])
    
    percent = flag_time * 100 / time
    print "{0:.2f} seconds flagged from {1:.2f} seconds of data ({2:.2f} percent)".format(flag_time, time, percent)



In [259]:

    
def flagfile(basename, max_DM=2097.2, freq_l=0.169615, freq_h=0.200335, padding=3):
    """
    This function takes in a text file of bad 0 DM times and
    writes out one flagged over the correct de-dispersive smearing
    times, looking for overlaps along the way. There must be a text file named
    basename.bad with rows indicating bad times for this to work. 
    """
    from subprocess import check_call

    # BWM: originally planned to move this to the load_file function, 
    #  but left it incase we JUST want to call flagfile
    bads = np.genfromtxt(basename+'.bad', comments='#', autostrip=True)

    # BWM: again because how np.genfromtxt works, if there is only 1 bad line, we get a list, 
    # if there are 2+ bad lines we get a list of lists. So have to check for np.ndarray 
    # instances and change method accordingly.
    

    i = 0 # initialize counter for new list
    flags = []

    if any(isinstance(b, np.ndarray) for b in bads):
        for bad in bads:
            start = bad[0] - (padding + disp_delay(freq1=freq_l, freq2=freq_h, DM=max_DM)/1000.0)
            if start < 0:
                start = 0
            stop = bad[1] + padding
            if len(flags) > 0:
                if start <= flags[-1][1]:
                    flags[-1][1] = stop
                else:
                    flags.append([start, stop])
            else:
                flags.append([start, stop])
    
    else:
        # if there is a no bad regions (defaulted) then don't put any padding in
        if bads[0] == bads[1]:
            padding = 0
            max_DM = 0

        start = bads[0] - (padding + disp_delay(freq1=freq_l, freq2=freq_h, DM=max_DM)/1000.0)
        if start < 0:
            start = 0
        stop = bads[1] + padding
        if len(flags) > 0:
            if start <= flags[-1][1]:
                flags[-1][1] = stop
            else:
                flags.append([start, stop])
        else:
            flags.append([start, stop])
    # save new file  as basename.flag
    np.savetxt(basename+'.flag', flags, fmt='%d')
    # call flag.sh script to creat masked singlepulse file
    check_call(['flag.sh', basename])
    #Popen(['flag.sh', basename]).communicate()[0]



In [260]:

    
def singlepulse_plot(basename=None, StatPlots=False, raw=False, threshold=5.0):
    """
    Plots up the flagged data as in PRESTO single_pulse_search output
    """
    if basename == None:
        print "No file name provided. Quitting..."
        sys.exit(0)
        
    if raw:
        data = load_file(basename + '.singlepulse')
    else:
        try:
            flagfile(basename) # BWM: should we be providing appropriate freqs and DM for this?
        except:
            print "No {}.bad file given. Creating one with entry [0 0]".format(basename)
            f=open('{}.bad'.format(basename),'w')
            f.write('0 0')
            f.close()
            print "Saved {}.bad".format(basename)
            print "Retrying..."
            flagfile(basename)

        data = load_file(basename + '_flagged.singlepulse')
        flags = load_file(basename + '.flag')

    # ensure only using data points with sigma > threshold
    data = data[data['Sigma']>threshold]

    # calculate boxcar sizes
    boxcar = data['Downfact'].values * data['dt'].values

    #cm = plt.cm.get_cmap('gist_rainbow')

    Size = 5 * data['Sigma'].values/4.0
    percentile = np.percentile(Size, 99.5)
    Size[np.where(Size > 5 * percentile)] = 5 * percentile/4.0

    TOOLS = "resize,pan,wheel_zoom,box_zoom,reset,lasso_select,box_select"
    color = "navy"   
    
    # Function to create colormap based on whatever data is passed. 
    # Uses gist_rainbow as base map
    def make_cmap(scale_data):
        colors = [(int(r),int(g),int(b)) for r,g,b,_ in 255*mpl.cm.gist_rainbow(mpl.colors.Normalize()(scale_data))]
        return ["#{0:02X}{1:02X}{2:02X}".format(c[0],c[1],c[2]) for c in colors]   

    color_map = make_cmap(boxcar)


    src = ColumnDataSource(data=dict(time=data['Time'].values, dm=data['DM'].values, sigma=data['Sigma'].values,\
                                         size=Size, cmap=color_map, downfact=data['Downfact'].values.astype(float)))

    minDM, maxDM = min(data['DM']),max(data['DM'])
    timeseries = figure(plot_width=900, plot_height=400, webgl=True, tools=TOOLS, toolbar_location='right',y_range=(0.9*minDM,1.1*maxDM))
    timeseries.select(BoxSelectTool).select_every_mousemove = False
    ts = timeseries.scatter('time', 'dm', source=src, marker='o', size='size', fill_color='cmap', line_color='cmap')
    timeseries.xaxis.axis_label = 'Time (s)'
    timeseries.yaxis.axis_label = 'DM (pc/cc)'
    
    
    
    
    
    if StatPlots:
        
        sigma_bins = int(0.1 * len(set(data['Sigma'].values)))
        histL,edgesL = np.histogram(data['Sigma'].values, bins=100)
        histLzeros = np.zeros(len(edgesL)-1)
        vmaxL = max(histL)*1.1
        top_left = figure(plot_width=300, plot_height=300 ,webgl=True, tools=TOOLS, y_range=(0,vmaxL))
        top_left.quad(top=histL, bottom=0, left=edgesL[:-1], right=edgesL[1:], line_color=color, fill_color=color)
        top_left.xaxis.axis_label = 'S/N'
        top_left.yaxis.axis_label = 'Number of pulses'

        dm_bins = int(0.5 * len(set(data['DM'].values)))
        histM,edgesM = np.histogram(data['DM'].values, bins=100)
        histMzeros = np.zeros(len(edgesM)-1)
        vmaxM = max(histM)*1.1
        top_mid = figure(plot_width=300, plot_height=300, webgl=True, tools=TOOLS, x_range=timeseries.y_range, y_range=(0,vmaxM))
        top_mid.quad(top=histM, bottom=0, left=edgesM[:-1], right=edgesM[1:], line_color=color, fill_color=color)
        top_mid.xaxis.axis_label = 'DM (pc/cc)'
        top_mid.yaxis.axis_label = 'Number of pulses'

        top_right = figure(plot_width=300, plot_height=300, webgl=True, tools=TOOLS,\
                           x_range=top_mid.x_range)
        top_right.scatter('dm', 'sigma', source=src, marker='o',color='cmap')
        top_right.xaxis.axis_label = 'DM (pc/cc)'
        top_right.yaxis.axis_label = 'S/N'
        
        stat_figures = [top_left, top_mid, top_right]
        
        
        
    def update_hists(attr, old, new):
        inds = np.array(new['1d']['indices'])
        
        if len(inds) == 0:
            histL = histLzeros
            histM = histMzeros
        elif len(inds) == len(data['Sigma'].values):
            histL = histLzeros
        elif len(inds) == len(data['DM'].values):
            histM = histMzeros
        else:
            histL,_ = np.histogram(data['Sigma'].values[inds], bins=100)
            histM,_ = np.histogram(data['DM'].values[inds], bins=100)
        
        
        top_left.data_source.data["top"] = histL
        top_mid.data_source.data["top"] = histM       
        
        
        
    if StatPlots: 
        plots = [gridplot([stat_figures], toolbar_location='right'), timeseries]
        show(vplot(*plots))

    else:
        show(timeseries)
        
    ts.data_source.on_change('selected',update_hists)



In [261]:

    
"""
Now to set up the desired parameters. These will be passed to singlepulse_plot to create the interactive window.

Users edit the following:
"""

# base name of single pulse file (typically observation ID).
# e.g. if you produced a file named "test.singlpulse" from running the sort_singlepulse tool, the basename="test"
basename = 1099414416

# whether you want to plot the statistical output as well as the DM vs. Time. Default is to not plot statistics.
stat_plots = True

# do you want to plot flagged data or the raw data? Default is to plot flagged data.
raw = False

# what signal-to-noise threshold do you want to enforce?
threshold = 6.0



In [262]:

    
singlepulse_plot(basename=str(basename), StatPlots=stat_plots, raw=raw, threshold=threshold)









    



Generating singlepulse file Data Frame...
Generating flags list...



In [ ]: