In [256]:
import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt
from pulsar_tools import disp_delay
import pandas as pd
import sys
import bokeh.io
from bokeh.models import CustomJS, ColumnDataSource, BoxSelectTool, Slider
from bokeh.plotting import figure, gridplot, show, vplot
import ipywidgets
bokeh.io.output_notebook()
In [257]:
def load_file(filename):
if filename == None:
print "No filename supplied to read..."
elif filename.endswith('.singlepulse'):
print "Generating singlepulse file Data Frame..."
data = pd.read_csv(filename)
return data
elif filename.endswith('.flag'):
print "Generating flags list..."
flags = np.genfromtxt(filename ,comments="#", autostrip=True)
if len(flags) == 0:
print "No flags/bad times provided. No times in final output will be masked."
return flags
else:
print "File name suplied is not recognised. Must be either .singlepulse, .bad or .flag"
In [258]:
def obs_stats(time, flags):
"""
Print the masker observation stats, i.e. how much time has been masked
"""
flag_time = 0
# BWM: if there is only 1 masked region, flags is a list,
# if there are 2+ masked regions, flags is a list of lists.
if any(isinstance(l, np.ndarray) for l in flags):
for flag in flags:
flag_time += (float(flag[1]) - float(flag[0]))
else:
flag_time = float(flags[1]) - float(flags[0])
percent = flag_time * 100 / time
print "{0:.2f} seconds flagged from {1:.2f} seconds of data ({2:.2f} percent)".format(flag_time, time, percent)
In [259]:
def flagfile(basename, max_DM=2097.2, freq_l=0.169615, freq_h=0.200335, padding=3):
"""
This function takes in a text file of bad 0 DM times and
writes out one flagged over the correct de-dispersive smearing
times, looking for overlaps along the way. There must be a text file named
basename.bad with rows indicating bad times for this to work.
"""
from subprocess import check_call
# BWM: originally planned to move this to the load_file function,
# but left it incase we JUST want to call flagfile
bads = np.genfromtxt(basename+'.bad', comments='#', autostrip=True)
# BWM: again because how np.genfromtxt works, if there is only 1 bad line, we get a list,
# if there are 2+ bad lines we get a list of lists. So have to check for np.ndarray
# instances and change method accordingly.
i = 0 # initialize counter for new list
flags = []
if any(isinstance(b, np.ndarray) for b in bads):
for bad in bads:
start = bad[0] - (padding + disp_delay(freq1=freq_l, freq2=freq_h, DM=max_DM)/1000.0)
if start < 0:
start = 0
stop = bad[1] + padding
if len(flags) > 0:
if start <= flags[-1][1]:
flags[-1][1] = stop
else:
flags.append([start, stop])
else:
flags.append([start, stop])
else:
# if there is a no bad regions (defaulted) then don't put any padding in
if bads[0] == bads[1]:
padding = 0
max_DM = 0
start = bads[0] - (padding + disp_delay(freq1=freq_l, freq2=freq_h, DM=max_DM)/1000.0)
if start < 0:
start = 0
stop = bads[1] + padding
if len(flags) > 0:
if start <= flags[-1][1]:
flags[-1][1] = stop
else:
flags.append([start, stop])
else:
flags.append([start, stop])
# save new file as basename.flag
np.savetxt(basename+'.flag', flags, fmt='%d')
# call flag.sh script to creat masked singlepulse file
check_call(['flag.sh', basename])
#Popen(['flag.sh', basename]).communicate()[0]
In [260]:
def singlepulse_plot(basename=None, StatPlots=False, raw=False, threshold=5.0):
"""
Plots up the flagged data as in PRESTO single_pulse_search output
"""
if basename == None:
print "No file name provided. Quitting..."
sys.exit(0)
if raw:
data = load_file(basename + '.singlepulse')
else:
try:
flagfile(basename) # BWM: should we be providing appropriate freqs and DM for this?
except:
print "No {}.bad file given. Creating one with entry [0 0]".format(basename)
f=open('{}.bad'.format(basename),'w')
f.write('0 0')
f.close()
print "Saved {}.bad".format(basename)
print "Retrying..."
flagfile(basename)
data = load_file(basename + '_flagged.singlepulse')
flags = load_file(basename + '.flag')
# ensure only using data points with sigma > threshold
data = data[data['Sigma']>threshold]
# calculate boxcar sizes
boxcar = data['Downfact'].values * data['dt'].values
#cm = plt.cm.get_cmap('gist_rainbow')
Size = 5 * data['Sigma'].values/4.0
percentile = np.percentile(Size, 99.5)
Size[np.where(Size > 5 * percentile)] = 5 * percentile/4.0
TOOLS = "resize,pan,wheel_zoom,box_zoom,reset,lasso_select,box_select"
color = "navy"
# Function to create colormap based on whatever data is passed.
# Uses gist_rainbow as base map
def make_cmap(scale_data):
colors = [(int(r),int(g),int(b)) for r,g,b,_ in 255*mpl.cm.gist_rainbow(mpl.colors.Normalize()(scale_data))]
return ["#{0:02X}{1:02X}{2:02X}".format(c[0],c[1],c[2]) for c in colors]
color_map = make_cmap(boxcar)
src = ColumnDataSource(data=dict(time=data['Time'].values, dm=data['DM'].values, sigma=data['Sigma'].values,\
size=Size, cmap=color_map, downfact=data['Downfact'].values.astype(float)))
minDM, maxDM = min(data['DM']),max(data['DM'])
timeseries = figure(plot_width=900, plot_height=400, webgl=True, tools=TOOLS, toolbar_location='right',y_range=(0.9*minDM,1.1*maxDM))
timeseries.select(BoxSelectTool).select_every_mousemove = False
ts = timeseries.scatter('time', 'dm', source=src, marker='o', size='size', fill_color='cmap', line_color='cmap')
timeseries.xaxis.axis_label = 'Time (s)'
timeseries.yaxis.axis_label = 'DM (pc/cc)'
if StatPlots:
sigma_bins = int(0.1 * len(set(data['Sigma'].values)))
histL,edgesL = np.histogram(data['Sigma'].values, bins=100)
histLzeros = np.zeros(len(edgesL)-1)
vmaxL = max(histL)*1.1
top_left = figure(plot_width=300, plot_height=300 ,webgl=True, tools=TOOLS, y_range=(0,vmaxL))
top_left.quad(top=histL, bottom=0, left=edgesL[:-1], right=edgesL[1:], line_color=color, fill_color=color)
top_left.xaxis.axis_label = 'S/N'
top_left.yaxis.axis_label = 'Number of pulses'
dm_bins = int(0.5 * len(set(data['DM'].values)))
histM,edgesM = np.histogram(data['DM'].values, bins=100)
histMzeros = np.zeros(len(edgesM)-1)
vmaxM = max(histM)*1.1
top_mid = figure(plot_width=300, plot_height=300, webgl=True, tools=TOOLS, x_range=timeseries.y_range, y_range=(0,vmaxM))
top_mid.quad(top=histM, bottom=0, left=edgesM[:-1], right=edgesM[1:], line_color=color, fill_color=color)
top_mid.xaxis.axis_label = 'DM (pc/cc)'
top_mid.yaxis.axis_label = 'Number of pulses'
top_right = figure(plot_width=300, plot_height=300, webgl=True, tools=TOOLS,\
x_range=top_mid.x_range)
top_right.scatter('dm', 'sigma', source=src, marker='o',color='cmap')
top_right.xaxis.axis_label = 'DM (pc/cc)'
top_right.yaxis.axis_label = 'S/N'
stat_figures = [top_left, top_mid, top_right]
def update_hists(attr, old, new):
inds = np.array(new['1d']['indices'])
if len(inds) == 0:
histL = histLzeros
histM = histMzeros
elif len(inds) == len(data['Sigma'].values):
histL = histLzeros
elif len(inds) == len(data['DM'].values):
histM = histMzeros
else:
histL,_ = np.histogram(data['Sigma'].values[inds], bins=100)
histM,_ = np.histogram(data['DM'].values[inds], bins=100)
top_left.data_source.data["top"] = histL
top_mid.data_source.data["top"] = histM
if StatPlots:
plots = [gridplot([stat_figures], toolbar_location='right'), timeseries]
show(vplot(*plots))
else:
show(timeseries)
ts.data_source.on_change('selected',update_hists)
In [261]:
"""
Now to set up the desired parameters. These will be passed to singlepulse_plot to create the interactive window.
Users edit the following:
"""
# base name of single pulse file (typically observation ID).
# e.g. if you produced a file named "test.singlpulse" from running the sort_singlepulse tool, the basename="test"
basename = 1099414416
# whether you want to plot the statistical output as well as the DM vs. Time. Default is to not plot statistics.
stat_plots = True
# do you want to plot flagged data or the raw data? Default is to plot flagged data.
raw = False
# what signal-to-noise threshold do you want to enforce?
threshold = 6.0
In [262]:
singlepulse_plot(basename=str(basename), StatPlots=stat_plots, raw=raw, threshold=threshold)
In [ ]: