In [1]:
# Basic setup for displaying bokeh plots in jupyter.
from bokeh.plotting import figure 
from bokeh.io import output_notebook, show
# Additional requirements for Stacked Bar plot.
from bokeh.core.properties import value
from bokeh.models import ColumnDataSource
output_notebook()


Loading BokehJS ...

In [2]:
# NumPy imports.
import numpy as np
import cPickle as pickle
import os
cwd = os.getcwd()
print(cwd)


/Users/kevin/dev/nml/paper_platform/scripts

In [3]:
# Raw data.
raws = pickle.load(open('2018-04-03-14-01-41-627705_singlerun_100.p', 'rb'))
print(type(raws))
print(len(raws))
print(raws[0])


<type 'list'>
100
{'/media/kevin/d640/enterobase_db/ESC_BA1121AA_AS.fasta': [{u'job_ptstx1_datastruct': [u'Tue, 03 Apr 2018 18:02:37 GMT', u'Tue, 03 Apr 2018 18:02:38 GMT', 1.0]}, {u'job_amr_dict': [u'Tue, 03 Apr 2018 18:03:30 GMT', u'Tue, 03 Apr 2018 18:03:30 GMT', 0.0]}, {u'job_ectyper_beautify_serotype': [u'Tue, 03 Apr 2018 18:01:57 GMT', u'Tue, 03 Apr 2018 18:01:58 GMT', 1.0]}, {u'job_ectyper_beautify_vf': [u'Tue, 03 Apr 2018 18:01:55 GMT', u'Tue, 03 Apr 2018 18:01:55 GMT', 0.0]}, {u'job_ectyper_datastruct_vf': [u'Tue, 03 Apr 2018 18:01:55 GMT', u'Tue, 03 Apr 2018 18:01:56 GMT', 1.0]}, {u'job_pteae': [u'Tue, 03 Apr 2018 18:01:56 GMT', u'Tue, 03 Apr 2018 18:03:28 GMT', 92.0]}, {u'job_ptstx1_dict': [u'Tue, 03 Apr 2018 18:02:37 GMT', u'Tue, 03 Apr 2018 18:02:37 GMT', 0.0]}, {u'job_ectyper_serotype': [u'Tue, 03 Apr 2018 18:01:52 GMT', u'Tue, 03 Apr 2018 18:01:57 GMT', 5.0]}, {u'job_pteae_dict': [u'Tue, 03 Apr 2018 18:03:28 GMT', u'Tue, 03 Apr 2018 18:03:28 GMT', 0.0]}, {u'job_ptstx1_beautify': [u'Tue, 03 Apr 2018 18:02:37 GMT', u'Tue, 03 Apr 2018 18:02:38 GMT', 1.0]}, {u'job_ptstx2_datastruct': [u'Tue, 03 Apr 2018 18:02:37 GMT', u'Tue, 03 Apr 2018 18:02:39 GMT', 2.0]}, {u'job_pteae_beautify': [u'Tue, 03 Apr 2018 18:03:28 GMT', u'Tue, 03 Apr 2018 18:03:28 GMT', 0.0]}, {u'job_turtle': [u'Tue, 03 Apr 2018 18:01:51 GMT', u'Tue, 03 Apr 2018 18:01:52 GMT', 1.0]}, {u'job_id': [u'Tue, 03 Apr 2018 18:01:51 GMT', u'Tue, 03 Apr 2018 18:01:52 GMT', 1.0]}, {u'job_ectyper_vf': [u'Tue, 03 Apr 2018 18:01:52 GMT', u'Tue, 03 Apr 2018 18:01:55 GMT', 3.0]}, {u'job_amr_beautify': [u'Tue, 03 Apr 2018 18:03:30 GMT', u'Tue, 03 Apr 2018 18:03:31 GMT', 1.0]}, {u'job_ectyper_datastruct_serotype': [u'Tue, 03 Apr 2018 18:01:57 GMT', u'Tue, 03 Apr 2018 18:01:58 GMT', 1.0]}, {u'job_ptstx2': [u'Tue, 03 Apr 2018 18:01:56 GMT', u'Tue, 03 Apr 2018 18:02:36 GMT', 40.0]}, {u'job_ptstx1': [u'Tue, 03 Apr 2018 18:02:36 GMT', u'Tue, 03 Apr 2018 18:02:37 GMT', 1.0]}, {u'job_amr': [u'Tue, 03 Apr 2018 18:01:52 GMT', u'Tue, 03 Apr 2018 18:03:30 GMT', 98.0]}, {u'job_amr_datastruct': [u'Tue, 03 Apr 2018 18:03:30 GMT', u'Tue, 03 Apr 2018 18:03:30 GMT', 0.0]}, {u'job_ptstx2_dict': [u'Tue, 03 Apr 2018 18:02:36 GMT', u'Tue, 03 Apr 2018 18:02:37 GMT', 1.0]}, {u'job_qc': [u'Tue, 03 Apr 2018 18:01:51 GMT', u'Tue, 03 Apr 2018 18:01:51 GMT', 0.0]}, {u'job_pteae_datastruct': [u'Tue, 03 Apr 2018 18:03:28 GMT', u'Tue, 03 Apr 2018 18:03:29 GMT', 1.0]}, {u'job_ptstx2_beautify': [u'Tue, 03 Apr 2018 18:02:37 GMT', u'Tue, 03 Apr 2018 18:02:38 GMT', 1.0]}, {u'total': [u'Tue, 03 Apr 2018 18:01:51 GMT', u'Tue, 03 Apr 2018 18:03:31 GMT', 100.0]}]}

In [4]:
# Tabulate runtimes per module.
parsed = {
    'qc':[],
    'databaseid':[],
    'serotype':[],
    'vf':[],
    'amr':[],
    'stx1':[],
    'stx2':[],
    'eae':[],
    'total': []
}
for outerd in raws:
    qc=0
    databaseid=0
    serotype=0
    vf=0
    amr=0
    stx1=0
    stx2=0
    eae=0
    total=0
    for d in outerd.values()[0]:
        if 'stx1' in d.keys()[0]:
            stx1 += d.values()[0][2]
        elif 'stx2' in d.keys()[0]:
            stx2 += d.values()[0][2]
        elif 'eae' in d.keys()[0]:
            eae += d.values()[0][2]
        elif 'vf' in d.keys()[0]:
            vf += d.values()[0][2]
        elif 'amr' in d.keys()[0]:
            amr += d.values()[0][2]
        elif 'serotype'in d.keys()[0]:
            serotype += d.values()[0][2]
        elif 'job_id' in d.keys()[0]:
            databaseid += d.values()[0][2]
        elif 'turtle' in d.keys()[0]:
            databaseid += d.values()[0][2]
        elif 'job_qc' in d.keys()[0]:
            qc += d.values()[0][2]
        elif 'total' in d.keys()[0]:
            total += d.values()[0][2]
    parsed['qc'].append(qc)
    parsed['databaseid'].append(databaseid)
    parsed['serotype'].append(serotype)
    parsed['vf'].append(vf)
    parsed['amr'].append(amr)
    parsed['stx1'].append(stx1)
    parsed['stx2'].append(stx2)
    parsed['eae'].append(eae)
    parsed['total'].append(total)

In [5]:
import numpy as np
from bokeh.models import ColumnDataSource
from bokeh.models.glyphs import Line

colormap = {'qc':'red',
            'databaseid':'green',
            'serotype':'blueviolet',
            'vf':'crimson',
            'amr':'firebrick',
            'stx1':'darksalmon',
            'stx2':'darkorange',
            'eae':'darkgoldenrod',
            'total': 'blue'}
colors = [colormap[x] for x in parsed.keys() for i in range(len(parsed.values()[0]))]

x_runs = [i for l in parsed.values() for i,x in enumerate(l)]
y_times = [x for l in parsed.values() for x in l]

p = figure(title = "Timings for Individual Runs",width=500,
           height=500,)
p.xaxis.axis_label = 'Run #'
p.yaxis.axis_label = 'Runtime (seconds)'

print(np.mean(parsed['total']))

lines_source = ColumnDataSource(data=dict(y=[np.mean(parsed['total']) for i in range(len(parsed['total']))], x=[i for i,x in enumerate(parsed['total'])]))
line = Line(x='x',y='y', line_color="#666699", line_width=2)

p.add_glyph(lines_source, line)

p.circle(x_runs, y_times,
         color=colors, fill_alpha=0.2, size=10)

show(p)


129.53

In [6]:
avgs = {k:np.mean(parsed[k]) for k in parsed}
print(avgs)


{'databaseid': 35.38, 'amr': 93.78, 'vf': 4.24, 'stx1': 4.26, 'stx2': 2.88, 'eae': 28.83, 'serotype': 6.24, 'qc': 0.69, 'total': 129.53}

In [7]:
# Plot a histogram of the targets.
x = np.array(parsed['amr'])
print(x.mean())
hist, edges = np.histogram(x, density=True, bins=50)

p1 = figure(title="Histogram of Total Runtimes per Genome",tools="save",)

p1.quad(top=hist, bottom=0, left=edges[:-1], right=edges[1:],
        fill_color="blue", line_color="#033649", alpha=0.5)

show(p1)


93.78

In [8]:
la = [i for i in parsed['total'] if i<140]
lb = [i for i in parsed['total'] if i>=140]
print(np.mean(la), np.mean(lb))


(95.9322033898305, 177.8780487804878)

In [9]:
# Raw data.
raws_batch = pickle.load(open('2018-04-11-10-26-48-900902_spfy_class_11.p', 'rb'))
data_batch = raws_batch.data
type(data_batch)


Out[9]:
list

In [10]:
data_batch[1]


Out[10]:
[{u'/datastore/2018-04-11-14-30-16-888218/2018-04-11-14-30-16-888218-ESC_CA6411AA_AS.fasta|job_turtle': [u'Wed, 11 Apr 2018 14:30:17 GMT',
   u'Wed, 11 Apr 2018 14:30:18 GMT',
   1.0]},
 {u'/datastore/2018-04-11-14-30-16-888218/2018-04-11-14-30-16-888218-ESC_CA6411AA_AS.fasta|job_amr_dict': [u'Wed, 11 Apr 2018 14:32:09 GMT',
   u'Wed, 11 Apr 2018 14:32:09 GMT',
   0.0]},
 {u'/datastore/2018-04-11-14-30-16-888218/2018-04-11-14-30-16-888218-ESC_CA6411AA_AS.fasta|job_id': [u'Wed, 11 Apr 2018 14:30:17 GMT',
   u'Wed, 11 Apr 2018 14:30:18 GMT',
   1.0]},
 {u'/datastore/2018-04-11-14-30-16-888218/2018-04-11-14-30-16-888218-ESC_CA6411AA_AS.fasta|job_ectyper_vf': [u'Wed, 11 Apr 2018 14:30:18 GMT',
   u'Wed, 11 Apr 2018 14:30:21 GMT',
   3.0]},
 {u'/datastore/2018-04-11-14-30-16-888218/2018-04-11-14-30-16-888218-ESC_CA6411AA_AS.fasta|job_amr_beautify': [u'Wed, 11 Apr 2018 14:32:09 GMT',
   u'Wed, 11 Apr 2018 14:32:10 GMT',
   1.0]},
 {u'/datastore/2018-04-11-14-30-16-888218/2018-04-11-14-30-16-888218-ESC_CA6411AA_AS.fasta|job_ectyper_beautify_serotype': [u'Wed, 11 Apr 2018 14:30:24 GMT',
   u'Wed, 11 Apr 2018 14:30:24 GMT',
   0.0]},
 {u'/datastore/2018-04-11-14-30-16-888218/2018-04-11-14-30-16-888218-ESC_CA6411AA_AS.fasta|job_ectyper_beautify_vf': [u'Wed, 11 Apr 2018 14:30:21 GMT',
   u'Wed, 11 Apr 2018 14:30:21 GMT',
   0.0]},
 {u'/datastore/2018-04-11-14-30-16-888218/2018-04-11-14-30-16-888218-ESC_CA6411AA_AS.fasta|job_amr': [u'Wed, 11 Apr 2018 14:30:18 GMT',
   u'Wed, 11 Apr 2018 14:32:09 GMT',
   111.0]},
 {u'/datastore/2018-04-11-14-30-16-888218/2018-04-11-14-30-16-888218-ESC_CA6411AA_AS.fasta|job_ectyper_datastruct_vf': [u'Wed, 11 Apr 2018 14:30:21 GMT',
   u'Wed, 11 Apr 2018 14:30:21 GMT',
   0.0]},
 {u'/datastore/2018-04-11-14-30-16-888218/2018-04-11-14-30-16-888218-ESC_CA6411AA_AS.fasta|job_ectyper_datastruct_serotype': [u'Wed, 11 Apr 2018 14:30:24 GMT',
   u'Wed, 11 Apr 2018 14:30:24 GMT',
   0.0]},
 {u'/datastore/2018-04-11-14-30-16-888218/2018-04-11-14-30-16-888218-ESC_CA6411AA_AS.fasta|job_ectyper_serotype': [u'Wed, 11 Apr 2018 14:30:18 GMT',
   u'Wed, 11 Apr 2018 14:30:24 GMT',
   6.0]},
 {u'/datastore/2018-04-11-14-30-16-888218/2018-04-11-14-30-16-888218-ESC_CA6411AA_AS.fasta|job_amr_datastruct': [u'Wed, 11 Apr 2018 14:32:09 GMT',
   u'Wed, 11 Apr 2018 14:32:09 GMT',
   0.0]},
 {u'/datastore/2018-04-11-14-30-16-888218/2018-04-11-14-30-16-888218-ESC_CA6411AA_AS.fasta|job_qc': [u'Wed, 11 Apr 2018 14:30:17 GMT',
   u'Wed, 11 Apr 2018 14:30:17 GMT',
   0.0]},
 {u'/datastore/2018-04-11-14-30-16-888218/2018-04-11-14-30-16-888218-ESC_BA2347AA_AS.fasta|job_turtle': [u'Wed, 11 Apr 2018 14:30:18 GMT',
   u'Wed, 11 Apr 2018 14:30:19 GMT',
   1.0]},
 {u'/datastore/2018-04-11-14-30-16-888218/2018-04-11-14-30-16-888218-ESC_BA2347AA_AS.fasta|job_amr_dict': [u'Wed, 11 Apr 2018 14:39:20 GMT',
   u'Wed, 11 Apr 2018 14:39:20 GMT',
   0.0]},
 {u'/datastore/2018-04-11-14-30-16-888218/2018-04-11-14-30-16-888218-ESC_BA2347AA_AS.fasta|job_id': [u'Wed, 11 Apr 2018 14:30:36 GMT',
   u'Wed, 11 Apr 2018 14:30:37 GMT',
   1.0]},
 {u'/datastore/2018-04-11-14-30-16-888218/2018-04-11-14-30-16-888218-ESC_BA2347AA_AS.fasta|job_ectyper_vf': [u'Wed, 11 Apr 2018 14:30:44 GMT',
   u'Wed, 11 Apr 2018 14:30:48 GMT',
   4.0]},
 {u'/datastore/2018-04-11-14-30-16-888218/2018-04-11-14-30-16-888218-ESC_BA2347AA_AS.fasta|job_amr_beautify': [u'Wed, 11 Apr 2018 14:39:20 GMT',
   u'Wed, 11 Apr 2018 14:39:20 GMT',
   0.0]},
 {u'/datastore/2018-04-11-14-30-16-888218/2018-04-11-14-30-16-888218-ESC_BA2347AA_AS.fasta|job_ectyper_beautify_serotype': [u'Wed, 11 Apr 2018 14:30:43 GMT',
   u'Wed, 11 Apr 2018 14:30:43 GMT',
   0.0]},
 {u'/datastore/2018-04-11-14-30-16-888218/2018-04-11-14-30-16-888218-ESC_BA2347AA_AS.fasta|job_ectyper_beautify_vf': [u'Wed, 11 Apr 2018 14:30:48 GMT',
   u'Wed, 11 Apr 2018 14:30:49 GMT',
   1.0]},
 {u'/datastore/2018-04-11-14-30-16-888218/2018-04-11-14-30-16-888218-ESC_BA2347AA_AS.fasta|job_amr': [u'Wed, 11 Apr 2018 14:37:33 GMT',
   u'Wed, 11 Apr 2018 14:39:20 GMT',
   107.0]},
 {u'/datastore/2018-04-11-14-30-16-888218/2018-04-11-14-30-16-888218-ESC_BA2347AA_AS.fasta|job_ectyper_datastruct_vf': [u'Wed, 11 Apr 2018 14:30:48 GMT',
   u'Wed, 11 Apr 2018 14:30:50 GMT',
   2.0]},
 {u'/datastore/2018-04-11-14-30-16-888218/2018-04-11-14-30-16-888218-ESC_BA2347AA_AS.fasta|job_ectyper_datastruct_serotype': [u'Wed, 11 Apr 2018 14:30:43 GMT',
   u'Wed, 11 Apr 2018 14:30:43 GMT',
   0.0]},
 {u'/datastore/2018-04-11-14-30-16-888218/2018-04-11-14-30-16-888218-ESC_BA2347AA_AS.fasta|job_ectyper_serotype': [u'Wed, 11 Apr 2018 14:30:37 GMT',
   u'Wed, 11 Apr 2018 14:30:43 GMT',
   6.0]},
 {u'/datastore/2018-04-11-14-30-16-888218/2018-04-11-14-30-16-888218-ESC_BA2347AA_AS.fasta|job_amr_datastruct': [u'Wed, 11 Apr 2018 14:39:20 GMT',
   u'Wed, 11 Apr 2018 14:39:20 GMT',
   0.0]},
 {u'/datastore/2018-04-11-14-30-16-888218/2018-04-11-14-30-16-888218-ESC_BA2347AA_AS.fasta|job_qc': [u'Wed, 11 Apr 2018 14:30:17 GMT',
   u'Wed, 11 Apr 2018 14:30:18 GMT',
   1.0]},
 {u'/datastore/2018-04-11-14-30-16-888218/2018-04-11-14-30-16-888218-ESC_BA1983AA_AS.fasta|job_turtle': [u'Wed, 11 Apr 2018 14:30:17 GMT',
   u'Wed, 11 Apr 2018 14:30:18 GMT',
   1.0]},
 {u'/datastore/2018-04-11-14-30-16-888218/2018-04-11-14-30-16-888218-ESC_BA1983AA_AS.fasta|job_amr_dict': [u'Wed, 11 Apr 2018 14:35:42 GMT',
   u'Wed, 11 Apr 2018 14:35:42 GMT',
   0.0]},
 {u'/datastore/2018-04-11-14-30-16-888218/2018-04-11-14-30-16-888218-ESC_BA1983AA_AS.fasta|job_id': [u'Wed, 11 Apr 2018 14:30:24 GMT',
   u'Wed, 11 Apr 2018 14:30:26 GMT',
   2.0]},
 {u'/datastore/2018-04-11-14-30-16-888218/2018-04-11-14-30-16-888218-ESC_BA1983AA_AS.fasta|job_ectyper_vf': [u'Wed, 11 Apr 2018 14:30:29 GMT',
   u'Wed, 11 Apr 2018 14:30:32 GMT',
   3.0]},
 {u'/datastore/2018-04-11-14-30-16-888218/2018-04-11-14-30-16-888218-ESC_BA1983AA_AS.fasta|job_amr_beautify': [u'Wed, 11 Apr 2018 14:35:42 GMT',
   u'Wed, 11 Apr 2018 14:35:43 GMT',
   1.0]},
 {u'/datastore/2018-04-11-14-30-16-888218/2018-04-11-14-30-16-888218-ESC_BA1983AA_AS.fasta|job_ectyper_beautify_serotype': [u'Wed, 11 Apr 2018 14:30:32 GMT',
   u'Wed, 11 Apr 2018 14:30:33 GMT',
   1.0]},
 {u'/datastore/2018-04-11-14-30-16-888218/2018-04-11-14-30-16-888218-ESC_BA1983AA_AS.fasta|job_ectyper_beautify_vf': [u'Wed, 11 Apr 2018 14:30:32 GMT',
   u'Wed, 11 Apr 2018 14:30:33 GMT',
   1.0]},
 {u'/datastore/2018-04-11-14-30-16-888218/2018-04-11-14-30-16-888218-ESC_BA1983AA_AS.fasta|job_amr': [u'Wed, 11 Apr 2018 14:33:56 GMT',
   u'Wed, 11 Apr 2018 14:35:42 GMT',
   106.0]},
 {u'/datastore/2018-04-11-14-30-16-888218/2018-04-11-14-30-16-888218-ESC_BA1983AA_AS.fasta|job_ectyper_datastruct_vf': [u'Wed, 11 Apr 2018 14:30:32 GMT',
   u'Wed, 11 Apr 2018 14:30:34 GMT',
   2.0]},
 {u'/datastore/2018-04-11-14-30-16-888218/2018-04-11-14-30-16-888218-ESC_BA1983AA_AS.fasta|job_ectyper_datastruct_serotype': [u'Wed, 11 Apr 2018 14:30:32 GMT',
   u'Wed, 11 Apr 2018 14:30:32 GMT',
   0.0]},
 {u'/datastore/2018-04-11-14-30-16-888218/2018-04-11-14-30-16-888218-ESC_BA1983AA_AS.fasta|job_ectyper_serotype': [u'Wed, 11 Apr 2018 14:30:26 GMT',
   u'Wed, 11 Apr 2018 14:30:32 GMT',
   6.0]},
 {u'/datastore/2018-04-11-14-30-16-888218/2018-04-11-14-30-16-888218-ESC_BA1983AA_AS.fasta|job_amr_datastruct': [u'Wed, 11 Apr 2018 14:35:42 GMT',
   u'Wed, 11 Apr 2018 14:35:42 GMT',
   0.0]},
 {u'/datastore/2018-04-11-14-30-16-888218/2018-04-11-14-30-16-888218-ESC_BA1983AA_AS.fasta|job_qc': [u'Wed, 11 Apr 2018 14:30:17 GMT',
   u'Wed, 11 Apr 2018 14:30:17 GMT',
   0.0]},
 {u'/datastore/2018-04-11-14-30-16-888218/2018-04-11-14-30-16-888218-ESC_BA6386AA_AS.fasta|job_turtle': [u'Wed, 11 Apr 2018 14:30:18 GMT',
   u'Wed, 11 Apr 2018 14:30:18 GMT',
   0.0]},
 {u'/datastore/2018-04-11-14-30-16-888218/2018-04-11-14-30-16-888218-ESC_BA6386AA_AS.fasta|job_amr_dict': [u'Wed, 11 Apr 2018 14:37:33 GMT',
   u'Wed, 11 Apr 2018 14:37:33 GMT',
   0.0]},
 {u'/datastore/2018-04-11-14-30-16-888218/2018-04-11-14-30-16-888218-ESC_BA6386AA_AS.fasta|job_id': [u'Wed, 11 Apr 2018 14:30:29 GMT',
   u'Wed, 11 Apr 2018 14:30:33 GMT',
   4.0]},
 {u'/datastore/2018-04-11-14-30-16-888218/2018-04-11-14-30-16-888218-ESC_BA6386AA_AS.fasta|job_ectyper_vf': [u'Wed, 11 Apr 2018 14:30:38 GMT',
   u'Wed, 11 Apr 2018 14:30:41 GMT',
   3.0]},
 {u'/datastore/2018-04-11-14-30-16-888218/2018-04-11-14-30-16-888218-ESC_BA6386AA_AS.fasta|job_amr_beautify': [u'Wed, 11 Apr 2018 14:37:33 GMT',
   u'Wed, 11 Apr 2018 14:37:34 GMT',
   1.0]},
 {u'/datastore/2018-04-11-14-30-16-888218/2018-04-11-14-30-16-888218-ESC_BA6386AA_AS.fasta|job_ectyper_beautify_serotype': [u'Wed, 11 Apr 2018 14:30:40 GMT',
   u'Wed, 11 Apr 2018 14:30:40 GMT',
   0.0]},
 {u'/datastore/2018-04-11-14-30-16-888218/2018-04-11-14-30-16-888218-ESC_BA6386AA_AS.fasta|job_ectyper_beautify_vf': [u'Wed, 11 Apr 2018 14:30:41 GMT',
   u'Wed, 11 Apr 2018 14:30:42 GMT',
   1.0]},
 {u'/datastore/2018-04-11-14-30-16-888218/2018-04-11-14-30-16-888218-ESC_BA6386AA_AS.fasta|job_amr': [u'Wed, 11 Apr 2018 14:35:45 GMT',
   u'Wed, 11 Apr 2018 14:37:33 GMT',
   108.0]},
 {u'/datastore/2018-04-11-14-30-16-888218/2018-04-11-14-30-16-888218-ESC_BA6386AA_AS.fasta|job_ectyper_datastruct_vf': [u'Wed, 11 Apr 2018 14:30:41 GMT',
   u'Wed, 11 Apr 2018 14:30:42 GMT',
   1.0]},
 {u'/datastore/2018-04-11-14-30-16-888218/2018-04-11-14-30-16-888218-ESC_BA6386AA_AS.fasta|job_ectyper_datastruct_serotype': [u'Wed, 11 Apr 2018 14:30:40 GMT',
   u'Wed, 11 Apr 2018 14:30:40 GMT',
   0.0]},
 {u'/datastore/2018-04-11-14-30-16-888218/2018-04-11-14-30-16-888218-ESC_BA6386AA_AS.fasta|job_ectyper_serotype': [u'Wed, 11 Apr 2018 14:30:33 GMT',
   u'Wed, 11 Apr 2018 14:30:40 GMT',
   7.0]},
 {u'/datastore/2018-04-11-14-30-16-888218/2018-04-11-14-30-16-888218-ESC_BA6386AA_AS.fasta|job_amr_datastruct': [u'Wed, 11 Apr 2018 14:37:33 GMT',
   u'Wed, 11 Apr 2018 14:37:33 GMT',
   0.0]},
 {u'/datastore/2018-04-11-14-30-16-888218/2018-04-11-14-30-16-888218-ESC_BA6386AA_AS.fasta|job_qc': [u'Wed, 11 Apr 2018 14:30:17 GMT',
   u'Wed, 11 Apr 2018 14:30:17 GMT',
   0.0]},
 {u'/datastore/2018-04-11-14-30-16-888218/2018-04-11-14-30-16-888218-ESC_BA0435AA_AS.fasta|job_turtle': [u'Wed, 11 Apr 2018 14:30:17 GMT',
   u'Wed, 11 Apr 2018 14:30:18 GMT',
   1.0]},
 {u'/datastore/2018-04-11-14-30-16-888218/2018-04-11-14-30-16-888218-ESC_BA0435AA_AS.fasta|job_amr_dict': [u'Wed, 11 Apr 2018 14:32:11 GMT',
   u'Wed, 11 Apr 2018 14:32:11 GMT',
   0.0]},
 {u'/datastore/2018-04-11-14-30-16-888218/2018-04-11-14-30-16-888218-ESC_BA0435AA_AS.fasta|job_id': [u'Wed, 11 Apr 2018 14:30:18 GMT',
   u'Wed, 11 Apr 2018 14:30:19 GMT',
   1.0]},
 {u'/datastore/2018-04-11-14-30-16-888218/2018-04-11-14-30-16-888218-ESC_BA0435AA_AS.fasta|job_ectyper_vf': [u'Wed, 11 Apr 2018 14:30:21 GMT',
   u'Wed, 11 Apr 2018 14:30:23 GMT',
   2.0]},
 {u'/datastore/2018-04-11-14-30-16-888218/2018-04-11-14-30-16-888218-ESC_BA0435AA_AS.fasta|job_amr_beautify': [u'Wed, 11 Apr 2018 14:32:11 GMT',
   u'Wed, 11 Apr 2018 14:32:12 GMT',
   1.0]},
 {u'/datastore/2018-04-11-14-30-16-888218/2018-04-11-14-30-16-888218-ESC_BA0435AA_AS.fasta|job_ectyper_beautify_serotype': [u'Wed, 11 Apr 2018 14:30:24 GMT',
   u'Wed, 11 Apr 2018 14:30:25 GMT',
   1.0]},
 {u'/datastore/2018-04-11-14-30-16-888218/2018-04-11-14-30-16-888218-ESC_BA0435AA_AS.fasta|job_ectyper_beautify_vf': [u'Wed, 11 Apr 2018 14:30:23 GMT',
   u'Wed, 11 Apr 2018 14:30:23 GMT',
   0.0]},
 {u'/datastore/2018-04-11-14-30-16-888218/2018-04-11-14-30-16-888218-ESC_BA0435AA_AS.fasta|job_amr': [u'Wed, 11 Apr 2018 14:30:19 GMT',
   u'Wed, 11 Apr 2018 14:32:11 GMT',
   112.0]},
 {u'/datastore/2018-04-11-14-30-16-888218/2018-04-11-14-30-16-888218-ESC_BA0435AA_AS.fasta|job_ectyper_datastruct_vf': [u'Wed, 11 Apr 2018 14:30:23 GMT',
   u'Wed, 11 Apr 2018 14:30:23 GMT',
   0.0]},
 {u'/datastore/2018-04-11-14-30-16-888218/2018-04-11-14-30-16-888218-ESC_BA0435AA_AS.fasta|job_ectyper_datastruct_serotype': [u'Wed, 11 Apr 2018 14:30:24 GMT',
   u'Wed, 11 Apr 2018 14:30:25 GMT',
   1.0]},
 {u'/datastore/2018-04-11-14-30-16-888218/2018-04-11-14-30-16-888218-ESC_BA0435AA_AS.fasta|job_ectyper_serotype': [u'Wed, 11 Apr 2018 14:30:19 GMT',
   u'Wed, 11 Apr 2018 14:30:24 GMT',
   5.0]},
 {u'/datastore/2018-04-11-14-30-16-888218/2018-04-11-14-30-16-888218-ESC_BA0435AA_AS.fasta|job_amr_datastruct': [u'Wed, 11 Apr 2018 14:32:11 GMT',
   u'Wed, 11 Apr 2018 14:32:11 GMT',
   0.0]},
 {u'/datastore/2018-04-11-14-30-16-888218/2018-04-11-14-30-16-888218-ESC_BA0435AA_AS.fasta|job_qc': [u'Wed, 11 Apr 2018 14:30:17 GMT',
   u'Wed, 11 Apr 2018 14:30:17 GMT',
   0.0]},
 {u'/datastore/2018-04-11-14-30-16-888218/2018-04-11-14-30-16-888218-ESC_CA6110AA_AS.fasta|job_turtle': [u'Wed, 11 Apr 2018 14:30:17 GMT',
   u'Wed, 11 Apr 2018 14:30:18 GMT',
   1.0]},
 {u'/datastore/2018-04-11-14-30-16-888218/2018-04-11-14-30-16-888218-ESC_CA6110AA_AS.fasta|job_amr_dict': [u'Wed, 11 Apr 2018 14:33:56 GMT',
   u'Wed, 11 Apr 2018 14:33:56 GMT',
   0.0]},
 {u'/datastore/2018-04-11-14-30-16-888218/2018-04-11-14-30-16-888218-ESC_CA6110AA_AS.fasta|job_id': [u'Wed, 11 Apr 2018 14:30:24 GMT',
   u'Wed, 11 Apr 2018 14:30:24 GMT',
   0.0]},
 {u'/datastore/2018-04-11-14-30-16-888218/2018-04-11-14-30-16-888218-ESC_CA6110AA_AS.fasta|job_ectyper_vf': [u'Wed, 11 Apr 2018 14:30:27 GMT',
   u'Wed, 11 Apr 2018 14:30:29 GMT',
   2.0]},
 {u'/datastore/2018-04-11-14-30-16-888218/2018-04-11-14-30-16-888218-ESC_CA6110AA_AS.fasta|job_amr_beautify': [u'Wed, 11 Apr 2018 14:33:56 GMT',
   u'Wed, 11 Apr 2018 14:33:57 GMT',
   1.0]},
 {u'/datastore/2018-04-11-14-30-16-888218/2018-04-11-14-30-16-888218-ESC_CA6110AA_AS.fasta|job_ectyper_beautify_serotype': [u'Wed, 11 Apr 2018 14:30:30 GMT',
   u'Wed, 11 Apr 2018 14:30:31 GMT',
   1.0]},
 {u'/datastore/2018-04-11-14-30-16-888218/2018-04-11-14-30-16-888218-ESC_CA6110AA_AS.fasta|job_ectyper_beautify_vf': [u'Wed, 11 Apr 2018 14:30:29 GMT',
   u'Wed, 11 Apr 2018 14:30:30 GMT',
   1.0]},
 {u'/datastore/2018-04-11-14-30-16-888218/2018-04-11-14-30-16-888218-ESC_CA6110AA_AS.fasta|job_amr': [u'Wed, 11 Apr 2018 14:32:11 GMT',
   u'Wed, 11 Apr 2018 14:33:56 GMT',
   105.0]},
 {u'/datastore/2018-04-11-14-30-16-888218/2018-04-11-14-30-16-888218-ESC_CA6110AA_AS.fasta|job_ectyper_datastruct_vf': [u'Wed, 11 Apr 2018 14:30:29 GMT',
   u'Wed, 11 Apr 2018 14:30:31 GMT',
   2.0]},
 {u'/datastore/2018-04-11-14-30-16-888218/2018-04-11-14-30-16-888218-ESC_CA6110AA_AS.fasta|job_ectyper_datastruct_serotype': [u'Wed, 11 Apr 2018 14:30:30 GMT',
   u'Wed, 11 Apr 2018 14:30:31 GMT',
   1.0]},
 {u'/datastore/2018-04-11-14-30-16-888218/2018-04-11-14-30-16-888218-ESC_CA6110AA_AS.fasta|job_ectyper_serotype': [u'Wed, 11 Apr 2018 14:30:24 GMT',
   u'Wed, 11 Apr 2018 14:30:30 GMT',
   6.0]},
 {u'/datastore/2018-04-11-14-30-16-888218/2018-04-11-14-30-16-888218-ESC_CA6110AA_AS.fasta|job_amr_datastruct': [u'Wed, 11 Apr 2018 14:33:56 GMT',
   u'Wed, 11 Apr 2018 14:33:56 GMT',
   0.0]},
 {u'/datastore/2018-04-11-14-30-16-888218/2018-04-11-14-30-16-888218-ESC_CA6110AA_AS.fasta|job_qc': [u'Wed, 11 Apr 2018 14:30:17 GMT',
   u'Wed, 11 Apr 2018 14:30:17 GMT',
   0.0]},
 {u'/datastore/2018-04-11-14-30-16-888218/2018-04-11-14-30-16-888218-ESC_BA9983AA_AS.fasta|job_turtle': [u'Wed, 11 Apr 2018 14:30:17 GMT',
   u'Wed, 11 Apr 2018 14:30:18 GMT',
   1.0]},
 {u'/datastore/2018-04-11-14-30-16-888218/2018-04-11-14-30-16-888218-ESC_BA9983AA_AS.fasta|job_amr_dict': [u'Wed, 11 Apr 2018 14:33:56 GMT',
   u'Wed, 11 Apr 2018 14:33:56 GMT',
   0.0]},
 {u'/datastore/2018-04-11-14-30-16-888218/2018-04-11-14-30-16-888218-ESC_BA9983AA_AS.fasta|job_id': [u'Wed, 11 Apr 2018 14:30:19 GMT',
   u'Wed, 11 Apr 2018 14:30:24 GMT',
   5.0]},
 {u'/datastore/2018-04-11-14-30-16-888218/2018-04-11-14-30-16-888218-ESC_BA9983AA_AS.fasta|job_ectyper_vf': [u'Wed, 11 Apr 2018 14:30:24 GMT',
   u'Wed, 11 Apr 2018 14:30:27 GMT',
   3.0]},
 {u'/datastore/2018-04-11-14-30-16-888218/2018-04-11-14-30-16-888218-ESC_BA9983AA_AS.fasta|job_amr_beautify': [u'Wed, 11 Apr 2018 14:33:56 GMT',
   u'Wed, 11 Apr 2018 14:33:56 GMT',
   0.0]},
 {u'/datastore/2018-04-11-14-30-16-888218/2018-04-11-14-30-16-888218-ESC_BA9983AA_AS.fasta|job_ectyper_beautify_serotype': [u'Wed, 11 Apr 2018 14:30:30 GMT',
   u'Wed, 11 Apr 2018 14:30:30 GMT',
   0.0]},
 {u'/datastore/2018-04-11-14-30-16-888218/2018-04-11-14-30-16-888218-ESC_BA9983AA_AS.fasta|job_ectyper_beautify_vf': [u'Wed, 11 Apr 2018 14:30:27 GMT',
   u'Wed, 11 Apr 2018 14:30:27 GMT',
   0.0]},
 {u'/datastore/2018-04-11-14-30-16-888218/2018-04-11-14-30-16-888218-ESC_BA9983AA_AS.fasta|job_amr': [u'Wed, 11 Apr 2018 14:32:09 GMT',
   u'Wed, 11 Apr 2018 14:33:56 GMT',
   107.0]},
 {u'/datastore/2018-04-11-14-30-16-888218/2018-04-11-14-30-16-888218-ESC_BA9983AA_AS.fasta|job_ectyper_datastruct_vf': [u'Wed, 11 Apr 2018 14:30:27 GMT',
   u'Wed, 11 Apr 2018 14:30:28 GMT',
   1.0]},
 {u'/datastore/2018-04-11-14-30-16-888218/2018-04-11-14-30-16-888218-ESC_BA9983AA_AS.fasta|job_ectyper_datastruct_serotype': [u'Wed, 11 Apr 2018 14:30:30 GMT',
   u'Wed, 11 Apr 2018 14:30:30 GMT',
   0.0]},
 {u'/datastore/2018-04-11-14-30-16-888218/2018-04-11-14-30-16-888218-ESC_BA9983AA_AS.fasta|job_ectyper_serotype': [u'Wed, 11 Apr 2018 14:30:24 GMT',
   u'Wed, 11 Apr 2018 14:30:30 GMT',
   6.0]},
 {u'/datastore/2018-04-11-14-30-16-888218/2018-04-11-14-30-16-888218-ESC_BA9983AA_AS.fasta|job_amr_datastruct': [u'Wed, 11 Apr 2018 14:33:56 GMT',
   u'Wed, 11 Apr 2018 14:33:56 GMT',
   0.0]},
 {u'/datastore/2018-04-11-14-30-16-888218/2018-04-11-14-30-16-888218-ESC_BA9983AA_AS.fasta|job_qc': [u'Wed, 11 Apr 2018 14:30:17 GMT',
   u'Wed, 11 Apr 2018 14:30:17 GMT',
   0.0]},
 {u'/datastore/2018-04-11-14-30-16-888218/2018-04-11-14-30-16-888218-ESC_CA3586AA_AS.fasta|job_turtle': [u'Wed, 11 Apr 2018 14:30:18 GMT',
   u'Wed, 11 Apr 2018 14:30:18 GMT',
   0.0]},
 {u'/datastore/2018-04-11-14-30-16-888218/2018-04-11-14-30-16-888218-ESC_CA3586AA_AS.fasta|job_amr_dict': [u'Wed, 11 Apr 2018 14:37:31 GMT',
   u'Wed, 11 Apr 2018 14:37:31 GMT',
   0.0]},
 {u'/datastore/2018-04-11-14-30-16-888218/2018-04-11-14-30-16-888218-ESC_CA3586AA_AS.fasta|job_id': [u'Wed, 11 Apr 2018 14:30:28 GMT',
   u'Wed, 11 Apr 2018 14:30:29 GMT',
   1.0]},
 {u'/datastore/2018-04-11-14-30-16-888218/2018-04-11-14-30-16-888218-ESC_CA3586AA_AS.fasta|job_ectyper_vf': [u'Wed, 11 Apr 2018 14:30:35 GMT',
   u'Wed, 11 Apr 2018 14:30:38 GMT',
   3.0]},
 {u'/datastore/2018-04-11-14-30-16-888218/2018-04-11-14-30-16-888218-ESC_CA3586AA_AS.fasta|job_amr_beautify': [u'Wed, 11 Apr 2018 14:37:31 GMT',
   u'Wed, 11 Apr 2018 14:37:31 GMT',
   0.0]},
 {u'/datastore/2018-04-11-14-30-16-888218/2018-04-11-14-30-16-888218-ESC_CA3586AA_AS.fasta|job_ectyper_beautify_serotype': [u'Wed, 11 Apr 2018 14:30:35 GMT',
   u'Wed, 11 Apr 2018 14:30:36 GMT',
   1.0]},
 {u'/datastore/2018-04-11-14-30-16-888218/2018-04-11-14-30-16-888218-ESC_CA3586AA_AS.fasta|job_ectyper_beautify_vf': [u'Wed, 11 Apr 2018 14:30:38 GMT',
   u'Wed, 11 Apr 2018 14:30:38 GMT',
   0.0]},
 {u'/datastore/2018-04-11-14-30-16-888218/2018-04-11-14-30-16-888218-ESC_CA3586AA_AS.fasta|job_amr': [u'Wed, 11 Apr 2018 14:35:42 GMT',
   u'Wed, 11 Apr 2018 14:37:31 GMT',
   109.0]},
 {u'/datastore/2018-04-11-14-30-16-888218/2018-04-11-14-30-16-888218-ESC_CA3586AA_AS.fasta|job_ectyper_datastruct_vf': [u'Wed, 11 Apr 2018 14:30:38 GMT',
   u'Wed, 11 Apr 2018 14:30:39 GMT',
   1.0]},
 {u'/datastore/2018-04-11-14-30-16-888218/2018-04-11-14-30-16-888218-ESC_CA3586AA_AS.fasta|job_ectyper_datastruct_serotype': [u'Wed, 11 Apr 2018 14:30:35 GMT',
   u'Wed, 11 Apr 2018 14:30:36 GMT',
   1.0]},
 {u'/datastore/2018-04-11-14-30-16-888218/2018-04-11-14-30-16-888218-ESC_CA3586AA_AS.fasta|job_ectyper_serotype': [u'Wed, 11 Apr 2018 14:30:29 GMT',
   u'Wed, 11 Apr 2018 14:30:35 GMT',
   6.0]},
 {u'/datastore/2018-04-11-14-30-16-888218/2018-04-11-14-30-16-888218-ESC_CA3586AA_AS.fasta|job_amr_datastruct': [u'Wed, 11 Apr 2018 14:37:31 GMT',
   u'Wed, 11 Apr 2018 14:37:31 GMT',
   0.0]},
 {u'/datastore/2018-04-11-14-30-16-888218/2018-04-11-14-30-16-888218-ESC_CA3586AA_AS.fasta|job_qc': [u'Wed, 11 Apr 2018 14:30:17 GMT',
   u'Wed, 11 Apr 2018 14:30:17 GMT',
   0.0]},
 {u'/datastore/2018-04-11-14-30-16-888218/2018-04-11-14-30-16-888218-ESC_CA1477AA_AS.fasta|job_turtle': [u'Wed, 11 Apr 2018 14:30:18 GMT',
   u'Wed, 11 Apr 2018 14:30:19 GMT',
   1.0]},
 {u'/datastore/2018-04-11-14-30-16-888218/2018-04-11-14-30-16-888218-ESC_CA1477AA_AS.fasta|job_amr_dict': [u'Wed, 11 Apr 2018 14:39:17 GMT',
   u'Wed, 11 Apr 2018 14:39:17 GMT',
   0.0]},
 {u'/datastore/2018-04-11-14-30-16-888218/2018-04-11-14-30-16-888218-ESC_CA1477AA_AS.fasta|job_id': [u'Wed, 11 Apr 2018 14:30:33 GMT',
   u'Wed, 11 Apr 2018 14:30:36 GMT',
   3.0]},
 {u'/datastore/2018-04-11-14-30-16-888218/2018-04-11-14-30-16-888218-ESC_CA1477AA_AS.fasta|job_ectyper_vf': [u'Wed, 11 Apr 2018 14:30:41 GMT',
   u'Wed, 11 Apr 2018 14:30:44 GMT',
   3.0]},
 {u'/datastore/2018-04-11-14-30-16-888218/2018-04-11-14-30-16-888218-ESC_CA1477AA_AS.fasta|job_amr_beautify': [u'Wed, 11 Apr 2018 14:39:17 GMT',
   u'Wed, 11 Apr 2018 14:39:17 GMT',
   0.0]},
 {u'/datastore/2018-04-11-14-30-16-888218/2018-04-11-14-30-16-888218-ESC_CA1477AA_AS.fasta|job_ectyper_beautify_serotype': [u'Wed, 11 Apr 2018 14:30:42 GMT',
   u'Wed, 11 Apr 2018 14:30:42 GMT',
   0.0]},
 {u'/datastore/2018-04-11-14-30-16-888218/2018-04-11-14-30-16-888218-ESC_CA1477AA_AS.fasta|job_ectyper_beautify_vf': [u'Wed, 11 Apr 2018 14:30:44 GMT',
   u'Wed, 11 Apr 2018 14:30:45 GMT',
   1.0]},
 {u'/datastore/2018-04-11-14-30-16-888218/2018-04-11-14-30-16-888218-ESC_CA1477AA_AS.fasta|job_amr': [u'Wed, 11 Apr 2018 14:37:31 GMT',
   u'Wed, 11 Apr 2018 14:39:17 GMT',
   106.0]},
 {u'/datastore/2018-04-11-14-30-16-888218/2018-04-11-14-30-16-888218-ESC_CA1477AA_AS.fasta|job_ectyper_datastruct_vf': [u'Wed, 11 Apr 2018 14:30:44 GMT',
   u'Wed, 11 Apr 2018 14:30:45 GMT',
   1.0]},
 {u'/datastore/2018-04-11-14-30-16-888218/2018-04-11-14-30-16-888218-ESC_CA1477AA_AS.fasta|job_ectyper_datastruct_serotype': [u'Wed, 11 Apr 2018 14:30:42 GMT',
   u'Wed, 11 Apr 2018 14:30:42 GMT',
   0.0]},
 {u'/datastore/2018-04-11-14-30-16-888218/2018-04-11-14-30-16-888218-ESC_CA1477AA_AS.fasta|job_ectyper_serotype': [u'Wed, 11 Apr 2018 14:30:36 GMT',
   u'Wed, 11 Apr 2018 14:30:42 GMT',
   6.0]},
 {u'/datastore/2018-04-11-14-30-16-888218/2018-04-11-14-30-16-888218-ESC_CA1477AA_AS.fasta|job_amr_datastruct': [u'Wed, 11 Apr 2018 14:39:17 GMT',
   u'Wed, 11 Apr 2018 14:39:17 GMT',
   0.0]},
 {u'/datastore/2018-04-11-14-30-16-888218/2018-04-11-14-30-16-888218-ESC_CA1477AA_AS.fasta|job_qc': [u'Wed, 11 Apr 2018 14:30:17 GMT',
   u'Wed, 11 Apr 2018 14:30:18 GMT',
   1.0]},
 {u'/datastore/2018-04-11-14-30-16-888218/2018-04-11-14-30-16-888218-ESC_BA1435AA_AS.fasta|job_turtle': [u'Wed, 11 Apr 2018 14:30:18 GMT',
   u'Wed, 11 Apr 2018 14:30:18 GMT',
   0.0]},
 {u'/datastore/2018-04-11-14-30-16-888218/2018-04-11-14-30-16-888218-ESC_BA1435AA_AS.fasta|job_amr_dict': [u'Wed, 11 Apr 2018 14:35:45 GMT',
   u'Wed, 11 Apr 2018 14:35:45 GMT',
   0.0]},
 {u'/datastore/2018-04-11-14-30-16-888218/2018-04-11-14-30-16-888218-ESC_BA1435AA_AS.fasta|job_id': [u'Wed, 11 Apr 2018 14:30:26 GMT',
   u'Wed, 11 Apr 2018 14:30:28 GMT',
   2.0]},
 {u'/datastore/2018-04-11-14-30-16-888218/2018-04-11-14-30-16-888218-ESC_BA1435AA_AS.fasta|job_ectyper_vf': [u'Wed, 11 Apr 2018 14:30:32 GMT',
   u'Wed, 11 Apr 2018 14:30:35 GMT',
   3.0]},
 {u'/datastore/2018-04-11-14-30-16-888218/2018-04-11-14-30-16-888218-ESC_BA1435AA_AS.fasta|job_amr_beautify': [u'Wed, 11 Apr 2018 14:35:45 GMT',
   u'Wed, 11 Apr 2018 14:35:45 GMT',
   0.0]},
 {u'/datastore/2018-04-11-14-30-16-888218/2018-04-11-14-30-16-888218-ESC_BA1435AA_AS.fasta|job_ectyper_beautify_serotype': [u'Wed, 11 Apr 2018 14:30:34 GMT',
   u'Wed, 11 Apr 2018 14:30:35 GMT',
   1.0]},
 {u'/datastore/2018-04-11-14-30-16-888218/2018-04-11-14-30-16-888218-ESC_BA1435AA_AS.fasta|job_ectyper_beautify_vf': [u'Wed, 11 Apr 2018 14:30:35 GMT',
   u'Wed, 11 Apr 2018 14:30:35 GMT',
   0.0]},
 {u'/datastore/2018-04-11-14-30-16-888218/2018-04-11-14-30-16-888218-ESC_BA1435AA_AS.fasta|job_amr': [u'Wed, 11 Apr 2018 14:33:56 GMT',
   u'Wed, 11 Apr 2018 14:35:45 GMT',
   109.0]},
 {u'/datastore/2018-04-11-14-30-16-888218/2018-04-11-14-30-16-888218-ESC_BA1435AA_AS.fasta|job_ectyper_datastruct_vf': [u'Wed, 11 Apr 2018 14:30:35 GMT',
   u'Wed, 11 Apr 2018 14:30:36 GMT',
   1.0]},
 {u'/datastore/2018-04-11-14-30-16-888218/2018-04-11-14-30-16-888218-ESC_BA1435AA_AS.fasta|job_ectyper_datastruct_serotype': [u'Wed, 11 Apr 2018 14:30:34 GMT',
   u'Wed, 11 Apr 2018 14:30:34 GMT',
   0.0]},
 {u'/datastore/2018-04-11-14-30-16-888218/2018-04-11-14-30-16-888218-ESC_BA1435AA_AS.fasta|job_ectyper_serotype': [u'Wed, 11 Apr 2018 14:30:28 GMT',
   u'Wed, 11 Apr 2018 14:30:34 GMT',
   6.0]},
 {u'/datastore/2018-04-11-14-30-16-888218/2018-04-11-14-30-16-888218-ESC_BA1435AA_AS.fasta|job_amr_datastruct': [u'Wed, 11 Apr 2018 14:35:45 GMT',
   u'Wed, 11 Apr 2018 14:35:45 GMT',
   0.0]},
 {u'/datastore/2018-04-11-14-30-16-888218/2018-04-11-14-30-16-888218-ESC_BA1435AA_AS.fasta|job_qc': [u'Wed, 11 Apr 2018 14:30:17 GMT',
   u'Wed, 11 Apr 2018 14:30:17 GMT',
   0.0]},
 {u'total': [u'Wed, 11 Apr 2018 14:30:17 GMT',
   u'Wed, 11 Apr 2018 14:39:20 GMT',
   543.0]}]

In [11]:
import pandas as pd
def group_analyses(raws):
    '''Tabulate runtimes per module.
    '''
    assert isinstance(raws, list)
    # "grouped" is on a per job basis.
    grouped = {}
    for outerd in raws:
        assert len(outerd.keys()) == 1
        key = outerd.keys()[0]
        lastkey = key
        if key == 'total':
            analysis = key
        else:
            filename, analysis = key.split('|')
            
        if analysis not in grouped:
            grouped.update({analysis:[]})
        
        grouped[analysis].append(
            outerd.values()[0][2]
        )
    total = grouped.pop('total')
    df = pd.DataFrame(data=grouped)
    return total,df

In [40]:
# Calculate averages.
batches = raws_batch.list_sizes # list_sizes
plot_data = {
    'Batches': [str(b) for b in batches],
    'QC': [],
    'ID': [],
    'VF': [],
    'Serotype': [],
    'AMR': []
}
avt = []
i = 0
for l in data_batch:
    r = group_analyses(l)
    # Remove the total since it's only 1.
    total = r[0]
    df = r[1]
    #print(df)
    amr = df[['job_amr','job_amr_beautify','job_amr_datastruct','job_amr_dict']].sum(axis=1)
    serotype = df[['job_ectyper_beautify_serotype','job_ectyper_datastruct_serotype','job_ectyper_serotype']].sum(axis=1)
    vf = df[['job_ectyper_beautify_vf','job_ectyper_datastruct_vf','job_ectyper_vf']].sum(axis=1)
    dbid = df[['job_id','job_turtle']].sum(axis=1)
    qc = df[['job_qc']].sum(axis=1)
    plot_data['QC'].append(qc.sum())
    plot_data['ID'].append(dbid.sum())
    plot_data['VF'].append(vf.sum())
    plot_data['Serotype'].append(serotype.sum())
    plot_data['AMR'].append(amr.sum())
    avt.append(total[0])
    i += 1
print(plot_data)
print(avt)
print('cat')
for k, l in plot_data.items():
    print(k,l)
    if k == 'Batches':
        continue
    for i,n in enumerate(l):
        l[i] = float(n)/60.0
print(plot_data)
avt = [i/60.0 for i in avt]
print(avt)


{'Batches': ['1', '10', '20', '30', '40', '50', '60', '70', '80', '90', '100'], 'VF': [5.0, 45.0, 122.0, 175.0, 234.0, 309.0, 379.0, 423.0, 520.0, 548.0, 639.0], 'Serotype': [8.0, 68.0, 185.0, 295.0, 455.0, 647.0, 734.0, 904.0, 1055.0, 1132.0, 1269.0], 'QC': [6.0, 2.0, 22.0, 31.0, 44.0, 54.0, 69.0, 80.0, 103.0, 114.0, 117.0], 'AMR': [94.0, 1085.0, 2171.0, 3127.0, 4315.0, 5517.0, 6291.0, 7603.0, 8787.0, 9777.0, 10795.0], 'ID': [4.0, 27.0, 62.0, 70.0, 100.0, 117.0, 145.0, 154.0, 198.0, 172.0, 223.0]}
[104.0, 543.0, 1090.0, 1572.0, 2153.0, 2740.0, 3138.0, 3782.0, 4366.0, 4864.0, 5344.0]
cat
('Batches', ['1', '10', '20', '30', '40', '50', '60', '70', '80', '90', '100'])
('VF', [5.0, 45.0, 122.0, 175.0, 234.0, 309.0, 379.0, 423.0, 520.0, 548.0, 639.0])
('Serotype', [8.0, 68.0, 185.0, 295.0, 455.0, 647.0, 734.0, 904.0, 1055.0, 1132.0, 1269.0])
('QC', [6.0, 2.0, 22.0, 31.0, 44.0, 54.0, 69.0, 80.0, 103.0, 114.0, 117.0])
('AMR', [94.0, 1085.0, 2171.0, 3127.0, 4315.0, 5517.0, 6291.0, 7603.0, 8787.0, 9777.0, 10795.0])
('ID', [4.0, 27.0, 62.0, 70.0, 100.0, 117.0, 145.0, 154.0, 198.0, 172.0, 223.0])
{'Batches': ['1', '10', '20', '30', '40', '50', '60', '70', '80', '90', '100'], 'VF': [0.08333333333333333, 0.75, 2.033333333333333, 2.9166666666666665, 3.9, 5.15, 6.316666666666666, 7.05, 8.666666666666666, 9.133333333333333, 10.65], 'Serotype': [0.13333333333333333, 1.1333333333333333, 3.0833333333333335, 4.916666666666667, 7.583333333333333, 10.783333333333333, 12.233333333333333, 15.066666666666666, 17.583333333333332, 18.866666666666667, 21.15], 'QC': [0.1, 0.03333333333333333, 0.36666666666666664, 0.5166666666666667, 0.7333333333333333, 0.9, 1.15, 1.3333333333333333, 1.7166666666666666, 1.9, 1.95], 'AMR': [1.5666666666666667, 18.083333333333332, 36.18333333333333, 52.11666666666667, 71.91666666666667, 91.95, 104.85, 126.71666666666667, 146.45, 162.95, 179.91666666666666], 'ID': [0.06666666666666667, 0.45, 1.0333333333333334, 1.1666666666666667, 1.6666666666666667, 1.95, 2.4166666666666665, 2.566666666666667, 3.3, 2.8666666666666667, 3.716666666666667]}
[1.7333333333333334, 9.05, 18.166666666666668, 26.2, 35.88333333333333, 45.666666666666664, 52.3, 63.03333333333333, 72.76666666666667, 81.06666666666666, 89.06666666666666]

In [35]:
# Data.
tasks = ["QC", "ID", "VF", "Serotype", "AMR"] # subtasks
colors = [colormap['qc'], colormap['databaseid'], colormap['vf'],colormap['serotype'], colormap['amr']]

source = ColumnDataSource(data=plot_data)

In [42]:
# Plot.
p = figure(x_range=[str(b) for b in batches],
           plot_height=350,
           title="Runtimes for Analysis Modules",
           x_axis_label="#Total Batch Size",
           y_axis_label="Total Runtime per Batch (minutes)")

p.vbar_stack(tasks, x='Batches', width=0.9, color=colors, source=source,
             legend=[value(x) for x in tasks], name=tasks, alpha=0.5)

p.y_range.start = 0
p.x_range.range_padding = 0.1
p.xgrid.grid_line_color = None
p.axis.minor_tick_line_color = None
p.outline_line_color = None
p.legend.location = "center_left"
p.legend.orientation = "vertical"

p.line(x=[str(b) for b in batches], y=avt, color="cyan", line_width=2)

show(p)



In [15]:
# Calculate averages.
batches = raws_batch.list_sizes # list_sizes
pdata = {
    'Batches': [str(b) for b in batches],
    'QC': [],
    'ID': [],
    'VF': [],
    'Serotype': [],
    'AMR': []
}
i = 0
for l in data_batch:
    r = group_analyses(l)
    # Remove the total since it's only 1.
    total = r[0]
    df = r[1]
    # Total amount of time spent per task (column) per file (row).
    amr = df[['job_amr','job_amr_beautify','job_amr_datastruct','job_amr_dict']].sum(axis=1)
    serotype = df[['job_ectyper_beautify_serotype','job_ectyper_datastruct_serotype','job_ectyper_serotype']].sum(axis=1)
    vf = df[['job_ectyper_beautify_vf','job_ectyper_datastruct_vf','job_ectyper_vf']].sum(axis=1)
    dbid = df[['job_id','job_turtle']].sum(axis=1)
    qc = df[['job_qc']].sum(axis=1)
    # Sum the columns and give me an average over the total number of files in a batch.
    samr = amr.sum(axis=0)
    print(samr/batches[i])
    pdata['QC'].append(qc.mean())
    pdata['ID'].append(dbid.mean())
    pdata['VF'].append(vf.mean())
    pdata['Serotype'].append(serotype.mean())
    pdata['AMR'].append(amr.mean())
    print(total)
    i += 1
# print(plot_data)


94.0
[104.0]
108.5
[543.0]
108.55
[1090.0]
104.23333333333333
[1572.0]
107.875
[2153.0]
110.34
[2740.0]
104.85
[3138.0]
108.61428571428571
[3782.0]
109.8375
[4366.0]
108.63333333333334
[4864.0]
107.95
[5344.0]

In [16]:
l = [i for i in avt[1:]]
print(np.mean(l))


2959.2

In [ ]: