In [25]:
from __future__ import absolute_import
import sys
sys.path.append("../lib")
import pandas as pd
import matplotlib.pyplot as plt
from database import Database, Result
In [13]:
a = """
sdf {aval}
""".format(**{"aval":"hello"})
print(a)
In [76]:
db = Database()
def runTimes(start_time, end_time, db):
times = {
"starttime": start_time,
"endtime": end_time
}
sql = """
select
a.job_id, a.starttime as mrbayes_starttime, a.timestamp as mrbayes_timestamp,
b.starttime as clustalo_starttime, b.timestamp as clustalo_timestamp,
TIMESTAMPDIFF(SECOND, a.starttime, a.timestamp) as mrbayes_time,
TIMESTAMPDIFF(SECOND, b.starttime, b.timestamp) as clustalo_time
from (select * from results
where prog='mrbayes' and starttime > '{starttime}' and timestamp < '{endtime}'
) a
join (select * from results
where prog='clustalo' and starttime > '{starttime}' and timestamp < '{endtime}'
) b
on a.job_id = b.job_id
order by a.timestamp desc
""".format(**times)
return pd.read_sql_query(sql, db.conn)
# rq = {"order":"squamata","data.dwc:associatedSequences": {"type":"exists"}}
# limit = 30
# 4 sequences
seq_4 = runTimes("2015-12-09 00:21:00", "2015-12-09 00:28:00", db)
# rq = {"order":"squamata","data.dwc:associatedSequences": {"type":"exists"}}
# limit = 100
# 5 sequences
seq_5 = runTimes("2015-12-09 00:44:15", "2015-12-09 00:46:30", db)
# rq = {"order":"squamata","data.dwc:associatedSequences": {"type":"exists"}}
# limit = 1000
# 6 sequences
seq_6 = runTimes("2015-12-09 01:00:00", "2015-12-09 01:03:00", db)
# rq = {"order":"squamata","data.dwc:associatedSequences": {"type":"exists"}}
# limit = 1000
# 7 sequences
seq_7 = runTimes("2015-12-09 01:05:00", "2015-12-09 01:09:00", db)
# rq = {"order":"squamata","data.dwc:associatedSequences": {"type":"exists"}}
# limit = 1000
# 8 sequences
seq_8 = runTimes("2015-12-09 01:14:00", "2015-12-09 01:17:00", db)
# rq = {"order":"squamata","data.dwc:associatedSequences": {"type":"exists"}}
# limit = 1000
# 16 sequences
seq_16 = runTimes("2015-12-09 01:44:00", "2015-12-09 01:50:00", db)
print seq_16
In [77]:
# the slices above need to be averaged together in to a new df
df = pd.DataFrame()
#df["seq_4_mrbayes"]
mean_clustalo = [seq_4["clustalo_time"].mean(), seq_5["clustalo_time"].mean(),
seq_6["clustalo_time"].mean(), seq_7["clustalo_time"].mean(),
seq_8["clustalo_time"].mean(), seq_16["clustalo_time"].mean()]
mean_mrbayes = [seq_4["mrbayes_time"].mean(), seq_5["mrbayes_time"].mean(),
seq_6["mrbayes_time"].mean(), seq_7["mrbayes_time"].mean(),
seq_8["mrbayes_time"].mean(), seq_16["mrbayes_time"].mean()]
df["labels"] = ["4", "5", "6", "7", "8", "16"]
df["mean_clustalo"] = mean_clustalo
df["mean_mrbayes"] = mean_mrbayes
#print df
In [80]:
%matplotlib inline
#http://chrisalbon.com/python/matplotlib_stacked_bar_plot.html
f, ax1 = plt.subplots(1, figsize=(10,5))
bar_width = 0.75
bar_l = [i+1 for i in range(len(df['labels']))]
tick_pos = [i+(bar_width/2) for i in bar_l]
ax1.bar(bar_l,
# using the pre_score data
df['mean_clustalo'],
# set the width
width=bar_width,
# with the label pre score
label='Clustal O',
# with alpha 0.5
alpha=0.5,
# with color
color='#F4561D')
# Create a bar plot, in position bar_1
ax1.bar(bar_l,
# using the mid_score data
df['mean_mrbayes'],
# set the width
width=bar_width,
# with pre_score on the bottom
bottom=df['mean_clustalo'],
# with the label mid score
label='MrBayes',
# with alpha 0.5
alpha=0.5,
# with color
color='#F1911E')
plt.xticks(tick_pos, df['labels'])
ax1.set_ylabel("Seconds Used by Worker")
ax1.set_xlabel("Number of Sequences")
plt.legend(loc='upper left')
plt.xlim([min(tick_pos)-bar_width, max(tick_pos)+bar_width])
Out[80]:
In [ ]: