In [12]:
import pandas as pd
import matplotlib.pyplot as plt
import glob
import numpy as np
import math
%matplotlib inline
In [13]:
# Create required dataframe format
df = pd.DataFrame(columns=['Producer execution time','Consumer execution time', 'Producer Throughput', 'Consumer Throughput'])
df_err = pd.DataFrame(columns=['Producer execution time','Consumer execution time', 'Producer Throughput', 'Consumer Throughput'])
In [14]:
def get_exec(path):
# Get producer execution time
f = open(path + '/producer.txt','r')
lines = f.readlines()
f.close()
prod_exec_time = float(lines[1].strip().split(':')[1].strip()) - float(lines[0].strip().split(':')[1].strip())
# Get consumer execution time
cons = glob.glob(path + '/consumer*.txt')
min_cons_time = None
max_cons_time = None
for c in cons:
f = open(c,'r')
lines = f.readlines()
f.close()
start_time = float(lines[0].strip().split(':')[1].strip().split(' ')[0].strip())
end_time = float(lines[1].strip().split(':')[1].strip().split(' ')[0].strip())
if not min_cons_time:
min_cons_time = start_time
max_cons_time = end_time
if end_time > max_cons_time:
max_cons_time = end_time
if start_time < min_cons_time:
min_cons_time = start_time
cons_exec_time = max_cons_time - min_cons_time
return prod_exec_time, cons_exec_time
In [25]:
# Update dataframe with data
num_workers = 8
num_queues = 8
trials = 3
dirs = glob.glob('./raw_data/workers*')
for w in [1,2,4,8,16]:
for q in [1,2,4,8,16]:
if w>=q:
prod_execs = []
cons_execs = []
for t in range(1,trials+1):
prod_exec, cons_exec = get_exec(path='./raw_data/workers_%s_queues_%s_trial_%s'%(w,q,t))
prod_execs.append(prod_exec)
cons_execs.append(cons_exec)
df.loc['%s, %s'%(w,q)] = [np.mean(prod_execs),
np.mean(cons_execs),
1000000/np.mean(prod_execs),
1000000/np.mean(cons_execs)]
df_err.loc['%s, %s'%(w,q)] = [np.std(prod_execs)/math.sqrt(trials),
np.std(cons_execs)/math.sqrt(trials),
np.std(prod_execs)/math.sqrt(trials),
np.std(cons_execs)/math.sqrt(trials)]
In [28]:
df
Out[28]:
In [41]:
# Plot dataframe
fontsize = 20
ax = df.plot(kind='bar', ylim=(0,500),
y=['Producer execution time','Consumer execution time'],
title= 'Time taken to process 10^6 EnTK Task objects',
yerr = df_err,
fontsize=fontsize)
ax1 = df.plot(ax=ax,
kind='line',
y=['Producer Throughput', 'Consumer Throughput'],
yerr = df_err,
secondary_y=True,
marker='o')
ax.set_xlabel('Consumers, Queues', fontsize=fontsize)
ax.set_ylabel('Time (seconds)', fontsize=fontsize)
ax.set_title(ax.get_title(), fontsize=fontsize)
ax1.set_ylabel('Throughput (tasks/sec)', fontsize=fontsize)
ax1.set_ylim(0,80000)
ax.set_xlim(-0.5, 14.5)
plt.xticks(rotation=0,fontsize=fontsize)
plt.yticks(fontsize=fontsize)
p1, l1 = ax.get_legend_handles_labels()
p2, l2 = ax1.get_legend_handles_labels()
patches = p1 + p2
labels = l1 + l2
fig = plt.gcf()
fig.set_size_inches(18.5, 10.5)
ax.legend(patches,labels, loc='center left', bbox_to_anchor=(1.15, 0.8),fontsize=fontsize)
Out[41]:
In [ ]: