In [12]:
import pandas as pd
import matplotlib.pyplot as plt
import glob
import numpy as np
import math

%matplotlib inline

In [13]:
# Create required dataframe format

df = pd.DataFrame(columns=['Producer execution time','Consumer execution time', 'Producer Throughput', 'Consumer Throughput'])
df_err = pd.DataFrame(columns=['Producer execution time','Consumer execution time', 'Producer Throughput', 'Consumer Throughput'])

In [14]:
def get_exec(path):
    
    # Get producer execution time
    f = open(path + '/producer.txt','r')
    lines = f.readlines()
    f.close()
    
    prod_exec_time = float(lines[1].strip().split(':')[1].strip()) - float(lines[0].strip().split(':')[1].strip())
    
    
    # Get consumer execution time
    cons = glob.glob(path + '/consumer*.txt')
    min_cons_time = None
    max_cons_time = None
    
    for c in cons:
        f = open(c,'r')
        lines = f.readlines()
        f.close()
        
        start_time = float(lines[0].strip().split(':')[1].strip().split(' ')[0].strip())
        end_time = float(lines[1].strip().split(':')[1].strip().split(' ')[0].strip())
        
        if not min_cons_time:            
            min_cons_time = start_time
            max_cons_time = end_time
            
        if end_time > max_cons_time:
            max_cons_time = end_time
            
        if start_time < min_cons_time:
            min_cons_time = start_time
                
    cons_exec_time = max_cons_time - min_cons_time
    
    return prod_exec_time, cons_exec_time

In [25]:
# Update dataframe with data

num_workers = 8
num_queues = 8
trials = 3

dirs = glob.glob('./raw_data/workers*')

for w in [1,2,4,8,16]:
    for q in [1,2,4,8,16]:
        
        if w>=q:
        
            prod_execs = []
            cons_execs = []
            for t in range(1,trials+1):
                    
                prod_exec, cons_exec = get_exec(path='./raw_data/workers_%s_queues_%s_trial_%s'%(w,q,t))            
                prod_execs.append(prod_exec)
                cons_execs.append(cons_exec)
        
            df.loc['%s, %s'%(w,q)] = [np.mean(prod_execs), 
                                      np.mean(cons_execs), 
                                      1000000/np.mean(prod_execs),
                                      1000000/np.mean(cons_execs)]
            
            df_err.loc['%s, %s'%(w,q)] = [np.std(prod_execs)/math.sqrt(trials), 
                                      np.std(cons_execs)/math.sqrt(trials), 
                                      np.std(prod_execs)/math.sqrt(trials),
                                      np.std(cons_execs)/math.sqrt(trials)]

In [28]:
df


Out[28]:
Producer execution time Consumer execution time Producer Throughput Consumer Throughput
1, 1 32.536667 301.803333 30734.555844 3313.416021
2, 1 32.343333 165.673333 30918.272632 6035.974405
2, 2 31.573333 159.233333 31672.297288 6280.092107
4, 1 32.123333 114.750000 31130.019676 8714.596944
4, 2 32.420000 104.330000 30845.157313 9584.970766
4, 4 34.543333 101.546667 28949.145968 9847.689072
8, 1 33.820000 92.746667 29568.302768 10782.058663
8, 2 33.790000 63.970000 29594.554566 15632.327647
8, 4 35.853333 62.250000 27891.409461 16064.257028
8, 8 36.550000 45.210000 27359.781157 22119.000241
16, 1 33.650000 94.280000 29717.682007 10606.703440
16, 2 31.730000 132.273333 31515.915518 7560.102814
16, 4 35.186667 88.833333 28419.856021 11257.035637
16, 8 35.976667 29.856667 27795.793553 33493.357265
16, 16 39.116667 14.796667 25564.550459 67582.789120

In [41]:
# Plot dataframe

fontsize = 20

ax = df.plot(kind='bar', ylim=(0,500), 
             y=['Producer execution time','Consumer execution time'], 
             title= 'Time taken to process 10^6 EnTK Task objects',
             yerr = df_err,
            fontsize=fontsize)

ax1 = df.plot(ax=ax, 
              kind='line', 
              y=['Producer Throughput', 'Consumer Throughput'],
              yerr = df_err, 
              secondary_y=True, 
              marker='o')

ax.set_xlabel('Consumers, Queues', fontsize=fontsize)
ax.set_ylabel('Time (seconds)', fontsize=fontsize)
ax.set_title(ax.get_title(), fontsize=fontsize)
ax1.set_ylabel('Throughput (tasks/sec)', fontsize=fontsize)
ax1.set_ylim(0,80000)
ax.set_xlim(-0.5, 14.5)
plt.xticks(rotation=0,fontsize=fontsize)
plt.yticks(fontsize=fontsize)

p1, l1 = ax.get_legend_handles_labels()
p2, l2 = ax1.get_legend_handles_labels()

patches = p1 + p2
labels = l1 + l2

fig = plt.gcf()
fig.set_size_inches(18.5, 10.5)

ax.legend(patches,labels, loc='center left', bbox_to_anchor=(1.15, 0.8),fontsize=fontsize)


Out[41]:
<matplotlib.legend.Legend at 0x7f628ac23750>

In [ ]: