Imports

Import deps


In [1]:
import os
import pandas as pd

import utils

import plotly.graph_objs as go
import plotly.figure_factory as ff
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
init_notebook_mode(connected=True)


Import data


In [2]:
%%bash
ls -l data/ | grep csv


-rw-r--r-- 1 1000 1000 1426122219 Jun 16  2016 emails.csv
-rw-rw-r-- 1 1000 1000        133 Mar 12 17:13 metadata_client_UDP_r1.csv
-rw-rw-r-- 1 1000 1000        133 Mar 12 17:13 metadata_client_UDP_r2.csv
-rw-rw-r-- 1 1000 1000        133 Mar 12 17:13 metadata_client_UDP_r3.csv
-rw-rw-r-- 1 1000 1000        136 Mar 12 17:18 metadata_client_ZeroMQ_r1.csv
-rw-rw-r-- 1 1000 1000        136 Mar 12 17:18 metadata_client_ZeroMQ_r2.csv
-rw-rw-r-- 1 1000 1000        136 Mar 12 17:18 metadata_client_ZeroMQ_r3.csv
-rw-rw-r-- 1 1000 1000        133 Mar 12 17:13 metadata_server_UDP_r1.csv
-rw-rw-r-- 1 1000 1000        133 Mar 12 17:13 metadata_server_UDP_r2.csv
-rw-rw-r-- 1 1000 1000        133 Mar 12 17:13 metadata_server_UDP_r3.csv
-rw-rw-r-- 1 1000 1000        136 Mar 12 17:18 metadata_server_ZeroMQ_r1.csv
-rw-rw-r-- 1 1000 1000        136 Mar 12 17:18 metadata_server_ZeroMQ_r2.csv
-rw-rw-r-- 1 1000 1000        136 Mar 12 17:18 metadata_server_ZeroMQ_r3.csv

In [3]:
root, dirs, files = os.walk('data/').__next__()

metadata_files = [file for file in files if file.startswith('metadata')]
print(metadata_files)


['metadata_client_UDP_r2.csv', 'metadata_server_UDP_r2.csv', 'metadata_client_ZeroMQ_r2.csv', 'metadata_client_UDP_r1.csv', 'metadata_client_ZeroMQ_r1.csv', 'metadata_client_ZeroMQ_r3.csv', 'metadata_server_ZeroMQ_r1.csv', 'metadata_client_UDP_r3.csv', 'metadata_server_UDP_r3.csv', 'metadata_server_ZeroMQ_r2.csv', 'metadata_server_ZeroMQ_r3.csv', 'metadata_server_UDP_r1.csv']

In [4]:
lst_df = []

for file in metadata_files:
    lst_df.append(pd.read_csv(root + file))

df = pd.concat(lst_df)
df.reset_index(inplace=True)

df.Time = pd.to_timedelta(df.Time)
df['SizeBytes'] = df['Size(MB)'].apply(utils.parseSize)

df = df[['Protocol', 'Type', 'Location',
         'Setup', 'Mode', 'Rounds', 'Time',
         'Count', 'SizeBytes', 'Size(MB)']]

df = df.sort_values('Rounds').sort_values('Protocol')
df.reset_index(inplace=True, drop=True)

df.info()
df.head(df.shape[0])


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 12 entries, 0 to 11
Data columns (total 10 columns):
Protocol     12 non-null object
Type         12 non-null object
Location     12 non-null object
Setup        12 non-null object
Mode         12 non-null object
Rounds       12 non-null int64
Time         12 non-null timedelta64[ns]
Count        12 non-null int64
SizeBytes    12 non-null int64
Size(MB)     12 non-null object
dtypes: int64(3), object(6), timedelta64[ns](1)
memory usage: 1.0+ KB
Out[4]:
Protocol Type Location Setup Mode Rounds Time Count SizeBytes Size(MB)
0 UDP client localhost single single node 1 00:00:07.183246 273151 9540000 9.54 MB
1 UDP server localhost single single node 1 00:00:12.844457 235536 8220000 8.22 MB
2 UDP client localhost single single node 2 00:00:07.008885 273151 9540000 9.54 MB
3 UDP server localhost single single node 2 00:00:07.009099 232755 8130000 8.13 MB
4 UDP client localhost single single node 3 00:00:07.137223 273151 9540000 9.54 MB
5 UDP server localhost single single node 3 00:00:07.136707 233548 8170000 8.17 MB
6 ZeroMQ client localhost single single node 1 00:00:09.543791 273151 9540000 9.54 MB
7 ZeroMQ server localhost single single node 1 00:00:11.574812 271364 9480000 9.48 MB
8 ZeroMQ client localhost single single node 2 00:00:09.478591 273151 9540000 9.54 MB
9 ZeroMQ server localhost single single node 2 00:00:09.477659 273067 9530000 9.53 MB
10 ZeroMQ client localhost single single node 3 00:00:09.367065 273151 9540000 9.54 MB
11 ZeroMQ server localhost single single node 3 00:00:09.366702 273113 9540000 9.54 MB

Deltas

Delta UDP


In [5]:
delta_udp_time = []
delta_udp_count = []
delta_udp_bytes = []
delta_udp_mb = []

In [6]:
delta_udp = df[df.Protocol == 'UDP']

delta_udp


Out[6]:
Protocol Type Location Setup Mode Rounds Time Count SizeBytes Size(MB)
0 UDP client localhost single single node 1 00:00:07.183246 273151 9540000 9.54 MB
1 UDP server localhost single single node 1 00:00:12.844457 235536 8220000 8.22 MB
2 UDP client localhost single single node 2 00:00:07.008885 273151 9540000 9.54 MB
3 UDP server localhost single single node 2 00:00:07.009099 232755 8130000 8.13 MB
4 UDP client localhost single single node 3 00:00:07.137223 273151 9540000 9.54 MB
5 UDP server localhost single single node 3 00:00:07.136707 233548 8170000 8.17 MB

In [7]:
(delta_udp.iloc[1].Time - delta_udp.iloc[0].Time)# .seconds


Out[7]:
Timedelta('0 days 00:00:05.661211')

In [8]:
# delta_udp_time = delta_udp[delta_udp.Type == 'server'].Time - delta_udp[delta_udp.Type == 'client'].Time

delta_udp_time.append((delta_udp.iloc[1].Time - delta_udp.iloc[0].Time))
delta_udp_time.append((delta_udp.iloc[3].Time - delta_udp.iloc[2].Time))
delta_udp_time.append((delta_udp.iloc[4].Time - delta_udp.iloc[5].Time)) # diff

delta_udp_time


Out[8]:
[Timedelta('0 days 00:00:05.661211'),
 Timedelta('0 days 00:00:00.000214'),
 Timedelta('0 days 00:00:00.000516')]

In [9]:
y = delta_udp_time

trace = [go.Box(
    y=y,
    jitter=0.3,
    pointpos=-1.8,
    boxpoints = 'suspectedoutliers',
    marker = dict(
        color = 'rgb(8,81,156)',
        outliercolor = 'rgba(219, 64, 82, 0.6)',
        line = dict(
            outliercolor = 'rgba(219, 64, 82, 0.6)',
            outlierwidth = 2)
    ),
#     name = name + ' - suspected outliers'
)]

iplot(trace)



In [10]:
# delta_udp_size = delta_udp[delta_udp.Type == 'client'].SizeBytes - delta_udp[delta_udp.Type == 'server'].SizeBytes

delta_udp_bytes.append((delta_udp.iloc[0].SizeBytes - delta_udp.iloc[1].SizeBytes))
delta_udp_bytes.append((delta_udp.iloc[2].SizeBytes - delta_udp.iloc[3].SizeBytes))
delta_udp_bytes.append((delta_udp.iloc[4].SizeBytes - delta_udp.iloc[5].SizeBytes))

print(delta_udp_bytes)

delta_udp_mb.append(utils.convert_size(delta_udp_bytes[0]))
delta_udp_mb.append(utils.convert_size(delta_udp_bytes[1]))
delta_udp_mb.append(utils.convert_size(delta_udp_bytes[2]))

print(delta_udp_mb)


[1320000, 1410000, 1370000]
['1.26 MB', '1.34 MB', '1.31 MB']

In [11]:
y = delta_udp_bytes

trace = [go.Box(
    y=y,
    jitter=0.3,
    pointpos=-1.8,
    boxpoints = 'suspectedoutliers',
    marker = dict(
        color = 'rgb(8,81,156)',
        outliercolor = 'rgba(219, 64, 82, 0.6)',
        line = dict(
            outliercolor = 'rgba(219, 64, 82, 0.6)',
            outlierwidth = 2)
    ),
#     name = name + ' - suspected outliers'
)]

iplot(trace)



In [12]:
delta_udp_count.append((delta_udp.iloc[0].Count - delta_udp.iloc[1].Count))
delta_udp_count.append((delta_udp.iloc[2].Count - delta_udp.iloc[3].Count))
delta_udp_count.append((delta_udp.iloc[4].Count - delta_udp.iloc[5].Count))

print(delta_udp_count)


[37615, 40396, 39603]

In [13]:
y = delta_udp_count

trace = [go.Box(
    y=y,
    jitter=0.3,
    pointpos=-1.8,
    boxpoints = 'suspectedoutliers',
    marker = dict(
        color = 'rgb(8,81,156)',
        outliercolor = 'rgba(219, 64, 82, 0.6)',
        line = dict(
            outliercolor = 'rgba(219, 64, 82, 0.6)',
            outlierwidth = 2)
    ),
#     name = name + ' - suspected outliers'
)]

iplot(trace)


Delta ZeroMQ


In [14]:
delta_zmq_time = []
delta_zmq_count = []
delta_zmq_bytes = []
delta_zmq_mb = []

In [15]:
delta_zmq = df[df.Protocol == 'ZeroMQ']

delta_zmq


Out[15]:
Protocol Type Location Setup Mode Rounds Time Count SizeBytes Size(MB)
6 ZeroMQ client localhost single single node 1 00:00:09.543791 273151 9540000 9.54 MB
7 ZeroMQ server localhost single single node 1 00:00:11.574812 271364 9480000 9.48 MB
8 ZeroMQ client localhost single single node 2 00:00:09.478591 273151 9540000 9.54 MB
9 ZeroMQ server localhost single single node 2 00:00:09.477659 273067 9530000 9.53 MB
10 ZeroMQ client localhost single single node 3 00:00:09.367065 273151 9540000 9.54 MB
11 ZeroMQ server localhost single single node 3 00:00:09.366702 273113 9540000 9.54 MB

In [16]:
# delta_zmq_time = delta_zmq[delta_zmq.Type == 'server'].Time - delta_zmq[delta_zmq.Type == 'client'].Time

delta_zmq_time.append((delta_zmq.iloc[1].Time - delta_zmq.iloc[0].Time))
delta_zmq_time.append((delta_zmq.iloc[2].Time - delta_zmq.iloc[3].Time)) # diff
delta_zmq_time.append((delta_zmq.iloc[4].Time - delta_zmq.iloc[5].Time))

delta_zmq_time


Out[16]:
[Timedelta('0 days 00:00:02.031021'),
 Timedelta('0 days 00:00:00.000932'),
 Timedelta('0 days 00:00:00.000363')]

In [17]:
# delta_zmq_size = delta_udp[delta_udp.Type == 'client'].SizeBytes - delta_udp[delta_udp.Type == 'server'].SizeBytes

delta_zmq_bytes.append((delta_zmq.iloc[0].SizeBytes - delta_zmq.iloc[1].SizeBytes))
delta_zmq_bytes.append((delta_zmq.iloc[2].SizeBytes - delta_zmq.iloc[3].SizeBytes))
delta_zmq_bytes.append((delta_zmq.iloc[4].SizeBytes - delta_zmq.iloc[5].SizeBytes))

print(delta_zmq_bytes)

delta_zmq_mb.append(utils.convert_size(delta_zmq_bytes[0]))
delta_zmq_mb.append(utils.convert_size(delta_zmq_bytes[1]))
delta_zmq_mb.append(utils.convert_size(delta_zmq_bytes[2]))

print(delta_udp_mb)


[60000, 10000, 0]
['1.26 MB', '1.34 MB', '1.31 MB']

In [18]:
delta_zmq_count.append((delta_zmq.iloc[0].Count - delta_zmq.iloc[1].Count))
delta_zmq_count.append((delta_zmq.iloc[2].Count - delta_zmq.iloc[3].Count))
delta_zmq_count.append((delta_zmq.iloc[4].Count - delta_zmq.iloc[5].Count))

print(delta_udp_count)


[37615, 40396, 39603]

In [ ]:


In [ ]:


In [ ]: