In [70]:
%matplotlib inline
import os
import re
import csv
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
sns.set_context('poster')
sns.set_style('whitegrid')
In [18]:
def parse_server_log(path, host=None):
clipat = re.compile(r'^(\d+) remote clients - (\d+) (\w+) (\d+) (\w+)$')
conpat = re.compile(r'^\[honu\] ([\d\.:]+) created (\w+) consistency storage$')
thrpat = re.compile(r'^\[honu\] ([\d\.:]+) (\d+) accesses \(\d+ reads, \d+ writes\) in ([\d\.]+)s -- ([\d\.]+) accesses/second$')
clients = None
servers = {}
consistency = None
with open(path, 'r') as f:
for line in f:
line = line.strip()
match = clipat.match(line)
if match is not None:
parts = match.groups()
clients = parts[0]
servers[parts[2]] = int(parts[1])
servers[parts[4]] = int(parts[3])
match = conpat.match(line)
if match is not None:
consistency = match.groups()[1]
match = thrpat.match(line)
if match is not None:
row = {
"host": host,
"role": "server",
"clients": clients,
"consistency": consistency,
"accesses": int(match.groups()[1]),
"seconds": float(match.groups()[2]),
"throughput (accesses/sec)": float(match.groups()[3]),
}
row.update(servers)
yield row
In [20]:
with open('server-throughput.csv', 'w') as out:
writer = csv.DictWriter(out, fieldnames=("host", "role", "clients", "consistency", "accesses", "seconds", "throughput (accesses/sec)", "hyperion", "lagoon"))
writer.writeheader()
for row in parse_server_log('throughput-server-nevis.txt', 'nevis'):
writer.writerow(row)
In [22]:
def parse_client_log(path, host=None):
clipat = re.compile(r'^(\d+) local (\d+) remote clients$')
thrpat = re.compile(r'^(\d+) messages sent in ([\d\.]+)s \(([\d\.]+) msg\/sec(, ([\d\.]+) ns\/msg)?\)$')
clients = None
local = None
remote = None
with open(path, 'r') as f:
for line in f:
line = line.strip()
match = clipat.match(line)
if match is not None:
local = int(match.group(1))
remote = int(match.group(2))
clients = local + remote
match = thrpat.match(line)
if match is not None:
row = {
"host": host,
"role": "client",
"clients": clients,
"local": local,
"remote": remote,
"accesses": int(match.groups()[0]),
"seconds": float(match.groups()[1]),
"throughput (accesses/sec)": float(match.groups()[2]),
}
yield row
In [23]:
with open('clients-throughput.csv', 'w') as out:
writer = csv.DictWriter(out, fieldnames=("host", "role", "clients", "accesses", "seconds", "throughput (accesses/sec)", "local", "remote"))
writer.writeheader()
for name in ('hyperion', 'lagoon'):
path = "throughput-client-{}.txt".format(name)
for row in parse_client_log(path, name):
writer.writerow(row)
In [25]:
server = pd.read_csv('server-throughput.csv')
clients = pd.read_csv('clients-throughput.csv')
In [26]:
server.dtypes
Out[26]:
In [27]:
clients.dtypes
Out[27]:
In [28]:
server.describe()
Out[28]:
In [29]:
clients.describe()
Out[29]:
In [103]:
g = sns.lmplot(x="clients", y="throughput (accesses/sec)", data=server, x_jitter=1, order=2, size=8, aspect=2, hue="host", line_kws={'color': 'r'}, scatter_kws={'color': 'r'})
plt.title("Stand Alone Server-Side Throughput")
Out[103]:
In [104]:
# df = pd.DataFrame(clients.groupby(['clients', 'host']).mean().to_records())
g = sns.lmplot(x="clients", y="throughput (accesses/sec)", x_jitter=1, hue='host', data=clients, order=4, size=8, aspect=2)
plt.title("Client-Side Per-Process Throughput")
Out[104]: