In [1]:
%load_ext memory_profiler
%matplotlib inline
import os
import sys
import json
import math
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib as mpl
import graph_tool.all as gt
import matplotlib.pyplot as plt
from operator import itemgetter
from itertools import groupby, chain
from collections import defaultdict, Counter
# Modify the Notebook path
sys.path.append(os.path.join(os.getcwd(), ".."))
from cloudscope.colors import ColorMap
from cloudscope.results import Results
from cloudscope.results.graph import extract_graph
from cloudscope.results.analysis import create_per_replica_dataframe as create_replica_dataframe
from cloudscope.results.analysis import create_per_experiment_dataframe as create_dataframe
In [2]:
sns.set_style('whitegrid')
sns.set_context('poster')
sns.set_palette('Set1', n_colors=9)
In [3]:
# Specify a path to a results file
# If None, will attempt to look one up
FIXTURES = os.path.join("..", "fixtures", "results")
FIGURES = os.path.join("..", "fixtures", "figures", "scaling-systems")
RESULTS = os.path.join(FIXTURES, "scaling-homogenous-topologies-20161020.json")
def get_results_data(path=RESULTS):
with open(path, 'r') as f:
for line in f:
yield Results.load(line)
In [4]:
%%memit
df = create_dataframe(get_results_data())
In [5]:
# Uncomment below if you need to see the columns
# print("\n".join(df.columns))
# Add the ename to identify the experiment
df['ename'] = df['users'].apply(str) + " " + df['type'] + " nodes"
df['type'] = df['type'].apply(lambda s: s.title())
In [6]:
%%memit
def get_message_rows(df):
for row in df[['message types', 'ename']].itertuples():
item = row[1]
item['experiment'] = row[2]
yield item
# Create the data frame
msgs = pd.DataFrame(sorted(get_message_rows(df), key=lambda item: item['experiment']))
# Create the figure
fig = plt.figure(figsize=(14,16))
ax = fig.add_subplot(111)
mpl.rcParams.update({'font.size': 22})
# Plot the bar chart
g = msgs.plot(
x='experiment', kind='barh', stacked=True, ax=ax,
title="Message Counts by Type", color=sns.color_palette()
)
# Modify the figure
ax.set_xlabel("message count")
ax.yaxis.grid(False)
# Save the figure to disk
plt.savefig(os.path.join(FIGURES, 'message_counts.png'))
In [7]:
ulim = df.users.max() + 5
In [8]:
# Forked Writes (two keys: "inconsistent writes" and "forked writes")
df['% forked writes'] = (df['forked writes'] / df['writes']) * 100
g = sns.lmplot(
x="users", y="% forked writes", hue='type',
data=df, fit_reg=False, size=6, aspect=1.4, markers=["x","o"],
scatter_kws={'s': 64}
)
# Set the title and the labels
title_fmt = "Percent of Writes that are Forked"
g.ax.set_title(title_fmt)
# Modify the axis limits
for ax in g.axes:
ax[0].set_ylim(0,)
ax[0].set_xlim(0,ulim)
ax[0].set_xlabel("number of nodes")
# Save the figure to disk
plt.savefig(os.path.join(FIGURES, 'forked_writes.png'))
In [9]:
# Stale Writes
df['% stale writes'] = (df['stale writes'] / df['writes']) * 100
g = sns.lmplot(
x="users", y="% stale writes", hue='type',
data=df, fit_reg=False, size=6, aspect=1.4, markers=["x","o"],
scatter_kws={'s': 64}
)
# Set the title and the labels
title_fmt = "Percent of Writes that are Stale"
g.ax.set_title(title_fmt)
# Modify the axis limits
for ax in g.axes:
ax[0].set_ylim(0,)
ax[0].set_xlim(0,ulim)
ax[0].set_xlabel("number of nodes")
# Save the figure to disk
plt.savefig(os.path.join(FIGURES, 'stale_writes.png'))
In [10]:
# Forked Writes (two keys: "inconsistent writes" and "forked writes")
df['% inconsistent writes'] = (df['inconsistent writes'] / df['writes']) * 100
g = sns.lmplot(
x="users", y="% inconsistent writes", hue='type',
data=df, fit_reg=False, size=6, aspect=1.4, markers=["x","o"],
scatter_kws={'s': 64}
)
# Set the title and the labels
title_fmt = "Percent of Writes that are Inconsistent"
g.ax.set_title(title_fmt)
# Modify the axis limits
for ax in g.axes:
ax[0].set_ylim(0,)
ax[0].set_xlim(0,ulim)
ax[0].set_xlabel("number of nodes")
# Save the figure to disk
plt.savefig(os.path.join(FIGURES, 'inconsistent_writes.png'))
In [11]:
# Dropped Writes
df['% dropped writes'] = (df['dropped writes'] / df['writes']) * 100
g = sns.lmplot(
x="users", y="% dropped writes", hue='type',
data=df, fit_reg=False, size=6, aspect=1.4, markers=["x","o"],
scatter_kws={'s': 64}
)
# Set the title and the labels
title_fmt = "Percent of Writes that are Dropped"
g.ax.set_title(title_fmt)
# Modify the axis limits
for ax in g.axes:
ax[0].set_ylim(0,)
ax[0].set_xlim(0,ulim)
ax[0].set_xlabel("number of nodes")
# Save the figure to disk
plt.savefig(os.path.join(FIGURES, 'dropped_writes.png'))
In [12]:
# Stale Reads
df['% stale reads'] = (df['stale reads'] / df['reads']) * 100
g = sns.lmplot(
x="users", y="% stale reads", hue='type',
data=df, fit_reg=False, size=6, aspect=1.4, markers=["x","o"],
scatter_kws={'s': 64}
)
# Set the title and the labels
title_fmt = "Percent of Reads that are Stale"
g.ax.set_title(title_fmt)
# Modify the axis limits
for ax in g.axes:
ax[0].set_ylim(0,)
ax[0].set_xlim(0,ulim)
ax[0].set_xlabel("number of nodes")
# Save the figure to disk
plt.savefig(os.path.join(FIGURES, 'stale_reads.png'))
In [13]:
# Visible Writes
df['% visible writes'] = (df['visible writes'] / df['writes']) * 100
g = sns.lmplot(
x="users", y="% visible writes", hue='type',
data=df, fit_reg=False, size=6, aspect=1.4, markers=["x","o"],
scatter_kws={'s': 64}
)
# Set the title and the labels
title_fmt = "Percent of Writes that are Fully Visible"
g.ax.set_title(title_fmt)
# Modify the axis limits
for ax in g.axes:
ax[0].set_ylim(0,)
ax[0].set_xlim(0,ulim)
ax[0].set_xlabel("number of nodes")
# Save the figure to disk
plt.savefig(os.path.join(FIGURES, 'visible_writes.png'))
In [14]:
# Average visibility of writes
g = sns.lmplot(
x="users", y="mean visibility", hue='type',
data=df, fit_reg=False, size=6, aspect=1.4, markers=["x","o"],
scatter_kws={'s': 64}
)
# Set the title and the labels
title_fmt = "Average Fraction of Replicas Writes are Visible On"
g.ax.set_title(title_fmt)
# Modify the axis limits
for ax in g.axes:
ax[0].set_ylim(0,)
ax[0].set_xlim(0,ulim)
ax[0].set_xlabel("number of nodes")
# Save the figure to disk
plt.savefig(os.path.join(FIGURES, 'mean_visibility.png'))
In [15]:
# Comitted Writes
df['% committed writes'] = (df['committed writes'] / df['writes']) * 100
g = sns.lmplot(
x="users", y="% committed writes", hue='type',
data=df, fit_reg=False, size=6, aspect=1.4, markers=["x","o"],
scatter_kws={'s': 64}
)
# Set the title and the labels
title_fmt = "Percent of Writes that are Committted"
g.ax.set_title(title_fmt)
# Modify the axis limits
for ax in g.axes:
ax[0].set_ylim(0,)
ax[0].set_xlim(0,ulim)
ax[0].set_xlabel("number of nodes")
# Save the figure to disk
plt.savefig(os.path.join(FIGURES, 'committed_writes.png'))
In [16]:
# Number of Messages
g = sns.lmplot(
x="users", y="sent messages", hue='type',
data=df, fit_reg=False, size=6, aspect=1.4, markers=["x","o"],
scatter_kws={'s': 64}
)
# Set the title and the labels
title_fmt = "Total Sent Messages"
g.ax.set_title(title_fmt)
# Modify the axis limits
for ax in g.axes:
ax[0].set_ylim(0,)
ax[0].set_xlim(0,ulim)
ax[0].set_xlabel("number of nodes")
# Save the figure to disk
plt.savefig(os.path.join(FIGURES, 'messages_sent.png'))
In [17]:
# Read latency (ms delay before read)
g = sns.lmplot(
x="users", y="mean read latency (ms)", hue='type',
data=df, fit_reg=False, size=6, aspect=1.4, markers=["x","o"],
scatter_kws={'s': 64}
)
# Set the title and the labels
title_fmt = "Average Read Latency"
g.ax.set_title(title_fmt)
# Modify the axis limits
for ax in g.axes:
ax[0].set_xlim(0,ulim)
ax[0].set_xlabel("number of nodes")
# Save the figure to disk
plt.savefig(os.path.join(FIGURES, 'read_latency.png'))
In [18]:
# Write Latency (ms delay before write)
g = sns.lmplot(
x="users", y="mean write latency (ms)", hue='type',
data=df, fit_reg=False, size=6, aspect=1.4, markers=["x","o"],
scatter_kws={'s': 64}
)
# Set the title and the labels
title_fmt = "Average Write Latency"
g.ax.set_title(title_fmt)
# Modify the axis limits
for ax in g.axes:
ax[0].set_ylim(0,)
ax[0].set_xlim(0,ulim)
ax[0].set_xlabel("number of nodes")
# Save the figure to disk
plt.savefig(os.path.join(FIGURES, 'write_latency.png'))
In [19]:
# Also plot the expected convergence times
# Eventual expected is AE * log3N where N is number of nodes and AE is anti-entropy delay
# Raft expected is just the heartbeat interval for now
x = np.linspace(1,ulim)
ep = map(lambda x: df['anti-entropy delay (ms)'].max() * math.log(x, 3), x)
rp = map(lambda x: df['heartbeat interval (ms)'].max(), x)
In [27]:
# Replication Latency (Visibility Latency)
g = sns.lmplot(
x="users", y="mean visibility latency (ms)", hue='type',
data=df, fit_reg=False, size=6, aspect=1.4, markers=["x","o"],
scatter_kws={'s': 64}
)
# Set the title and the labels
title_fmt = "Average Full Visibility Latency"
g.ax.set_title(title_fmt)
# Add the expected points
g.ax.plot(x, ep, '--', c=sns.color_palette()[0], lw=2)
g.ax.plot(x, rp, '--', c=sns.color_palette()[1], lw=2)
# Modify the axis limits
for ax in g.axes:
ax[0].set_ylim(0,)
ax[0].set_xlim(0,ulim)
ax[0].set_xlabel("number of nodes")
# Save the figure to disk
plt.savefig(os.path.join(FIGURES, 'visibility_latency.png'))
In [28]:
# Partial Replication Latency (Visibility Latency)
g = sns.lmplot(
x="users", y="mean partial visibility latency (ms)", hue='type',
data=df, fit_reg=False, size=6, aspect=1.4, markers=["x","o"],
scatter_kws={'s': 64}
)
# Set the title and the labels
title_fmt = "Average Partial Visibility Latency"
g.ax.set_title(title_fmt)
# Add the expected points
g.ax.plot(x, ep, '--', c=sns.color_palette()[0], lw=2)
g.ax.plot(x, rp, '--', c=sns.color_palette()[1], lw=2)
# Modify the axis limits
for ax in g.axes:
ax[0].set_ylim(0,)
ax[0].set_xlim(0,ulim)
ax[0].set_xlabel("number of nodes")
# Save the figure to disk
plt.savefig(os.path.join(FIGURES, 'partial_visibility_latency.png'))
In [22]:
# Commit Latency
g = sns.lmplot(
x="users", y="mean commit latency (ms)", hue='type',
data=df, fit_reg=False, size=6, aspect=1.4, markers=["x","o"],
scatter_kws={'s': 64}
)
# Set the title and the labels
title_fmt = "Average Commit Latency"
g.ax.set_title(title_fmt)
# Modify the axis limits
for ax in g.axes:
ax[0].set_ylim(0,)
ax[0].set_xlim(0,ulim)
ax[0].set_xlabel("number of nodes")
# Save the figure to disk
plt.savefig(os.path.join(FIGURES, 'commit_latency.png'))
In [23]:
# Simulation Time
g = sns.lmplot(
x="users", y="simulation time (secs)", hue='type',
data=df, fit_reg=False, size=6, aspect=1.4, markers=["x","o"],
scatter_kws={'s': 64}
)
# Set the title and the labels
title_fmt = "Elapsed Real Simulation Time"
g.ax.set_title(title_fmt)
g.set(yscale="log")
g.set(ylabel="simulation time (secs - log scale)")
# Modify the axis limits
for ax in g.axes:
ax[0].set_xlim(0,ulim)
ax[0].set_xlabel("number of nodes")
# Save the figure to disk
plt.savefig(os.path.join(FIGURES, 'simulation_time.png'))
In [24]:
def find_results(etype='raft', users=None):
for result in get_results_data():
if result.settings['type'] == etype:
if (users and users == result.settings['users']) or users is None:
name = "{}-{}nodes.png".format(etype, users)
return result, name
return None, None
In [25]:
# Draw the messages graph
# Find the desired results
result, name = find_results('raft', 25)
if result is None: raise ValueError("Could not find results!")
# Extract the Graph Tool graph
G = extract_graph(result, by_message_type=True)
# Draw the graph
vlabel = G.vp['id']
vsize = G.vp['writes']
vsize = gt.prop_to_size(vsize, ma=60, mi=20)
# Set the vertex color
vcolor = G.new_vertex_property('string')
vcmap = ColorMap('flatui', shuffle=False)
vcmap('raft')
vcmap('eventual')
for vertex in G.vertices():
vcolor[vertex] = vcmap(G.vp['consistency'][vertex])
# Set the edge color
ecolor = G.new_edge_property('string')
ecmap = ColorMap('paired', shuffle=False)
for edge in G.edges():
ecolor[edge] = ecmap(G.ep['label'][edge])
elabel = G.ep['label']
esize = G.ep['norm']
esize = gt.prop_to_size(esize, mi=2, ma=5)
# Create the layout with the edge weights.
# pos = gt.arf_layout(G, weight=G.ep['weight'])
pos = gt.sfdp_layout(G, eweight=G.ep['weight'], vweight=vsize)
# pos = gt.fruchterman_reingold_layout(G, weight=G.ep['weight'])
gt.graph_draw(
G, pos=pos, output_size=(1200,1200), output=os.path.join(FIGURES, name),
vertex_text=vlabel, vertex_size=vsize, vertex_font_weight=1,
vertex_pen_width=1.3, vertex_fill_color=vcolor,
edge_pen_width=esize, edge_color=ecolor, edge_text=elabel
)
Out[25]:
In [26]:
# Draw the message latency graph
# Find the desired results
result, name = find_results('eventual', 25)
if result is None: raise ValueError("Could not find results!")
# Extract the Graph Tool graph
G = extract_graph(result, by_message_type=False)
# Draw the graph
vlabel = G.vp['id']
vsize = G.vp['sent']
vsize = gt.prop_to_size(vsize, ma=60, mi=20)
# Set the vertex color
vcolor = G.new_vertex_property('string')
vcmap = ColorMap('flatui', shuffle=False)
vcmap('raft')
vcmap('eventual')
for vertex in G.vertices():
vcolor[vertex] = vcmap(G.vp['consistency'][vertex])
# Make the edge weights the inverse of the latency mean (lower latency is higher weight)
esize = G.new_edge_property('float')
for edge in G.edges():
esize[edge] = 1.0 / G.ep['mean'][edge]
esize = gt.prop_to_size(esize, mi=1, ma=5)
# Create the layout with the edge weights.
# pos = gt.arf_layout(G, weight=esize)
# pos = gt.sfdp_layout(G, eweight=esize, vweight=vsize)
pos = gt.fruchterman_reingold_layout(G, weight=esize)
gt.graph_draw(
G, pos=pos, output_size=(1200,1200), output=os.path.join(FIGURES, name),
vertex_text=vlabel, vertex_size=vsize, vertex_font_weight=1,
vertex_pen_width=1.3, vertex_fill_color=vcolor,
edge_pen_width=esize,
)
Out[26]: