In [1]:
%load_ext memory_profiler
%matplotlib inline
import os
import sys
import pandas as pd
import seaborn as sns
import matplotlib as mpl
import graph_tool.all as gt
import matplotlib.pyplot as plt
# Modify the Notebook path
sys.path.append(os.path.join(os.getcwd(), ".."))
from cloudscope.colors import ColorMap
from cloudscope.results import Results
from cloudscope.viz import draw_topology
from cloudscope.viz import plot_workload
from cloudscope.viz import plot_message_traffic
from cloudscope.results.graph import extract_graph
from cloudscope.results.analysis import create_per_replica_dataframe as create_dataframe
from cloudscope.results.analysis import create_messages_dataframe
In [2]:
sns.set_style('whitegrid')
sns.set_context('notebook')
sns.set_palette('Set1')
In [5]:
# Specify a path to a results file
FIXTURES = os.path.join("..", "fixtures", "results")
FIGURES = os.path.join("..", "fixtures", "figures")
NAME = "eventually-consistent-large-group-20160923.json"
RESULTS = os.path.join(FIXTURES, NAME)
def get_results_data(path=RESULTS):
with open(path, 'r') as f:
return Results.load(f)
In [6]:
%%memit
# Load the data
results = get_results_data()
In [ ]:
# Extract the Graph Tool graph
G = extract_graph(results, by_message_type=True)
# Draw the graph
vlabel = G.vp['id']
vsize = G.vp['writes']
vsize = gt.prop_to_size(vsize, ma=60, mi=20)
# Set the vertex color
vcolor = G.new_vertex_property('string')
vcmap = ColorMap('flatui', shuffle=False)
for vertex in G.vertices():
vcolor[vertex] = vcmap(G.vp['consistency'][vertex])
# Set the edge color
ecolor = G.new_edge_property('string')
ecmap = ColorMap('set1', shuffle=False)
for edge in G.edges():
ecolor[edge] = ecmap(G.ep['label'][edge])
elabel = G.ep['label']
esize = G.ep['norm']
esize = gt.prop_to_size(esize, mi=.1, ma=3)
eweight = G.ep['weight']
# Figure out the path name to save the figure
name = os.path.splitext(NAME)[0] + ".png"
output = os.path.join(FIGURES, name)
gt.graph_draw(
G, output_size=(1200,1200), output=output,
vertex_text=vlabel, vertex_size=vsize, vertex_font_weight=1,
vertex_pen_width=1.3, vertex_fill_color=vcolor,
edge_pen_width=esize, edge_color=ecolor, edge_text=elabel
)
In [7]:
%%memit
# Create the data frame for further analysis
df = create_dataframe(results)
In [8]:
df
Out[8]:
In [9]:
print results.print_details()
In [ ]:
# Create the workload timeline plot
plot_workload(results, series='objects')
In [ ]:
# Create the sent messages timeline
# plot_message_traffic(results.results['sent'])
In [ ]:
%%memit
def get_message_rows(df):
for row in df[['message types', 'label', 'consistency']].itertuples():
item = row[1]
item['replica'] = "{} ({})".format(row[2], row[3])
yield item
# Create the data frame
msgs = pd.DataFrame(sorted(get_message_rows(df), key=lambda item: item['replica']))
# Create the figure
fig = plt.figure(figsize=(14,20))
ax = fig.add_subplot(111)
mpl.rcParams.update({'font.size': 14})
# Plot the bar chart
g = msgs.plot(
x='replica', kind='barh', stacked=True, ax=ax,
title="Message Counts by Type", color=sns.color_palette()
)
# Modify the figure
ax.set_xlabel("message count")
ax.yaxis.grid(False)
# Save the figure to disk
plt.savefig(os.path.join(FIGURES, 'message_counts.png'))
In [ ]:
# ax = sns.barplot(y='sent', x='replica', hue='type', data=msgs, estimator=count)
In [10]:
# Extract accesses
accesses = pd.DataFrame(df, columns=['reads', 'writes'])
print (
"Accesses\n"
"========\n"
"{} Total Reads, {} Total Writes \n"
"{:0.1f} average reads per replica\n"
"{:0.1f} average writes per replica\n"
).format(
df.reads.sum(), df.writes.sum(), df.reads.mean(), df.writes.mean()
)
accesses.plot(kind='bar', stacked=True)
Out[10]:
In [11]:
errors = pd.DataFrame(df, columns=['reads', 'missed reads', 'empty reads', 'stale reads'])
errors.plot(kind='bar', stacked=True)
Out[11]:
In [12]:
errors = pd.DataFrame(df, columns=['dropped writes', 'forked writes'])
errors.plot(kind='bar', stacked=True)
Out[12]:
In [13]:
errors = pd.DataFrame(df, columns=['writes', 'partially replicated writes', 'visible writes'])
errors.plot(kind='bar', stacked=True)
Out[13]:
In [14]:
data=pd.DataFrame(df, columns=['replica', 'mean read latency (ms)', 'mean write latency (ms)', 'mean visibility latency (ms)', 'mean commit latency (ms)'])
data.plot(kind='bar', x='replica')
Out[14]:
In [19]:
sns.barplot(x='replica', y='mean read time staleness (ms)', data=df)
Out[19]: