In [91]:
from __future__ import division
%matplotlib inline
import os
import re
import sys
import pandas as pd
import seaborn as sns
from collections import deque
from operator import itemgetter
# Modify the Notebook path
sys.path.append(os.path.join(os.getcwd(), ".."))
from cloudscope.results import Results
from cloudscope.utils.tree import Tree
In [3]:
FIXTURES = os.path.join("..", "fixtures")
RESULTS = os.path.join(FIXTURES, "results")
# Results Files with log traces
FEDERATED = os.path.join(RESULTS, "single", "federated-consistency-20160923.json")
RAFT = os.path.join(RESULTS, "single", "large-consensus-group-20160923.json")
EVENTUAL = os.path.join(RESULTS, "single", "eventually-consistent-large-group-20160923.json")
In [4]:
def load_data(path=EVENTUAL):
with open(path) as f:
return Results.load(f)
# Load the logs into memory
logs = [log['log'] for log in load_data().consistency['logs'].values()]
In this section, we will explore a variety of definitions of "branchiness" to see if we can come up with a metric for how critical forks in a log actually are - that is, how far do they deviate from a single linear chain. Example trees are as follows:
Obviously these examples can be generalized further, but they should demonstrate different types of trees that we want to compare from the best case (chain) to the worst case (fan). Each of these trees will have the same exact size.
In [85]:
# Generator Functions
def generate_chain(size):
root = Tree("0")
tree = root
for idx in xrange(1, size):
tree = tree.append(str(idx))
assert root.size() == size
return root
def generate_prong(size, min_handle=3):
# Get the number of nodes in the handle
# such that the size is evenly divided by two
handle = min_handle
if (size - handle) % 2 != 0:
handle += 1
# Get the prong length
plength = (size - handle) // 2
# Create the handle
root = Tree("0")
tree = root
for idx in xrange(1, handle):
tree = tree.append(str(idx))
# Create the prongs
for _ in xrange(2):
branch = tree
for jdx in xrange(0, plength):
idx += 1
branch = branch.append(str(idx))
assert root.size() == size
return root
def generate_trident(size, handle=4):
splen = (size - handle) // 3
lplen = (size - handle - splen) // 2
# Create the handle
root = Tree("0")
tree = root
for idx in xrange(1, handle):
tree = tree.append(str(idx))
# Create the prongs
for bdx in xrange(3):
branch = tree
plength = splen if bdx == 1 else lplen
for jdx in xrange(0, plength):
idx += 1
branch = branch.append(str(idx))
assert root.size() == size
return root
def generate_thorns(size, tlen=1):
# Also generates cactus by supplying tlen=2
root = Tree("0")
tree = root
label = 0
for idx in xrange(0, size-1):
if idx % 3 == 0:
branch = tree
for _ in xrange(tlen):
if label >= (size-1):
break
label += 1
branch = branch.append(str(label))
else:
if label >= (size-1):
break
label += 1
tree = tree.append(str(label))
assert root.size() == size
return root
def generate_btree(size):
root = Tree("0")
idx = 0
children = deque([root])
while root.size() < size:
child = children.popleft()
for _ in xrange(2):
idx += 1
if idx >= (size): break
children.append(child.append(str(idx)))
assert root.size() == size
return root
def generate_fan(size, handle=0):
root = Tree("0")
# Create the handle
tree = root
for idx in xrange(1, handle+1):
tree = tree.append(str(idx))
for idx in xrange(1, size-handle):
tree.append(str(idx))
assert root.size() == size
return root
In [89]:
SIZE = 15
trees = {
'chain': generate_chain(SIZE),
'prong': generate_prong(SIZE),
'trident': generate_trident(SIZE),
'thorns': generate_thorns(SIZE, 1),
'cactus': generate_thorns(SIZE, 2),
'btree': generate_btree(SIZE),
'fan': generate_fan(SIZE),
}
In [90]:
for name, tree in trees.items():
print name
print tree.pprint()
print
In [92]:
def normalize(data):
"""
Computes the z-index of each point in the list of data
for visual normalization to the space between 0 and 1.
"""
minx = min(data)
rngx = float(max(data) - minx)
return [
float(x - minx) / rngx
for x in data
]
def branchiness(tree):
"""
Computes branchiness as the average shortest distance to
each leaf node normalized by the greatest depth.
"""
n = float(tree.size())
d = [float(leaf.depth()) for leaf in tree.leaves()]
return (sum(d) / float(len(d))) / n if d else 1.0
In [99]:
df = pd.DataFrame([
{
'name': name,
'forks': tree.forks(),
'height': tree.height(),
'branchiness': branchiness(tree),
} for name, tree in trees.items()
])
df.forks = normalize(df.forks)
df.height = normalize(df.height)
In [109]:
df.plot(kind='bar', x='name', figsize=(16,9), fontsize=14)
Out[109]: