In [1]:

    
import pandas as pd
from matplotlib import pyplot as plt
%matplotlib inline

Coordinator Load With Leader Death

We run a full etcd cluster with continuous checkpointing. 500 clients, each with a unique query, spread across 10 brokers. All 500 clients are started and the system is allowed to stabilize. Each client subscribes to 10% of the publishers in the system. We begin adding publishers to the system. Each broker submits a new publisher with a delay uniformly spread between [50, 500] ms until there are 1000 total publishers in the system. We kill the leader and allow it to fail over. Latency is measured as time the publish message is sent until time that the broker receives the subscriptiondiffmessage.



In [51]:

    
datafile = "data/brokerLatencyManyPublishersChangeMD-long-etcd-leaderDeath-30-300.csv"
df = pd.read_csv(datafile,header=None)
df.columns=['start_time', 'latency']
df['latency'] /= float(1e6) # convert to milliseconds
df['start_time'] /= float(1e9) # convert to seconds
df = df[df['latency'] < 1e5]
min_start_time = df['start_time'].min()
df['start_time'] = df['start_time'].map(lambda t: t - min_start_time)
ax = df.plot(kind='scatter', x='start_time', y='latency', figsize=(15,8), s=1, c='black')
#ax.set_title("Latency to Handle Publish")
#ax.set_yscale('log')
ax.set_ylabel("Latency (ms) - log scale")
ax.set_xlabel("Time of Publish Message (s)")
ax.set_xlim(xmin=0)
ax.set_ylim(ymin=0)
ax.grid(axis='y', which='major')
#ax.axhline(y=df['latency'].quantile(q=0.50),c="blue",linewidth=0.5)
#ax.axhline(y=df['latency'].quantile(q=0.75),c="blue",linewidth=0.5)
ax.axhline(y=df['latency'].quantile(q=0.95),c="blue",linewidth=0.5)
ax.axhline(y=df['latency'].quantile(q=0.99),linestyle='dashed',linewidth=0.5)
republish_switch_time = 1462437292458576882/1e9
ax.axvline(x=republish_switch_time-min_start_time,linewidth=0.5)
ax.legend(['95% Quantile', '99% Quantile'])
print df['latency'].quantile(q=0.99)
df.describe()









    



21816.2967446






    Out[51]:






  
    
      
      start_time
      latency
    
  
  
    
      count
      88727.000000
      88727.000000
    
    
      mean
      159.140746
      2269.057745
    
    
      std
      91.421960
      5282.308758
    
    
      min
      0.000000
      24.848007
    
    
      25%
      78.883592
      47.879855
    
    
      50%
      158.400362
      75.915225
    
    
      75%
      239.996590
      248.011644
    
    
      max
      298.082556
      27774.533369



In [67]:

    
datafile = "data/brokerLatencyManyPublishersChangeMD-long-etcd-leaderDeath-50-500.csv"
noetcd_datafile = "data/brokerLatencyManyPublishersChangeMD-long-noetcd-leaderDeath-50-500.csv"
df = pd.read_csv(datafile,header=None)
noetcd_df = pd.read_csv(noetcd_datafile,header=None)
df.columns=['start_time', 'latency']
noetcd_df.columns=['start_time', 'latency']
noetcd_df['latency'] /= float(1e6) # convert to milliseconds
noetcd_df['start_time'] /= float(1e9) # convert to seconds
noetcd_df = noetcd_df[noetcd_df['latency'] < 1e5]
noetcd_min_start_time = noetcd_df['start_time'].min()
noetcd_df['start_time'] = noetcd_df['start_time'].map(lambda t: t - noetcd_min_start_time)
df['latency'] /= float(1e6) # convert to milliseconds
df['start_time'] /= float(1e9) # convert to seconds
df = df[df['latency'] < 1e5]
min_start_time = df['start_time'].min()
df['start_time'] = df['start_time'].map(lambda t: t - min_start_time)
ax = df.plot(kind='scatter', x='start_time', y='latency', figsize=(15,8), s=2, c='green', edgecolors='none', label='Etcd')
ax.scatter(noetcd_df['start_time'], noetcd_df['latency'], s=2, c='red', edgecolors='none')
#ax.set_title("Latency to Handle Publish")
ax.set_yscale('log')
ax.set_ylabel("Latency (ms) - log scale")
ax.set_xlabel("Time of Publish Message (s)")
ax.set_xlim(xmin=0, xmax=450)
ax.set_ylim(ymin=1)
ax.grid(axis='y', which='major')
#ax.axhline(y=df['latency'].quantile(q=0.50),c="blue",linewidth=0.5)
#ax.axhline(y=df['latency'].quantile(q=0.75),c="blue",linewidth=0.5)
ax.axhline(y=df['latency'].quantile(q=0.95),c="blue",linewidth=0.5)
ax.axhline(y=df['latency'].quantile(q=0.99),linestyle='dashed',linewidth=0.5)
republish_switch_time = 1462438418753251298/1e9
ax.axvline(x=republish_switch_time-min_start_time,linewidth=1)
ax.legend(['95% Quantile', '99% Quantile'])
print df['latency'].quantile(q=0.99)
df.describe()









    



447.91644953






    Out[67]:






  
    
      
      start_time
      latency
    
  
  
    
      count
      89830.000000
      89830.000000
    
    
      mean
      257.775392
      95.478344
    
    
      std
      140.701009
      273.348483
    
    
      min
      0.000000
      25.785787
    
    
      25%
      132.372227
      42.413666
    
    
      50%
      268.785404
      60.342734
    
    
      75%
      391.215763
      90.700581
    
    
      max
      460.926421
      7066.640025



In [31]:

    
datafile = "data/brokerLatencyManyPublishers-long-etcd-leaderDeath.csv"
df = pd.read_csv(datafile,header=None)
df.columns=['start_time', 'latency']
df['latency'] /= float(1e6) # convert to milliseconds
df['start_time'] /= float(1e6) # convert to milliseconds
df = df[df['latency'] < 1e6]
min_start_time = df['start_time'].min()
df['start_time'] = df['start_time'].map(lambda t: t - min_start_time)
ax = df.plot(kind='scatter', x='start_time', y='latency', figsize=(15,8))
ax.set_title("Diff Latency")
ax.set_yscale('log')
ax.set_ylabel("Latency (ms) - log scale")
ax.set_xlabel("Time Since Start (ms)")
#ax.set_xlim([0, 80000])
ax.grid(axis='y', which='major')
#ax.axhline(y=df['latency'].quantile(q=0.50),c="blue",linewidth=0.5)
#ax.axhline(y=df['latency'].quantile(q=0.75),c="blue",linewidth=0.5)
#ax.axhline(y=df['latency'].quantile(q=0.95),c="blue",linewidth=0.5)
ax.axhline(y=df['latency'].quantile(q=0.99),c="blue",linewidth=0.5)
print df['latency'].quantile(q=0.99)
df.describe()









    



431.33213661






    Out[31]:






  
    
      
      start_time
      latency
    
  
  
    
      count
      49300.000000
      49300.000000
    
    
      mean
      116865.604348
      89.610937
    
    
      std
      56082.534270
      287.105839
    
    
      min
      0.000000
      23.195549
    
    
      25%
      66918.826660
      35.336152
    
    
      50%
      126402.573975
      46.678953
    
    
      75%
      161944.818115
      68.602758
    
    
      max
      207092.906494
      6107.644440



In [ ]:

	start_time	latency
count	88727.000000	88727.000000
mean	159.140746	2269.057745
std	91.421960	5282.308758
min	0.000000	24.848007
25%	78.883592	47.879855
50%	158.400362	75.915225
75%	239.996590	248.011644
max	298.082556	27774.533369

	start_time	latency
count	89830.000000	89830.000000
mean	257.775392	95.478344
std	140.701009	273.348483
min	0.000000	25.785787
25%	132.372227	42.413666
50%	268.785404	60.342734
75%	391.215763	90.700581
max	460.926421	7066.640025

	start_time	latency
count	49300.000000	49300.000000
mean	116865.604348	89.610937
std	56082.534270	287.105839
min	0.000000	23.195549
25%	66918.826660	35.336152
50%	126402.573975	46.678953
75%	161944.818115	68.602758
max	207092.906494	6107.644440