In [1]:
import pandas as pd
from matplotlib import pyplot as plt
%matplotlib inline
We run a full etcd cluster with continuous checkpointing. 500 clients, each with a unique query, spread across 10 brokers. All 500 clients are started and the system is allowed to stabilize. Each client subscribes to 10% of the publishers in the system. We begin adding publishers to the system. Each broker submits a new publisher with a delay uniformly spread between [50, 500] ms until there are 1000 total publishers in the system. We kill the leader and allow it to fail over. Latency is measured as time the publish message is sent until time that the broker receives the subscriptiondiffmessage.
In [51]:
datafile = "data/brokerLatencyManyPublishersChangeMD-long-etcd-leaderDeath-30-300.csv"
df = pd.read_csv(datafile,header=None)
df.columns=['start_time', 'latency']
df['latency'] /= float(1e6) # convert to milliseconds
df['start_time'] /= float(1e9) # convert to seconds
df = df[df['latency'] < 1e5]
min_start_time = df['start_time'].min()
df['start_time'] = df['start_time'].map(lambda t: t - min_start_time)
ax = df.plot(kind='scatter', x='start_time', y='latency', figsize=(15,8), s=1, c='black')
#ax.set_title("Latency to Handle Publish")
#ax.set_yscale('log')
ax.set_ylabel("Latency (ms) - log scale")
ax.set_xlabel("Time of Publish Message (s)")
ax.set_xlim(xmin=0)
ax.set_ylim(ymin=0)
ax.grid(axis='y', which='major')
#ax.axhline(y=df['latency'].quantile(q=0.50),c="blue",linewidth=0.5)
#ax.axhline(y=df['latency'].quantile(q=0.75),c="blue",linewidth=0.5)
ax.axhline(y=df['latency'].quantile(q=0.95),c="blue",linewidth=0.5)
ax.axhline(y=df['latency'].quantile(q=0.99),linestyle='dashed',linewidth=0.5)
republish_switch_time = 1462437292458576882/1e9
ax.axvline(x=republish_switch_time-min_start_time,linewidth=0.5)
ax.legend(['95% Quantile', '99% Quantile'])
print df['latency'].quantile(q=0.99)
df.describe()
Out[51]:
In [67]:
datafile = "data/brokerLatencyManyPublishersChangeMD-long-etcd-leaderDeath-50-500.csv"
noetcd_datafile = "data/brokerLatencyManyPublishersChangeMD-long-noetcd-leaderDeath-50-500.csv"
df = pd.read_csv(datafile,header=None)
noetcd_df = pd.read_csv(noetcd_datafile,header=None)
df.columns=['start_time', 'latency']
noetcd_df.columns=['start_time', 'latency']
noetcd_df['latency'] /= float(1e6) # convert to milliseconds
noetcd_df['start_time'] /= float(1e9) # convert to seconds
noetcd_df = noetcd_df[noetcd_df['latency'] < 1e5]
noetcd_min_start_time = noetcd_df['start_time'].min()
noetcd_df['start_time'] = noetcd_df['start_time'].map(lambda t: t - noetcd_min_start_time)
df['latency'] /= float(1e6) # convert to milliseconds
df['start_time'] /= float(1e9) # convert to seconds
df = df[df['latency'] < 1e5]
min_start_time = df['start_time'].min()
df['start_time'] = df['start_time'].map(lambda t: t - min_start_time)
ax = df.plot(kind='scatter', x='start_time', y='latency', figsize=(15,8), s=2, c='green', edgecolors='none', label='Etcd')
ax.scatter(noetcd_df['start_time'], noetcd_df['latency'], s=2, c='red', edgecolors='none')
#ax.set_title("Latency to Handle Publish")
ax.set_yscale('log')
ax.set_ylabel("Latency (ms) - log scale")
ax.set_xlabel("Time of Publish Message (s)")
ax.set_xlim(xmin=0, xmax=450)
ax.set_ylim(ymin=1)
ax.grid(axis='y', which='major')
#ax.axhline(y=df['latency'].quantile(q=0.50),c="blue",linewidth=0.5)
#ax.axhline(y=df['latency'].quantile(q=0.75),c="blue",linewidth=0.5)
ax.axhline(y=df['latency'].quantile(q=0.95),c="blue",linewidth=0.5)
ax.axhline(y=df['latency'].quantile(q=0.99),linestyle='dashed',linewidth=0.5)
republish_switch_time = 1462438418753251298/1e9
ax.axvline(x=republish_switch_time-min_start_time,linewidth=1)
ax.legend(['95% Quantile', '99% Quantile'])
print df['latency'].quantile(q=0.99)
df.describe()
Out[67]:
In [31]:
datafile = "data/brokerLatencyManyPublishers-long-etcd-leaderDeath.csv"
df = pd.read_csv(datafile,header=None)
df.columns=['start_time', 'latency']
df['latency'] /= float(1e6) # convert to milliseconds
df['start_time'] /= float(1e6) # convert to milliseconds
df = df[df['latency'] < 1e6]
min_start_time = df['start_time'].min()
df['start_time'] = df['start_time'].map(lambda t: t - min_start_time)
ax = df.plot(kind='scatter', x='start_time', y='latency', figsize=(15,8))
ax.set_title("Diff Latency")
ax.set_yscale('log')
ax.set_ylabel("Latency (ms) - log scale")
ax.set_xlabel("Time Since Start (ms)")
#ax.set_xlim([0, 80000])
ax.grid(axis='y', which='major')
#ax.axhline(y=df['latency'].quantile(q=0.50),c="blue",linewidth=0.5)
#ax.axhline(y=df['latency'].quantile(q=0.75),c="blue",linewidth=0.5)
#ax.axhline(y=df['latency'].quantile(q=0.95),c="blue",linewidth=0.5)
ax.axhline(y=df['latency'].quantile(q=0.99),c="blue",linewidth=0.5)
print df['latency'].quantile(q=0.99)
df.describe()
Out[31]:
In [ ]: