In [65]:
%pylab inline
In [66]:
from matplotlib import rc
fig_font = {'family':'sans-serif','sans-serif':['Helvetica'],
'serif':['Helvetica'],'size':14}
rc('font',**fig_font)
rc('legend',fontsize=14, handletextpad=0.5)
rc('text', usetex=True)
rc('figure', figsize=(3.33,2.22))
# rc('figure.subplot', left=0.10, top=0.90, bottom=0.12, right=0.95)
rc('axes', linewidth=0.5, color_cycle= ['#496ee2', '#8e053b', 'm', '#ef9708', 'g', 'c'])
rc('lines', linewidth=1)
def fig_to_file(fig, filename, ext):
fig.savefig("graphs/%s.%s" % (filename, ext), format=ext, bbox_inches='tight')
In [67]:
latency={}
time={}
servers=5
def time_to_int(t_str):
t_lst = t_str.split(':')
t_int = list(map(int, t_lst))
return ((t_int[0]*3600)+(t_int[1]*60)+t_int[2])
for src in range(1,servers+1):
latency[src]={}
time[src]={}
for dst in range (1,servers+1):
latency[src][dst]=[]
time[src][dst]=[]
start_time= time_to_int('19:00:00')
finish_time= time_to_int('08:50:00')
rollover = time_to_int('25:59:59')
print(start_time)
print(finish_time)
print(rollover)
def valid_time(time_pt):
return ((start_time <= time_pt and time_pt <= rollover) or (time_pt >= 0 and finish_time > time_pt))
def diff_time(time_pt):
if (start_time <= time_pt and time_pt <= rollover):
# same day
return (time_pt-start_time)
else:
# next day
return ((rollover-start_time)+time_pt)
min_time = 0
max_time = diff_time(finish_time)
print(max_time)
In [68]:
def int_to_time(t_int):
hr=t_int//3600
rem=t_int%3600
min=rem//60
sec=rem%60
t_lst = list(map(str, [hr,min,sec]))
return (":".join(t_lst))
In [69]:
import csv
with open('results/latency-results.csv', newline='') as csvfile:
reader = csv.reader(csvfile)
for row in reader:
time_pt = time_to_int(row[0])
if valid_time(time_pt):
src=int(row[1])
dst=int(row[2])
latency[src][dst].append(float(row[4]))
time[src][dst].append(diff_time(time_pt))
In [92]:
#averages for each set
import numpy as np
# print("\\begin{tabular}[| c c c c c c c |]")
# print("src & dst & min % 25th % 50th % 75th & max \\\\")
# sti = " % ".join(map(str,res))+" \\\\"
# print("\end{tabular}")
for src in range(1,servers+1):
for dst in range(1,servers+1):
data = latency[src][dst]
res = [src,dst,
format(np.mean(data),".1f"),
format(np.std(data),".1f"),
min(data),
format(np.percentile(data, 25),".1f"),
np.percentile(data, 50),
np.percentile(data, 75),
max(data)]
sti = ", ".join(map(str,res))
print(sti)
# 2 to 5 only
for src in range(2,servers+1):
for dst in range(2,servers+1):
data = latency[src][dst]
res = [src,dst,
format(np.percentile(data, 80),".1f"),
format(np.percentile(data, 90),".1f"),
format(np.percentile(data, 95),".1f"),
format(np.percentile(data, 98),".1f"),
format(np.percentile(data, 99),".1f")]
sti = ", ".join(map(str,res))
print(sti)
In [96]:
import matplotlib.pyplot as plt
for src in range (1,servers+1):
fig = plt.figure(figsize=[12,4])
axes = fig.add_axes([0.1, 0.1, 0.8, 0.8])
axes.set_xlabel('Time of measurement (relative to start time)')
axes.set_ylabel('Ping latency (ms)')
axes.set_title('Ping latency between machines in Azure')
dots=["g.","b.","k.","y.","r."]
for dst in range (1,servers+1):
axes.plot(time[src][dst], latency[src][dst],dots[dst-1])
axes.set_ylim([0,12])
x_marked = list(range(min_time, max_time, (2*3600)))
x_marked.append(max_time)
axes.set_xticks(x_marked)
x_labels = [int_to_time(x) for x in x_marked]
axes.set_xticklabels(x_labels)
axes.set_xlim([min_time,max_time])
axes.legend(range(1,servers+1),loc=1,frameon=True)
fig_to_file(fig,'fig4.'+str(src),'png')
In [72]:
# total CDF
latency_all=[]
for src in range(1,servers+1):
for dst in range(1,servers+1):
latency_all.extend(latency[src][dst])
latency_all.sort()
tcdf_y=[]
size=len(latency_all)
for y in range (1,size+1):
tcdf_y.append(y*100.0/size)
fig = plt.figure(figsize=[12,4])
axes = fig.add_axes([0.1, 0.1, 0.8, 0.8])
axes.set_xlabel('Ping latency (ms)')
axes.set_ylabel('Cumlative Propability')
axes.set_title('Cumulative distribution function of RTTs in Azure')
axes.set_xlim(0,12)
axes.set_ylim([0,100])
axes.plot(latency_all, tcdf_y)
fig_to_file(fig,'fig1','png')
basic_data = [
("number of measurements", len(latency_all)),
("min RTT", min(latency_all)),
("max RTT", format(max(latency_all),'.1f')),
("mean", format(np.mean(latency_all),'.1f')),
("std", format(np.std(latency_all),'.1f'))]
basic_data2 = [
("25th percentile", format(np.percentile(latency_all, 25),'.1f')),
("50th percentile", format(np.percentile(latency_all, 50),'.1f')),
("75th percentile", format(np.percentile(latency_all, 75),'.1f')),
("90th percentile", format(np.percentile(latency_all, 90),'.1f')),
("95th percentile", format(np.percentile(latency_all, 95),'.1f')),
("99th percentile", format(np.percentile(latency_all, 99),'.1f'))]
print(basic_data)
print(basic_data2)
# latency without machine 1
latency_m1=[]
for src in range(2,servers+1):
for dst in range(2,servers+1):
latency_m1.extend(latency[src][dst])
latency_m1.sort()
m1cdf_y=[]
size=len(latency_m1)
for y in range (1,size+1):
m1cdf_y.append(y*100.0/size)
# axes.plot(latency_m1, m1cdf_y)
In [95]:
# CDF's
cdf_x={}
cdf_y={}
for src in range(1,servers+1):
cdf_x[src]={}
cdf_y[src]={}
for dst in range(1,servers+1):
cdf_x[src][dst]=sort(latency[src][dst])
cdf_y[src][dst]=[]
size=len(cdf_x[src][dst])
for y in range (1,size+1):
cdf_y[src][dst].append(y*100.0/size)
for src in range(1,servers+1):
fig = plt.figure(figsize=[12,4])
axes = fig.add_axes([0.1, 0.1, 0.8, 0.8])
axes.set_xlabel('Ping latency (ms)')
axes.set_ylabel('Cumlative Propability')
axes.set_title('Cumulative distribution function of RTTs from VM '+str(src))
axes.set_xlim([0,8])
axes.set_ylim([0,100])
dots=["g-","b-","k-","y.","r."]
for dst in range (1,servers+1):
axes.plot(cdf_x[src][dst], cdf_y[src][dst])
axes.legend(range(1,servers+1),loc=1,frameon=True)
fig_to_file(fig,'fig3.'+str(src),'png')
In [98]:
# moving average
def mov_average(interval, window_s):
window = np.ones(int(window_s))/float(window_s)
return np.convolve(interval, window, 'same')
mov_ag = mov_average(latency[2][3],5)
fig = plt.figure(figsize=[12,4])
axes = fig.add_axes([0.1, 0.1, 0.8, 0.8])
axes.set_xlabel('Time (sec)')
axes.set_ylabel('Ping latency (ms)')
axes.set_title('RTT between VMs 2 and 3 over time')
x_marked = list(range(min_time, max_time, (2*3600)))
x_marked.append(max_time)
axes.set_xticks(x_marked)
x_labels = [int_to_time(x) for x in x_marked]
axes.set_xticklabels(x_labels)
axes.set_xlim([min_time,max_time])
axes.plot(time[2][3], mov_ag,'r-')
axes.plot(time[2][3], latency[2][3],'k.')
Out[98]:
In [75]:
# 15ms max?
def too_big(v):
return (v>15)
n=0
total=0
for src in range(1,servers+1):
for dst in range(1,servers+1):
large_latency = list(filter(too_big, latency[src][dst]))
n = n + len(large_latency)
total = total + len(latency[src][dst])
print (n, total)
In [82]:
# histo
fig = plt.figure(figsize=[12,4])
axes = fig.add_axes([0.1, 0.1, 0.8, 0.8])
axes.set_xlabel('Ping latency (ms)')
axes.set_ylabel('Probability')
axes.set_title('Frequency distribution of RTT')
n, bins, patches = axes.hist(latency_all, 101,range=[0,10],facecolor='green', normed=1, alpha=0.2)
#n, bins, patches = plt.hist(latency_m1, 101,range=[0,10],facecolor='blue', normed=1, alpha=0.2)
mu=3.6
sigma=1
y = mlab.normpdf(bins, mu, sigma)
axes.plot(bins, y, 'r--')
fig_to_file(fig,'fig2','png')
print(mu,sigma)
In [76]:
In [76]:
In [76]: