notebook.community

Edit and run



In [65]:

    
%pylab inline









    



Populating the interactive namespace from numpy and matplotlib






    



WARNING: pylab import has clobbered these variables: ['rc', 'size', 'axes']
`%matplotlib` prevents importing * from pylab and numpy



In [66]:

    
from matplotlib import rc

fig_font = {'family':'sans-serif','sans-serif':['Helvetica'],
           'serif':['Helvetica'],'size':14}
rc('font',**fig_font)
rc('legend',fontsize=14, handletextpad=0.5)
rc('text', usetex=True)
rc('figure', figsize=(3.33,2.22))
#  rc('figure.subplot', left=0.10, top=0.90, bottom=0.12, right=0.95)
rc('axes', linewidth=0.5, color_cycle= ['#496ee2', '#8e053b', 'm', '#ef9708', 'g', 'c'])
rc('lines', linewidth=1)

def fig_to_file(fig, filename, ext):
    fig.savefig("graphs/%s.%s" % (filename, ext), format=ext, bbox_inches='tight')



In [67]:

    
latency={}
time={}

servers=5

def time_to_int(t_str):
    t_lst = t_str.split(':')
    t_int = list(map(int, t_lst))
    return ((t_int[0]*3600)+(t_int[1]*60)+t_int[2])

for src in range(1,servers+1):
    latency[src]={}
    time[src]={}
    for dst in range (1,servers+1):
        latency[src][dst]=[]
        time[src][dst]=[]
        
start_time= time_to_int('19:00:00')
finish_time= time_to_int('08:50:00')
rollover = time_to_int('25:59:59')

print(start_time)
print(finish_time)
print(rollover)

def valid_time(time_pt):
    return ((start_time <= time_pt and time_pt <= rollover) or (time_pt >= 0 and finish_time > time_pt))
    
def diff_time(time_pt):
    if (start_time <= time_pt and time_pt <= rollover):
        # same day
        return (time_pt-start_time)
    else:
        # next day
        return ((rollover-start_time)+time_pt)
        
min_time = 0
max_time = diff_time(finish_time)

print(max_time)



In [68]:

    
def int_to_time(t_int):
    hr=t_int//3600
    rem=t_int%3600
    min=rem//60
    sec=rem%60
    t_lst = list(map(str, [hr,min,sec]))
    return (":".join(t_lst))



In [69]:

    
import csv
with open('results/latency-results.csv', newline='') as csvfile:
    reader = csv.reader(csvfile)
    for row in reader:
        time_pt = time_to_int(row[0])
        if valid_time(time_pt):
            src=int(row[1])
            dst=int(row[2])
            latency[src][dst].append(float(row[4]))
            time[src][dst].append(diff_time(time_pt))



In [92]:

    
#averages for each set
import numpy as np

# print("\\begin{tabular}[| c c c c c c c |]") 
# print("src & dst & min % 25th % 50th % 75th & max \\\\")
# sti = " % ".join(map(str,res))+" \\\\"
# print("\end{tabular}")

for src in range(1,servers+1):
    for dst in range(1,servers+1):
        data = latency[src][dst]
        res = [src,dst,
               format(np.mean(data),".1f"),
               format(np.std(data),".1f"),
               min(data),
               format(np.percentile(data, 25),".1f"),
               np.percentile(data, 50),
               np.percentile(data, 75),
               max(data)]
        sti = ", ".join(map(str,res))
        print(sti)


# 2 to 5 only    
for src in range(2,servers+1):
    for dst in range(2,servers+1):
        data = latency[src][dst]
        res = [src,dst,
              format(np.percentile(data, 80),".1f"),
              format(np.percentile(data, 90),".1f"), 
              format(np.percentile(data, 95),".1f"),
              format(np.percentile(data, 98),".1f"), 
              format(np.percentile(data, 99),".1f")]
        sti = ", ".join(map(str,res))
        print(sti)









    



1, 1, 51.0, 65.1, 1.7, 4.3, 65.25, 71.6, 1004.0
1, 2, 12.1, 89.7, 1.7, 2.9, 3.6, 4.3, 1004.7
1, 3, 9.5, 66.5, 1.6, 3.4, 4.1, 4.8, 1004.7
1, 4, 7.3, 52.3, 1.3, 3.0, 3.5, 4.0, 1004.1
1, 5, 11.8, 84.9, 1.6, 3.0, 3.6, 4.4, 1242.2
2, 1, 6.2, 12.7, 1.6, 3.0, 3.7, 4.425, 88.4
2, 2, 16.7, 65.8, 1.1, 3.0, 3.7, 4.6, 1002.9
2, 3, 15.0, 95.6, 1.4, 3.0, 3.7, 4.4, 1004.9
2, 4, 9.6, 62.0, 1.4, 2.9, 3.5, 4.3, 1003.6
2, 5, 15.3, 80.0, 1.3, 2.9, 3.5, 4.3, 1002.5
3, 1, 48.5, 44.9, 2.0, 4.5, 64.2, 70.6, 754.9
3, 2, 7.0, 48.9, 1.4, 2.9, 3.6, 4.4, 1005.0
3, 3, 3.9, 4.2, 1.6, 3.1, 3.8, 4.6, 124.9
3, 4, 5.8, 32.0, 1.3, 2.9, 3.4, 4.0, 628.6
3, 5, 6.7, 52.5, 1.5, 2.9, 3.8, 4.4, 1229.3
4, 1, 48.8, 48.3, 1.9, 4.7, 62.1, 67.3, 1003.0
4, 2, 7.7, 59.5, 1.3, 2.8, 3.6, 4.4, 1003.8
4, 3, 9.0, 60.1, 1.6, 3.3, 4.1, 4.8, 1004.0
4, 4, 5.2, 29.3, 1.2, 2.7, 3.3, 3.8, 754.6
4, 5, 15.6, 104.7, 1.5, 2.8, 3.6, 4.3, 1035.0
5, 1, 50.9, 69.8, 2.0, 4.5, 63.8, 69.6, 1005.3
5, 2, 9.4, 70.9, 1.3, 2.9, 3.7, 4.3, 1003.8
5, 3, 8.6, 57.6, 2.0, 3.3, 4.1, 4.8, 1003.7
5, 4, 6.5, 47.7, 1.3, 2.7, 3.4, 4.0, 1003.0
5, 5, 6.8, 49.2, 1.2, 2.5, 3.2, 4.1, 1023.0
2, 2, 5.1, 41.8, 47.2, 124.6, 150.7
2, 3, 4.6, 5.0, 5.5, 34.5, 429.5
2, 4, 4.5, 4.9, 5.5, 47.4, 78.5
2, 5, 4.5, 5.4, 60.4, 91.8, 207.8
3, 2, 4.5, 4.8, 5.0, 5.5, 8.9
3, 3, 4.7, 5.0, 5.1, 5.4, 5.8
3, 4, 4.2, 4.5, 4.8, 5.2, 6.1
3, 5, 4.6, 4.9, 5.2, 5.9, 6.6
4, 2, 4.5, 4.8, 5.0, 5.2, 5.8
4, 3, 5.0, 5.2, 5.4, 5.7, 6.4
4, 4, 4.0, 4.3, 4.7, 5.1, 5.8
4, 5, 4.5, 4.8, 5.2, 6.1, 762.9
5, 2, 4.5, 4.8, 4.9, 5.1, 6.1
5, 3, 4.9, 5.2, 5.4, 5.6, 6.8
5, 4, 4.1, 4.5, 4.9, 5.3, 6.3
5, 5, 4.3, 4.6, 4.8, 5.8, 7.2



In [96]:

    
import matplotlib.pyplot as plt

for src in range (1,servers+1):
    fig = plt.figure(figsize=[12,4])
    axes = fig.add_axes([0.1, 0.1, 0.8, 0.8])

    axes.set_xlabel('Time of measurement (relative to start time)')
    axes.set_ylabel('Ping latency (ms)')
    axes.set_title('Ping latency between machines in Azure')

    dots=["g.","b.","k.","y.","r."]
    for dst in range (1,servers+1):
        axes.plot(time[src][dst], latency[src][dst],dots[dst-1])

    axes.set_ylim([0,12])

    x_marked = list(range(min_time, max_time, (2*3600)))
    x_marked.append(max_time)
    axes.set_xticks(x_marked)
    x_labels = [int_to_time(x) for x in x_marked]
    axes.set_xticklabels(x_labels)
    axes.set_xlim([min_time,max_time])
    
    axes.legend(range(1,servers+1),loc=1,frameon=True)
    fig_to_file(fig,'fig4.'+str(src),'png')



In [72]:

    
# total CDF

latency_all=[]
for src in range(1,servers+1):
    for dst in range(1,servers+1):
        latency_all.extend(latency[src][dst])
latency_all.sort()

tcdf_y=[]
size=len(latency_all)
for y in range (1,size+1):
    tcdf_y.append(y*100.0/size)
    
fig = plt.figure(figsize=[12,4])
axes = fig.add_axes([0.1, 0.1, 0.8, 0.8])

axes.set_xlabel('Ping latency (ms)')
axes.set_ylabel('Cumlative Propability')
axes.set_title('Cumulative distribution function of RTTs in Azure')

axes.set_xlim(0,12)
axes.set_ylim([0,100])
axes.plot(latency_all, tcdf_y)
fig_to_file(fig,'fig1','png')


basic_data = [
 ("number of measurements", len(latency_all)),
 ("min RTT", min(latency_all)),
 ("max RTT", format(max(latency_all),'.1f')),
 ("mean", format(np.mean(latency_all),'.1f')),
 ("std", format(np.std(latency_all),'.1f'))]

basic_data2 = [
 ("25th percentile", format(np.percentile(latency_all, 25),'.1f')),
 ("50th percentile", format(np.percentile(latency_all, 50),'.1f')),
 ("75th percentile", format(np.percentile(latency_all, 75),'.1f')),
 ("90th percentile", format(np.percentile(latency_all, 90),'.1f')),
 ("95th percentile", format(np.percentile(latency_all, 95),'.1f')),
 ("99th percentile", format(np.percentile(latency_all, 99),'.1f'))]


print(basic_data)
print(basic_data2)



# latency without machine 1
latency_m1=[]
for src in range(2,servers+1):
    for dst in range(2,servers+1):
        latency_m1.extend(latency[src][dst])
latency_m1.sort()

m1cdf_y=[]
size=len(latency_m1)
for y in range (1,size+1):
    m1cdf_y.append(y*100.0/size)
# axes.plot(latency_m1, m1cdf_y)









    



[('number of measurements', 22332), ('min RTT', 1.1), ('max RTT', '1242.2'), ('mean', '15.9'), ('std', '66.5')]
[('25th percentile', '3.0'), ('50th percentile', '3.8'), ('75th percentile', '4.7'), ('90th percentile', '61.4'), ('95th percentile', '69.7'), ('99th percentile', '87.3')]



In [95]:

    
# CDF's

cdf_x={}
cdf_y={}

for src in range(1,servers+1):
    cdf_x[src]={}
    cdf_y[src]={}
    for dst in range(1,servers+1):
        cdf_x[src][dst]=sort(latency[src][dst])
        cdf_y[src][dst]=[]
        size=len(cdf_x[src][dst])
        for y in range (1,size+1):
            cdf_y[src][dst].append(y*100.0/size)
        
for src in range(1,servers+1):
    fig = plt.figure(figsize=[12,4])
    axes = fig.add_axes([0.1, 0.1, 0.8, 0.8])

    axes.set_xlabel('Ping latency (ms)')
    axes.set_ylabel('Cumlative Propability')
    axes.set_title('Cumulative distribution function of RTTs from VM '+str(src))

    axes.set_xlim([0,8])
    axes.set_ylim([0,100])
    dots=["g-","b-","k-","y.","r."]
    for dst in range (1,servers+1):
        axes.plot(cdf_x[src][dst], cdf_y[src][dst])

    axes.legend(range(1,servers+1),loc=1,frameon=True)
    
    fig_to_file(fig,'fig3.'+str(src),'png')



In [98]:

    
# moving average

def mov_average(interval, window_s):
    window = np.ones(int(window_s))/float(window_s)
    return np.convolve(interval, window, 'same')

mov_ag = mov_average(latency[2][3],5)

fig = plt.figure(figsize=[12,4])
axes = fig.add_axes([0.1, 0.1, 0.8, 0.8])

axes.set_xlabel('Time (sec)')
axes.set_ylabel('Ping latency (ms)')
axes.set_title('RTT between VMs 2 and 3 over time')

x_marked = list(range(min_time, max_time, (2*3600)))
x_marked.append(max_time)
axes.set_xticks(x_marked)
x_labels = [int_to_time(x) for x in x_marked]
axes.set_xticklabels(x_labels)
axes.set_xlim([min_time,max_time])

axes.plot(time[2][3], mov_ag,'r-')
axes.plot(time[2][3], latency[2][3],'k.')









    Out[98]:





[<matplotlib.lines.Line2D at 0x10b0cba58>]



In [75]:

    
# 15ms max?

def too_big(v):
    return (v>15)

n=0
total=0
for src in range(1,servers+1):
    for dst in range(1,servers+1):
        large_latency = list(filter(too_big, latency[src][dst]))
        n = n + len(large_latency)
        total = total + len(latency[src][dst])
print (n, total)



In [82]:

    
# histo

fig = plt.figure(figsize=[12,4])
axes = fig.add_axes([0.1, 0.1, 0.8, 0.8])

axes.set_xlabel('Ping latency (ms)')
axes.set_ylabel('Probability')
axes.set_title('Frequency distribution of RTT')

n, bins, patches = axes.hist(latency_all, 101,range=[0,10],facecolor='green', normed=1, alpha=0.2)
#n, bins, patches = plt.hist(latency_m1, 101,range=[0,10],facecolor='blue', normed=1, alpha=0.2)

mu=3.6
sigma=1
y = mlab.normpdf(bins, mu, sigma)
axes.plot(bins, y, 'r--')

fig_to_file(fig,'fig2','png')
print(mu,sigma)



In [76]:



In [76]:



In [76]: