In [13]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
#from scipy import stats as st
import time
import glob
import ntpath

In [14]:
sys.path.append('/work/eng/eliavb/all_distance_sketch/src/proto/')

In [15]:
import cover_pb2
import ranking_pb2

In [16]:
def get_node_ranks_from_gpb(ranking):
    node_ranks = []
    for node_rank in ranking.node_ranks:
        node_ranks.append((node_rank.node_id, node_rank.node_rank))
    return node_ranks

def compare_ranks_node_id(x, y):
    node_id_x, node_rank_x = x
    node_id_y, node_rank_y = y
    if (node_rank_x < node_rank_y):
        return -1
    if (node_rank_x > node_rank_y):
        return 1
    return -1

def path_leaf(path):
    head, tail = ntpath.split(path)
    return tail or ntpath.basename(head)

In [17]:
base_dir = "/work/eng/eliavb/experiments/youtube/distribution/"

In [18]:
def get_cum_dist(tup):
    id_, base_dir = tup
    f_name_d = base_dir + id_ + "_d"
    f_name_r = base_dir + id_ + "_r"
    data_d = open(f_name_d, "rb").read()
    data_r = open(f_name_r, "rb").read()
    ranking_d = ranking_pb2.NodeRanksGpb()
    ranking_r = ranking_pb2.NodeRanksGpb()
    ranking_d.ParseFromString(data_d)
    ranking_r.ParseFromString(data_r)
    node_rank_d = sorted(get_node_ranks_from_gpb(ranking_d), cmp = compare_ranks_node_id)
    node_rank_r = sorted(get_node_ranks_from_gpb(ranking_r), cmp = compare_ranks_node_id)
    ids_d = [r[1] for r in node_rank_d]
    ids_r = [r[1] for r in node_rank_r]
    return ids_d, ids_r

In [23]:
reverse_signals = {}
distance_signals = {}
nodes = ["480", "368749", "3394", "2711"]
for n_ in nodes:
    b = int(time.time()) * 1000
    distance_sig, reverse_rank_sig = get_cum_dist((n_, base_dir))
    reverse_signals[n_] = reverse_rank_sig
    distance_signals[n_] = distance_sig
    a = int(time.time()) * 1000
    print a - b


43000
42000
46000
43000

In [55]:
max_x = 0
max_y = 0
for node in reverse_signals:
    max_y = max(max(reverse_signals[node]), max_y)
    max_x = max(len(reverse_signals[node]), max_x)
    plt.plot(reverse_signals[node], label=node)
m_ = max(max_x, max_y)
plt.legend(loc="upper left")
plt.plot(range(m_))
plt.xlim(0, m_)
plt.ylim(0, m_)
fig = plt.gcf()
plt.xlabel("number of nodes", fontsize="large")
plt.ylabel("reverse rank", fontsize="large")
fig.set_size_inches(20.5, 10.5)



In [56]:
max_x = 0
max_y = 0
for node in reverse_signals:
    max_y = max(max(reverse_signals[node]), max_y)
    max_x = max(len(reverse_signals[node]), max_x)
    plt.plot(reverse_signals[node], label=node)
m_ = max(max_x, max_y)
plt.legend(loc="upper left")
plt.plot(range(m_))
plt.yscale('log')
plt.xscale('log')
plt.xlim(0, m_)
plt.ylim(0, m_)
fig = plt.gcf()
plt.xlabel("number of nodes", fontsize="large")
plt.ylabel("reverse rank", fontsize="large")
fig.set_size_inches(20.5, 10.5)



In [57]:
max_x = 0
max_y = 0
for node in distance_signals:
    max_y = max(max(distance_signals[node]), max_y)
    max_x = max(len(distance_signals[node]), max_x)
    plt.plot(distance_signals[node], label=node)
#m_ = max(max_x, max_y)
plt.legend(loc="upper left")
#plt.xlim(0, max_x+1)
#plt.ylim(0, max_y+1)
plt.xlabel("number of nodes", fontsize="large")
plt.ylabel("distance", fontsize="large")
fig = plt.gcf()
fig.set_size_inches(20.5, 10.5)



In [ ]: