In [ ]:
import pandas as pd
import networkx as nx # pip install --user networkx
In [ ]:
nodes = pd.read_pickle("cachenodes.pkl")
edges = pd.read_pickle("edges.pkl")
comp_nodes = pd.read_pickle("comp_nodes.pkl")
In [ ]:
def build_topology(nodes, edges):
topology = nx.Graph()
# add all nodes
for index, row in nodes.iterrows():
node_name = row["name"]
node_attributes = row.drop(["name"]).to_dict()
topology.add_node(node_name, **node_attributes)
# add all edges
for index, row in edges.iterrows():
node1_name = row["node1"]
node2_name = row["node2"]
edge_attributes = row.drop(["node1", "node2"]).to_dict()
topology.add_edge(node1_name, node2_name, **edge_attributes)
return topology
In [ ]:
topology = build_topology(nodes, edges)
In [ ]:
from libcrap.core import calcsave_or_load
from functools import partial
In [ ]:
pd_diskcache = partial(calcsave_or_load, load_func=pd.read_pickle, save_func=pd.to_pickle)
In [ ]:
import itertools
In [ ]:
@pd_diskcache("paths.pkl")
def find_comp_to_comp_shortest_paths(topology, comp_nodes):
paths_ugly = dict(nx.all_pairs_shortest_path(topology))
# calculates shortest paths and stores them in a dict of dicts
# build a table with all computational node pairs
# they are not duplicated
# if there is ("n48001", "n49419") then there is no ("n49419", "n48001") pair
comp_node_pairs = pd.DataFrame.from_records(
itertools.chain.from_iterable(
[(node1, node2) for node2 in comp_nodes.iloc[index:]]
for (index, node1) in comp_nodes.iteritems()
),
columns=["node1", "node2"]
)
# write shortest paths to this table
comp_node_pairs["shortest_path"] = comp_node_pairs.apply(
lambda row: paths_ugly[row.loc["node1"]][row.loc["node2"]],
axis=1
)
return comp_node_pairs
In [ ]:
# shortest paths between all computational nodes
paths = find_comp_to_comp_shortest_paths(topology, comp_nodes)
In [ ]:
def interleave(it1, it2):
"""
>>> list(interleave([1, 2, 3, 4], ["a", "b", "c"]))
[1, 'a', 2, 'b', 3, 'c', 4]
"""
return (
item for item
in itertools.chain.from_iterable(itertools.zip_longest(it1, it2))
if item is not None)
In [ ]:
def get_node_features(topology, node):
"""Returns node features as a tuple of tuples.
>>> topology = nx.Graph()
>>> topology.add_node("kek", a=1, b="lol")
>>> sorted(get_node_features(topology, "kek"), key=lambda pair: pair[0])
[('a', 1), ('b', 'lol')]
"""
return tuple(topology.node[node].items())
In [ ]:
def get_edge_features(topology, node1, node2):
"""Returns features of an edge as tuple of tuples.
>>> topology = nx.Graph()
>>> topology.add_node("a1")
>>> topology.add_node("b1")
>>> topology.add_edge("a1", "b1", foo="bar", shim="sham")
>>> get_edge_features(topology, "a1", "b1")
(('foo', 'bar'), ('shim', 'sham'))
"""
return tuple(topology.edges[node1, node2].items())
In [ ]:
def maybe_reverse(l):
"""
Takes list or tuple and reverses it, or not.
Using maybe_reverse on some list and on its reversed version will
yield the same result.
>>> maybe_reverse([1, 2, 3])
[1, 2, 3]
>>> maybe_reverse([3, 2, 1])
[1, 2, 3]
>>> maybe_reverse(('a', 'b', 'c'))
('a', 'b', 'c')
>>> maybe_reverse(('c', 'b', 'a'))
('a', 'b', 'c')
"""
if type(l) == list:
constructor = list
elif type(l) == tuple:
constructor = tuple
else:
raise TypeError("can only take list or tuple arguments")
reversed_l = constructor(reversed(l))
if str(l) <= str(reversed_l):
return l
return reversed_l
In [ ]:
def get_features_of_path(topology, path):
"""Returns features of path as a tuple of tuples of tuples.
The list of features will be normalized, so that
this function returns the same features in the same order for
path (A, B, C, D) and for path (D, C, B, A)"""
nodes_features = (get_node_features(topology, node) for node in path)
edges_features = (get_edge_features(topology, node1, node2)
for (node1, node2) in zip(path[:-1], path[1:]))
return maybe_reverse(tuple(interleave(nodes_features, edges_features)))
In [ ]:
def df_loc_by_sequence(df, sequence):
"""
Use this instead of `df.loc[sequence]`.
Pandas df gets confused by tuples and possibly by other
sequences. If you do `df.loc[(1, 2)]`, it will look for 1
or 2 in df's index instead of looking for the tuple itself.
You can use df.xs to overcome this problem. Or use this
function which hides the ugliness.
Also see
[stackoverflow question](https://goo.gl/emtjB8)
for better description of the problem."""
return df.xs(sequence)
In [ ]:
import doctest
In [ ]:
def test_get_node_features():
doctest.run_docstring_examples(get_node_features, globals())
assert get_node_features(topology, "КГК.48.0.3") == (("type_", "switch"),)
In [ ]:
def test_get_edge_features():
doctest.run_docstring_examples(get_edge_features, globals())
correct_result = (("connection_type", "backplane"),)
result1 = get_edge_features(topology, "КГК.48.0.3", "n48022")
result2 = get_edge_features(topology, "n48022", "КГК.48.0.3")
assert result1 == correct_result == result2
In [ ]:
doctest.run_docstring_examples(interleave, globals())
test_get_node_features()
test_get_edge_features()
doctest.run_docstring_examples(maybe_reverse, globals())
In [ ]:
@pd_diskcache("classes.pkl")
def list_path_classes(topology, paths):
unique_features_classes = frozenset(
get_features_of_path(topology, path)
for path in paths["shortest_path"]
)
return pd.DataFrame.from_records(
([features] for features in sorted(unique_features_classes)),
columns=["features"]
)
In [ ]:
@pd_diskcache("paths_with_classes.pkl")
def add_class_id_col(paths, classes):
"""Adds class_id column to paths table"""
# create pandas table for quick getting index by value of features list
classes_reverse_lookup = classes.reset_index().set_index("features", verify_integrity=True)
def get_class_id_by_path(path):
return df_loc_by_sequence(classes_reverse_lookup, get_features_of_path(topology, path))["index"]
return paths.assign(class_=paths["shortest_path"].apply(get_class_id_by_path))
In [ ]:
classes = list_path_classes(topology, paths)
paths_with_classes = add_class_id_col(paths, classes)