We are going to load the dataset https://snap.stanford.edu/data/wiki-Vote.html and transform it by adding labels to some edeges that represent emulated topics.
The topological graph structure was kept from the original graph.
In [121]:
import os
import random
from zipfile import ZipFile
from StringIO import StringIO
dataset_path = os.path.join('..', 'datasets', 'wiki.txt.zip')
wiki_file = StringIO(ZipFile(dataset_path).read('wiki-Vote.txt'))
In [122]:
def id_generator(seed=0):
"Generator to produce sequential IDs"
while True:
yield seed
seed += 1
def convert_line(line):
"Ignore comments (#) and strip values from columns"
if not line.strip() or line.startswith('#'):
return None, None
from_node, to_node = line.split()
return from_node.strip(), to_node.strip()
def invert_dict(d):
"Invert dict d[k]=v to be p[v]=[k1,k2,...kn]"
p = {}
for k, v in d.items():
try:
p[v].append(k)
except KeyError:
p[v] = [k]
return p
In [123]:
template_structure ="""<?xml version="1.0" encoding="UTF-8"?>
<graphml xmlns="http://graphml.graphdrawing.org/xmlns">
<key attr.name="topic" attr.type="string" for="edge" id="topic" />
<graph edgedefault="directed">
{NODES}
{EDGES}
</graph>
</graphml>
"""
template_node = '<node id="{node_id}"/>'
template_edge = '<edge id="{edge_id}" directed="true" source="{from_node}" target="{to_node}"> <data key="topic">{topic}</data> </edge>'
In [124]:
topics = ['Athletics', 'Cultural Center', 'Dorms', 'Food', 'Groceries']
In [125]:
output_file = os.path.join('..', 'datasets', 'wiki.graphml')
nodes = set()
edges = []
edge_id_gen = id_generator()
for line in wiki_file.readlines():
from_node, to_node = convert_line(line)
if from_node is None:
continue
nodes.add(from_node)
nodes.add(to_node)
edges.append((from_node, to_node))
node_lines = '\n'.join(template_node.format(node_id=id) for id in nodes)
edge_lines = '\n'.join(template_edge.format(from_node=f,
to_node=t,
edge_id=edge_id_gen.next(),
topic=random.choice(topics)) for f, t in edges)
with open(output_file, 'wb') as out_fd:
out_fd.write(template_structure.format(NODES=node_lines, EDGES=edge_lines))
In [126]:
import networkx as nx
from nxpd import draw
G = nx.read_graphml(output_file)
print("{0} nodes {1} edges".format(len(G.nodes()), len(G.edges())))
In [127]:
subnodes = G.nodes()[:300]
sG = G.subgraph(subnodes)
nx.write_graphml(sG, os.path.join('..', 'datasets', 'small_wiki.graphml'))
print("{0} nodes {1} edges".format(len(sG.nodes()), len(sG.edges())))
In [128]:
import matplotlib.pyplot as plt
%matplotlib inline
draw(sG, show='ipynb')
Out[128]:
Selecting a committee based on the delegation graph To select N people on a topic T from a set where K people have expressed their expertise on T, where K > N, the following simple algorithm can be used:
When the results are published, the committee members are shown with the count of number of people delegated to them. This allows committees to give delegates proportionate voting rights based on their number of delegates.
There are many other algorithms to compute committees where the delegation count is not relevant. But these are not explored here as we have the delegation counts.
In [132]:
filter_topic = topics[-1] # Groceries
# identify the edges tagged with filter_topic
topic_E = []
for e in sG.edges_iter():
edge_attrs = sG.get_edge_data(*e)
if edge_attrs['topic']==filter_topic:
topic_E.append(e)
# create induced subgraph from set of edges
topic_G = nx.DiGraph(topic_E)
print("{0} nodes {1} edges".format(len(topic_G.nodes()), len(topic_G.edges())))
In [147]:
import itertools
import random
random.seed(123)
def compute_powers(G):
"""
Computes two dictionaries:
* powers - key==node, value==delegation power of node
* support_base - key==node, value==all nodes represented by the key node.
Complexity |V|**2 * complexity(has_path).
has_path could be implemented to be O(V+E), using DFS with adjancency lists.
"""
support_base = {}
V = G.nodes()
powers = dict(zip(V, [0]*len(V)))
for dst in V:
for src in V:
if nx.has_path(G, src, dst):
powers[dst] += 1
try:
support_base[dst].append(src)
except KeyError:
support_base[dst] = [src]
return powers, support_base
def select_top_k(score_dict, k):
return list(itertools.islice(reversed(sorted((v, k) for k,v in score_dict.items())), 0, k))
In [135]:
# Force a tie condition with depedent delegation power
dependent_nodes = ['2685', '7214', '2510', '255', '271']
topic_G.add_edge('2685', '7214')
topic_G.add_edge('7214', '2685')
topic_G.add_edge('2510', '7214')
topic_G.add_edge('7214', '2510')
topic_G.add_edge('255', '271')
topic_G.add_edge('271', '255')
In [136]:
draw(topic_G, show='ipynb')
Out[136]:
In [152]:
nx.write_graphml(topic_G, "../datasets/case3_ties.graphml")
In [141]:
def all_pairwise_combinations(node_list):
forward = list(itertools.combinations(node_list, 2))
backward = [(j, i) for (i, j) in forward]
return forward + backward
def independent_nodes(G, node_list):
return not any(nx.has_path(G, i, j) for i, j in all_pairwise_combinations(node_list))
def candidate_from_depedent_tie(G, dependent_candidates):
aux_G = G.copy()
dependent_edges = all_pairwise_combinations(dependent_candidates)
for v,u in dependent_edges:
try:
aux_G.remove_edge(v, u)
except nx.NetworkXError:
pass
all_candidates, support_base = compute_powers(aux_G)
return select_candidate(aux_G, all_candidates)
def select_candidate(G, candidate_dict):
power_dict = invert_dict(candidate_dict)
top_voting = max(power_dict.keys())
top_candidates = power_dict[top_voting]
if len(top_candidates)==1:
candidate = top_candidates[0]
else:
candidate = (random.choice(top_candidates)
if independent_nodes(G, top_candidates)
else candidate_from_depedent_tie(G, top_candidates))
return candidate
def elect_committee(original_G, limit, max_iter=100):
iteration = 0
committee = {}
G = original_G.copy()
while len(committee) < limit:
iteration += 1
all_candidates, support_base = compute_powers(G)
candidate = select_candidate(G, all_candidates)
base = support_base[candidate]
committee[candidate] = base
G.remove_nodes_from(base)
if iteration > max_iter:
raise Exception('Maximum iteration limit reached while electing committee.')
return committee
committee = elect_committee(topic_G, 5)
print({k:len(v) for k, v in committee.items()})
In [151]:
colorG = topic_G.copy()
represented_base = list(itertools.chain.from_iterable(committee.values()))
dependent_node_colors = {k:'orange' for k in dependent_nodes}
committee_node_colors = {k:'red' for k, _ in committee.items()}
base_node_colors = {k:'green' for k in represented_base}
nx.set_node_attributes(colorG, 'color', base_node_colors)
nx.set_node_attributes(colorG, 'color', dependent_node_colors)
nx.set_node_attributes(colorG, 'color', committee_node_colors)
draw(colorG, show='ipynb')
Out[151]:
In [154]:
committee
Out[154]:
In [90]:
print(select_top_k(nx.pagerank(topic_G), 10))
In [91]:
print(select_top_k(nx.betweenness_centrality(topic_G, normalized=False), 10))
In [129]:
usG = sG.to_undirected()
In [130]:
cc = nx.connected_components(usG)
cc_sizes = list(reversed(sorted([len(i) for i in cc])))
try:
idx = cc_sizes.index(1)
isolated = len(cc_sizes[idx:])
except ValueError:
isolated = 0
print("{0} connected components and {1} isolated nodes".format(cc_sizes[:idx], isolated))
In [131]:
cluster_dict = nx.clustering(usG)