In [3]:
def pagerank(graph, damping=0.85, epsilon=1.0e-8):
""" Pure Python implementation of the PageRank algorithm
Paramters
---------
graph : iterable containing edges of the graph
damping : float, describing the decay factor after visiting
a node, in range 0...1
epsilon : float, convergence threshold of the algorithm
Reference
---------
Taken from http://dpk.io/pagerank
Note:
----
The PageRank algorithm is subject to a patent held by Stanford University under
exclusive licence to Google, Inc. This page presents a description of the algorithm
for educational purposes. I am not responsible for your use of this information.
"""
inlink_map = {}
outlink_counts = {}
def new_node(node):
if node not in inlink_map: inlink_map[node] = set()
if node not in outlink_counts: outlink_counts[node] = 0
for tail_node, head_node in graph:
new_node(tail_node)
new_node(head_node)
if tail_node == head_node: continue
if tail_node not in inlink_map[head_node]:
inlink_map[head_node].add(tail_node)
outlink_counts[tail_node] += 1
all_nodes = set(inlink_map.keys())
for node, outlink_count in outlink_counts.items():
if outlink_count == 0:
outlink_counts[node] = len(all_nodes)
for l_node in all_nodes: inlink_map[l_node].add(node)
initial_value = 1 / len(all_nodes)
ranks = {}
for node in inlink_map.keys(): ranks[node] = initial_value
new_ranks = {}
delta = 1.0
n_iterations = 0
rank_history = []
while delta > epsilon:
new_ranks = {}
for node, inlinks in inlink_map.items():
new_ranks[node] = ((1 - damping) / len(all_nodes)) + (damping * sum(ranks[inlink] / outlink_counts[inlink] for inlink in inlinks))
delta = sum(abs(new_ranks[node] - ranks[node]) for node in new_ranks.keys())
ranks, new_ranks = new_ranks, ranks
n_iterations += 1
rank_history.append(ranks)
return ranks, n_iterations, rank_history
In [4]:
import graphviz as gv
import numpy as np
from IPython.display import Image
from IPython.display import display
In [5]:
graph = []
nb_nodes = 10
probs = 0.2 * np.ones((nb_nodes, nb_nodes)) + 0.1 * np.eye(nb_nodes)
for i in range(nb_nodes):
for j in range(nb_nodes):
if np.random.uniform(0,1) < probs[i,j]:
graph.append((i,j))
ranks, _, history = pagerank(graph)
In [8]:
for t in range(len(history)):
g1 = gv.Digraph(format='svg')
for x,y in graph:
g1.edge(str(x), str(y), label="foo")
for i in ranks.keys():
g1.node(str(i), label="{} : {:.3f}".format(i, history[t][i]))
display(g1)
break
In [26]:
g1 = gv.Digraph(format='eps')
for i in list("bca"):
g1.node(i, label=i)
for nodes, probs in zip(("ab", "ac", "bc", "cb"),("2/3", "1/3", "1", "1")):
n,m = nodes
g1.edge(n,m, label=(probs))
#display(g1)
g1.render("markov_graph.eps")
Out[26]:
In [22]:
help(g1.render)