In [2]:
RTOL = 0.
ATOL = 1e-6
PageRank is an algorithm used by Google Search to rank websites in their search engine results. PageRank was named after Larry Page, one of the founders of Google. PageRank is a way of measuring the importance of website pages. According to Google:
PageRank works by counting the number and quality of links to a page to determine a rough estimate of how important the website is. The underlying assumption is that more important websites are likely to receive more links from other websites.
It is not the only algorithm used by Google to order search engine results, but it is the first algorithm that was used by the company, and it is the best-known.
All the graph examples in this notebook taked from the link below
http://www.cs.princeton.edu/~chazelle/courses/BIB/pagerank.htm
In [3]:
def normalize(G):
i = 0
n_to_i = {}
i_to_n = G.nodes()
for node in G.nodes():
n_to_i[node] = i
i += 1
return (n_to_i, i_to_n)
In [4]:
def pagerank(G, d=0.85, limit=100, debug=False):
n_to_i, i_to_n = normalize(G)
sz = len(G)
M = np.zeros((sz, sz))
for i in range(sz):
neighs = [n_to_i[node] for node in G.neighbors(i_to_n[i])]
if (len(neighs) > 0):
M[neighs, i] = 1.0 / len(neighs)
if debug:
print(M)
pr = np.ones(sz)
old_pr = pr
k = 0
while k < limit:
pr = (1 - d) + d * M.dot(pr)
if debug:
print(pr)
if np.allclose(pr, old_pr, atol=ATOL, rtol=RTOL):
break
old_pr = pr
k += 1
return dict(zip(G.nodes(), pr)), k
In [5]:
def plot(G):
nx.draw(G, pos=nx.fruchterman_reingold_layout(G), labels=dict(zip(G.nodes(), G.nodes())))
In [6]:
G1 = nx.DiGraph([('A', 'B'), ('A', 'C'), ('B', 'C'), ('C', 'A'), ('D', 'C')])
In [7]:
plot(G1)
In [8]:
pagerank(G1)
Out[8]:
In [9]:
G2 = nx.DiGraph(
[('A', 'B'), ('B', 'A'), ('A', 'C'), ('C', 'A'), ('A', 'D'), ('D', 'A'), ('D', 'E'), ('D', 'F'), ('D', 'G'),
('D', 'H')]
)
In [10]:
plot(G2)
In [11]:
pagerank(G2)
Out[11]:
In [12]:
G3 = nx.DiGraph(
[('A', 'B'), ('B', 'A'), ('A', 'C'), ('C', 'A'), ('A', 'D'), ('D', 'A'), ('D', 'E'), ('D', 'F'), ('D', 'G'),
('D', 'H'), ('E', 'A'), ('F', 'A'), ('G', 'A'), ('H', 'A')]
)
In [13]:
plot(G3)
In [14]:
pagerank(G3)
Out[14]:
In [15]:
G4 = nx.DiGraph(
[('A', 'B'), ('B', 'A'), ('A', 'C'), ('C', 'A'), ('A', 'D'), ('D', 'A')]
)
In [16]:
plot(G4)
In [17]:
pagerank(G4)
Out[17]:
In [18]:
G5 = nx.DiGraph(
[('A', 'B'), ('B', 'C'), ('C', 'D'), ('D', 'A')]
)
In [19]:
plot(G5)
In [20]:
pagerank(G5)
Out[20]:
In [21]:
G6 = nx.DiGraph(
[('A', 'B'), ('A', 'C'), ('A', 'D'),
('B', 'A'), ('B', 'C'), ('B', 'D'),
('C', 'A'), ('C', 'B'), ('C', 'D'),
('D', 'A'), ('D', 'B'), ('D', 'C')]
)
In [22]:
plot(G6)
In [23]:
pagerank(G6)
Out[23]:
In [24]:
G7 = nx.DiGraph(
[('A', 'B'), ('B', 'A'), ('A', 'C'), ('C', 'A'), ('A', 'D'), ('D', 'A'), ('E', 'A'), ('C', 'F')]
)
In [25]:
plot(G7)
In [26]:
pagerank(G7)
Out[26]:
In [27]:
G8 = nx.DiGraph(
[('A', 'B'), ('B', 'C'), ('C', 'D'), ('D', 'A'), ('E', 'A'), ('D', 'F')]
)
In [28]:
plot(G8)
In [29]:
pagerank(G8)
Out[29]:
In [30]:
G9 = nx.DiGraph(
[('A', 'B'), ('A', 'C'), ('A', 'D'),
('B', 'A'), ('B', 'C'), ('B', 'D'),
('C', 'A'), ('C', 'B'), ('C', 'D'),
('D', 'A'), ('D', 'B'), ('D', 'C'),
('E', 'A'), ('D', 'F')]
)
In [31]:
plot(G9)
In [32]:
pagerank(G9)
Out[32]:
In [33]:
G10 = nx.DiGraph()
G10.add_edge(0, 1)
for i in range(2, 10):
G10.add_edges_from([(1, i), (i, 0)])
In [34]:
plot(G10)
In [35]:
pagerank(G10)
Out[35]: