In [60]:
import numpy
from collections import defaultdict
def simple_page_rank(pages, alpha=0.85, iterations=5):
d = defaultdict(float)
for i in range(iterations):
for p in pages:
links = p.get_links()
count = len(links)
contrib = 0.0
if count != 0:
contrib = p.get_score() / count
for l in links:
d[l.get_name()] += contrib / (iterations * 1.5)
for p in pages:
s = (alpha * d[p.get_name()]) + ((1 - alpha) * (1 / len(pages)))
p.update_score(s)
class Page:
def __init__(self, name=''):
self.links = []
self.score = 1.0
self.name = name
def update_score(self, new_score):
self.score = new_score
def link_to(self, link):
self.links.append(link)
def get_links(self):
return self.links
def get_score(self):
return self.score
def get_name(self):
return self.name
def eigenvector_page_rank(A):
n = A.shape[1]
w, v = linalg.eig(A)
return abs(real(v[:n,0]) / linalg.norm(v[:n,0],1))
print 'Simple calculation'
print '******************'
a = Page('a')
b = Page('b')
c = Page('c')
d = Page('d')
e = Page('e')
f = Page('f')
a.link_to(b)
a.link_to(f)
b.link_to(c)
b.link_to(d)
c.link_to(d)
c.link_to(e)
c.link_to(f)
d.link_to(a)
e.link_to(f)
f.link_to(a)
pages = [a, b, c, d, e, f]
simple_page_rank(pages)
tot = 0.0
for p in pages:
print p.get_name() + ' : ' + str(round(p.get_score(), 3))
tot += p.get_score()
print 'Total : ' + str(tot)
print
print 'EigenVector calculation'
print '***********************'
A = array([ [0, 0, 0, 1, 0, 1],
[1/2.0, 0, 0, 0, 0, 0],
[0, 1/2.0, 0, 0, 0, 0],
[0, 1/2.0, 1/3.0, 0, 0, 0],
[0, 0, 1/3.0, 0, 0, 0],
[1/2.0, 0, 1/3.0, 0, 1, 0 ] ])
n = A.shape[1]
S = ones([n, n]) / n
M = ((1 - 0.15) * A) + (0.15 * S)
ev = eigenvector_page_rank(A)
labels = [l.get_name() for l in pages]
zipped = zip(labels, ev)
tot = 0.0
for (k, v) in zipped:
print k + ' : ' + str(round(v, 3))
tot += v
print 'Total : ' + str(tot)
In [ ]: