Page Rank computation



In [60]:
import numpy
from collections import defaultdict

def simple_page_rank(pages, alpha=0.85, iterations=5):
    d = defaultdict(float)
    for i in range(iterations):
        for p in pages:
            links = p.get_links()
            count = len(links)
            contrib = 0.0
            if count != 0:
                contrib = p.get_score() / count
            for l in links:
                d[l.get_name()] += contrib / (iterations * 1.5)
    
        for p in pages:
            s = (alpha * d[p.get_name()]) + ((1 - alpha) * (1 / len(pages)))
            p.update_score(s)

class Page:
    def __init__(self, name=''):
        self.links = []
        self.score = 1.0
        self.name = name
    
    def update_score(self, new_score):
        self.score = new_score
    
    def link_to(self, link):
        self.links.append(link)
    
    def get_links(self):
        return self.links
        
    def get_score(self):
        return self.score
    
    def get_name(self):
        return self.name
    
def eigenvector_page_rank(A):
    n = A.shape[1]
    w, v = linalg.eig(A)
    return abs(real(v[:n,0]) / linalg.norm(v[:n,0],1))
        
print 'Simple calculation'
print '******************'
a = Page('a')
b = Page('b')
c = Page('c')
d = Page('d')
e = Page('e')
f = Page('f')

a.link_to(b)
a.link_to(f)
b.link_to(c)
b.link_to(d)
c.link_to(d)
c.link_to(e)
c.link_to(f)
d.link_to(a)
e.link_to(f)
f.link_to(a)

pages = [a, b, c, d, e, f]
simple_page_rank(pages)
tot = 0.0
for p in pages:
    print p.get_name() + ' : ' + str(round(p.get_score(), 3))
    tot += p.get_score()
print 'Total : ' + str(tot)
print
print 'EigenVector calculation'
print '***********************'

A = array([ [0,     0,     0,     1, 0, 1],
            [1/2.0, 0,     0,     0, 0, 0],
            [0,     1/2.0, 0,     0, 0, 0],
            [0,     1/2.0, 1/3.0, 0, 0, 0],
            [0,     0,     1/3.0, 0, 0, 0],
            [1/2.0, 0,     1/3.0, 0, 1, 0 ] ])

n = A.shape[1]
S = ones([n, n]) / n
M = ((1 - 0.15) * A) + (0.15 * S)
ev = eigenvector_page_rank(A)
labels = [l.get_name() for l in pages]
zipped = zip(labels, ev)
tot = 0.0
for (k, v) in zipped:
    print k + ' : ' + str(round(v, 3))
    tot += v
print 'Total : ' + str(tot)


Simple calculation
******************
a : 0.382
b : 0.12
c : 0.074
d : 0.122
e : 0.047
f : 0.299
Total : 1.04474370031

EigenVector calculation
***********************
a : 0.353
b : 0.176
c : 0.088
d : 0.118
e : 0.029
f : 0.235
Total : 1.0

In [ ]: