In [1]:
import copy
import pandas as pd
import numpy as np
In [2]:
data = pd.read_csv("scores.csv")
In [41]:
class PageRank(object):
'''A PageRank object to generate PR'''
def __init__(self):
self.nodes = []
self.unweightedPref = {}
self.weightedPref = {}
self.unweightedPR = {}
self.weightedPR = {}
def PrefGenerator(self, data):
'''Convert dataframe into arrows between nodes'''
self.nodes = data.columns.values.tolist()
for node in self.nodes:
self.unweightedPref[node] = {}
self.weightedPref[node] = {}
for other in list(set(self.nodes) - set([node])):
pref = data[node] - data[other]
self.unweightedPref[node][other] = max([1 if x>0 else 0 for x in pref])
self.weightedPref[node][other] = sum([x for x in pref if x>0])
return
def traverseNodes(self):
'''Traverse all nodes and calculate total outlinks for each node'''
self.uwol = {} # Unweighted outlinks
self.wol = {} # Weighted outlinks
for node in self.nodes:
for key in self.unweightedPref[node]:
try:
self.uwol[key] += self.unweightedPref[node][key]
except KeyError:
self.uwol[key] = self.unweightedPref[node][key]
for key in self.weightedPref[node]:
try:
self.wol[key] += self.weightedPref[node][key]
except KeyError:
self.wol[key] = self.weightedPref[node][key]
return
def updatePR(self, PR, outlinks, Pref, d):
'''update PageRank'''
tmp = {}
for node in self.nodes:
tmp[node] = 1.0*(1-d)/len(self.nodes) + d*1.0*\
sum([PR[ref]*1.0/outlinks[ref]*Pref[node][ref] for ref in Pref[node]])
return tmp
def PRGenerator(self, d=0.85, maxiter=100, threshold=0.001):
'''d: damping factor
maxiter: max iteration #
threshold: stable iterative fluctuation of PR
Check out this guy's implementation:
https://github.com/ashkonf/PageRank'''
self.uwtmp = {} # Temporary variable to store iteration results
self.wtmp = {}
self.traverseNodes() # Calculate all outlinks
for node in self.nodes: # Initialize all PR to 1/N
self.unweightedPR[node] = 1.0/len(self.nodes)
self.weightedPR[node] = 1.0/len(self.nodes)
for i in range(maxiter):
self.flag = 1 # A flag to end iteration
self.uwtmp = self.updatePR(self.unweightedPR, self.uwol, self.unweightedPref, d)
self.wtmp = self.updatePR(self.weightedPR, self.wol, self.weightedPref, d)
for node in self.nodes:
if abs(self.unweightedPR[node] - self.uwtmp[node]) > threshold or\ # Stop when values don't change much
abs(self.weightedPR[node] - self.wtmp[node]) > threshold:
self.flag = 0
self.unweightedPR, self.weightedPR = copy.deepcopy(self.uwtmp), copy.deepcopy(self.wtmp)
if self.flag == 1:
print "Converge after {} iterations".format(i)
return
return
In [51]:
pr = PageRank()
pr.PrefGenerator(data)
for d in [0.5, 0.6, 0.7, 0.8, 0.85, 0.9]:
pr.PRGenerator(d=d)
print sorted(list(pr.unweightedPR), key=lambda tup: tup[1])
In [52]:
for d in [0.5, 0.6, 0.7, 0.8, 0.85, 0.9]:
pr.PRGenerator(d=d)
print sorted(list(pr.weightedPR), key=lambda tup: tup[1])
In [1]:
sales = {
"A6": 20,
"A8": 12,
"3series": 220,
"5series": 60,
"7series": 14,
"XJ": 6.6,
"ES": 135,
"LS": 30,
"RX": 120,
"Sclass": 25
}