In [9]:
import math, random, re
from collections import defaultdict, Counter, deque
from functools import partial
users = [
{ "id": 0, "name": "Hero" },
{ "id": 1, "name": "Dunn" },
{ "id": 2, "name": "Sue" },
{ "id": 3, "name": "Chi" },
{ "id": 4, "name": "Thor" },
{ "id": 5, "name": "Clive" },
{ "id": 6, "name": "Hicks" },
{ "id": 7, "name": "Devin" },
{ "id": 8, "name": "Kate" },
{ "id": 9, "name": "Klein" }
]
friendships = [(0, 1), (0, 2), (1, 2), (1, 3), (2, 3), (3, 4),
(4, 5), (5, 6), (5, 7), (6, 8), (7, 8), (8, 9)]
for user in users:
user["friends"] = []
for i, j in friendships:
users[i]["friends"].append(users[j])
users[j]["friends"].append(users[i])
print(users[0]) # What a mess.
In [3]:
""" some 2D linear algebra stuff from chapter 4 """
from math import sqrt
def vector_add(v, w):
'''adds corresponding elements of vectors'''
return [v_i + w_i for v_i,w_i in zip(v, w)]
def vector_subtract(v, w):
'''subtract corresponding elements of vectors'''
return [v_i - w_i for v_i,w_i in zip(v, w)]
def vector_sum(vectors):
'''sum all corresponding elements'''
result = vectors[0]
for vector in vectors[1:]:
result = vector_add(result, vector)
return result
def vector_sum_fast(vectors):
'''sum all corresponding elements'''
return reduce(vector_add, vectors)
def scalar_multiply(c, vector):
'''c is the scalar'''
return [c * v_i for v_i in vector]
def vector_mean(vectors):
'''The ith element of the result is the mean of the ith element
of all the input vectors.'''
n = len(vectors)
return scalar_multiply(1/n, vector_sum(vectors))
def dot(v, w):
'''the sum of the product of the matching elements
of the input vectors'''
return sum(v_i * w_i for v_i,w_i in zip(v, w))
def sum_of_squares(v):
'''add the square of each element'''
return dot(v, v)
def magnitude(v):
'''Find the length of a vector in cartesian space'''
return sqrt(sum_of_squares(v))
def distance(v, w):
'''Find the distance between two vectors'''
return magnitude(vector_subtract(v, w))
def shape(A):
num_rows = len(A)
num_cols = len(A[0] if A else 0)
return num_rows, num_cols
def get_row(A, i):
return A[i]
def get_col(A, i):
return [A_i[j] for A_i in A]
def make_matrix(num_rows, num_cols, entry_fn):
return [[entry_fn(i, j) for j in range(num_cols)] for i in range(num_rows)]
def is_diagonal(i, j):
return 1 if i == j else 0
In [15]:
def shortest_paths_from(from_user):
""" generate the shortest path from one spot to
everywhere else in the graph
"""
shortest_paths_to = {from_user['id']: [[]]}
frontier = deque((from_user, friend) for friend in from_user['friends'])
while frontier:
prev_user, user = frontier.popleft()
user_id = user['id']
paths_to_prev_user = shortest_paths_to[prev_user['id']]
new_paths_to_user = [path + [user_id] for path in paths_to_prev_user]
old_paths_to_user = shortest_paths_to.get(user_id, [])
if old_paths_to_user:
min_path_length = len(old_paths_to_user[0])
else:
min_path_length = float('inf')
# only keep short paths that are really new
new_paths_to_user = [path for path in new_paths_to_user
if len(path) <= min_path_length
and path not in old_paths_to_user]
shortest_paths_to[user_id] = old_paths_to_user + new_paths_to_user
# and new neighbors to the frontier
frontier.extend((user, friend) for friend in user['friends']
if friend['id'] not in shortest_paths_to)
return shortest_paths_to
In [17]:
for user in users:
user['shortest_paths'] = shortest_paths_from(user)
In [19]:
for user in users:
user['betweeness_centrality'] = 0.0
In [23]:
for source in users:
source_id = source['id']
for target_id, paths in source['shortest_paths'].items():
if source_id < target_id:
num_paths = len(paths)
contrib = 1.0 / num_paths
for path in paths:
for id in path:
if id not in [source_id, target_id]:
users[id]['betweeness_centrality'] += contrib
In [28]:
def farness(user):
""" the sum of the lengths of the shortest paths to each other user """
return sum(len(paths[0])
for paths in user['shortest_paths'].values())
for user in users:
user['closeness_centrality'] = 1.0 / farness(user)
In [47]:
""" Page Rank - Toy Model """
def page_rank(users, damping=0.85, num_iters=100):
""" A fun little toy model of the Page Rank algorithm """
# start with an even distribution
num_users = len(users)
pr = {user['id']: 1.0 / num_users for user in users}
# the small fraction that each node gets every iteration
base_pr = (1.0 - damping) / num_users
for _ in range(num_iters):
next_pr = {user['id']: base_pr for user in users}
for user in users:
# distribute page rank to outgoing links
links_pr = pr[user['id']] * damping
for endorsee in user['endorses']:
next_pr[endorsee['id']] += links_pr / len(user['endorses'])
pr = next_pr
return pr
In [53]:
users = [
{"id": 0, "name": "Hero", "endorses": []},
{"id": 1, "name": "Dunn", "endorses": []},
{"id": 2, "name": "Sue", "endorses": []},
{"id": 3, "name": "Chi", "endorses": []},
{"id": 4, "name": "Thor", "endorses": []},
{"id": 5, "name": "Clive", "endorses": []},
{"id": 6, "name": "Hicks", "endorses": []},
{"id": 7, "name": "Devin", "endorses": []},
{"id": 8, "name": "Kate", "endorses": []},
{"id": 9, "name": "Klein", "endorses": []}
]
endorsements = [(0, 1), (1, 0), (0, 2), (2, 0), (1, 2), (2, 1), (1, 3), (0, 4),
(2, 3), (3, 4), (5, 4), (5, 6), (7, 5), (6, 8), (8, 7), (8, 9)]
for end in endorsements:
users[end[0]]['endorses'].append(users[end[1]])
page_rank(users)
Out[53]:
In [ ]: