Necessary imports
In [ ]:
import networkx as nx
%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt
In [ ]:
simple_network = nx.Graph()
nodes = [1,2,3,4,5,6,7,8]
edges = [(1,2),(2,3),(1,3),(4,5),(2,7),(1,9),(3,4),(4,5),(4,9),(5,6),(7,8),(8,9)]
simple_network.add_nodes_from(nodes)
simple_network.add_edges_from(edges)
nx.draw(simple_network)
In [ ]:
pos=nx.spring_layout(simple_network) # positions for all nodes
# nodes
nx.draw_networkx_nodes(simple_network,pos,
node_color='r',
node_size=500,
alpha=0.8)
# edges
#nx.draw_networkx_edges(sub_graph,pos,width=1.0,alpha=0.5)
nx.draw_networkx_edges(simple_network,pos,
edgelist=edges,
width=8,alpha=0.5,edge_color='b')
node_name={}
for node in simple_network.nodes():
node_name[node]=str(node)
nx.draw_networkx_labels(simple_network,pos,node_name,font_size=16)
plt.axis('off')
plt.show() # display
In [ ]:
simple_network.has_edge(2,9)
#simple_network.has_node(2)
#simple_network.number_of_edges()
#simple_network.number_of_nodes()
#simple_network.order()
#len(simple_network)
In [ ]:
for n in simple_network.nodes_iter():
print(n)
In [ ]:
for a in simple_network.adjacency_iter():
print(a)
In [ ]:
for e in simple_network.edges_iter():
print(e)
In [ ]:
for d in simple_network.degree_iter():
print(d)
In [ ]:
G = nx.Graph() #Undirected simple graph
d = nx . DiGraph () #directed simple graph
m = nx . MultiGraph () #undirected with parallel edges
h = nx . MultiDiGraph () #directed with parallel edges
In [ ]:
print(nx.shortest_path(simple_network,6,8))
print(nx.shortest_path_length(simple_network,6,8))
In [ ]:
#Our geocoding data getter is useful here!
def get_json_data(response,country,types):
data = response.json()
result_list = list()
for result in data['results']:
if not country == 'ALL':
if not country in [x['long_name'] for x in result['address_components'] if 'country' in x['types']]:
continue
address = result['formatted_address']
lat = result['geometry']['location']['lat']
lng = result['geometry']['location']['lng']
if types:
result_list.append((address,lat,lng,result['types']))
else:
result_list.append((address,lat,lng))
return result_list
def get_geolocation_data(address_string,format="JSON",country="ALL",types=False):
format = format.lower()
address = '_'.join(address_string.split())
url = 'https://maps.googleapis.com/maps/api/geocode/%s?address=%s' %(format,address)
try:
import requests
response=requests.get(url)
if not response.status_code == 200: return None
func='get_'+format+'_data'
return globals()[func](response,country,types)
except:
return None
def get_lat_lon(address):
data = get_geolocation_data(address,format='JSON')
return str(data[0][1]) + ',' + str(data[0][2])
In [ ]:
get_lat_lon('New York, NY')
In [ ]:
addresses = [
"Columbia University, New York, NY",
"Amity Hall Uptown, Amsterdam Avenue, New York, NY",
"Ellington in the Park, Riverside Drive, New York, NY",
'Chaiwali, Lenox Avenue, New York, NY',
"Grant's Tomb, West 122nd Street, New York, NY",
'Pisticci, La Salle Street, New York, NY',
'Nicholas Roerich Museum, West 107th Street, New York, NY',
'Audubon Terrace, Broadway, New York, NY',
'Apollo Theater, New York, NY'
]
In [ ]:
latlons=''
for address in addresses:
latlon=get_lat_lon(address)
latlons += latlon + '|'
print(latlons)
In [ ]:
distance_url = 'https://maps.googleapis.com/maps/api/distancematrix/json?origins='
distance_url+=latlons
distance_url+='&destinations='
distance_url+=latlons
#Set the mode walking, driving, cycling
mode='walking'
distance_url+='&mode='+mode
print(distance_url)
In [ ]:
import requests
data=requests.get(distance_url).json()
all_rows = data['rows']
address_graph=nx.Graph()
address_graph.add_nodes_from(addresses)
for i in range(len(all_rows)):
origin = addresses[i]
for j in range(len(all_rows[i]['elements'])):
duration = all_rows[i]['elements'][j]['duration']['value']
destination = addresses[j]
address_graph.add_edge(origin,destination,d=duration)
#print(origin,destination,duration)
nx.draw(address_graph)
In [ ]:
nx.draw(address_graph)
In [ ]:
def get_route_graph(address_list,mode='walking'):
latlons=''
for address in addresses:
latlon=get_lat_lon(address)
latlons += latlon + '|'
distance_url = 'https://maps.googleapis.com/maps/api/distancematrix/json?origins='
distance_url+=latlons
distance_url+='&destinations='
distance_url+=latlons
#Set the mode walking, driving, cycling
mode='driving'
distance_url+='&mode='+mode
import requests
data=requests.get(distance_url).json()
all_rows = data['rows']
address_graph = nx.Graph()
address_graph.add_nodes_from(addresses)
for i in range(len(all_rows)):
origin = addresses[i]
for j in range(len(all_rows[i]['elements'])):
if i==j:
continue
duration = all_rows[i]['elements'][j]['duration']['value']
destination = addresses[j]
address_graph.add_edge(origin,destination,d=duration)
return address_graph
address_graph = get_route_graph(addresses)
In [ ]:
for edge in address_graph.edges():
print(edge,address_graph.get_edge_data(*edge))
In [ ]:
for n in address_graph.edges_iter():
print(n)
In [ ]:
address_graph = get_route_graph(addresses)
pos=nx.circular_layout(address_graph) # positions for all nodes
# nodes
nx.draw_networkx_nodes(address_graph,pos,
node_color='r',
node_size=2000,
alpha=0.001)
# edges
nx.draw_networkx_edges(address_graph,pos,edgelist=address_graph.edges(),width=8,alpha=0.5,edge_color='b')
nx.draw_networkx_edge_labels(address_graph,pos,font_size=10)
node_name={}
for node in address_graph.nodes():
node_name[node]=str(node)
nx.draw_networkx_labels(address_graph,pos,node_name,font_size=16)
plt.axis('off')
plt.show() # display
In [ ]:
for edge in address_graph.edges():
print(edge,address_graph.get_edge_data(*edge))
In [ ]:
for edge in address_graph.edges():
duration = address_graph.get_edge_data(*edge)['d']
address_graph.get_edge_data(*edge)['d'] = int(duration/60)
print(address_graph.get_edge_data(*edge))
In [ ]:
pos=nx.circular_layout(address_graph) # positions for all nodes
fig=plt.figure(1,figsize=(12,12)) #Let's draw a big graph so that it is clearer
# nodes
nx.draw_networkx_nodes(address_graph,pos,
node_color='r',
node_size=2000,
alpha=0.001)
# edges
nx.draw_networkx_edges(address_graph,pos,edgelist=address_graph.edges(),width=8,alpha=0.5,edge_color='b')
nx.draw_networkx_edge_labels(address_graph,pos,font_size=10)
node_name={}
for node in address_graph.nodes():
node_name[node]=str(node)
nx.draw_networkx_labels(address_graph,pos,node_name,font_size=16)
#fig.axis('off')
fig.show() # display
In [ ]:
def get_route_graph(address_list,mode='walking'):
latlons=''
for address in addresses:
latlon=get_lat_lon(address)
latlons += latlon + '|'
distance_url = 'https://maps.googleapis.com/maps/api/distancematrix/json?origins='
distance_url+=latlons
distance_url+='&destinations='
distance_url+=latlons
#Set the mode walking, driving, cycling
mode='driving'
distance_url+='&mode='+mode
import requests
data=requests.get(distance_url).json()
all_rows = data['rows']
address_graph = nx.Graph()
address_graph.add_nodes_from(addresses)
for i in range(len(all_rows)):
origin = addresses[i]
for j in range(len(all_rows[i]['elements'])):
if i==j:
continue
duration = all_rows[i]['elements'][j]['duration']['value']
destination = addresses[j]
address_graph.add_edge(origin,destination,d=int(duration/60))
return address_graph
address_graph = get_route_graph(addresses)
In [ ]:
for edge in address_graph.edges():
import random
r = random.random()
if r <0.75: #get rid of 60% of the edges
address_graph.remove_edge(*edge)
In [ ]:
pos=nx.circular_layout(address_graph) # positions for all nodes
plt.figure(1,figsize=(12,12)) #Let's draw a big graph so that it is clearer
# nodes
nx.draw_networkx_nodes(address_graph,pos,
node_color='r',
node_size=2000,
alpha=0.001)
# edges
nx.draw_networkx_edges(address_graph,pos,edgelist=address_graph.edges(),width=8,alpha=0.5,edge_color='b')
nx.draw_networkx_edge_labels(address_graph,pos,font_size=7)
node_name={}
for node in address_graph.nodes():
node_name[node]=str(node)
nx.draw_networkx_labels(address_graph,pos,node_name,font_size=16)
#fig.axis('off')
fig.show() # display
In [ ]:
print(addresses)
In [ ]:
print(nx.shortest_path(address_graph,'Amity Hall Uptown, Amsterdam Avenue, New York, NY', 'Chaiwali, Lenox Avenue, New York, NY'))
print(nx.dijkstra_path(address_graph,'Amity Hall Uptown, Amsterdam Avenue, New York, NY', 'Chaiwali, Lenox Avenue, New York, NY'))
print(nx.dijkstra_path_length (address_graph,'Amity Hall Uptown, Amsterdam Avenue, New York, NY', 'Chaiwali, Lenox Avenue, New York, NY',weight='d'))
In [ ]:
#[print(n1,n2,nx.shortest_path_length(n1,n2),nx.dijkstra_path_length(n1,n2,weight='d')) for n1 in address_graph.nodes() for n2 in address_graph.nodes()]
[print(n1,n2,
nx.shortest_path_length(address_graph,n1,n2),
nx.dijkstra_path_length(address_graph,n1,n2,weight='d'),
) for n1 in address_graph.nodes() for n2 in address_graph.nodes() if not n1 == n2]
In [ ]:
for edge in address_graph.edges():
print(edge,address_graph.get_edge_data(*edge))
Differnetiating edges by weight
In [ ]:
#Divide edges into two groups based on weight
#Easily extendable to n-groups
elarge=[(u,v) for (u,v,d) in address_graph.edges(data=True) if d['d'] >5]
esmall=[(u,v) for (u,v,d) in address_graph.edges(data=True) if d['d'] <=5]
pos=nx.spring_layout(address_graph) # positions for all nodes
plt.figure(1,figsize=(12,12)) #Let's draw a big graph so that it is clearer
# nodes
nx.draw_networkx_nodes(address_graph,pos,node_size=700)
# edges. draw the larger weight edges in solid lines and smaller weight edges in dashed lines
nx.draw_networkx_edges(address_graph,pos,edgelist=elarge,
width=6)
nx.draw_networkx_edges(address_graph,pos,edgelist=esmall,
width=6,alpha=0.5,edge_color='b',style='dashed')
# labels
nx.draw_networkx_labels(address_graph,pos,font_size=20,font_family='sans-serif')
nx.draw_networkx_edge_labels(address_graph,pos,font_size=7)
plt.axis('off')
#plt.savefig("address_graph.png") # save as png if you need to use it in a report or web app
fig.show() # display
In [ ]:
origin = 'Amity Hall Uptown, Amsterdam Avenue, New York, NY'
destination = 'Chaiwali, Lenox Avenue, New York, NY'
shortest_path = nx.dijkstra_path(address_graph,origin,destination)
shortest_path_edges = list()
for i in range(len(shortest_path)-1):
shortest_path_edges.append((shortest_path[i],shortest_path[i+1]))
shortest_path_edges.append((shortest_path[i+1],shortest_path[i]))
path_edges=list()
other_edges=list()
node_label_list = dict()
node_label_list = {n:'' for n in address_graph.nodes()}
for edge in address_graph.edges():
if edge in shortest_path_edges:
path_edges.append(edge)
node_label_list[edge[0]] = edge[0]
node_label_list[edge[1]] = edge[1]
else:
other_edges.append(edge)
pos=nx.spring_layout(address_graph) # positions for all nodes
fig=plt.figure(1,figsize=(12,12))
# nodes
nx.draw_networkx_nodes(address_graph,pos,node_size=700)
# edges. draw the larger weight edges in solid lines and smaller weight edges in dashed lines
nx.draw_networkx_edges(address_graph,pos,edgelist=path_edges,
width=6)
nx.draw_networkx_edges(address_graph,pos,edgelist=other_edges,
width=6,alpha=0.5,edge_color='b',style='dashed')
# labels
nx.draw_networkx_labels(address_graph,pos,font_size=20,font_family='sans-serif',labels=node_label_list)
nx.draw_networkx_edge_labels(address_graph,pos,font_size=7)
plt.axis('off')
#plt.savefig("address_graph.png") # save as png if you need to use it in a report or web app
plt.show() # display
Question How would you remove edge labels from all but the shortest path?
Given an address, generate a sorted by distance list of all other addresses
In [ ]:
location = 'Amity Hall Uptown, Amsterdam Avenue, New York, NY'
distance_list = list()
for node in address_graph.nodes():
if node == location:
continue
distance = nx.dijkstra_path_length(address_graph,location,node)
distance_list.append((node,distance))
from operator import itemgetter
print(sorted(distance_list,key=itemgetter(1)))
Get all paths from one location to another
In [ ]:
list(nx.all_simple_paths(address_graph,'Amity Hall Uptown, Amsterdam Avenue, New York, NY','Chaiwali, Lenox Avenue, New York, NY'))
In [ ]:
nx.all_simple_paths(address_graph,
'Amity Hall Uptown, Amsterdam Avenue, New York, NY',
'Chaiwali, Lenox Avenue, New York, NY')
In [ ]:
import json
import datetime
datafile='yelp_academic_dataset_user.json'
user_id_count = 1
user_id_dict = dict()
with open(datafile,'r') as f:
for line in f:
data = json.loads(line)
user_id = data.get('user_id')
friends = data.get('friends')
try:
user_id_dict[user_id]
except:
user_id_dict[user_id] = user_id_count
user_id_count+=1
user_data=list()
friends_data=list()
with open(datafile,'r') as f:
count=0
for line in f:
data=json.loads(line)
user_id=user_id_dict[data.get('user_id')]
name=data.get('name')
review_count=data.get('review_count')
average_stars=data.get('average_stars')
yelping_since=datetime.datetime.strptime(data.get('yelping_since'),"%Y-%m").date()
fans=data.get('fans')
user_friends=data.get('friends')
for i in range(len(user_friends)):
user_friends[i] = user_id_dict[user_friends[i]]
user_data.append([user_id,name,review_count,average_stars,yelping_since,fans])
friends_data.append([user_id,user_friends])
count+=1
print(count)
In [ ]:
friends_data[0:10]
In [ ]:
#Select a random(ish) list of nodes
friends_of_list = [1,5,15,100,2200,3700,13500,23800,45901,78643,112112,198034,267123,298078,301200,353216]
node_super_set = set(friends_of_list)
#Get a superset of these nodes - the friends they are connected to
for n in friends_of_list:
friends = friends_data[n-1][1]
node_super_set = node_super_set.union({f for f in friends})
node_super_list = list(node_super_set)
#Collect node data and edges for these nodes
node_data = dict()
edge_list = list()
for node in node_super_list:
node_data[node]=user_data[node-1]
friends = friends_data[node-1][1]
edges = [(node,e) for e in friends if e in node_super_list]
edge_list.extend(edges)
In [ ]:
print(len(edge_list),len(node_super_list),len(node_data))
In [ ]:
for e in edge_list:
if e[0] in node_super_list:
continue
if e[1] in node_super_list:
continue
print(e[0],e[1])
In [ ]:
import networkx as nx
In [ ]:
friend_graph=nx.Graph()
friend_graph.add_nodes_from(node_super_list)
friend_graph.add_edges_from(edge_list)
print(friend_graph.number_of_nodes(),friend_graph.number_of_edges())
In [ ]:
#Querying the graph
len(friend_graph.neighbors(1))
In [ ]:
nx.draw(friend_graph)
In [ ]:
count = 0
for n in friend_graph.nodes_iter():
if friend_graph.degree(n) == 1:
print(n)
In [ ]:
nodes = friend_graph.nodes()
for node in nodes:
if friend_graph.degree(node) == 0:
friend_graph.remove_node(node)
In [ ]:
pos=nx.spring_layout(friend_graph) # positions for all nodes
fig = plt.figure(1,figsize=(12,12))
#pos
# nodes
nx.draw_networkx_nodes(friend_graph,pos,
node_color='r',
node_size=500,
alpha=0.8)
# edges
nx.draw_networkx_edges(friend_graph,pos,width=1.0,alpha=0.5)
nx.draw_networkx_edges(friend_graph,pos,
width=8,alpha=0.5,edge_color='b')
node_name={}
for node in friend_graph.nodes():
node_name[node]=str(node)
nx.draw_networkx_labels(friend_graph,pos,node_name,font_size=16)
fig.show()
In [ ]:
nx.shortest_path(friend_graph,100219,19671)
In [ ]:
nx.shortest_path_length(friend_graph,167099,47622)
In [ ]:
print(len(list(nx.connected_components(friend_graph))))
In [ ]:
for comp in nx.connected_components(friend_graph):
print(comp)
In [ ]:
largest_size=0
largest_graph = None
for g in nx.connected_component_subgraphs(friend_graph):
if len(g) > largest_size:
largest_size = len(g)
largest_graph = g
nx.draw(largest_graph)
In [ ]:
smallest_size=100000
smallest_graph = None
for g in nx.connected_component_subgraphs(friend_graph):
if len(g) < smallest_size:
smallest_size = len(g)
smallest_graph = g
nx.draw(smallest_graph)
In [ ]:
#Find out node degrees in the graph
nx.degree(friend_graph)
In [ ]:
#Highest degree
print(max(nx.degree(friend_graph).values()))
#Node with highest degree value
degrees = nx.degree(friend_graph)
print(max(degrees,key=degrees.get))
In [ ]:
pos=nx.spring_layout(friend_graph) # positions for all nodes
fig = plt.figure(1,figsize=(12,12))
#pos
# nodes
nx.draw_networkx_nodes(friend_graph,pos,
node_color='r',
node_size=500,
alpha=0.8)
# edges
nx.draw_networkx_edges(friend_graph,pos,width=1.0,alpha=0.5)
nx.draw_networkx_edges(friend_graph,pos,
edgelist=edges,
width=8,alpha=0.5,edge_color='b')
node_name={}
for node in friend_graph.nodes():
node_name[node]=str(node)
nx.draw_networkx_labels(friend_graph,pos,node_name,font_size=16)
fig.show()
In [ ]:
nx.clustering(friend_graph)
In [ ]:
nx.average_clustering(friend_graph)
In [ ]:
G=nx.complete_graph(4)
nx.draw(G)
In [ ]:
In [ ]:
In [ ]:
In [ ]:
nx.clustering(G)
In [ ]:
G.remove_edge(1,2)
In [ ]:
pos=nx.spring_layout(G) # positions for all nodes
# nodes
nx.draw_networkx_nodes(G,pos,
node_color='r',
node_size=500,
alpha=0.8)
# edges
#nx.draw_networkx_edges(sub_graph,pos,width=1.0,alpha=0.5)
nx.draw_networkx_edges(G,pos,
edgelist=G.edges(),
width=8,alpha=0.5,edge_color='b')
node_name={}
for node in G.nodes():
node_name[node]=str(node)
nx.draw_networkx_labels(G,pos,node_name,font_size=16)
plt.axis('off')
plt.show() # display
In [ ]:
nx.clustering(G)
Communicability measures how easy it is to send a message from node i to node j
In [ ]:
from networkx.algorithms.centrality import closeness_centrality, communicability
In [ ]:
type(closeness_centrality(friend_graph))
In [ ]:
from collections import OrderedDict
cc = OrderedDict(sorted(
closeness_centrality(friend_graph).items(),
key = lambda x: x[1],
reverse = True))
cc
In [ ]:
G=nx.complete_graph(4)
nx.closeness_centrality(G)
In [ ]:
G.remove_edge(1,2)
In [ ]:
pos=nx.spring_layout(G) # positions for all nodes
# nodes
nx.draw_networkx_nodes(G,pos,
node_color='r',
node_size=500,
alpha=0.8)
# edges
#nx.draw_networkx_edges(sub_graph,pos,width=1.0,alpha=0.5)
nx.draw_networkx_edges(G,pos,
edgelist=G.edges(),
width=8,alpha=0.5,edge_color='b')
node_name={}
for node in G.nodes():
node_name[node]=str(node)
nx.draw_networkx_labels(G,pos,node_name,font_size=16)
plt.axis('off')
plt.show() # display
In [ ]:
nx.closeness_centrality(G)
Takes into account all paths between pairs of nodes
The more paths, the higher the communicability
In [ ]:
G = nx.Graph([(0,1),(1,2),(1,5),(5,4),(2,4),(2,3),(4,3),(3,6)])
nx.communicability(G)
In [ ]:
#Define a layout for the graph
pos=nx.spring_layout(G) # positions for all nodes
# draw the nodes: red, sized, transperancy
nx.draw_networkx_nodes(G,pos,
node_color='r',
node_size=500,
alpha=1)
# draw the edges
nx.draw_networkx_edges(G,pos,
width=8,alpha=0.5,edge_color='b')
node_name={}
for node in G.nodes():
node_name[node]=str(node)
nx.draw_networkx_labels(G,pos,node_name,font_size=16)
plt.axis('off')
plt.show() # display
In [ ]:
# communicability is the sum of closed walks of different lengths between nodes.
#communicability(friend_graph) #Costly operation, we won't do this. Try it at home!
In [ ]:
G=nx.complete_graph(4)
nx.betweenness_centrality(G)
In [ ]:
G.remove_edge(1,2)
nx.betweenness_centrality(G)
In [ ]:
#Define a layout for the graph
pos=nx.spring_layout(G) # positions for all nodes
# draw the nodes: red, sized, transperancy
nx.draw_networkx_nodes(G,pos,
node_color='r',
node_size=500,
alpha=1)
# draw the edges
nx.draw_networkx_edges(G,pos,
width=8,alpha=0.5,edge_color='b')
node_name={}
for node in G.nodes():
node_name[node]=str(node)
nx.draw_networkx_labels(G,pos,node_name,font_size=16)
plt.axis('off')
plt.show() # display
In [ ]:
nx.all_pairs_shortest_path(G)
In [ ]:
nx.betweenness_centrality(friend_graph)
In [ ]:
G = nx.complete_graph(4)
nx.eccentricity(G)
In [ ]:
G.remove_edge(1,2)
nx.eccentricity(G)
In [ ]:
nx.diameter(G)
In [ ]:
nx.periphery(G)
In [ ]:
nx.diameter(friend_graph)
In [ ]:
nx.periphery(griend_graph)
In [ ]:
G = nx.complete_graph(4)
print(nx.diameter(G))
print(nx.periphery(G))
In [ ]:
G.remove_edge(1,2)
print(nx.diameter(G))
print(nx.periphery(G))
In [ ]:
from networkx.algorithms.clique import find_cliques, cliques_containing_node
In [ ]:
for clique in find_cliques(friend_graph):
print(clique)
In [ ]:
cliques_containing_node(friend_graph,2)
In [ ]:
#nx.draw(nx.make_max_clique_graph(friend_graph))
In [ ]:
from networkx.algorithms.distance_measures import center
center(largest_graph)
In [ ]: