Network Analysis with Python

  • Networks are connected bi-directional graphs
  • Nodes mark the entities in a network
  • Edges mark the relationships in a network
  • Examples of networks

  • Facebook friends
  • Other social networks
  • transportation networks
  • Power grids
  • Internet routers
  • Activity networks
  • Many others
  • Questions we're interested in

  • Shortest path between two nodes
  • Connectedness
  • Centrality
  • Clustering
  • Communicability
  • networkx

  • Python package for networks
  • Nodes and edges can contain data
  • Nodes can be (hashable!) python objects
  • Constructing a simple network

    Necessary imports

    
    
    In [ ]:
    import networkx as nx
    %matplotlib inline
    import numpy as np
    import matplotlib.pyplot as plt
    
    
    
    In [ ]:
    simple_network = nx.Graph()
    nodes = [1,2,3,4,5,6,7,8]
    edges = [(1,2),(2,3),(1,3),(4,5),(2,7),(1,9),(3,4),(4,5),(4,9),(5,6),(7,8),(8,9)]
    simple_network.add_nodes_from(nodes)
    simple_network.add_edges_from(edges)
    nx.draw(simple_network)
    

    Add labels to the nodes

    
    
    In [ ]:
    pos=nx.spring_layout(simple_network) # positions for all nodes
    
    # nodes
    nx.draw_networkx_nodes(simple_network,pos,
                           node_color='r',
                           node_size=500,
                          alpha=0.8)
    
    # edges
    #nx.draw_networkx_edges(sub_graph,pos,width=1.0,alpha=0.5)
    nx.draw_networkx_edges(simple_network,pos,
                           edgelist=edges,
                           width=8,alpha=0.5,edge_color='b')
    
    
    node_name={}
    for node in simple_network.nodes():
        node_name[node]=str(node)
    
    
    nx.draw_networkx_labels(simple_network,pos,node_name,font_size=16)
    
    plt.axis('off')
    plt.show() # display
    

    Simple queries on the network

    
    
    In [ ]:
    simple_network.has_edge(2,9)
    #simple_network.has_node(2)
    #simple_network.number_of_edges()
    #simple_network.number_of_nodes()
    #simple_network.order()
    #len(simple_network)
    

    Iterating over a network

    
    
    In [ ]:
    for n in simple_network.nodes_iter():
        print(n)
    
    
    
    In [ ]:
    for a in simple_network.adjacency_iter():
        print(a)
    
    
    
    In [ ]:
    for e in simple_network.edges_iter():
        print(e)
    
    
    
    In [ ]:
    for d in simple_network.degree_iter():
        print(d)
    

    Types of graph

    
    
    In [ ]:
    G = nx.Graph() #Undirected simple graph
    d = nx . DiGraph () #directed simple graph
    m = nx . MultiGraph () #undirected with parallel edges
    h = nx . MultiDiGraph () #directed with parallel edges
    

    Shortest path

    
    
    In [ ]:
    print(nx.shortest_path(simple_network,6,8))
    print(nx.shortest_path_length(simple_network,6,8))
    

    Weighted Edges

  • Example: A network of travel times between locations
  • We can use Google Distance Matrix API to get travel times

  • Uses addresses to construct a distance matrix
  • Free version uses latitudes and longitudes
  • We can find latitudes and longitudes using the function we wrote as homework
  • We'll add a get_lat_lon function to our geocoding function to return lat,lon in google's required format

    
    
    In [ ]:
    #Our geocoding data getter is useful here!
    
    def get_json_data(response,country,types):
        data = response.json()
        result_list = list()
        for result in data['results']:
            if not country == 'ALL':
                if not country in [x['long_name'] for x in result['address_components'] if 'country' in x['types']]:
                    continue
            address = result['formatted_address']
            lat = result['geometry']['location']['lat']
            lng = result['geometry']['location']['lng']
            if types:
                result_list.append((address,lat,lng,result['types']))
            else:
                result_list.append((address,lat,lng))
        return result_list
                
        
    def get_geolocation_data(address_string,format="JSON",country="ALL",types=False):
        format = format.lower()
        address = '_'.join(address_string.split())
        url = 'https://maps.googleapis.com/maps/api/geocode/%s?address=%s' %(format,address)
        try:
            import requests
            response=requests.get(url)
            if not response.status_code == 200: return None
            func='get_'+format+'_data'
            return globals()[func](response,country,types)
        except:
            return None
        
    def get_lat_lon(address):
        data = get_geolocation_data(address,format='JSON')
        return str(data[0][1]) + ',' + str(data[0][2])
    
    
    
    In [ ]:
    get_lat_lon('New York, NY')
    

    Now we can construct the distance matrix api url

    
    
    In [ ]:
    addresses = [
        "Columbia University, New York, NY",
        "Amity Hall Uptown, Amsterdam Avenue, New York, NY",
        "Ellington in the Park, Riverside Drive, New York, NY",
        'Chaiwali, Lenox Avenue, New York, NY',
        "Grant's Tomb, West 122nd Street, New York, NY",
        'Pisticci, La Salle Street, New York, NY',
        'Nicholas Roerich Museum, West 107th Street, New York, NY',
        'Audubon Terrace, Broadway, New York, NY',
        'Apollo Theater, New York, NY'
    ]
    
    
    
    In [ ]:
    latlons=''
    for address in addresses:
        latlon=get_lat_lon(address)
        latlons += latlon + '|'
    print(latlons)
    
    
    
    In [ ]:
    distance_url = 'https://maps.googleapis.com/maps/api/distancematrix/json?origins='
    distance_url+=latlons
    distance_url+='&destinations='
    distance_url+=latlons
    #Set the mode walking, driving, cycling
    mode='walking'
    distance_url+='&mode='+mode
    print(distance_url)
    

    Then let's get the distances and construct a graph

    
    
    In [ ]:
    import requests
    data=requests.get(distance_url).json()
    all_rows = data['rows']
    address_graph=nx.Graph()
    address_graph.add_nodes_from(addresses)
    for i in range(len(all_rows)):
        origin = addresses[i]
        for j in range(len(all_rows[i]['elements'])):
            duration = all_rows[i]['elements'][j]['duration']['value']
            destination = addresses[j]
            address_graph.add_edge(origin,destination,d=duration)
            #print(origin,destination,duration)
    nx.draw(address_graph)
    
    
    
    In [ ]:
    nx.draw(address_graph)
    

    Functionalize this for reuse

    
    
    In [ ]:
    def get_route_graph(address_list,mode='walking'):
        latlons=''
        for address in addresses:
            latlon=get_lat_lon(address)
            latlons += latlon + '|'
        distance_url = 'https://maps.googleapis.com/maps/api/distancematrix/json?origins='
        distance_url+=latlons
        distance_url+='&destinations='
        distance_url+=latlons
        #Set the mode walking, driving, cycling
        mode='driving'
        distance_url+='&mode='+mode
        import requests
        data=requests.get(distance_url).json()
        all_rows = data['rows']
        address_graph = nx.Graph()
        address_graph.add_nodes_from(addresses)
        for i in range(len(all_rows)):
            origin = addresses[i]
            for j in range(len(all_rows[i]['elements'])):
                if i==j:
                    continue
                duration = all_rows[i]['elements'][j]['duration']['value']
                destination = addresses[j]
                address_graph.add_edge(origin,destination,d=duration)
        return address_graph
    address_graph = get_route_graph(addresses)
    

    Test the function by drawing it with node and edge labels

    
    
    In [ ]:
    for edge in address_graph.edges():
        print(edge,address_graph.get_edge_data(*edge))
    
    
    
    In [ ]:
    for n in address_graph.edges_iter():
        print(n)
    
    
    
    In [ ]:
    address_graph = get_route_graph(addresses)
    pos=nx.circular_layout(address_graph) # positions for all nodes
    
    # nodes
    nx.draw_networkx_nodes(address_graph,pos,
                           node_color='r',
                           node_size=2000,
                          alpha=0.001)
    
    # edges
    
    nx.draw_networkx_edges(address_graph,pos,edgelist=address_graph.edges(),width=8,alpha=0.5,edge_color='b')
    
    nx.draw_networkx_edge_labels(address_graph,pos,font_size=10)
    node_name={}
    for node in address_graph.nodes():
        node_name[node]=str(node)
    
    
    nx.draw_networkx_labels(address_graph,pos,node_name,font_size=16)
    
    plt.axis('off')
    plt.show() # display
    

    Yikes! Unreadable!

  • Let's see what the edge weights are
  • 
    
    In [ ]:
    for edge in address_graph.edges():
        print(edge,address_graph.get_edge_data(*edge))
    

    Let's make this readable

    
    
    In [ ]:
    for edge in address_graph.edges():
        duration = address_graph.get_edge_data(*edge)['d']
        address_graph.get_edge_data(*edge)['d'] = int(duration/60)
        print(address_graph.get_edge_data(*edge))
    

    Now let's look a the graph

    
    
    In [ ]:
    pos=nx.circular_layout(address_graph) # positions for all nodes
    fig=plt.figure(1,figsize=(12,12)) #Let's draw a big graph so that it is clearer
    # nodes
    nx.draw_networkx_nodes(address_graph,pos,
                           node_color='r',
                           node_size=2000,
                          alpha=0.001)
    
    # edges
    
    nx.draw_networkx_edges(address_graph,pos,edgelist=address_graph.edges(),width=8,alpha=0.5,edge_color='b')
    
    nx.draw_networkx_edge_labels(address_graph,pos,font_size=10)
    node_name={}
    for node in address_graph.nodes():
        node_name[node]=str(node)
    
    
    nx.draw_networkx_labels(address_graph,pos,node_name,font_size=16)
    #fig.axis('off')
    fig.show() # display
    
    
    
    In [ ]:
    def get_route_graph(address_list,mode='walking'):
        latlons=''
        for address in addresses:
            latlon=get_lat_lon(address)
            latlons += latlon + '|'
        distance_url = 'https://maps.googleapis.com/maps/api/distancematrix/json?origins='
        distance_url+=latlons
        distance_url+='&destinations='
        distance_url+=latlons
        #Set the mode walking, driving, cycling
        mode='driving'
        distance_url+='&mode='+mode
        import requests
        data=requests.get(distance_url).json()
        all_rows = data['rows']
        address_graph = nx.Graph()
        address_graph.add_nodes_from(addresses)
        for i in range(len(all_rows)):
            origin = addresses[i]
            for j in range(len(all_rows[i]['elements'])):
                if i==j:
                    continue
                duration = all_rows[i]['elements'][j]['duration']['value']
                destination = addresses[j]
                address_graph.add_edge(origin,destination,d=int(duration/60))
        return address_graph
    address_graph = get_route_graph(addresses)
    

    Let's remove a few edges (randomly)

    
    
    In [ ]:
    for edge in address_graph.edges():
        import random
        r = random.random()
        if r <0.75: #get rid of 60% of the edges
            address_graph.remove_edge(*edge)
    

    And draw it again

    
    
    In [ ]:
    pos=nx.circular_layout(address_graph) # positions for all nodes
    plt.figure(1,figsize=(12,12)) #Let's draw a big graph so that it is clearer
    # nodes
    nx.draw_networkx_nodes(address_graph,pos,
                           node_color='r',
                           node_size=2000,
                          alpha=0.001)
    
    # edges
    
    nx.draw_networkx_edges(address_graph,pos,edgelist=address_graph.edges(),width=8,alpha=0.5,edge_color='b')
    
    nx.draw_networkx_edge_labels(address_graph,pos,font_size=7)
    node_name={}
    for node in address_graph.nodes():
        node_name[node]=str(node)
    
    
    nx.draw_networkx_labels(address_graph,pos,node_name,font_size=16)
    #fig.axis('off')
    fig.show() # display
    
    
    
    In [ ]:
    print(addresses)
    

    Shortest path and shortest duration

    
    
    In [ ]:
    print(nx.shortest_path(address_graph,'Amity Hall Uptown, Amsterdam Avenue, New York, NY', 'Chaiwali, Lenox Avenue, New York, NY'))
    print(nx.dijkstra_path(address_graph,'Amity Hall Uptown, Amsterdam Avenue, New York, NY', 'Chaiwali, Lenox Avenue, New York, NY'))
    print(nx.dijkstra_path_length (address_graph,'Amity Hall Uptown, Amsterdam Avenue, New York, NY', 'Chaiwali, Lenox Avenue, New York, NY',weight='d'))
    
    
    
    In [ ]:
    #[print(n1,n2,nx.shortest_path_length(n1,n2),nx.dijkstra_path_length(n1,n2,weight='d')) for n1 in address_graph.nodes() for n2 in address_graph.nodes()]
    [print(n1,n2,
           nx.shortest_path_length(address_graph,n1,n2),
           nx.dijkstra_path_length(address_graph,n1,n2,weight='d'),
          ) for n1 in address_graph.nodes() for n2 in address_graph.nodes() if not n1 == n2]
    
    
    
    In [ ]:
    for edge in address_graph.edges():
        print(edge,address_graph.get_edge_data(*edge))
    

    Graph drawing options

  • nltk uses matplotlib to draw graphs
  • limited, but useful, functionalities
  • Let's take a look!

    Differnetiating edges by weight

    
    
    In [ ]:
    #Divide edges into two groups based on weight
    #Easily extendable to n-groups
    
    elarge=[(u,v) for (u,v,d) in address_graph.edges(data=True) if d['d'] >5]
    esmall=[(u,v) for (u,v,d) in address_graph.edges(data=True) if d['d'] <=5]
    
    pos=nx.spring_layout(address_graph) # positions for all nodes
    plt.figure(1,figsize=(12,12)) #Let's draw a big graph so that it is clearer
    
    # nodes
    nx.draw_networkx_nodes(address_graph,pos,node_size=700)
    
    # edges. draw the larger weight edges in solid lines and smaller weight edges in dashed lines
    nx.draw_networkx_edges(address_graph,pos,edgelist=elarge,
                        width=6)
    nx.draw_networkx_edges(address_graph,pos,edgelist=esmall,
                        width=6,alpha=0.5,edge_color='b',style='dashed')
    
    # labels
    nx.draw_networkx_labels(address_graph,pos,font_size=20,font_family='sans-serif')
    
    nx.draw_networkx_edge_labels(address_graph,pos,font_size=7)
    
    plt.axis('off')
    #plt.savefig("address_graph.png") # save as png if you need to use it in a report or web app
    fig.show() # display
    

    highlight the shortest path

    
    
    In [ ]:
    origin = 'Amity Hall Uptown, Amsterdam Avenue, New York, NY'
    destination = 'Chaiwali, Lenox Avenue, New York, NY'
    shortest_path = nx.dijkstra_path(address_graph,origin,destination)
    shortest_path_edges = list()
    for i in range(len(shortest_path)-1):
        shortest_path_edges.append((shortest_path[i],shortest_path[i+1]))
        shortest_path_edges.append((shortest_path[i+1],shortest_path[i]))
        
    path_edges=list()
    other_edges=list()
    node_label_list = dict()
    node_label_list = {n:'' for n in address_graph.nodes()}
    for edge in address_graph.edges():
        if edge in shortest_path_edges:
            path_edges.append(edge)
            node_label_list[edge[0]] = edge[0]
            node_label_list[edge[1]] = edge[1]
        else:
            other_edges.append(edge)
    
    pos=nx.spring_layout(address_graph) # positions for all nodes
    fig=plt.figure(1,figsize=(12,12))
    # nodes
    nx.draw_networkx_nodes(address_graph,pos,node_size=700)
    
    # edges. draw the larger weight edges in solid lines and smaller weight edges in dashed lines
    nx.draw_networkx_edges(address_graph,pos,edgelist=path_edges,
                        width=6)
    nx.draw_networkx_edges(address_graph,pos,edgelist=other_edges,
                        width=6,alpha=0.5,edge_color='b',style='dashed')
    
    # labels
    
    nx.draw_networkx_labels(address_graph,pos,font_size=20,font_family='sans-serif',labels=node_label_list)
    nx.draw_networkx_edge_labels(address_graph,pos,font_size=7)
    
    plt.axis('off')
    #plt.savefig("address_graph.png") # save as png if you need to use it in a report or web app
    plt.show() # display
    

    Question How would you remove edge labels from all but the shortest path?

    Working with a network

    Given an address, generate a sorted by distance list of all other addresses

    
    
    In [ ]:
    location = 'Amity Hall Uptown, Amsterdam Avenue, New York, NY'
    distance_list = list()
    for node in address_graph.nodes():
        if node == location:
            continue
        distance = nx.dijkstra_path_length(address_graph,location,node)
        distance_list.append((node,distance))
    from operator import itemgetter
    print(sorted(distance_list,key=itemgetter(1)))
    

    Get all paths from one location to another

    
    
    In [ ]:
    list(nx.all_simple_paths(address_graph,'Amity Hall Uptown, Amsterdam Avenue, New York, NY','Chaiwali, Lenox Avenue, New York, NY'))
    
    
    
    In [ ]:
    nx.all_simple_paths(address_graph,
                        'Amity Hall Uptown, Amsterdam Avenue, New York, NY',
                        'Chaiwali, Lenox Avenue, New York, NY')
    

    Social networks


    We will use the Yelp database challenge
    Data on: users, businesses, reviews, tips (try the mushroom burger!), check-in (special offers from yelp)

    We're use the data in the users file (yelp_academic_dataset_user.json)

    { 'type': 'user', 'user_id': (encrypted user id), 'name': (first name), 'review_count': (review count), 'average_stars': (floating point average, like 4.31), 'votes': {(vote type): (count)}, 'friends': [(friend user_ids)], 'elite': [(years_elite)], 'yelping_since': (date, formatted like '2012-03'), 'compliments': { (compliment_type): (num_compliments_of_this_type), ... }, 'fans': (num_fans), }

    Important note!

    The data on the yelp site has changed. If you want to follow along with the class video, do the following:

  • Download the file "friends_graph" from the Files unit at the top of this week's class material
  • Scroll down to the cell that says "G = nx.read_gpickle('friend_graph')" and run the rest of the notebook from that point onward
  • If you want to use the new file, then do all the following cells EXCEPT for the "G = nx.read_gpickle('friend_graph')" cell
  • Read the data from the data file and create several list variables to hold the data

  • You could also use objects to store the data
  • 
    
    In [ ]:
    import json
    import datetime
    datafile='yelp_academic_dataset_user.json'
    user_id_count = 1
    user_id_dict = dict()
    with open(datafile,'r') as f:
        for line in f:
            data = json.loads(line)
            user_id = data.get('user_id')
            friends = data.get('friends')
            try:
                user_id_dict[user_id]
            except:
                user_id_dict[user_id] = user_id_count
                user_id_count+=1
    print(len(user_id_dict))
    user_data=list()
    friends_data=list()
    with open(datafile,'r') as f:
        count=0
        for line in f:
            data=json.loads(line)
            user_id=user_id_dict[data.get('user_id')]
            name=data.get('name')
            review_count=data.get('review_count')
            average_stars=data.get('average_stars') 
            
            try:
                yelping_since=datetime.datetime.strptime(data.get('yelping_since').strip(),"%Y-%m").date()
            except:
                yelping_since=datetime.datetime.now()
            
            fans=data.get('fans')
            user_friends=data.get('friends')
            user_friends_list = list()
            for i in range(len(user_friends)):
                try:
                    user_friends_list.append(user_id_dict[user_friends[i]])
                except:
                    continue
            user_data.append([user_id,name,review_count,yelping_since,average_stars,fans])
            friends_data.append([user_id,user_friends_list])
            count+=1
    print(count)
    
    
    
    In [ ]:
    friends_data[0:10]
    

    Too much data for this class so let's cut it down

    
    
    In [ ]:
    #Select a random(ish) list of nodes 
    friends_of_list = [1,5,15,100,2200,3700,13500,23800,45901,78643,112112,198034,267123,298078,301200,353216]
    node_super_set = set(friends_of_list)
    #Get a superset of these nodes - the friends they are connected to
    for n in friends_of_list:
        friends = friends_data[n-1][1]
        node_super_set = node_super_set.union({f for f in friends})
    node_super_list = list(node_super_set)
    #Collect node data and edges for these nodes
    node_data = dict()
    edge_list = list()
    for node in node_super_list:
        node_data[node]=user_data[node-1]
        friends = friends_data[node-1][1]
        edges = [(node,e) for e in friends if e in node_super_list]
        edge_list.extend(edges)
    
    
    
    In [ ]:
    print(len(edge_list),len(node_super_list),len(node_data))
    
    
    
    In [ ]:
    for e in edge_list:
        if e[0] in node_super_list:
            continue
        if e[1] in node_super_list:
            continue
        print(e[0],e[1])
    

    Make the graph

    
    
    In [ ]:
    import networkx as nx
    
    
    
    In [ ]:
    friend_graph=nx.Graph()
    friend_graph.add_nodes_from(node_super_list)
    friend_graph.add_edges_from(edge_list)
    print(friend_graph.number_of_nodes(),friend_graph.number_of_edges())
    
    
    
    In [ ]:
    nx.write_gpickle(friend_graph,'./friend_graph')
    
    
    
    In [ ]:
    G = nx.read_gpickle('friend_graph')
    len(G.neighbors(1))
    
    
    
    In [ ]:
    #Querying the graph
    len(friend_graph.neighbors(1))
    
    
    
    In [ ]:
    %matplotlib inline
    nx.draw(friend_graph)
    

    Let's remove disconnected nodes

    
    
    In [ ]:
    count = 0
    for n in friend_graph.nodes_iter():
        if friend_graph.degree(n) == 1:
            print(n)
    
    
    
    In [ ]:
    nodes = friend_graph.nodes()
    for node in nodes:
        if friend_graph.degree(node) == 0:
            friend_graph.remove_node(node)
    
    
    
    In [ ]:
    pos=nx.spring_layout(friend_graph) # positions for all nodes
    import matplotlib.pyplot as plt
    fig = plt.figure(1,figsize=(12,12))
    #pos
    # nodes
    nx.draw_networkx_nodes(friend_graph,pos,
                           node_color='r',
                           node_size=500,
                           alpha=0.8)
    
    # edges
    nx.draw_networkx_edges(friend_graph,pos,width=1.0,alpha=0.5)
    nx.draw_networkx_edges(friend_graph,pos,
                           width=8,alpha=0.5,edge_color='b')
    
    node_name={}
    for node in friend_graph.nodes():
        node_name[node]=str(node)
    
    nx.draw_networkx_labels(friend_graph,pos,node_name,font_size=16)
    
    fig.show()
    

    Start looking at different aspects of the graph

    
    
    In [ ]:
    nx.shortest_path(friend_graph,420079,3700)
    #If using the file friend_graph, comment the line above and uncomment the line below this one
    #nx.shortest_path(friend_graph,100219,19671)
    
    
    
    In [ ]:
    nx.shortest_path_length(friend_graph,420079,928795)
    #If using the file friend_graph, comment the line above and uncomment the line below this one
    #nx.shortest_path_length(friend_graph,167099,47622)
    

    Graph components

  • Let's see the number of connected components
  • And then each connected component
  • 
    
    In [ ]:
    print(len(list(nx.connected_components(friend_graph))))
    
    
    
    In [ ]:
    for comp in nx.connected_components(friend_graph):
        print(comp)
    

    Largest connected component subgraph

    
    
    In [ ]:
    largest_size=0
    largest_graph = None
    for g in nx.connected_component_subgraphs(friend_graph):
        if len(g) > largest_size:
            largest_size = len(g)
            largest_graph = g
    nx.draw(largest_graph)
    

    Smallest connected component

    
    
    In [ ]:
    smallest_size=100000
    smallest_graph = None
    for g in nx.connected_component_subgraphs(friend_graph):
        if len(g) < smallest_size:
            smallest_size = len(g)
            smallest_graph = g
    nx.draw(smallest_graph)
    
    
    
    In [ ]:
    #Find out node degrees in the graph
    nx.degree(friend_graph)
    

    Max degree. The yelp user with the most friends

    
    
    In [ ]:
    #Highest degree
    print(max(nx.degree(friend_graph).values()))
    
    
    #Node with highest degree value
    degrees = nx.degree(friend_graph)
    print(max(degrees,key=degrees.get))
    

    Clustering

    Clustering is a measure of how closely knit the nodes in a graph are. We can measure the degree to which a node belongs to a cluster and the degree to which the graph is clustered

    • Node clustering coefficient: A measure that shows the degree to which a node belongs to a cluster
    • Graph clustering coefficient: A measure that shows the degree to which a graph is clustered
    
    
    In [ ]:
    pos=nx.spring_layout(friend_graph) # positions for all nodes
    fig = plt.figure(1,figsize=(12,12))
    #pos
    # nodes
    nx.draw_networkx_nodes(friend_graph,pos,
                           node_color='r',
                           node_size=500,
                           alpha=0.8)
    
    # edges
    nx.draw_networkx_edges(friend_graph,pos,width=1.0,alpha=0.5)
    nx.draw_networkx_edges(friend_graph,pos,
                           edgelist=edges,
                           width=8,alpha=0.5,edge_color='b')
    
    node_name={}
    for node in friend_graph.nodes():
        node_name[node]=str(node)
    
    nx.draw_networkx_labels(friend_graph,pos,node_name,font_size=16)
    
    fig.show()
    
    
    
    In [ ]:
    nx.clustering(friend_graph)
    
    
    
    In [ ]:
    nx.average_clustering(friend_graph)
    
    
    
    In [ ]:
    G=nx.complete_graph(4)
    nx.draw(G)
    
    
    
    In [ ]:
    
    
    
    
    In [ ]:
    
    
    
    
    In [ ]:
    
    
    
    
    In [ ]:
    nx.clustering(G)
    
    
    
    In [ ]:
    G.remove_edge(1,2)
    
    
    
    In [ ]:
    pos=nx.spring_layout(G) # positions for all nodes
    
    # nodes
    nx.draw_networkx_nodes(G,pos,
                           node_color='r',
                           node_size=500,
                          alpha=0.8)
    
    # edges
    #nx.draw_networkx_edges(sub_graph,pos,width=1.0,alpha=0.5)
    nx.draw_networkx_edges(G,pos,
                           edgelist=G.edges(),
                           width=8,alpha=0.5,edge_color='b')
    
    
    node_name={}
    for node in G.nodes():
        node_name[node]=str(node)
    
    
    nx.draw_networkx_labels(G,pos,node_name,font_size=16)
    
    plt.axis('off')
    plt.show() # display
    
    
    
    In [ ]:
    nx.clustering(G)
    

    Node 0 has two neighbors: 1 and 2. Of the three possible edges, only two are actually present. So, its clustering coefficient is 2/3 or 0.667

    Centrality and communicability

    Centrality deals with identifying the most important nodes in a graph

    Communicability measures how easy it is to send a message from node i to node j

  • closeness_centrality: (n-1)/sum(shortest path to all other nodes)
  • betweenness_centrality: fraction of pair shortest paths that pass through node n
  • degree centrality: fraction of nodes that n is connected to
  • communicability: the sum of all walks from one node to every other node
  • 
    
    In [ ]:
    from networkx.algorithms.centrality import closeness_centrality, communicability
    

    Closeness centrality is a measure of how near a node is to every other node in a network

    The higher the closeness centrality, the more central a node is

    Roughly, because it can get to more nodes in shorter jumps

    
    
    In [ ]:
    type(closeness_centrality(friend_graph))
    
    
    
    In [ ]:
    from collections import OrderedDict
    cc = OrderedDict(sorted(
                        closeness_centrality(friend_graph).items(),
                        key = lambda x: x[1],
                        reverse = True))
    cc
    

    Understanding closeness centrality

    
    
    In [ ]:
    G=nx.complete_graph(4)
    nx.closeness_centrality(G)
    
    
    
    In [ ]:
    G.remove_edge(1,2)
    
    
    
    In [ ]:
    pos=nx.spring_layout(G) # positions for all nodes
    
    # nodes
    nx.draw_networkx_nodes(G,pos,
                           node_color='r',
                           node_size=500,
                          alpha=0.8)
    
    # edges
    #nx.draw_networkx_edges(sub_graph,pos,width=1.0,alpha=0.5)
    nx.draw_networkx_edges(G,pos,
                           edgelist=G.edges(),
                           width=8,alpha=0.5,edge_color='b')
    
    
    node_name={}
    for node in G.nodes():
        node_name[node]=str(node)
    
    
    nx.draw_networkx_labels(G,pos,node_name,font_size=16)
    
    plt.axis('off')
    plt.show() # display
    
    
    
    In [ ]:
    nx.closeness_centrality(G)
    

  • n=4
  • shortest paths from 2 (2-0:1, 2-3:1, 2-1:2)
  • (n-1)/sum = 3/4 = 0.75
  • Communicability

    A measure of the degree to which one node can communicate with another

    Takes into account all paths between pairs of nodes

    The more paths, the higher the communicability

    
    
    In [ ]:
    G = nx.Graph([(0,1),(1,2),(1,5),(5,4),(2,4),(2,3),(4,3),(3,6)])
    nx.communicability(G)
    
    
    
    In [ ]:
    #Define a layout for the graph
    pos=nx.spring_layout(G) # positions for all nodes
    
    # draw the nodes: red, sized, transperancy
    nx.draw_networkx_nodes(G,pos,
                           node_color='r',
                           node_size=500,
                          alpha=1)
    
    # draw the edges
    nx.draw_networkx_edges(G,pos,
                           width=8,alpha=0.5,edge_color='b')
    
    
    node_name={}
    for node in G.nodes():
        node_name[node]=str(node)
    
    
    nx.draw_networkx_labels(G,pos,node_name,font_size=16)
    
    plt.axis('off')
    plt.show() # display
    
    
    
    In [ ]:
    # communicability is the sum of closed walks of different lengths between nodes.
    #communicability(friend_graph) #Costly operation, we won't do this. Try it at home!
    

    Betweenness centrality

    measures of the extent to which a node is connected to other nodes that are not connected to each other.

    It’s a measure of the degree to which a node serves as a connector

    Example: a traffic bottleneck

    The number of shortest paths that go through node n/total number of shortest paths

    
    
    In [ ]:
    G=nx.complete_graph(4)
    nx.betweenness_centrality(G)
    

    When the graph is fully connected, no shortest paths go through the node. So the numerator is zero

    
    
    In [ ]:
    G.remove_edge(1,2)
    nx.betweenness_centrality(G)
    
    
    
    In [ ]:
    #Define a layout for the graph
    pos=nx.spring_layout(G) # positions for all nodes
    
    # draw the nodes: red, sized, transperancy
    nx.draw_networkx_nodes(G,pos,
                           node_color='r',
                           node_size=500,
                          alpha=1)
    
    # draw the edges
    nx.draw_networkx_edges(G,pos,
                           width=8,alpha=0.5,edge_color='b')
    
    
    node_name={}
    for node in G.nodes():
        node_name[node]=str(node)
    
    
    nx.draw_networkx_labels(G,pos,node_name,font_size=16)
    
    plt.axis('off')
    plt.show() # display
    
    
    
    In [ ]:
    nx.all_pairs_shortest_path(G)
    

    There are 12 shortest paths in total

    Two go through 0 (1, 0, 2) and (2, 0, 1)

    Betweeness centrality: 2/12

    
    
    In [ ]:
    nx.betweenness_centrality(friend_graph)
    

    Dispersion in fully connected graphs

  • Eccentricity: the max distance from one node to all other nodes (least eccentric is more central)
  • diameter: the max eccentricity of all nodes in a graph (the longest shortest path)
  • periphery: the set of nodes with eccentricity = diameter
  • 
    
    In [ ]:
    G = nx.complete_graph(4)
    nx.eccentricity(G)
    
    
    
    In [ ]:
    G.remove_edge(1,2)
    nx.eccentricity(G)
    

    Diameter

    The longest shortest path in the graph

    Periphery

    The nodes with the longest shortest paths (the peripheral nodes)

    
    
    In [ ]:
    nx.diameter(G)
    
    
    
    In [ ]:
    nx.periphery(G)
    
    
    
    In [ ]:
    nx.diameter(friend_graph)
    
    
    
    In [ ]:
    nx.periphery(friend_graph)
    
    
    
    In [ ]:
    G = nx.complete_graph(4)
    print(nx.diameter(G))
    print(nx.periphery(G))
    
    
    
    In [ ]:
    G.remove_edge(1,2)
    print(nx.diameter(G))
    print(nx.periphery(G))
    

    Cliques

    A clique is a subgraph in which every node is connected to every other node

    
    
    In [ ]:
    from networkx.algorithms.clique import find_cliques, cliques_containing_node
    
    
    
    In [ ]:
    for clique in find_cliques(friend_graph):
        print(clique)
    
    
    
    In [ ]:
    cliques_containing_node(friend_graph,2)
    
    
    
    In [ ]:
    #nx.draw(nx.make_max_clique_graph(friend_graph))
    

    Center: The set of nodes that are the most central (they have the smallest distance to any other node)

    Graph must be fully connected

    
    
    In [ ]:
    from networkx.algorithms.distance_measures import center
    center(largest_graph)
    
    
    
    In [ ]: