positive network


In [1]:
import networkx as nx
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
import os
from glob import glob

#gml_files = glob('../output/network/*/*.gml')

graph = nx.read_gml('../output_join/article_pos1.gml')
ugraph = graph.to_undirected()
U = graph.to_undirected(reciprocal=True)
e = U.edges()
ugraph.add_edges_from(e)

def highest_centrality(cent_dict):
    """Returns a tuple (node,value) with the node
    with largest value from centrality dictionary."""
    # create ordered tuple of centrality data
    cent_items = [(b,a) for (a,b) in cent_dict.iteritems()]
    # sort in descending order
    cent_items.sort()
    cent_items.reverse()
    return tuple(reversed(cent_items[0]))

In [2]:
# start here
#ugraph = nx.read_gml('positive_uall.gml')

print nx.info(graph)
print nx.info(ugraph)


Name: 
Type: MultiDiGraph
Number of nodes: 853
Number of edges: 1127
Average in degree:   1.3212
Average out degree:   1.3212
Name: 
Type: MultiGraph
Number of nodes: 853
Number of edges: 1127
Average degree:   2.6424

In [3]:
def drawIt(graph, what = 'graph'):
    nsize = graph.number_of_nodes()
    print "Drawing %s of size %s:" % (what, nsize)
    
    if nsize > 20:
        plt.figure(figsize=(10, 10))
        if nsize > 40:
            nx.draw_spring(graph, with_labels = True, node_size = 70, font_size = 12)
        else:
            nx.draw_spring(graph, with_labels = True)
    else:
        nx.draw_spring(graph, with_labels = True)
    plt.show()

def describeGraph(graph):
    components = sorted(nx.connected_components(graph), key = len, reverse = True)
    cc = [len(c) for c in components]
    subgraphs = list(nx.connected_component_subgraphs(graph))
    params = (graph.number_of_edges(),graph.number_of_nodes(),len(cc))
    print "Graph has %s nodes, %s edges, %s connected components\n" % params
    drawIt(graph)
    for sub in components:
        drawIt(graph.subgraph(sub), what = 'component')

In [ ]:
#describeGraph(ugraph)

Components


In [4]:
# list of connected components (sets of nodes), starting with largest
print [len(c) for c in sorted(nx.connected_components(ugraph), key=len, reverse=True)]


[676, 15, 7, 6, 5, 5, 4, 4, 4, 4, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2]

In [5]:
# generate connected components as subgraphs; Gc is largest component
subgraphs = list(nx.connected_component_subgraphs(ugraph))

# Gc = size of largest component
Gc = max(nx.connected_component_subgraphs(ugraph), key=len)
len(Gc)


Out[5]:
676

Connectivity

A k-component is a maximal subgraph of a graph G that has, at least, node connectivity k: we need to remove at least k nodes to break it into more components. k-components have an inherent hierarchical structure because they are nested in terms of connectivity: a connected graph can contain several 2-components, each of which can contain one or more 3-components, and so forth. k_components returns dictionary with all connectivity levels k in the input Graph as keys and a list of sets of nodes that form a k-component of level k as values. for finding all minimum-size node cut-sets of a graph 1. Compute node connectivity, k, of the input graph G. 2. Identify all k-cutsets at the current level of connectivity using Kanevsky’s algorithm. 3. Generate new graph components based on the removal of these cutsets. Nodes in a cutset belong to both sides of the induced cut. 4. If the graph is neither complete nor trivial, return to 1; else end MultiGraph and MultiDiGraph types not supported.

Degree


In [6]:
# degree histogram: returns a list of frequencies of degrees
print nx.degree_histogram(graph)


[0, 488, 153, 73, 40, 29, 19, 7, 8, 8, 4, 3, 0, 1, 1, 1, 2, 1, 1, 0, 1, 0, 2, 1, 0, 0, 1, 1, 3, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1]

In [7]:
# degree rank plot (undirected)

degree_sequence=sorted(nx.degree(ugraph).values(),reverse=True) # degree sequence
#print "Degree sequence", degree_sequence
dmax=max(degree_sequence)

plt.loglog(degree_sequence,'b-',marker='o')
plt.title("Degree rank plot")
plt.ylabel("degree")
plt.xlabel("rank")

# draw graph in inset
plt.axes([0.45,0.45,0.45,0.45])
Gcc=sorted(nx.connected_component_subgraphs(ugraph), key = len, reverse=True)[0]
pos=nx.spring_layout(Gcc)
plt.axis('off')
nx.draw_networkx_nodes(Gcc,pos,node_size=20)
nx.draw_networkx_edges(Gcc,pos,alpha=0.4)

plt.show()


Centrality


In [8]:
# degree centrality
a = nx.degree_centrality(graph)
dfIn=pd.DataFrame.from_dict(a,orient='index')
dfIn.columns = ['degree centrality']
dfIn = dfIn.sort_values(by=['degree centrality'])
dfIn


Out[8]:
degree centrality
neighbors 0.001174
free vaccine 0.001174
testing 0.001174
sex 0.001174
vaccine opponents 0.001174
officials 0.001174
elite list 0.001174
arm 0.001174
Dutch Bible belt 0.001174
efficacious 0.001174
diarrhea deaths 0.001174
written down 0.001174
vaccine refusal rates 0.001174
unconscionable 0.001174
Department of Public Health Immunization Program 0.001174
polio vaccination effort 0.001174
vaccine refusing 0.001174
medical law 0.001174
Afghanistan 0.001174
home-schooled children 0.001174
children with affected older sibling and who had received MMR vaccine 0.001174
psychiatrist 0.001174
fraud 0.001174
vaccines cause neurological problems 0.001174
unethical 0.001174
public schools 0.001174
random cases 0.001174
genes 0.001174
immunization programs 0.001174
Federal Circuit 0.001174
... ...
polio vaccine opposition 0.010563
immune system 0.010563
genital warts 0.011737
vaccinations 0.011737
immunization 0.011737
infection 0.011737
Tdap vaccine 0.012911
states 0.012911
cervical dysplasia 0.012911
Gardasil 0.015258
anti-vaccination website 0.016432
side effects 0.017606
herd immunity 0.018779
Jain study 0.018779
religious groups 0.019953
meningococcal vaccine 0.021127
autism risk 0.023474
SB 277 0.025822
anti-vaccination 0.025822
measles vaccine 0.026995
MMR vaccine 0.030516
vaccination 0.031690
children 0.032864
HPV vaccine 0.032864
meningococcal disease 0.032864
vaccine-autism link 0.037559
autism 0.044601
measles 0.058685
parents 0.059859
vaccines 0.070423

853 rows × 1 columns


In [ ]:
# betweenness centrality
a = nx.betweenness_centrality(graph)
dfIn=pd.DataFrame.from_dict(a,orient='index')
dfIn.columns = ['betweenness centrality']
dfIn = dfIn.sort_values(by=['betweenness centrality'])
dfIn

In [ ]:
# closeness centrality
a = nx.closeness_centrality(graph)
dfIn=pd.DataFrame.from_dict(a,orient='index')
dfIn.columns = ['closeness centrality']
dfIn = dfIn.sort_values(by=['closeness centrality'])
dfIn

In [ ]:
# in degree centrality
a = nx.in_degree_centrality(graph)
dfIn=pd.DataFrame.from_dict(a,orient='index')
dfIn.columns = ['in deg centrality']
dfIn = dfIn.sort_values(by=['in deg centrality'])
dfIn

In [ ]:
# out degree centrality
b = nx.out_degree_centrality(graph)
dfIn=pd.DataFrame.from_dict(b,orient='index')
dfIn.columns = ['out deg centrality']
dfIn = dfIn.sort_values(by=['out deg centrality'])
dfIn


In [ ]:
# current-flow betweenness centrality (graph must be connected; run for largest component)
#nx.current_flow_betweenness_centrality(graph)

# eigenvector centrality

# degree assortativity coefficient
# average neighbor degree; average degree connectivity (k nearest neighbors)

#nx.edge_connectivity(graph)
#nx.node_connectivity(graph)

# clustering coefficient (cannot be multigraph)
# nx.average_clustering(graph)