positive network



In [1]:

    
import networkx as nx
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
import os
from glob import glob

#gml_files = glob('../output/network/*/*.gml')

graph = nx.read_gml('../output_join/article_pos1.gml')
ugraph = graph.to_undirected()
U = graph.to_undirected(reciprocal=True)
e = U.edges()
ugraph.add_edges_from(e)

def highest_centrality(cent_dict):
    """Returns a tuple (node,value) with the node
    with largest value from centrality dictionary."""
    # create ordered tuple of centrality data
    cent_items = [(b,a) for (a,b) in cent_dict.iteritems()]
    # sort in descending order
    cent_items.sort()
    cent_items.reverse()
    return tuple(reversed(cent_items[0]))



In [2]:

    
# start here
#ugraph = nx.read_gml('positive_uall.gml')

print nx.info(graph)
print nx.info(ugraph)









    



Name: 
Type: MultiDiGraph
Number of nodes: 853
Number of edges: 1127
Average in degree:   1.3212
Average out degree:   1.3212
Name: 
Type: MultiGraph
Number of nodes: 853
Number of edges: 1127
Average degree:   2.6424



In [3]:

    
def drawIt(graph, what = 'graph'):
    nsize = graph.number_of_nodes()
    print "Drawing %s of size %s:" % (what, nsize)
    
    if nsize > 20:
        plt.figure(figsize=(10, 10))
        if nsize > 40:
            nx.draw_spring(graph, with_labels = True, node_size = 70, font_size = 12)
        else:
            nx.draw_spring(graph, with_labels = True)
    else:
        nx.draw_spring(graph, with_labels = True)
    plt.show()

def describeGraph(graph):
    components = sorted(nx.connected_components(graph), key = len, reverse = True)
    cc = [len(c) for c in components]
    subgraphs = list(nx.connected_component_subgraphs(graph))
    params = (graph.number_of_edges(),graph.number_of_nodes(),len(cc))
    print "Graph has %s nodes, %s edges, %s connected components\n" % params
    drawIt(graph)
    for sub in components:
        drawIt(graph.subgraph(sub), what = 'component')



In [ ]:

    
#describeGraph(ugraph)

Components



In [4]:

    
# list of connected components (sets of nodes), starting with largest
print [len(c) for c in sorted(nx.connected_components(ugraph), key=len, reverse=True)]









    



[676, 15, 7, 6, 5, 5, 4, 4, 4, 4, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2]



In [5]:

    
# generate connected components as subgraphs; Gc is largest component
subgraphs = list(nx.connected_component_subgraphs(ugraph))

# Gc = size of largest component
Gc = max(nx.connected_component_subgraphs(ugraph), key=len)
len(Gc)









    Out[5]:





676

Connectivity

A k-component is a maximal subgraph of a graph G that has, at least, node connectivity k: we need to remove at least k nodes to break it into more components. k-components have an inherent hierarchical structure because they are nested in terms of connectivity: a connected graph can contain several 2-components, each of which can contain one or more 3-components, and so forth. k_components returns dictionary with all connectivity levels k in the input Graph as keys and a list of sets of nodes that form a k-component of level k as values. for finding all minimum-size node cut-sets of a graph 1. Compute node connectivity, k, of the input graph G. 2. Identify all k-cutsets at the current level of connectivity using Kanevsky’s algorithm. 3. Generate new graph components based on the removal of these cutsets. Nodes in a cutset belong to both sides of the induced cut. 4. If the graph is neither complete nor trivial, return to 1; else end MultiGraph and MultiDiGraph types not supported.

Degree



In [6]:

    
# degree histogram: returns a list of frequencies of degrees
print nx.degree_histogram(graph)









    



[0, 488, 153, 73, 40, 29, 19, 7, 8, 8, 4, 3, 0, 1, 1, 1, 2, 1, 1, 0, 1, 0, 2, 1, 0, 0, 1, 1, 3, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1]



In [7]:

    
# degree rank plot (undirected)

degree_sequence=sorted(nx.degree(ugraph).values(),reverse=True) # degree sequence
#print "Degree sequence", degree_sequence
dmax=max(degree_sequence)

plt.loglog(degree_sequence,'b-',marker='o')
plt.title("Degree rank plot")
plt.ylabel("degree")
plt.xlabel("rank")

# draw graph in inset
plt.axes([0.45,0.45,0.45,0.45])
Gcc=sorted(nx.connected_component_subgraphs(ugraph), key = len, reverse=True)[0]
pos=nx.spring_layout(Gcc)
plt.axis('off')
nx.draw_networkx_nodes(Gcc,pos,node_size=20)
nx.draw_networkx_edges(Gcc,pos,alpha=0.4)

plt.show()

Centrality



In [8]:

    
# degree centrality
a = nx.degree_centrality(graph)
dfIn=pd.DataFrame.from_dict(a,orient='index')
dfIn.columns = ['degree centrality']
dfIn = dfIn.sort_values(by=['degree centrality'])
dfIn









    Out[8]:






  
    
      
      degree centrality
    
  
  
    
      neighbors
      0.001174
    
    
      free vaccine
      0.001174
    
    
      testing
      0.001174
    
    
      sex
      0.001174
    
    
      vaccine opponents
      0.001174
    
    
      officials
      0.001174
    
    
      elite list
      0.001174
    
    
      arm
      0.001174
    
    
      Dutch Bible belt
      0.001174
    
    
      efficacious
      0.001174
    
    
      diarrhea deaths
      0.001174
    
    
      written down
      0.001174
    
    
      vaccine refusal rates
      0.001174
    
    
      unconscionable
      0.001174
    
    
      Department of Public Health Immunization Program
      0.001174
    
    
      polio vaccination effort
      0.001174
    
    
      vaccine refusing
      0.001174
    
    
      medical law
      0.001174
    
    
      Afghanistan
      0.001174
    
    
      home-schooled children
      0.001174
    
    
      children with affected older sibling and who had received MMR vaccine
      0.001174
    
    
      psychiatrist
      0.001174
    
    
      fraud
      0.001174
    
    
      vaccines cause neurological problems
      0.001174
    
    
      unethical
      0.001174
    
    
      public schools
      0.001174
    
    
      random cases
      0.001174
    
    
      genes
      0.001174
    
    
      immunization programs
      0.001174
    
    
      Federal Circuit
      0.001174
    
    
      ...
      ...
    
    
      polio vaccine opposition
      0.010563
    
    
      immune system
      0.010563
    
    
      genital warts
      0.011737
    
    
      vaccinations
      0.011737
    
    
      immunization
      0.011737
    
    
      infection
      0.011737
    
    
      Tdap vaccine
      0.012911
    
    
      states
      0.012911
    
    
      cervical dysplasia
      0.012911
    
    
      Gardasil
      0.015258
    
    
      anti-vaccination website
      0.016432
    
    
      side effects
      0.017606
    
    
      herd immunity
      0.018779
    
    
      Jain study
      0.018779
    
    
      religious groups
      0.019953
    
    
      meningococcal vaccine
      0.021127
    
    
      autism risk
      0.023474
    
    
      SB 277
      0.025822
    
    
      anti-vaccination
      0.025822
    
    
      measles vaccine
      0.026995
    
    
      MMR vaccine
      0.030516
    
    
      vaccination
      0.031690
    
    
      children
      0.032864
    
    
      HPV vaccine
      0.032864
    
    
      meningococcal disease
      0.032864
    
    
      vaccine-autism link
      0.037559
    
    
      autism
      0.044601
    
    
      measles
      0.058685
    
    
      parents
      0.059859
    
    
      vaccines
      0.070423
    
  

853 rows × 1 columns



In [ ]:

    
# betweenness centrality
a = nx.betweenness_centrality(graph)
dfIn=pd.DataFrame.from_dict(a,orient='index')
dfIn.columns = ['betweenness centrality']
dfIn = dfIn.sort_values(by=['betweenness centrality'])
dfIn



In [ ]:

    
# closeness centrality
a = nx.closeness_centrality(graph)
dfIn=pd.DataFrame.from_dict(a,orient='index')
dfIn.columns = ['closeness centrality']
dfIn = dfIn.sort_values(by=['closeness centrality'])
dfIn



In [ ]:

    
# in degree centrality
a = nx.in_degree_centrality(graph)
dfIn=pd.DataFrame.from_dict(a,orient='index')
dfIn.columns = ['in deg centrality']
dfIn = dfIn.sort_values(by=['in deg centrality'])
dfIn



In [ ]:

    
# out degree centrality
b = nx.out_degree_centrality(graph)
dfIn=pd.DataFrame.from_dict(b,orient='index')
dfIn.columns = ['out deg centrality']
dfIn = dfIn.sort_values(by=['out deg centrality'])
dfIn



In [ ]:

    
# current-flow betweenness centrality (graph must be connected; run for largest component)
#nx.current_flow_betweenness_centrality(graph)

# eigenvector centrality

# degree assortativity coefficient
# average neighbor degree; average degree connectivity (k nearest neighbors)

#nx.edge_connectivity(graph)
#nx.node_connectivity(graph)

# clustering coefficient (cannot be multigraph)
# nx.average_clustering(graph)

	degree centrality
neighbors	0.001174
free vaccine	0.001174
testing	0.001174
sex	0.001174
vaccine opponents	0.001174
officials	0.001174
elite list	0.001174
arm	0.001174
Dutch Bible belt	0.001174
efficacious	0.001174
diarrhea deaths	0.001174
written down	0.001174
vaccine refusal rates	0.001174
unconscionable	0.001174
Department of Public Health Immunization Program	0.001174
polio vaccination effort	0.001174
vaccine refusing	0.001174
medical law	0.001174
Afghanistan	0.001174
home-schooled children	0.001174
children with affected older sibling and who had received MMR vaccine	0.001174
psychiatrist	0.001174
fraud	0.001174
vaccines cause neurological problems	0.001174
unethical	0.001174
public schools	0.001174
random cases	0.001174
genes	0.001174
immunization programs	0.001174
Federal Circuit	0.001174
...	...
polio vaccine opposition	0.010563
immune system	0.010563
genital warts	0.011737
vaccinations	0.011737
immunization	0.011737
infection	0.011737
Tdap vaccine	0.012911
states	0.012911
cervical dysplasia	0.012911
Gardasil	0.015258
anti-vaccination website	0.016432
side effects	0.017606
herd immunity	0.018779
Jain study	0.018779
religious groups	0.019953
meningococcal vaccine	0.021127
autism risk	0.023474
SB 277	0.025822
anti-vaccination	0.025822
measles vaccine	0.026995
MMR vaccine	0.030516
vaccination	0.031690
children	0.032864
HPV vaccine	0.032864
meningococcal disease	0.032864
vaccine-autism link	0.037559
autism	0.044601
measles	0.058685
parents	0.059859
vaccines	0.070423