negative network



In [1]:

    
import networkx as nx
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
import os
from glob import glob

#gml_files = glob('../output/network/*/*.gml')

graph = nx.read_gml('../output_join/article_neg1.gml')
ugraph = graph.to_undirected()
U = graph.to_undirected(reciprocal=True)
e = U.edges()
ugraph.add_edges_from(e)

def highest_centrality(cent_dict):
    """Returns a tuple (node,value) with the node
    with largest value from centrality dictionary."""
    # create ordered tuple of centrality data
    cent_items = [(b,a) for (a,b) in cent_dict.iteritems()]
    # sort in descending order
    cent_items.sort()
    cent_items.reverse()
    return tuple(reversed(cent_items[0]))



In [2]:

    
# start here
#ugraph = nx.read_gml('positive_uall.gml')

print nx.info(graph)
print nx.info(ugraph)









    



Name: 
Type: MultiDiGraph
Number of nodes: 1257
Number of edges: 1898
Average in degree:   1.5099
Average out degree:   1.5099
Name: 
Type: MultiGraph
Number of nodes: 1257
Number of edges: 1898
Average degree:   3.0199



In [3]:

    
def drawIt(graph, what = 'graph'):
    nsize = graph.number_of_nodes()
    print "Drawing %s of size %s:" % (what, nsize)
    
    if nsize > 20:
        plt.figure(figsize=(10, 10))
        if nsize > 40:
            nx.draw_spring(graph, with_labels = True, node_size = 70, font_size = 12)
        else:
            nx.draw_spring(graph, with_labels = True)
    else:
        nx.draw_spring(graph, with_labels = True)
    plt.show()

def describeGraph(graph):
    components = sorted(nx.connected_components(graph), key = len, reverse = True)
    cc = [len(c) for c in components]
    subgraphs = list(nx.connected_component_subgraphs(graph))
    params = (graph.number_of_edges(),graph.number_of_nodes(),len(cc))
    print "Graph has %s nodes, %s edges, %s connected components\n" % params
    drawIt(graph)
    for sub in components:
        drawIt(graph.subgraph(sub), what = 'component')



In [5]:

    
#describeGraph(ugraph)

Components



In [6]:

    
# list of connected components (sets of nodes), starting with largest
print [len(c) for c in sorted(nx.connected_components(ugraph), key=len, reverse=True)]









    



[1140, 7, 4, 4, 4, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2]



In [7]:

    
# generate connected components as subgraphs; Gc is largest component
subgraphs = list(nx.connected_component_subgraphs(ugraph))

# Gc = size of largest component
Gc = max(nx.connected_component_subgraphs(ugraph), key=len)
len(Gc)









    Out[7]:





1140

Connectivity

A k-component is a maximal subgraph of a graph G that has, at least, node connectivity k: we need to remove at least k nodes to break it into more components. k-components have an inherent hierarchical structure because they are nested in terms of connectivity: a connected graph can contain several 2-components, each of which can contain one or more 3-components, and so forth. k_components returns dictionary with all connectivity levels k in the input Graph as keys and a list of sets of nodes that form a k-component of level k as values. for finding all minimum-size node cut-sets of a graph 1. Compute node connectivity, k, of the input graph G. 2. Identify all k-cutsets at the current level of connectivity using Kanevsky’s algorithm. 3. Generate new graph components based on the removal of these cutsets. Nodes in a cutset belong to both sides of the induced cut. 4. If the graph is neither complete nor trivial, return to 1; else end MultiGraph and MultiDiGraph types not supported.

Degree



In [9]:

    
# degree histogram: returns a list of frequencies of degrees
print nx.degree_histogram(graph)









    



[0, 692, 265, 86, 54, 34, 23, 18, 11, 7, 8, 8, 5, 6, 2, 0, 5, 4, 2, 2, 3, 3, 3, 2, 0, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1, 2, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1]



In [10]:

    
# degree rank plot (undirected)

degree_sequence=sorted(nx.degree(ugraph).values(),reverse=True) # degree sequence
#print "Degree sequence", degree_sequence
dmax=max(degree_sequence)

plt.loglog(degree_sequence,'b-',marker='o')
plt.title("Degree rank plot")
plt.ylabel("degree")
plt.xlabel("rank")

# draw graph in inset
plt.axes([0.45,0.45,0.45,0.45])
Gcc=sorted(nx.connected_component_subgraphs(ugraph), key = len, reverse=True)[0]
pos=nx.spring_layout(Gcc)
plt.axis('off')
nx.draw_networkx_nodes(Gcc,pos,node_size=20)
nx.draw_networkx_edges(Gcc,pos,alpha=0.4)

plt.show()

Centrality



In [11]:

    
# degree centrality
a = nx.degree_centrality(graph)
dfIn=pd.DataFrame.from_dict(a,orient='index')
dfIn.columns = ['degree centrality']
dfIn = dfIn.sort_values(by=['degree centrality'])
dfIn









    Out[11]:






  
    
      
      degree centrality
    
  
  
    
      financial collusion
      0.000796
    
    
      toxic heavy metal
      0.000796
    
    
      syncope
      0.000796
    
    
      labor and delivery floor
      0.000796
    
    
      human carcinogen
      0.000796
    
    
      dissenters
      0.000796
    
    
      generous
      0.000796
    
    
      trustworthy
      0.000796
    
    
      human muscle tissue
      0.000796
    
    
      parental right
      0.000796
    
    
      human right
      0.000796
    
    
      flu season
      0.000796
    
    
      target
      0.000796
    
    
      anaphylactic shock
      0.000796
    
    
      solid marks
      0.000796
    
    
      government healthcare reform
      0.000796
    
    
      outrageous
      0.000796
    
    
      more severe autism
      0.000796
    
    
      compensation
      0.000796
    
    
      New York
      0.000796
    
    
      cellular degeneration
      0.000796
    
    
      guilt
      0.000796
    
    
      nobody
      0.000796
    
    
      CDC scientific fraud
      0.000796
    
    
      intimidation
      0.000796
    
    
      variant genotypes
      0.000796
    
    
      surveys
      0.000796
    
    
      political weapon
      0.000796
    
    
      demand for justice
      0.000796
    
    
      special education services
      0.000796
    
    
      ...
      ...
    
    
      measles cases
      0.013535
    
    
      scientific fraud
      0.014331
    
    
      Nichole Rolfe
      0.014331
    
    
      vaccine-autism link
      0.015127
    
    
      SV40
      0.015127
    
    
      adverse effects
      0.015924
    
    
      parents
      0.015924
    
    
      hepatitis B vaccine
      0.015924
    
    
      Merck
      0.016720
    
    
      vaccine ingredients
      0.016720
    
    
      informed consent
      0.016720
    
    
      pandemic H1N1 swine flu vaccine
      0.017516
    
    
      people
      0.017516
    
    
      measles mortality
      0.017516
    
    
      United States
      0.018312
    
    
      measles
      0.018312
    
    
      SB 277
      0.020701
    
    
      vaccination
      0.021497
    
    
      mandatory vaccines
      0.023089
    
    
      pharmaceutical companies
      0.024682
    
    
      flu shots
      0.026274
    
    
      doctors
      0.031847
    
    
      mainstream media
      0.032643
    
    
      mercury
      0.032643
    
    
      autism
      0.037420
    
    
      CDC
      0.049363
    
    
      vaccine industry
      0.051752
    
    
      thimerosal
      0.057325
    
    
      children
      0.060510
    
    
      vaccines
      0.106688
    
  

1257 rows × 1 columns



In [ ]:

    
# betweenness centrality
a = nx.betweenness_centrality(graph)
dfIn=pd.DataFrame.from_dict(a,orient='index')
dfIn.columns = ['betweenness centrality']
dfIn = dfIn.sort_values(by=['betweenness centrality'])
dfIn



In [ ]:

    
# closeness centrality
a = nx.closeness_centrality(graph)
dfIn=pd.DataFrame.from_dict(a,orient='index')
dfIn.columns = ['closeness centrality']
dfIn = dfIn.sort_values(by=['closeness centrality'])
dfIn



In [ ]:

    
# in degree centrality
a = nx.in_degree_centrality(graph)
dfIn=pd.DataFrame.from_dict(a,orient='index')
dfIn.columns = ['in deg centrality']
dfIn = dfIn.sort_values(by=['in deg centrality'])
dfIn



In [ ]:

    
# out degree centrality
b = nx.out_degree_centrality(graph)
dfIn=pd.DataFrame.from_dict(b,orient='index')
dfIn.columns = ['out deg centrality']
dfIn = dfIn.sort_values(by=['out deg centrality'])
dfIn



In [ ]:

    
# current-flow betweenness centrality (graph must be connected; run for largest component)
#nx.current_flow_betweenness_centrality(graph)

# eigenvector centrality

# degree assortativity coefficient
# average neighbor degree; average degree connectivity (k nearest neighbors)

#nx.edge_connectivity(graph)
#nx.node_connectivity(graph)

# clustering coefficient (cannot be multigraph)
# nx.average_clustering(graph)

	degree centrality
financial collusion	0.000796
toxic heavy metal	0.000796
syncope	0.000796
labor and delivery floor	0.000796
human carcinogen	0.000796
dissenters	0.000796
generous	0.000796
trustworthy	0.000796
human muscle tissue	0.000796
parental right	0.000796
human right	0.000796
flu season	0.000796
target	0.000796
anaphylactic shock	0.000796
solid marks	0.000796
government healthcare reform	0.000796
outrageous	0.000796
more severe autism	0.000796
compensation	0.000796
New York	0.000796
cellular degeneration	0.000796
guilt	0.000796
nobody	0.000796
CDC scientific fraud	0.000796
intimidation	0.000796
variant genotypes	0.000796
surveys	0.000796
political weapon	0.000796
demand for justice	0.000796
special education services	0.000796
...	...
measles cases	0.013535
scientific fraud	0.014331
Nichole Rolfe	0.014331
vaccine-autism link	0.015127
SV40	0.015127
adverse effects	0.015924
parents	0.015924
hepatitis B vaccine	0.015924
Merck	0.016720
vaccine ingredients	0.016720
informed consent	0.016720
pandemic H1N1 swine flu vaccine	0.017516
people	0.017516
measles mortality	0.017516
United States	0.018312
measles	0.018312
SB 277	0.020701
vaccination	0.021497
mandatory vaccines	0.023089
pharmaceutical companies	0.024682
flu shots	0.026274
doctors	0.031847
mainstream media	0.032643
mercury	0.032643
autism	0.037420
CDC	0.049363
vaccine industry	0.051752
thimerosal	0.057325
children	0.060510
vaccines	0.106688