neutral graph

1.


In [1]:
# 1_network_df

import networkx as nx
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
import os
from glob import glob

plt.style.use('ggplot')
pd.set_option('display.width', 5000) 
pd.set_option('display.max_columns', 60)

gml_files = glob('../output/network/article_neu1.gml')

In [2]:
def calculate_graph_inf(graph):
    graph.name = filename
    info = nx.info(graph)
    print info

def highest_centrality(cent_dict):
    """Returns a tuple (node,value) with the node
    with largest value from centrality dictionary."""
    # create ordered tuple of centrality data
    cent_items = [(b,a) for (a,b) in cent_dict.iteritems()]
    # sort in descending order
    cent_items.sort()
    cent_items.reverse()
    return tuple(reversed(cent_items[0]))

2. convert to undirected


In [3]:
for graph_num, gml_graph in enumerate(gml_files):
    dgraph = nx.read_gml(gml_graph)
    ugraph = dgraph.to_undirected() # to undirected graph
    #U = dgraph.to_undirected(reciprocal=True)
    #e = U.edges()
    #ugraph.add_edges_from(e)
    (filepath, filename) = os.path.split(gml_graph)
    print('-' * 10)
    print(gml_graph)
    calculate_graph_inf(dgraph)
    calculate_graph_inf(ugraph)


----------
../output/network/article_neu1.gml
Name: article_neu1.gml
Type: MultiDiGraph
Number of nodes: 201
Number of edges: 241
Average in degree:   1.1990
Average out degree:   1.1990
Name: article_neu1.gml
Type: MultiGraph
Number of nodes: 201
Number of edges: 236
Average degree:   2.3483

In [4]:
# save undirected gml
#nx.write_gml(ugraph, "../output/network/article_u_neu.gml")

dgraph = directed ugraph = undirected

undirected graph


In [5]:
# load
gml_files = glob('../output/network/article_u_neu.gml')

In [6]:
# ugraph = undirected; dgraph = directed
for graph_num, gml_graph in enumerate(gml_files):
    ugraph = nx.read_gml(gml_graph)
    (filepath, filename) = os.path.split(gml_graph)
    print('-' * 10)
    print(gml_graph)
    calculate_graph_inf(ugraph)


----------
../output/network/article_u_neu.gml
Name: article_u_neu.gml
Type: MultiGraph
Number of nodes: 201
Number of edges: 236
Average degree:   2.3483

3. all nodes table


In [7]:
# 2_node_df: list all nodes and centrality
data_columns = ['name',
                'sentiment'
                ]
data = pd.DataFrame(columns = data_columns)
combined_df = pd.DataFrame()

In [8]:
# calculate variables and save into list
sent = "neutral"    
deg_cent = nx.degree_centrality(ugraph)
bet_cent = nx.betweenness_centrality(ugraph)
clo_cent = nx.closeness_centrality(ugraph)
graph_values = {'name':filename,
                'sentiment':sent
                }
data = data.append(graph_values, ignore_index=True)

degree = nx.degree(ugraph)
deg_df = pd.DataFrame.from_dict(degree, orient = 'index')
deg_df.columns = ['degree']

# degree centrality
deg_cent = nx.degree_centrality(ugraph)
dc_df = pd.DataFrame.from_dict(deg_cent, orient = 'index')
dc_df.columns = ['deg cent']

# betweenness centrality
bet_cent = nx.betweenness_centrality(ugraph)
bc_df = pd.DataFrame.from_dict(bet_cent, orient = 'index')
bc_df.columns = ['bet cent']

# closeness centrality
clo_cent = nx.closeness_centrality(ugraph)
cc_df = pd.DataFrame.from_dict(clo_cent, orient = 'index')
cc_df.columns = ['clo cent']

# concat node frames into node_df
frames = [deg_df, dc_df, bc_df, cc_df]
node_df = pd.concat(frames, axis = 1)
node_df.index.name = 'node'
node_df = node_df.reset_index()

values = pd.DataFrame(graph_values, columns = ('name', 'sentiment'), index = [0])

# df = merges graph_values with node_df for single graph and fill NaNs
df = pd.concat([values, node_df], axis = 1)
df = df.fillna(method='ffill')
combined_df = combined_df.append(df)

In [9]:
# print entire network
combined_df


Out[9]:
name sentiment node degree deg cent bet cent clo cent
0 article_u_neu.gml neutral 7-11 year olds 2 0.010 0.015611 0.147149
1 article_u_neu.gml neutral Alfred and Lisa Claire Dwoskin 1 0.005 0.000000 0.138278
2 article_u_neu.gml neutral Americans 1 0.005 0.000000 0.130533
3 article_u_neu.gml neutral Andrew Wakefield 5 0.025 0.101658 0.133796
4 article_u_neu.gml neutral Australia 1 0.005 0.000000 0.131723
5 article_u_neu.gml neutral Barry Segal 1 0.005 0.000000 0.093953
6 article_u_neu.gml neutral Ben Allen 4 0.020 0.000034 0.132569
7 article_u_neu.gml neutral CDC 4 0.020 0.070193 0.160556
8 article_u_neu.gml neutral California 1 0.005 0.000000 0.132084
9 article_u_neu.gml neutral Chairwoman Carol Liu 2 0.010 0.008492 0.132326
10 article_u_neu.gml neutral Children's Hospital of Philadelphia 1 0.005 0.000000 0.130533
11 article_u_neu.gml neutral Chris Christie 3 0.015 0.008518 0.118540
12 article_u_neu.gml neutral Connie Leyva 3 0.015 0.000067 0.132569
13 article_u_neu.gml neutral Disneyland measles outbreak 1 0.005 0.000000 0.114865
14 article_u_neu.gml neutral Dr. Anna Acosta 2 0.010 0.025176 0.135936
15 article_u_neu.gml neutral Dr. Paul Offit 3 0.015 0.012663 0.154051
16 article_u_neu.gml neutral Drew Downing 1 0.005 0.000000 0.005000
17 article_u_neu.gml neutral Dwoskin Family Foundation 8 0.040 0.295402 0.164954
18 article_u_neu.gml neutral Focus for Health 4 0.020 0.073568 0.105551
19 article_u_neu.gml neutral Gambhir study 3 0.015 0.000151 0.015000
20 article_u_neu.gml neutral Generation Rescue 4 0.020 0.088618 0.118637
21 article_u_neu.gml neutral Immunization Action Coalition 1 0.005 0.000000 0.135173
22 article_u_neu.gml neutral Jenny McCarthy 3 0.015 0.024020 0.119619
23 article_u_neu.gml neutral Lisa and J.B. Handley 1 0.005 0.000000 0.104182
24 article_u_neu.gml neutral MMR vaccine 1 0.005 0.000000 0.085251
25 article_u_neu.gml neutral National Vaccine Information Center 2 0.010 0.008492 0.138543
26 article_u_neu.gml neutral PLOS Computational Biology 1 0.005 0.000000 0.143211
27 article_u_neu.gml neutral Pez dispensers 1 0.005 0.000000 0.161093
28 article_u_neu.gml neutral Rand Paul 3 0.015 0.033392 0.162542
29 article_u_neu.gml neutral Renee Gentry 1 0.005 0.000000 0.005000
... ... ... ... ... ... ... ...
171 article_u_neu.gml neutral teens 1 0.005 0.000000 0.131723
172 article_u_neu.gml neutral the sake of being anti-vaccine 1 0.005 0.000000 0.116626
173 article_u_neu.gml neutral upward trend 1 0.005 0.000000 0.130298
174 article_u_neu.gml neutral vaccinated 1 0.005 0.000000 0.110053
175 article_u_neu.gml neutral vaccinated children 2 0.010 0.008492 0.132326
176 article_u_neu.gml neutral vaccination 5 0.025 0.038961 0.149431
177 article_u_neu.gml neutral vaccination exemption 3 0.015 0.008492 0.132326
178 article_u_neu.gml neutral vaccination schedule 1 0.005 0.000000 0.009000
179 article_u_neu.gml neutral vaccine allergy 1 0.005 0.000000 0.005000
180 article_u_neu.gml neutral vaccine choice 1 0.005 0.000000 0.104107
181 article_u_neu.gml neutral vaccine concerns 1 0.005 0.000000 0.119224
182 article_u_neu.gml neutral vaccine debate 4 0.020 0.017035 0.104786
183 article_u_neu.gml neutral vaccine duration 2 0.010 0.014639 0.160734
184 article_u_neu.gml neutral vaccine efficacy 3 0.015 0.008492 0.153397
185 article_u_neu.gml neutral vaccine refusal 3 0.015 0.000000 0.110053
186 article_u_neu.gml neutral vaccine risk 3 0.015 0.008492 0.116909
187 article_u_neu.gml neutral vaccine safety 2 0.010 0.003857 0.146701
188 article_u_neu.gml neutral vaccine-autism link 4 0.020 0.261307 0.149896
189 article_u_neu.gml neutral vaccine-injured children 3 0.015 0.008342 0.117099
190 article_u_neu.gml neutral vaccine-preventable diseases 1 0.005 0.000000 0.102847
191 article_u_neu.gml neutral vaccines 19 0.095 0.414983 0.198489
192 article_u_neu.gml neutral vaccines cause childhood illnesses 1 0.005 0.000000 0.138278
193 article_u_neu.gml neutral voluntary 1 0.005 0.000000 0.161093
194 article_u_neu.gml neutral vomiting 1 0.005 0.000000 0.131723
195 article_u_neu.gml neutral waning effectiveness 2 0.010 0.000101 0.131006
196 article_u_neu.gml neutral waning immunity 1 0.005 0.000000 0.153397
197 article_u_neu.gml neutral wealthy family foundations 1 0.005 0.000000 0.093346
198 article_u_neu.gml neutral whole-cell vaccine 5 0.025 0.108918 0.181305
199 article_u_neu.gml neutral whooping cough 1 0.005 0.000000 0.131723
200 article_u_neu.gml neutral young people 1 0.005 0.000000 0.114774

201 rows × 7 columns


In [10]:
# save
#combined_df.to_csv('../output/df/article_u_neu.csv')

4. Draw undirected and directed network


In [11]:
# 7_graph_calculation
def drawIt(graph, what = 'graph'):
    nsize = graph.number_of_nodes()
    print "Drawing %s of size %s:" % (what, nsize)
    
    if nsize > 20:
        plt.figure(figsize=(10, 10))
        if nsize > 40:
            nx.draw_spring(graph, with_labels = True, node_size = 70, font_size = 12)
        else:
            nx.draw_spring(graph, with_labels = True)
    else:
        nx.draw_spring(graph, with_labels = True)
    plt.show()

# for undirected graphs
def describeGraph(graph):
    components = sorted(nx.connected_components(graph), key = len, reverse = True)
    cc = [len(c) for c in components]
    subgraphs = list(nx.connected_component_subgraphs(graph))
    params = (graph.number_of_edges(),graph.number_of_nodes(),len(cc))
    print "Graph has %s edges, %s nodes, %s connected components\n" % params
    drawIt(graph)
    for sub in components:
        drawIt(graph.subgraph(sub), what = 'component')

# for directed graphs
def describeGraph_d(graph):
    components = sorted(nx.weakly_connected_components(graph), key = len, reverse = True)
    cc = [len(c) for c in components]
    subgraphs = list(nx.weakly_connected_component_subgraphs(graph))
    params = (graph.number_of_edges(),graph.number_of_nodes(),len(cc))
    print "Graph has %s edges, %s nodes, %s connected components\n" % params
    drawIt(graph)
    for sub in components:
        drawIt(graph.subgraph(sub), what = 'component')

In [12]:
# UNDIRECTED network graph
describeGraph(ugraph)


Graph has 236 edges, 201 nodes, 12 connected components

Drawing graph of size 201:
Drawing component of size 171:
Drawing component of size 4:
Drawing component of size 4:
Drawing component of size 4:
Drawing component of size 3:
Drawing component of size 3:
Drawing component of size 2:
Drawing component of size 2:
Drawing component of size 2:
Drawing component of size 2:
Drawing component of size 2:
Drawing component of size 2:

In [13]:
# DIRECTED network graph
describeGraph_d(dgraph)


Graph has 241 edges, 201 nodes, 12 connected components

Drawing graph of size 201:
Drawing component of size 171:
Drawing component of size 4:
Drawing component of size 4:
Drawing component of size 4:
Drawing component of size 3:
Drawing component of size 3:
Drawing component of size 2:
Drawing component of size 2:
Drawing component of size 2:
Drawing component of size 2:
Drawing component of size 2:
Drawing component of size 2:

Undirected graph components


In [14]:
# list of connected components by size (undirected graph)
connected_components = [len(c) for c in sorted(nx.connected_components(ugraph), key=len, reverse=True)]

# generate connected components as subgraphs (undirected graph)
subgraphs = list(nx.connected_component_subgraphs(ugraph))

# greatest component (undirected MultiGraph)
u_Gc = max(nx.connected_component_subgraphs(ugraph), key=len)
u_Gc.name = "undirected Gc"

In [15]:
print "connected components = ", connected_components
print nx.info(u_Gc)


connected components =  [171, 4, 4, 4, 3, 3, 2, 2, 2, 2, 2, 2]
Name: undirected Gc
Type: MultiGraph
Number of nodes: 171
Number of edges: 216
Average degree:   2.5263

Directed graph components


In [16]:
# use directed dgraph
components = sorted(nx.weakly_connected_components(dgraph), key = len, reverse = True)
cc = [len(c) for c in components]

# generate connected components as subgraphs 
subgraphs = list(nx.weakly_connected_component_subgraphs(dgraph))

# greatest component
d_Gc = max(nx.weakly_connected_component_subgraphs(dgraph), key=len)
d_Gc.name = "directed Gc"

In [17]:
print "connected components = ", cc
print nx.info(d_Gc)


connected components =  [171, 4, 4, 4, 3, 3, 2, 2, 2, 2, 2, 2]
Name: directed Gc
Type: MultiDiGraph
Number of nodes: 171
Number of edges: 221
Average in degree:   1.2924
Average out degree:   1.2924

5. Greatest component graph


In [18]:
# finally, greatest components for undirected and directed graphs
print nx.info(u_Gc)
print nx.info(d_Gc)


Name: undirected Gc
Type: MultiGraph
Number of nodes: 171
Number of edges: 216
Average degree:   2.5263
Name: directed Gc
Type: MultiDiGraph
Number of nodes: 171
Number of edges: 221
Average in degree:   1.2924
Average out degree:   1.2924

In [19]:
# save Gc
#nx.write_gml(u_Gc, "../output/network/u_Gc_neutral2.gml")
#nx.write_gml(d_Gc, "../output/network/d_Gc_neutral2.gml")

6. network stats for DIRECTED GC


In [20]:
# load directed Gc
Gc_files = glob('../output/network/d_Gc_neutral2.gml')

network_data_columns = ['name',
                    'sentiment',
                    '# nodes',
                    '# edges',
                    #'avg deg',
                    'density',
                    'deg assort coef', 
                    'avg deg cent',
                    'avg bet cent',
                    'avg clo cent',
                    'high deg cent',
                    'high bet cent',
                    'high clo cent',
                    'avg node conn',
                    '# conn comp',
                    'gc size'
                    ]
network_data = pd.DataFrame(columns = network_data_columns)

In [21]:
# Gc_files
for graph_num, gml_graph in enumerate(Gc_files):
    graph = nx.read_gml(gml_graph)
    (filepath, filename) = os.path.split(gml_graph)
    print('-' * 10)
    print(gml_graph)
    calculate_graph_inf(graph)
    
    # calculate variables
    sent = "neu"
    nodes = nx.number_of_nodes(graph)
    edges = nx.number_of_edges(graph)
    density = float("{0:.4f}".format(nx.density(graph)))
    avg_deg_cen = np.array(nx.degree_centrality(graph).values()).mean()
    avg_bet_cen = np.array(nx.betweenness_centrality(graph).values()).mean()
    avg_clo_cen = np.array(nx.closeness_centrality(graph).values()).mean()
    #avg_deg = float("{0:.4f}".format(in_deg + out_deg))
    avg_node_con = float("{0:.4f}".format((nx.average_node_connectivity(graph))))
    deg_assort_coeff = float("{0:.4f}".format((nx.degree_assortativity_coefficient(graph))))
    conn_comp = nx.number_weakly_connected_components(graph) # ugraph
    deg_cen = nx.degree_centrality(graph)
    bet_cen = nx.betweenness_centrality(graph)
    clo_cen = nx.closeness_centrality(graph)
    highest_deg_cen = highest_centrality(deg_cen)
    highest_bet_cen = highest_centrality(bet_cen)
    highest_clo_cen = highest_centrality(clo_cen)
    Gc = len(max(nx.weakly_connected_component_subgraphs(graph), key=len))

    # save variables into list
    graph_values = {'name':filename,
                    'sentiment':sent,
                    '# nodes':nodes,
                    '# edges':edges,
                    #'avg deg':avg_deg,
                    'density':density,
                    'deg assort coef':deg_assort_coeff,
                    'avg deg cent':"%.4f" % avg_deg_cen,
                    'avg bet cent':"%.4f" % avg_bet_cen,
                    'avg clo cent':"%.4f" % avg_clo_cen,
                    'high deg cent':highest_deg_cen,
                    'high bet cent':highest_bet_cen,
                    'high clo cent':highest_clo_cen,
                    'avg node conn':avg_node_con,
                    '# conn comp':conn_comp,
                    'gc size':Gc
                    }
    network_data = network_data.append(graph_values, ignore_index=True)


----------
../output/network/d_Gc_neutral2.gml
Name: d_Gc_neutral2.gml
Type: MultiDiGraph
Number of nodes: 171
Number of edges: 221
Average in degree:   1.2924
Average out degree:   1.2924

In [22]:
# print network data for greatest component
network_data


Out[22]:
name sentiment # nodes # edges density deg assort coef avg deg cent avg bet cent avg clo cent high deg cent high bet cent high clo cent avg node conn # conn comp gc size
0 d_Gc_neutral2.gml neu 171.0 221.0 0.0076 -0.194 0.0152 0.0011 0.0183 (SB 277, 0.194117647059) (children, 0.0226940480334) (children, 0.133013640239) 0.0614 1.0 171.0

In [23]:
# save
#network_data.to_csv('../output/df/d_Gc_neu2.csv')

7. network stats for UNDIRECTED GC


In [24]:
# load UNdirected Gc
Gc_files = glob('../output/network/u_Gc_neutral2.gml')

network_data_columns = ['name',
                    'sentiment',
                    '# nodes',
                    '# edges',
                    #'avg deg',
                    'density',
                    'deg assort coef', 
                    'avg deg cent',
                    'avg bet cent',
                    'avg clo cent',
                    'high deg cent',
                    'high bet cent',
                    'high clo cent',
                    'avg node conn'
                    #'# conn comp',
                    #'gc size'
                    ]
network_data = pd.DataFrame(columns = network_data_columns)

In [25]:
# Gc_files
for graph_num, gml_graph in enumerate(Gc_files):
    graph = nx.read_gml(gml_graph)
    (filepath, filename) = os.path.split(gml_graph)
    print('-' * 10)
    print(gml_graph)
    calculate_graph_inf(graph)
    
    # calculate variables
    sent = "neu"
    nodes = nx.number_of_nodes(graph)
    edges = nx.number_of_edges(graph)
    density = float("{0:.4f}".format(nx.density(graph)))
    avg_deg_cen = np.array(nx.degree_centrality(graph).values()).mean()
    avg_bet_cen = np.array(nx.betweenness_centrality(graph).values()).mean()
    avg_clo_cen = np.array(nx.closeness_centrality(graph).values()).mean()
    #avg_deg = float("{0:.4f}".format(in_deg + out_deg))
    avg_node_con = float("{0:.4f}".format((nx.average_node_connectivity(graph))))
    deg_assort_coeff = float("{0:.4f}".format((nx.degree_assortativity_coefficient(graph))))
    #conn_comp = nx.number_weakly_connected_components(graph) # ugraph
    deg_cen = nx.degree_centrality(graph)
    bet_cen = nx.betweenness_centrality(graph)
    clo_cen = nx.closeness_centrality(graph)
    highest_deg_cen = highest_centrality(deg_cen)
    highest_bet_cen = highest_centrality(bet_cen)
    highest_clo_cen = highest_centrality(clo_cen)
    #Gc = len(max(nx.weakly_connected_component_subgraphs(graph), key=len))

    # save variables into list
    graph_values = {'name':filename,
                    'sentiment':sent,
                    '# nodes':nodes,
                    '# edges':edges,
                    #'avg deg':avg_deg,
                    'density':density,
                    'deg assort coef':deg_assort_coeff,
                    'avg deg cent':"%.4f" % avg_deg_cen,
                    'avg bet cent':"%.4f" % avg_bet_cen,
                    'avg clo cent':"%.4f" % avg_clo_cen,
                    'high deg cent':highest_deg_cen,
                    'high bet cent':highest_bet_cen,
                    'high clo cent':highest_clo_cen,
                    'avg node conn':avg_node_con
                    #'# conn comp':conn_comp,
                    #'gc size':Gc
                    }
    network_data = network_data.append(graph_values, ignore_index=True)


----------
../output/network/u_Gc_neutral2.gml
Name: u_Gc_neutral2.gml
Type: MultiGraph
Number of nodes: 171
Number of edges: 216
Average degree:   2.5263

In [26]:
# print network data for greatest component
network_data


Out[26]:
name sentiment # nodes # edges density deg assort coef avg deg cent avg bet cent avg clo cent high deg cent high bet cent high clo cent avg node conn
0 u_Gc_neutral2.gml neu 171.0 216.0 0.0149 -0.3055 0.0149 0.0342 0.1533 (SB 277, 0.182352941176) (vaccines, 0.574881076691) (vaccines, 0.233516483516) 1.035

In [27]:
# save
#network_data.to_csv('../output/df/u_Gc_neu2.csv')

Gc nodes table (directed & undirected)


In [42]:
#gml_files = glob('../output/network/d_Gc_neutral2.gml')
gml_files = glob('../output/network/u_Gc_neutral2.gml')

In [43]:
# 2_node_df: list all nodes and centrality
data_columns = ['name',
                'sentiment'
                ]
data = pd.DataFrame(columns = data_columns)
#combined_df = pd.DataFrame()

In [44]:
for graph_num, gml_graph in enumerate(gml_files):
    graph = nx.read_gml(gml_graph)
    (filepath, filename) = os.path.split(gml_graph)
    print('-' * 10)
    print(gml_graph)
    calculate_graph_inf(graph)

    # calculate variables and save into list
    sent = "neu"    
    deg_cent = nx.degree_centrality(graph)
    bet_cent = nx.betweenness_centrality(graph)
    clo_cent = nx.closeness_centrality(graph)
    graph_values = {'name':filename,
                    'sentiment':sent
                    }
    data = data.append(graph_values, ignore_index=True)

    degree = nx.degree(graph)
    deg_df = pd.DataFrame.from_dict(degree, orient = 'index')
    deg_df.columns = ['degree']
    # degree centrality
    deg_cent = nx.degree_centrality(graph)
    dc_df = pd.DataFrame.from_dict(deg_cent, orient = 'index')
    dc_df.columns = ['deg cent']
    # betweenness centrality
    bet_cent = nx.betweenness_centrality(graph)
    bc_df = pd.DataFrame.from_dict(bet_cent, orient = 'index')
    bc_df.columns = ['bet cent']
    # closeness centrality
    clo_cent = nx.closeness_centrality(graph)
    cc_df = pd.DataFrame.from_dict(clo_cent, orient = 'index')
    cc_df.columns = ['clo cent']
    # concat node frames into node_df
    frames = [deg_df, dc_df, bc_df, cc_df]
    node_df = pd.concat(frames, axis = 1)
    node_df.index.name = 'node'
    node_df = node_df.reset_index()

    values = pd.DataFrame(graph_values, columns = ('name', 'sentiment'), index = [0])
    
    # df = merges graph_values with node_df for single graph and fill NaNs
    df = pd.concat([values, node_df], axis = 1)
    df = df.fillna(method='ffill')
    #combined_df = combined_df.append(df)


----------
../output/network/u_Gc_neutral2.gml
Name: u_Gc_neutral2.gml
Type: MultiGraph
Number of nodes: 171
Number of edges: 216
Average degree:   2.5263

In [45]:
# print neutral gc nodes
df


Out[45]:
name sentiment node degree deg cent bet cent clo cent
0 u_Gc_neutral2.gml neu 7-11 year olds 2 0.011765 0.021627 0.173116
1 u_Gc_neutral2.gml neu Alfred and Lisa Claire Dwoskin 1 0.005882 0.000000 0.162679
2 u_Gc_neutral2.gml neu Americans 1 0.005882 0.000000 0.153568
3 u_Gc_neutral2.gml neu Andrew Wakefield 5 0.029412 0.140828 0.157407
4 u_Gc_neutral2.gml neu Australia 1 0.005882 0.000000 0.154968
5 u_Gc_neutral2.gml neu Barry Segal 1 0.005882 0.000000 0.110533
6 u_Gc_neutral2.gml neu Ben Allen 4 0.023529 0.000046 0.155963
7 u_Gc_neutral2.gml neu CDC 4 0.023529 0.097239 0.188889
8 u_Gc_neutral2.gml neu California 1 0.005882 0.000000 0.155393
9 u_Gc_neutral2.gml neu Chairwoman Carol Liu 2 0.011765 0.011765 0.155678
10 u_Gc_neutral2.gml neu Children's Hospital of Philadelphia 1 0.005882 0.000000 0.153568
11 u_Gc_neutral2.gml neu Chris Christie 3 0.017647 0.011800 0.139459
12 u_Gc_neutral2.gml neu Connie Leyva 3 0.017647 0.000093 0.155963
13 u_Gc_neutral2.gml neu Disneyland measles outbreak 1 0.005882 0.000000 0.135135
14 u_Gc_neutral2.gml neu Dr. Anna Acosta 2 0.011765 0.034876 0.159925
15 u_Gc_neutral2.gml neu Dr. Paul Offit 3 0.017647 0.017543 0.181237
16 u_Gc_neutral2.gml neu Dwoskin Family Foundation 8 0.047059 0.409224 0.194064
17 u_Gc_neutral2.gml neu Focus for Health 4 0.023529 0.101914 0.124178
18 u_Gc_neutral2.gml neu Generation Rescue 4 0.023529 0.122764 0.139573
19 u_Gc_neutral2.gml neu Immunization Action Coalition 1 0.005882 0.000000 0.159027
20 u_Gc_neutral2.gml neu Jenny McCarthy 3 0.017647 0.033275 0.140728
21 u_Gc_neutral2.gml neu Lisa and J.B. Handley 1 0.005882 0.000000 0.122567
22 u_Gc_neutral2.gml neu MMR vaccine 1 0.005882 0.000000 0.100295
23 u_Gc_neutral2.gml neu National Vaccine Information Center 2 0.011765 0.011765 0.162991
24 u_Gc_neutral2.gml neu PLOS Computational Biology 1 0.005882 0.000000 0.168484
25 u_Gc_neutral2.gml neu Pez dispensers 1 0.005882 0.000000 0.189521
26 u_Gc_neutral2.gml neu Rand Paul 3 0.017647 0.046258 0.191226
27 u_Gc_neutral2.gml neu Republican 2 0.011765 0.011625 0.161290
28 u_Gc_neutral2.gml neu Richard Pan 2 0.011765 0.000046 0.135674
29 u_Gc_neutral2.gml neu SB 277 31 0.182353 0.329352 0.183784
... ... ... ... ... ... ... ...
141 u_Gc_neutral2.gml neu state-required vaccinations 1 0.005882 0.000000 0.171371
142 u_Gc_neutral2.gml neu students 1 0.005882 0.000000 0.134814
143 u_Gc_neutral2.gml neu teens 1 0.005882 0.000000 0.154968
144 u_Gc_neutral2.gml neu the sake of being anti-vaccine 1 0.005882 0.000000 0.137207
145 u_Gc_neutral2.gml neu upward trend 1 0.005882 0.000000 0.153291
146 u_Gc_neutral2.gml neu vaccinated 1 0.005882 0.000000 0.129474
147 u_Gc_neutral2.gml neu vaccinated children 2 0.011765 0.011765 0.155678
148 u_Gc_neutral2.gml neu vaccination 5 0.029412 0.053974 0.175801
149 u_Gc_neutral2.gml neu vaccination exemption 3 0.017647 0.011765 0.155678
150 u_Gc_neutral2.gml neu vaccine choice 1 0.005882 0.000000 0.122478
151 u_Gc_neutral2.gml neu vaccine concerns 1 0.005882 0.000000 0.140264
152 u_Gc_neutral2.gml neu vaccine debate 4 0.023529 0.023599 0.123278
153 u_Gc_neutral2.gml neu vaccine duration 2 0.011765 0.020279 0.189099
154 u_Gc_neutral2.gml neu vaccine efficacy 3 0.017647 0.011765 0.180467
155 u_Gc_neutral2.gml neu vaccine refusal 3 0.017647 0.000000 0.129474
156 u_Gc_neutral2.gml neu vaccine risk 3 0.017647 0.011765 0.137540
157 u_Gc_neutral2.gml neu vaccine safety 2 0.011765 0.005344 0.172589
158 u_Gc_neutral2.gml neu vaccine-autism link 4 0.023529 0.361991 0.176349
159 u_Gc_neutral2.gml neu vaccine-injured children 3 0.017647 0.011556 0.137763
160 u_Gc_neutral2.gml neu vaccine-preventable diseases 1 0.005882 0.000000 0.120996
161 u_Gc_neutral2.gml neu vaccines 19 0.111765 0.574881 0.233516
162 u_Gc_neutral2.gml neu vaccines cause childhood illnesses 1 0.005882 0.000000 0.162679
163 u_Gc_neutral2.gml neu voluntary 1 0.005882 0.000000 0.189521
164 u_Gc_neutral2.gml neu vomiting 1 0.005882 0.000000 0.154968
165 u_Gc_neutral2.gml neu waning effectiveness 2 0.011765 0.000139 0.154125
166 u_Gc_neutral2.gml neu waning immunity 1 0.005882 0.000000 0.180467
167 u_Gc_neutral2.gml neu wealthy family foundations 1 0.005882 0.000000 0.109819
168 u_Gc_neutral2.gml neu whole-cell vaccine 5 0.029412 0.150886 0.213300
169 u_Gc_neutral2.gml neu whooping cough 1 0.005882 0.000000 0.154968
170 u_Gc_neutral2.gml neu young people 1 0.005882 0.000000 0.135028

171 rows × 7 columns


In [37]:
# save
#df.to_csv('../output/df/d_Gc_nodes_neu2.csv')
#df.to_csv('../output/df/u_Gc_nodes_neu2.csv')

full network node centrality (directed & undirected)


In [46]:
# make sure you're using the right graph
print "gml_files = ", gml_files
print "gml_graph = ", gml_graph


gml_files =  ['../output/network/u_Gc_neutral2.gml']
gml_graph =  ../output/network/u_Gc_neutral2.gml

In [47]:
# FULL DIRECTED
#graph = nx.read_gml('../output/network/article_neu1.gml')  

# FULL UNDIRECTED
graph = nx.read_gml('../output/network/article_u_neu.gml')

print nx.info(graph)


Name: article_neu1.gml
Type: MultiGraph
Number of nodes: 201
Number of edges: 236
Average degree:   2.3483

In [48]:
# degree centrality
dc = nx.degree_centrality(graph)
dc_df = pd.DataFrame.from_dict(dc, orient = 'index')
dc_df.columns = ['degree cent']
dc_df = dc_df.sort_values(by = ['degree cent'])
#dc_df

# betweenness centrality
bc = nx.betweenness_centrality(graph)
bc_df = pd.DataFrame.from_dict(bc, orient = 'index')
bc_df.columns = ['betweenness cent']
bc_df = bc_df.sort_values(by = ['betweenness cent'])
#bc_df

# closeness centrality
cc = nx.closeness_centrality(graph)
cc_df = pd.DataFrame.from_dict(cc, orient = 'index')
cc_df.columns = ['closeness cent']
cc_df = cc_df.sort_values(by = ['closeness cent'])
#cc_df

In [49]:
dc_df


Out[49]:
degree cent
lawyer 0.005
diagnosis 0.005
pertussis in teens 0.005
misrepresentation 0.005
alzheimer's 0.005
medical belief 0.005
vomiting 0.005
maternal inoculation 0.005
severe disease 0.005
childhood illnesses 0.005
parental choice 0.005
minimal 0.005
Pez dispensers 0.005
politicians 0.005
teens 0.005
ages 65-84 0.005
Lisa and J.B. Handley 0.005
whooping cough 0.005
research 0.005
waning immunity 0.005
Americans 0.005
voluntary 0.005
bacteria 0.005
immunity 0.005
imperfect 0.005
acelluar vaccine 0.005
protested 0.005
diagnoses 0.005
vaccine choice 0.005
the sake of being anti-vaccine 0.005
... ...
public education 0.015
United States 0.015
medical science 0.015
vaccine-injured children 0.015
Gambhir study 0.015
vaccine-autism link 0.020
CDC 0.020
parents with vaccine-injured children 0.020
proposed restrictions 0.020
Focus for Health 0.020
elderly 0.020
Generation Rescue 0.020
Ben Allen 0.020
vaccine debate 0.020
disease 0.025
vaccination 0.025
autism 0.025
home-school 0.025
whole-cell vaccine 0.025
Andrew Wakefield 0.025
children 0.035
parents 0.040
Dwoskin Family Foundation 0.040
acellular pertussis vaccine 0.040
high-dose flu vaccine 0.055
pertussis 0.070
pertussis vaccine 0.070
anti-vaccination 0.090
vaccines 0.095
SB 277 0.155

201 rows × 1 columns


In [50]:
bc_df


Out[50]:
betweenness cent
influenza 0.000000
infectious disease 0.000000
pharmaceuticals 0.000000
people 0.000000
California 0.000000
Saron Runner 0.000000
Senate committee 0.000000
vaccine concerns 0.000000
preteens 0.000000
rubella vaccination is safe 0.000000
severe coughing 0.000000
required vaccinations 0.000000
mixed findings 0.000000
vaccine-preventable diseases 0.000000
age 85 and older 0.000000
imperfect 0.000000
immunization waivers 0.000000
Australia 0.000000
wealthy family foundations 0.000000
severe disease 0.000000
vomiting 0.000000
United States 0.000000
pertussis in teens 0.000000
diagnoses 0.000000
the sake of being anti-vaccine 0.000000
Vaccine Injured Petitioners Bar Association 0.000000
Drew Downing 0.000000
measles vaccine is safe 0.000000
options 0.000000
miracles 0.000000
... ...
Jenny McCarthy 0.024020
Dr. Anna Acosta 0.025176
Rand Paul 0.033392
vaccination 0.038961
autism 0.050050
effective 0.053189
changes 0.068981
CDC 0.070193
Focus for Health 0.073568
infants 0.078987
high-dose flu vaccine 0.082462
protection 0.087236
immune response 0.087889
Generation Rescue 0.088618
immune disorders 0.095260
inflammatory disorders 0.095260
cognitive disorders 0.095260
Andrew Wakefield 0.101658
pertussis 0.103752
whole-cell vaccine 0.108918
artificial vaccine 0.118643
acellular pertussis vaccine 0.149934
side effects 0.169408
anti-vaccination 0.184372
children 0.190810
SB 277 0.237746
vaccine-autism link 0.261307
pertussis vaccine 0.284884
Dwoskin Family Foundation 0.295402
vaccines 0.414983

201 rows × 1 columns


In [51]:
cc_df


Out[51]:
closeness cent
Vaccine Injured Petitioners Bar Association 0.005000
findings 0.005000
lawyer 0.005000
Drew Downing 0.005000
rise 0.005000
Renee Gentry 0.005000
research 0.005000
people 0.005000
aluminum 0.005000
clusters 0.005000
vaccine allergy 0.005000
neurological disorders 0.005000
pertussis in adults 0.006667
religious exemption 0.006667
pertussis in teens 0.006667
committee members 0.006667
pertussis vaccination 0.009000
vaccination schedule 0.009000
death 0.009000
severe disease 0.009000
booster 0.009000
required vaccinations 0.009000
high-risk 0.009000
amendment 0.010000
risk 0.010000
immunization waivers 0.011250
kindergarteners 0.011250
pertussis in infants 0.015000
personal belief 0.015000
Gambhir study 0.015000
... ...
pertussis 0.155711
SB 277 0.156216
adolescents 0.157923
CDC 0.160556
vaccine duration 0.160734
miracles 0.161093
Pez dispensers 0.161093
necessary 0.161093
man-made 0.161093
for-profit 0.161093
imperfect 0.161093
magic 0.161093
childhood illnesses 0.161093
pharmaceuticals 0.161093
voluntary 0.161093
Rand Paul 0.162542
Dwoskin Family Foundation 0.164954
changes 0.165143
infants 0.168808
acellular pertussis vaccine 0.172024
children 0.175577
protection 0.177518
cognitive disorders 0.179950
inflammatory disorders 0.179950
immune disorders 0.179950
whole-cell vaccine 0.181305
effective 0.181305
pertussis vaccine 0.186934
side effects 0.187662
vaccines 0.198489

201 rows × 1 columns

Gc node centrality (directed & undirected)


In [52]:
# Gc directed
#graph = nx.read_gml('../output/network/d_Gc_neutral2.gml')

# Gc undirected
graph = nx.read_gml('../output/network/u_Gc_neutral2.gml')  

print nx.info(graph)


Name: undirected Gc
Type: MultiGraph
Number of nodes: 171
Number of edges: 216
Average degree:   2.5263

In [53]:
# degree centrality
dc = nx.degree_centrality(graph)
dc_df = pd.DataFrame.from_dict(dc, orient = 'index')
dc_df.columns = ['degree cent']
dc_df = dc_df.sort_values(by = ['degree cent'])
#dc_df

# betweenness centrality
bc = nx.betweenness_centrality(graph)
bc_df = pd.DataFrame.from_dict(bc, orient = 'index')
bc_df.columns = ['betweenness cent']
bc_df = bc_df.sort_values(by = ['betweenness cent'])
#bc_df

# closeness centrality
cc = nx.closeness_centrality(graph)
cc_df = pd.DataFrame.from_dict(cc, orient = 'index')
cc_df.columns = ['closeness cent']
cc_df = cc_df.sort_values(by = ['closeness cent'])
#cc_df

In [54]:
dc_df


Out[54]:
degree cent
Barry Segal 0.005882
vaccine-preventable diseases 0.005882
mixed findings 0.005882
Saron Runner 0.005882
severe coughing 0.005882
rubella vaccination is safe 0.005882
imperfect 0.005882
preteens 0.005882
California 0.005882
Alfred and Lisa Claire Dwoskin 0.005882
vaccine concerns 0.005882
Australia 0.005882
age 85 and older 0.005882
wealthy family foundations 0.005882
committee hearing room 0.005882
miracles 0.005882
options 0.005882
measles vaccine is safe 0.005882
the sake of being anti-vaccine 0.005882
diagnoses 0.005882
vomiting 0.005882
childhood illnesses 0.005882
minimal 0.005882
Pez dispensers 0.005882
teens 0.005882
standard vaccine 0.005882
Senate Education Committee 0.005882
measles outbreak 0.005882
adults 0.005882
diagnosis 0.005882
... ...
Jenny McCarthy 0.017647
open conversations 0.017647
vaccine refusal 0.017647
public education 0.017647
Dr. Paul Offit 0.017647
Ben Allen 0.023529
Generation Rescue 0.023529
elderly 0.023529
CDC 0.023529
Focus for Health 0.023529
parents with vaccine-injured children 0.023529
proposed restrictions 0.023529
vaccine-autism link 0.023529
vaccine debate 0.023529
autism 0.029412
vaccination 0.029412
whole-cell vaccine 0.029412
Andrew Wakefield 0.029412
disease 0.029412
home-school 0.029412
children 0.041176
Dwoskin Family Foundation 0.047059
parents 0.047059
acellular pertussis vaccine 0.047059
high-dose flu vaccine 0.064706
pertussis 0.082353
pertussis vaccine 0.082353
anti-vaccination 0.105882
vaccines 0.111765
SB 277 0.182353

171 rows × 1 columns


In [55]:
bc_df


Out[55]:
betweenness cent
influenza 0.000000
severe coughing 0.000000
rubella vaccination is safe 0.000000
imperfect 0.000000
preteens 0.000000
Senate committee 0.000000
California 0.000000
pharmaceuticals 0.000000
Alfred and Lisa Claire Dwoskin 0.000000
infectious disease 0.000000
vaccine concerns 0.000000
Australia 0.000000
wealthy family foundations 0.000000
Saron Runner 0.000000
standard vaccine 0.000000
misrepresentation 0.000000
miracles 0.000000
options 0.000000
measles vaccine is safe 0.000000
for-profit 0.000000
the sake of being anti-vaccine 0.000000
age 85 and older 0.000000
diagnoses 0.000000
diagnosis 0.000000
United States 0.000000
vomiting 0.000000
childhood illnesses 0.000000
committee hearing room 0.000000
mixed findings 0.000000
vaccine-preventable diseases 0.000000
... ...
Jenny McCarthy 0.033275
Dr. Anna Acosta 0.034876
Rand Paul 0.046258
vaccination 0.053974
autism 0.069335
effective 0.073683
changes 0.095560
CDC 0.097239
Focus for Health 0.101914
infants 0.109421
high-dose flu vaccine 0.114236
protection 0.120849
immune response 0.121754
Generation Rescue 0.122764
cognitive disorders 0.131964
immune disorders 0.131964
inflammatory disorders 0.131964
Andrew Wakefield 0.140828
pertussis 0.143729
whole-cell vaccine 0.150886
artificial vaccine 0.164358
acellular pertussis vaccine 0.207706
side effects 0.234683
anti-vaccination 0.255412
children 0.264331
SB 277 0.329352
vaccine-autism link 0.361991
pertussis vaccine 0.394653
Dwoskin Family Foundation 0.409224
vaccines 0.574881

171 rows × 1 columns


In [56]:
cc_df


Out[56]:
closeness cent
alzheimer's 0.091398
asthma 0.091398
ages 65-84 0.095721
MMR vaccine 0.100295
diagnosis 0.100295
diagnoses 0.100295
increased 0.100532
immunity 0.105525
standard flu vaccine 0.105787
friends and family 0.108557
preteens 0.108696
wealthy family foundations 0.109819
nonprofit organizations 0.109819
chronic illness 0.110533
Barry Segal 0.110533
side of right and good 0.110605
autism 0.111402
standard vaccine 0.117729
influenza 0.117729
major flu impacts 0.117729
age 85 and older 0.117729
mixed findings 0.117729
elderly 0.117892
age 65 and older 0.118056
vaccine-preventable diseases 0.120996
crazy people 0.121515
parents with vaccine-injured children 0.121689
pertussis booster shot 0.121864
vaccine choice 0.122478
Lisa and J.B. Handley 0.122567
... ...
pertussis 0.183190
SB 277 0.183784
adolescents 0.185792
CDC 0.188889
vaccine duration 0.189099
miracles 0.189521
imperfect 0.189521
childhood illnesses 0.189521
necessary 0.189521
for-profit 0.189521
man-made 0.189521
pharmaceuticals 0.189521
magic 0.189521
voluntary 0.189521
Pez dispensers 0.189521
Rand Paul 0.191226
Dwoskin Family Foundation 0.194064
changes 0.194286
infants 0.198598
acellular pertussis vaccine 0.202381
children 0.206561
protection 0.208845
inflammatory disorders 0.211706
immune disorders 0.211706
cognitive disorders 0.211706
whole-cell vaccine 0.213300
effective 0.213300
pertussis vaccine 0.219922
side effects 0.220779
vaccines 0.233516

171 rows × 1 columns


Cutsets (directed & undirected)


In [62]:
# Gc directed
#graph = nx.read_gml('../output/network/d_Gc_neutral2.gml')

# Gc undirected
graph = nx.read_gml('../output/network/u_Gc_neutral2.gml')

print nx.info(graph)


Name: undirected Gc
Type: MultiGraph
Number of nodes: 171
Number of edges: 216
Average degree:   2.5263

In [63]:
print "Greatest component size =", len(graph)


Greatest component size = 171

In [64]:
# returns all minimum k cutsets of an undirected graph
# i.e., the set(s) of nodes of cardinality equal to the node connectivity of G
# thus if removed, would break G into two or more connected components

#cutsets = list(nx.all_node_cuts(graph))  # must be undirected

print "Greatest component size =", len(graph)
#print "# of cutsets =", len(cutsets)

# returns a set of nodes or edges of minimum cardinality that disconnects G
min_ncut = nx.minimum_node_cut(graph)
min_ecut = nx.minimum_edge_cut(graph)

print "Min node cut =", min_ncut
print "Min edge cut =", min_ecut

# min cuts with source and target
print nx.minimum_node_cut(graph, s='vaccines', t='autism')
print nx.minimum_edge_cut(graph, s='vaccines', t='autism')


Greatest component size = 171
Min node cut = set([u'anti-vaccination'])
Min edge cut = set([(u'artificial vaccine', u'acellular pertussis vaccine')])
set([u'Focus for Health'])
set([(u'Focus for Health', u'autism')])

In [65]:
# read edge labels in min cut for Gc
# change source and target
a = nx.minimum_edge_cut(graph, s='vaccines', t='autism')
#a = nx.minimum_edge_cut(graph)

labels = nx.get_edge_attributes(graph,'edge')
edgelabels = {}
for e in labels.keys():
    e1 = e[0:2]
    edgelabels[e1]=labels[e]

for e in a:
    if edgelabels.has_key(e):
        print e,edgelabels[e]
    else:
        rev_e = e[::-1]
        print rev_e, edgelabels[rev_e]


(u'Focus for Health', u'autism') is dedicated to advocacy, education, investigation and research that explores epidemic of