Gc negative graph

imports "article_neg1.gml"

  • saves "nodes_df_negative.csv"
    • node labels, degrees, and centralities for entire network
  • saves "Gc_negative.gml"

imports "Gc_negative.gml"

  • saves "Gc_df_neg.csv"
    • node labels, degrees, and centralities for greatest component

In [123]:
# 1_network_df

import networkx as nx
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
import os
from glob import glob

plt.style.use('ggplot')
pd.set_option('display.width', 5000) 
pd.set_option('display.max_columns', 60)

gml_files = glob('../output/network/article_neg1.gml')

In [124]:
def calculate_graph_inf(graph):
    graph.name = filename
    info = nx.info(graph)
    print info
    #plt.figure(figsize=(10,10))
    #nx.draw_spring(graph, arrows=True, with_labels=True)

def highest_centrality(cent_dict):
    """Returns a tuple (node,value) with the node
    with largest value from centrality dictionary."""
    # create ordered tuple of centrality data
    cent_items = [(b,a) for (a,b) in cent_dict.iteritems()]
    # sort in descending order
    cent_items.sort()
    cent_items.reverse()
    return tuple(reversed(cent_items[0]))

all nodes table


In [125]:
# 2_node_df: list all nodes and centrality
data_columns = ['name',
                'sentiment'
                ]
data = pd.DataFrame(columns = data_columns)
combined_df = pd.DataFrame()

In [126]:
# graph = directed, ugraph = undirected
for graph_num, gml_graph in enumerate(gml_files):
    graph = nx.read_gml(gml_graph)
    ugraph = graph.to_undirected() # to undirected graph
    U = graph.to_undirected(reciprocal=True)
    e = U.edges()
    ugraph.add_edges_from(e)
    (filepath, filename) = os.path.split(gml_graph)
    print('-' * 10)
    print(gml_graph)
    calculate_graph_inf(graph)
    calculate_graph_inf(ugraph)

    ## calculate variables and save into list
    sent = "neg"    
    deg_cent = nx.degree_centrality(graph)
    bet_cent = nx.betweenness_centrality(graph)
    clo_cent = nx.closeness_centrality(graph)
    graph_values = {'name':filename,
                    'sentiment':sent
                    }
    data = data.append(graph_values, ignore_index=True)

    degree = nx.degree(graph)
    deg_df = pd.DataFrame.from_dict(degree, orient = 'index')
    deg_df.columns = ['degree']
    # degree centrality
    deg_cent = nx.degree_centrality(graph)
    dc_df = pd.DataFrame.from_dict(deg_cent, orient = 'index')
    dc_df.columns = ['deg cent']
    # betweenness centrality
    bet_cent = nx.betweenness_centrality(graph)
    bc_df = pd.DataFrame.from_dict(bet_cent, orient = 'index')
    bc_df.columns = ['bet cent']
    # closeness centrality
    clo_cent = nx.closeness_centrality(graph)
    cc_df = pd.DataFrame.from_dict(clo_cent, orient = 'index')
    cc_df.columns = ['clo cent']
    # concat node frames into node_df
    frames = [deg_df, dc_df, bc_df, cc_df]
    node_df = pd.concat(frames, axis = 1)
    node_df.index.name = 'node'
    node_df = node_df.reset_index()

    values = pd.DataFrame(graph_values, columns = ('name', 'sentiment'), index = [0])
    
    # df = merges graph_values with node_df for single graph and fill NaNs
    df = pd.concat([values, node_df], axis = 1)
    df = df.fillna(method='ffill')
    combined_df = combined_df.append(df)


----------
../output/network/article_neg1.gml
Name: article_neg1.gml
Type: MultiDiGraph
Number of nodes: 1257
Number of edges: 1898
Average in degree:   1.5099
Average out degree:   1.5099
Name: article_neg1.gml
Type: MultiGraph
Number of nodes: 1257
Number of edges: 1898
Average degree:   3.0199

In [127]:
# what the network looks like without adding back edges e = U.edges()

#for graph_num, gml_graph in enumerate(gml_files):
#    graph2 = nx.read_gml(gml_graph)
#    ugraph2 = graph.to_undirected() # to undirected graph
#    U2 = graph.to_undirected(reciprocal=True)
#    (filepath, filename) = os.path.split(gml_graph)
#    print('-' * 10)
#    print(gml_graph)
#    calculate_graph_inf(graph2)
#    calculate_graph_inf(ugraph2)

In [128]:
# print entire network
combined_df


Out[128]:
name sentiment node degree deg cent bet cent clo cent
0 article_neg1.gml neg ACIP 1 0.000796 0.000000e+00 0.077104
1 article_neg1.gml neg ACIP's rotavirus use recommendation 1 0.000796 0.000000e+00 0.000000
2 article_neg1.gml neg ADHD 1 0.000796 0.000000e+00 0.102970
3 article_neg1.gml neg AIDS 1 0.000796 0.000000e+00 0.000000
4 article_neg1.gml neg African American males 2 0.001592 0.000000e+00 0.105347
5 article_neg1.gml neg African women 3 0.002389 2.008949e-06 0.000796
6 article_neg1.gml neg African-American children 1 0.000796 0.000000e+00 0.000000
7 article_neg1.gml neg Alysia Osoff 6 0.004777 4.250514e-04 0.097385
8 article_neg1.gml neg America 2 0.001592 1.903215e-06 0.001433
9 article_neg1.gml neg American Academy of Pediatrics 2 0.001592 2.170723e-04 0.095888
10 article_neg1.gml neg American Nursing Association's Code of Ethics 2 0.001592 0.000000e+00 0.000000
11 article_neg1.gml neg Americans 7 0.005573 4.896067e-03 0.102908
12 article_neg1.gml neg Amish 2 0.001592 0.000000e+00 0.001791
13 article_neg1.gml neg Andrew Wakefield 1 0.000796 0.000000e+00 0.000000
14 article_neg1.gml neg Apartheid 1 0.000796 0.000000e+00 0.000000
15 article_neg1.gml neg Attkisson's website 2 0.001592 4.104601e-04 0.088995
16 article_neg1.gml neg Australia 1 0.000796 0.000000e+00 0.096528
17 article_neg1.gml neg Baby Boom 2 0.001592 0.000000e+00 0.000000
18 article_neg1.gml neg Baby Boomers 5 0.003981 0.000000e+00 0.104643
19 article_neg1.gml neg Baker College nursing school 3 0.002389 2.537620e-06 0.000796
20 article_neg1.gml neg Baker College nursing school instructors 6 0.004777 4.387104e-04 0.092638
21 article_neg1.gml neg Bell's Palsy 1 0.000796 0.000000e+00 0.000000
22 article_neg1.gml neg Big Pharma 13 0.010350 7.582102e-03 0.127289
23 article_neg1.gml neg Big Tobacco 2 0.001592 0.000000e+00 0.123915
24 article_neg1.gml neg Bill of Rights 1 0.000796 0.000000e+00 0.000000
25 article_neg1.gml neg Brian Hooker 1 0.000796 0.000000e+00 0.096415
26 article_neg1.gml neg Bruesewitz v. Wyeth 2 0.001592 4.066536e-04 0.121071
27 article_neg1.gml neg Bush Administration 2 0.001592 0.000000e+00 0.107416
28 article_neg1.gml neg CDC 62 0.049363 2.134902e-02 0.154337
29 article_neg1.gml neg CDC and Big Pharma 3 0.002389 0.000000e+00 0.097188
... ... ... ... ... ... ... ...
1227 article_neg1.gml neg violation of basic human rights 2 0.001592 0.000000e+00 0.000000
1228 article_neg1.gml neg violation of law 1 0.000796 0.000000e+00 0.000000
1229 article_neg1.gml neg viral replication 2 0.001592 0.000000e+00 0.000000
1230 article_neg1.gml neg vitamin A supplements 2 0.001592 0.000000e+00 0.004485
1231 article_neg1.gml neg vitamin B12 2 0.001592 3.172025e-07 0.000796
1232 article_neg1.gml neg vitamin C 2 0.001592 3.172025e-07 0.000796
1233 article_neg1.gml neg vitamin D 1 0.000796 0.000000e+00 0.001062
1234 article_neg1.gml neg vitamin D deficiency 1 0.000796 0.000000e+00 0.000796
1235 article_neg1.gml neg vitamin supplements 1 0.000796 0.000000e+00 0.000000
1236 article_neg1.gml neg vulnerable 1 0.000796 0.000000e+00 0.000000
1237 article_neg1.gml neg wander 1 0.000796 0.000000e+00 0.000000
1238 article_neg1.gml neg wander off 1 0.000796 0.000000e+00 0.000000
1239 article_neg1.gml neg war propaganda 2 0.001592 0.000000e+00 0.000000
1240 article_neg1.gml neg water 1 0.000796 0.000000e+00 0.000000
1241 article_neg1.gml neg whistle 1 0.000796 0.000000e+00 0.000000
1242 article_neg1.gml neg whistleblower 3 0.002389 4.136321e-04 0.101282
1243 article_neg1.gml neg whistleblowers 1 0.000796 0.000000e+00 0.000000
1244 article_neg1.gml neg whooping cough 1 0.000796 0.000000e+00 0.000000
1245 article_neg1.gml neg whooping cough outbreaks 1 0.000796 0.000000e+00 0.080278
1246 article_neg1.gml neg widespread 1 0.000796 0.000000e+00 0.000000
1247 article_neg1.gml neg widespread fear 2 0.001592 1.209176e-03 0.114824
1248 article_neg1.gml neg widespread health problems 2 0.001592 0.000000e+00 0.102746
1249 article_neg1.gml neg words 2 0.001592 0.000000e+00 0.114627
1250 article_neg1.gml neg world 1 0.000796 0.000000e+00 0.000000
1251 article_neg1.gml neg world's healthiest children 1 0.000796 0.000000e+00 0.000000
1252 article_neg1.gml neg wrong doing 3 0.002389 0.000000e+00 0.000000
1253 article_neg1.gml neg years 1 0.000796 0.000000e+00 0.000000
1254 article_neg1.gml neg you 10 0.007962 3.966962e-03 0.120027
1255 article_neg1.gml neg young adults 1 0.000796 0.000000e+00 0.000000
1256 article_neg1.gml neg young doctors 7 0.005573 4.091913e-04 0.092548

1257 rows × 7 columns


In [129]:
# save
combined_df.to_csv('../output/df/nodes_df_negative.csv')

Draw network


In [22]:
# 7_graph_calculation
def drawIt(graph, what = 'graph'):
    nsize = graph.number_of_nodes()
    print "Drawing %s of size %s:" % (what, nsize)
    
    if nsize > 20:
        plt.figure(figsize=(10, 10))
        if nsize > 40:
            nx.draw_spring(graph, with_labels = True, node_size = 70, font_size = 12)
        else:
            nx.draw_spring(graph, with_labels = True)
    else:
        nx.draw_spring(graph, with_labels = True)
    plt.show()

# for undirected graphs
def describeGraph(graph):
    components = sorted(nx.connected_components(graph), key = len, reverse = True)
    cc = [len(c) for c in components]
    subgraphs = list(nx.connected_component_subgraphs(graph))
    params = (graph.number_of_edges(),graph.number_of_nodes(),len(cc))
    print "Graph has %s edges, %s nodes, %s connected components\n" % params
    drawIt(graph)
    for sub in components:
        drawIt(graph.subgraph(sub), what = 'component')

# for directed graphs
def describeGraph_d(graph):
    components = sorted(nx.weakly_connected_components(graph), key = len, reverse = True)
    cc = [len(c) for c in components]
    subgraphs = list(nx.weakly_connected_component_subgraphs(graph))
    params = (graph.number_of_edges(),graph.number_of_nodes(),len(cc))
    print "Graph has %s edges, %s nodes, %s connected components\n" % params
    drawIt(graph)
    for sub in components:
        drawIt(graph.subgraph(sub), what = 'component')

In [23]:
# UNDIRECTED network graph
describeGraph(ugraph)


Graph has 1898 edges, 1257 nodes, 49 connected components

Drawing graph of size 1257:
Drawing component of size 1140:
Drawing component of size 7:
Drawing component of size 4:
Drawing component of size 4:
Drawing component of size 4:
Drawing component of size 3:
Drawing component of size 3:
Drawing component of size 3:
Drawing component of size 3:
Drawing component of size 3:
Drawing component of size 3:
Drawing component of size 3:
Drawing component of size 3:
Drawing component of size 3:
Drawing component of size 3:
Drawing component of size 2:
Drawing component of size 2:
Drawing component of size 2:
Drawing component of size 2:
Drawing component of size 2:
Drawing component of size 2:
Drawing component of size 2:
Drawing component of size 2:
Drawing component of size 2:
Drawing component of size 2:
Drawing component of size 2:
Drawing component of size 2:
Drawing component of size 2:
Drawing component of size 2:
Drawing component of size 2:
Drawing component of size 2:
Drawing component of size 2:
Drawing component of size 2:
Drawing component of size 2:
Drawing component of size 2:
Drawing component of size 2:
Drawing component of size 2:
Drawing component of size 2:
Drawing component of size 2:
Drawing component of size 2:
Drawing component of size 2:
Drawing component of size 2:
Drawing component of size 2:
Drawing component of size 2:
Drawing component of size 2:
Drawing component of size 2:
Drawing component of size 2:
Drawing component of size 2:
Drawing component of size 2:

In [24]:
# DIRECTED network graph
describeGraph_d(graph)


Graph has 1898 edges, 1257 nodes, 49 connected components

Drawing graph of size 1257:
Drawing component of size 1140:
Drawing component of size 7:
Drawing component of size 4:
Drawing component of size 4:
Drawing component of size 4:
Drawing component of size 3:
Drawing component of size 3:
Drawing component of size 3:
Drawing component of size 3:
Drawing component of size 3:
Drawing component of size 3:
Drawing component of size 3:
Drawing component of size 3:
Drawing component of size 3:
Drawing component of size 3:
Drawing component of size 2:
Drawing component of size 2:
Drawing component of size 2:
Drawing component of size 2:
Drawing component of size 2:
Drawing component of size 2:
Drawing component of size 2:
Drawing component of size 2:
Drawing component of size 2:
Drawing component of size 2:
Drawing component of size 2:
Drawing component of size 2:
Drawing component of size 2:
Drawing component of size 2:
Drawing component of size 2:
Drawing component of size 2:
Drawing component of size 2:
Drawing component of size 2:
Drawing component of size 2:
Drawing component of size 2:
Drawing component of size 2:
Drawing component of size 2:
Drawing component of size 2:
Drawing component of size 2:
Drawing component of size 2:
Drawing component of size 2:
Drawing component of size 2:
Drawing component of size 2:
Drawing component of size 2:
Drawing component of size 2:
Drawing component of size 2:
Drawing component of size 2:
Drawing component of size 2:
Drawing component of size 2:

Undirected graph representation


In [40]:
# list of connected components by size (undirected graph)
connected_components = [len(c) for c in sorted(nx.connected_components(ugraph), key=len, reverse=True)]

# generate connected components as subgraphs (undirected graph)
subgraphs = list(nx.connected_component_subgraphs(ugraph))

# greatest component (undirected MultiGraph)
u_Gc = max(nx.connected_component_subgraphs(ugraph), key=len)
u_Gc.name = "undirected Gc"

In [41]:
print "connected components = ", connected_components
print nx.info(u_Gc)


connected components =  [1140, 7, 4, 4, 4, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2]
Name: undirected Gc
Type: MultiGraph
Number of nodes: 1140
Number of edges: 1826
Average degree:   3.2035

Directed graph representation


In [42]:
# use directed graph
components = sorted(nx.weakly_connected_components(graph), key = len, reverse = True)
cc = [len(c) for c in components]

# generate connected components as subgraphs 
subgraphs = list(nx.weakly_connected_component_subgraphs(graph))

# greatest component
d_Gc = max(nx.weakly_connected_component_subgraphs(graph), key=len)
d_Gc.name = "directed Gc"

In [43]:
print "connected components = ", cc
print nx.info(d_Gc)


connected components =  [1140, 7, 4, 4, 4, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2]
Name: directed Gc
Type: MultiDiGraph
Number of nodes: 1140
Number of edges: 1826
Average in degree:   1.6018
Average out degree:   1.6018

In [46]:
## understand how direction changes degree ##

print nx.info(graph)  # original directed
print nx.info(ugraph)  # to undirected
temp = ugraph.to_directed()  # back to directed
print nx.info(temp)


Name: article_neg1.gml
Type: MultiDiGraph
Number of nodes: 1257
Number of edges: 1898
Average in degree:   1.5099
Average out degree:   1.5099
Name: article_neg1.gml
Type: MultiGraph
Number of nodes: 1257
Number of edges: 1898
Average degree:   3.0199
Name: article_neg1.gml
Type: MultiDiGraph
Number of nodes: 1257
Number of edges: 3796
Average in degree:   3.0199
Average out degree:   3.0199

In [47]:
# finally, greatest components for undirected and directed graphs
print nx.info(u_Gc)
print nx.info(d_Gc)


Name: undirected Gc
Type: MultiGraph
Number of nodes: 1140
Number of edges: 1826
Average degree:   3.2035
Name: directed Gc
Type: MultiDiGraph
Number of nodes: 1140
Number of edges: 1826
Average in degree:   1.6018
Average out degree:   1.6018

In [48]:
# save Gc
#nx.write_gml(u_Gc, "../output/network/u_Gc_negative.gml")
#nx.write_gml(d_Gc, "../output/network/d_Gc_negative.gml")

Calculate network statistics (averages) for greatest component.


In [49]:
# load directed Gc
Gc_files = glob('../output/network/d_Gc_negative.gml')

network_data_columns = ['name',
                    'sentiment',
                    '# nodes',
                    '# edges',
                    #'avg deg',
                    'density',
                    'deg assort coef', 
                    'avg deg cent',
                    'avg bet cent',
                    'avg clo cent',
                    'high deg cent',
                    'high bet cent',
                    'high clo cent',
                    'avg node conn',
                    '# conn comp',
                    'gc size'
                    ]
network_data = pd.DataFrame(columns = network_data_columns)

In [63]:
# Gc_files

for graph_num, gml_graph in enumerate(Gc_files):
    graph = nx.read_gml(gml_graph)
    (filepath, filename) = os.path.split(gml_graph)
    print('-' * 10)
    print(gml_graph)
    calculate_graph_inf(graph)
    
    # calculate variables
    sent = "neg"
    nodes = nx.number_of_nodes(graph)
    edges = nx.number_of_edges(graph)
    density = float("{0:.4f}".format(nx.density(graph)))
    avg_deg_cen = np.array(nx.degree_centrality(graph).values()).mean()
    avg_bet_cen = np.array(nx.betweenness_centrality(graph).values()).mean()
    avg_clo_cen = np.array(nx.closeness_centrality(graph).values()).mean()
    #avg_deg = float("{0:.4f}".format(in_deg + out_deg))
    avg_node_con = float("{0:.4f}".format((nx.average_node_connectivity(graph))))
    deg_assort_coeff = float("{0:.4f}".format((nx.degree_assortativity_coefficient(graph))))
    conn_comp = nx.number_weakly_connected_components(graph) # ugraph
    deg_cen = nx.degree_centrality(graph)
    bet_cen = nx.betweenness_centrality(graph)
    clo_cen = nx.closeness_centrality(graph)
    highest_deg_cen = highest_centrality(deg_cen)
    highest_bet_cen = highest_centrality(bet_cen)
    highest_clo_cen = highest_centrality(clo_cen)
    Gc = len(max(nx.weakly_connected_component_subgraphs(graph), key=len))

    # save variables into list
    graph_values = {'name':filename,
                    'sentiment':sent,
                    '# nodes':nodes,
                    '# edges':edges,
                    #'avg deg':avg_deg,
                    'density':density,
                    'deg assort coef':deg_assort_coeff,
                    'avg deg cent':"%.4f" % avg_deg_cen,
                    'avg bet cent':"%.4f" % avg_bet_cen,
                    'avg clo cent':"%.4f" % avg_clo_cen,
                    'high deg cent':highest_deg_cen,
                    'high bet cent':highest_bet_cen,
                    'high clo cent':highest_clo_cen,
                    'avg node conn':avg_node_con,
                    '# conn comp':conn_comp,
                    'gc size':Gc
                    }
    network_data = network_data.append(graph_values, ignore_index=True)


----------
../output/network/d_Gc_negative.gml
Name: d_Gc_negative.gml
Type: MultiDiGraph
Number of nodes: 1140
Number of edges: 1826
Average in degree:   1.6018
Average out degree:   1.6018

In [64]:
# print network data for greatest component
network_data


Out[64]:
name sentiment # nodes # edges density deg assort coef avg deg cent avg bet cent avg clo cent high deg cent high bet cent high clo cent avg node conn # conn comp gc size
0 d_Gc_negative.gml neg 1140.0 1826.0 0.0014 -0.0122 0.0028 0.0007 0.0399 (vaccines, 0.117647058824) (vaccines, 0.0838129050492) (vaccine industry, 0.174485053966) 0.2135 1.0 1140.0

In [65]:
# save
#network_data.to_csv('../output/df/Gc_df_neg.csv')

Gc nodes table


In [111]:
gml_files = glob('../output/network/d_Gc_negative.gml')

In [112]:
# 2_node_df: list all nodes and centrality
data_columns = ['name',
                'sentiment'
                ]
data = pd.DataFrame(columns = data_columns)
combined_df = pd.DataFrame()

In [113]:
# graph = directed, ugraph = undirected
for graph_num, gml_graph in enumerate(gml_files):
    graph = nx.read_gml(gml_graph)
    (filepath, filename) = os.path.split(gml_graph)
    print('-' * 10)
    print(gml_graph)
    calculate_graph_inf(graph)

    ## calculate variables and save into list
    sent = "neg"    
    deg_cent = nx.degree_centrality(graph)
    bet_cent = nx.betweenness_centrality(graph)
    clo_cent = nx.closeness_centrality(graph)
    graph_values = {'name':filename,
                    'sentiment':sent
                    }
    data = data.append(graph_values, ignore_index=True)

    degree = nx.degree(graph)
    deg_df = pd.DataFrame.from_dict(degree, orient = 'index')
    deg_df.columns = ['degree']
    # degree centrality
    deg_cent = nx.degree_centrality(graph)
    dc_df = pd.DataFrame.from_dict(deg_cent, orient = 'index')
    dc_df.columns = ['deg cent']
    # betweenness centrality
    bet_cent = nx.betweenness_centrality(graph)
    bc_df = pd.DataFrame.from_dict(bet_cent, orient = 'index')
    bc_df.columns = ['bet cent']
    # closeness centrality
    clo_cent = nx.closeness_centrality(graph)
    cc_df = pd.DataFrame.from_dict(clo_cent, orient = 'index')
    cc_df.columns = ['clo cent']
    # concat node frames into node_df
    frames = [deg_df, dc_df, bc_df, cc_df]
    node_df = pd.concat(frames, axis = 1)
    node_df.index.name = 'node'
    node_df = node_df.reset_index()

    values = pd.DataFrame(graph_values, columns = ('name', 'sentiment'), index = [0])
    
    # df = merges graph_values with node_df for single graph and fill NaNs
    df = pd.concat([values, node_df], axis = 1)
    df = df.fillna(method='ffill')
    #combined_df = combined_df.append(df)


----------
../output/network/d_Gc_negative.gml
Name: d_Gc_negative.gml
Type: MultiDiGraph
Number of nodes: 1140
Number of edges: 1826
Average in degree:   1.6018
Average out degree:   1.6018

In [114]:
# print negative gc nodes
df


Out[114]:
name sentiment node degree deg cent bet cent clo cent
0 d_Gc_negative.gml neg ACIP 1 0.000878 0.000000 0.085025
1 d_Gc_negative.gml neg ACIP's rotavirus use recommendation 1 0.000878 0.000000 0.000000
2 d_Gc_negative.gml neg ADHD 1 0.000878 0.000000 0.113547
3 d_Gc_negative.gml neg AIDS 1 0.000878 0.000000 0.000000
4 d_Gc_negative.gml neg African American males 2 0.001756 0.000000 0.116168
5 d_Gc_negative.gml neg African women 3 0.002634 0.000002 0.000878
6 d_Gc_negative.gml neg African-American children 1 0.000878 0.000000 0.000000
7 d_Gc_negative.gml neg Alysia Osoff 6 0.005268 0.000517 0.107389
8 d_Gc_negative.gml neg America 2 0.001756 0.000002 0.001580
9 d_Gc_negative.gml neg American Academy of Pediatrics 2 0.001756 0.000264 0.105738
10 d_Gc_negative.gml neg American Nursing Association's Code of Ethics 2 0.001756 0.000000 0.000000
11 d_Gc_negative.gml neg Americans 7 0.006146 0.005954 0.113479
12 d_Gc_negative.gml neg Amish 2 0.001756 0.000000 0.001975
13 d_Gc_negative.gml neg Andrew Wakefield 1 0.000878 0.000000 0.000000
14 d_Gc_negative.gml neg Apartheid 1 0.000878 0.000000 0.000000
15 d_Gc_negative.gml neg Attkisson's website 2 0.001756 0.000499 0.098137
16 d_Gc_negative.gml neg Australia 1 0.000878 0.000000 0.106444
17 d_Gc_negative.gml neg Baby Boom 2 0.001756 0.000000 0.000000
18 d_Gc_negative.gml neg Baby Boomers 5 0.004390 0.000000 0.115392
19 d_Gc_negative.gml neg Baker College nursing school 3 0.002634 0.000003 0.000878
20 d_Gc_negative.gml neg Baker College nursing school instructors 6 0.005268 0.000534 0.102154
21 d_Gc_negative.gml neg Bell's Palsy 1 0.000878 0.000000 0.000000
22 d_Gc_negative.gml neg Big Pharma 13 0.011414 0.009221 0.140364
23 d_Gc_negative.gml neg Big Tobacco 2 0.001756 0.000000 0.136643
24 d_Gc_negative.gml neg Bill of Rights 1 0.000878 0.000000 0.000000
25 d_Gc_negative.gml neg Brian Hooker 1 0.000878 0.000000 0.106318
26 d_Gc_negative.gml neg Bruesewitz v. Wyeth 2 0.001756 0.000495 0.133508
27 d_Gc_negative.gml neg Bush Administration 2 0.001756 0.000000 0.118450
28 d_Gc_negative.gml neg CDC 62 0.054434 0.025962 0.170191
29 d_Gc_negative.gml neg CDC and Big Pharma 3 0.002634 0.000000 0.107171
... ... ... ... ... ... ... ...
1110 d_Gc_negative.gml neg vaccines spread disease 1 0.000878 0.000000 0.000000
1111 d_Gc_negative.gml neg vaccinia virus-naive subjects 2 0.001756 0.000000 0.002744
1112 d_Gc_negative.gml neg value 1 0.000878 0.000000 0.000878
1113 d_Gc_negative.gml neg variant genotypes 1 0.000878 0.000000 0.108150
1114 d_Gc_negative.gml neg victims 4 0.003512 0.001742 0.120312
1115 d_Gc_negative.gml neg violation of Hippocratic Oath 1 0.000878 0.000000 0.000000
1116 d_Gc_negative.gml neg violation of basic human rights 2 0.001756 0.000000 0.000000
1117 d_Gc_negative.gml neg violation of law 1 0.000878 0.000000 0.000000
1118 d_Gc_negative.gml neg vitamin A supplements 2 0.001756 0.000000 0.004946
1119 d_Gc_negative.gml neg vitamin D 1 0.000878 0.000000 0.001171
1120 d_Gc_negative.gml neg vitamin supplements 1 0.000878 0.000000 0.000000
1121 d_Gc_negative.gml neg vulnerable 1 0.000878 0.000000 0.000000
1122 d_Gc_negative.gml neg wander 1 0.000878 0.000000 0.000000
1123 d_Gc_negative.gml neg wander off 1 0.000878 0.000000 0.000000
1124 d_Gc_negative.gml neg water 1 0.000878 0.000000 0.000000
1125 d_Gc_negative.gml neg whistle 1 0.000878 0.000000 0.000000
1126 d_Gc_negative.gml neg whistleblower 3 0.002634 0.000503 0.111686
1127 d_Gc_negative.gml neg whistleblowers 1 0.000878 0.000000 0.000000
1128 d_Gc_negative.gml neg whooping cough 1 0.000878 0.000000 0.000000
1129 d_Gc_negative.gml neg whooping cough outbreaks 1 0.000878 0.000000 0.088525
1130 d_Gc_negative.gml neg widespread fear 2 0.001756 0.001470 0.126619
1131 d_Gc_negative.gml neg widespread health problems 2 0.001756 0.000000 0.113300
1132 d_Gc_negative.gml neg words 2 0.001756 0.000000 0.126402
1133 d_Gc_negative.gml neg world 1 0.000878 0.000000 0.000000
1134 d_Gc_negative.gml neg world's healthiest children 1 0.000878 0.000000 0.000000
1135 d_Gc_negative.gml neg wrong doing 3 0.002634 0.000000 0.000000
1136 d_Gc_negative.gml neg years 1 0.000878 0.000000 0.000000
1137 d_Gc_negative.gml neg you 10 0.008780 0.004824 0.132357
1138 d_Gc_negative.gml neg young adults 1 0.000878 0.000000 0.000000
1139 d_Gc_negative.gml neg young doctors 7 0.006146 0.000498 0.102055

1140 rows × 7 columns


In [115]:
# save
df.to_csv('../output/df/Gc_nodes_neg.csv')

Cutsets


In [66]:
print "Greatest component size =", len(graph)


Greatest component size = 1140

In [67]:
# returns all minimum k cutsets of an undirected graph
# i.e., the set(s) of nodes of cardinality equal to the node connectivity of G
# thus if removed, would break G into two or more connected components

#cutsets = list(nx.all_node_cuts(graph))  # must be undirected

print "Greatest component size =", len(graph)
#print "# of cutsets =", len(cutsets)

# returns a set of nodes or edges of minimum cardinality that disconnects G
min_ncut = nx.minimum_node_cut(graph)
min_ecut = nx.minimum_edge_cut(graph)

print "Min node cut =", min_ncut
print "Min edge cut =", min_ecut

# min cuts with source and target
print nx.minimum_node_cut(graph, s='vaccines', t='autism')
print nx.minimum_edge_cut(graph, s='vaccines', t='autism')


Greatest component size = 1140
Min node cut = set([])
Min edge cut = set([])
set([u'toxic chemical ingredients', u'vaccine safety', u'vaccine industry', u'doctors', u'vaccine story', u'behavior changes', u'encephalopathy', u'thimerosal', u'vaccine damage', u'children', u'preservative', u'hepatitis B vaccine'])
set([(u'CDC', u'autism'), (u'thimerosal', u'autism'), (u'children', u'autism'), (u'encephalopathy', u'autism'), (u'vaccines', u'autism'), (u'preservative', u'autism'), (u'link', u'autism'), (u'measles', u'autism'), (u'vaccination', u'autism'), (u'deaths', u'hepatitis B vaccine'), (u'CDC whistleblower', u'autism'), (u'vaccine safety', u'autism'), (u'toxic chemical ingredients', u'autism'), (u'vaccine-injured children', u'autism'), (u'public', u'autism')])

In [61]:
# read edge labels in min cut for Gc
# change source and target
a = nx.minimum_edge_cut(graph, s='vaccines', t='autism')
#a = nx.minimum_edge_cut(graph)

labels = nx.get_edge_attributes(graph,'edge')
edgelabels = {}
for e in labels.keys():
    e1 = e[0:2]
    edgelabels[e1]=labels[e]

for e in a:
    if edgelabels.has_key(e):
        print e,edgelabels[e]
    else:
        rev_e = e[::-1]
        print rev_e, edgelabels[rev_e]


(u'CDC', u'autism') obscure an existing link between vaccines and
(u'thimerosal', u'autism') appeared to be responsible for drastic increase in
(u'children', u'autism') every year tens of thousands more diagnosed with
(u'encephalopathy', u'autism') caused by vaccines produces a permanent injury and creates symptoms of
(u'vaccines', u'autism') illogical and downright ignorant to suggest that have nothing to do with rise in
(u'preservative', u'autism') in vaccine causes
(u'link', u'autism') existing between vaccines and
(u'measles', u'autism') preferable to
(u'vaccination', u'autism') caused
(u'deaths', u'hepatitis B vaccine') 47 reported from
(u'CDC whistleblower', u'autism') admitting vaccines are linked to
(u'vaccine safety', u'autism') questionable in relation to
(u'toxic chemical ingredients', u'autism') may be significant contributing factor to
(u'vaccine-injured children', u'autism') often revealed to have
(u'public', u'autism') wonders why they have

Centrality tables


In [74]:
# make sure you're using the right graph
print "gml_files = ", gml_files
print "gml_graph = ", gml_graph


gml_files =  ['../output/network/article_neg1.gml']
gml_graph =  ../output/network/d_Gc_negative.gml

In [69]:
graph = nx.read_gml('../output/network/article_neg1.gml')  # full network
#graph = nx.read_gml('../output/network/d_Gc_negative.gml')  # gc network

print nx.info(graph)


Name: 
Type: MultiDiGraph
Number of nodes: 1257
Number of edges: 1898
Average in degree:   1.5099
Average out degree:   1.5099

In [70]:
# degree centrality
dc = nx.degree_centrality(graph)
dc_df = pd.DataFrame.from_dict(dc, orient = 'index')
dc_df.columns = ['degree cent']
dc_df = dc_df.sort_values(by = ['degree cent'])
#dc_df

# betweenness centrality
bc = nx.betweenness_centrality(graph)
bc_df = pd.DataFrame.from_dict(bc, orient = 'index')
bc_df.columns = ['betweenness cent']
bc_df = bc_df.sort_values(by = ['betweenness cent'])
#bc_df

# closeness centrality
cc = nx.closeness_centrality(graph)
cc_df = pd.DataFrame.from_dict(cc, orient = 'index')
cc_df.columns = ['closeness cent']
cc_df = cc_df.sort_values(by = ['closeness cent'])
#cc_df

In [75]:
dc_df


Out[75]:
degree cent
financial collusion 0.000796
toxic heavy metal 0.000796
syncope 0.000796
labor and delivery floor 0.000796
human carcinogen 0.000796
dissenters 0.000796
generous 0.000796
trustworthy 0.000796
human muscle tissue 0.000796
parental right 0.000796
human right 0.000796
flu season 0.000796
target 0.000796
anaphylactic shock 0.000796
solid marks 0.000796
government healthcare reform 0.000796
outrageous 0.000796
more severe autism 0.000796
compensation 0.000796
New York 0.000796
cellular degeneration 0.000796
guilt 0.000796
nobody 0.000796
CDC scientific fraud 0.000796
intimidation 0.000796
variant genotypes 0.000796
surveys 0.000796
political weapon 0.000796
demand for justice 0.000796
special education services 0.000796
... ...
measles cases 0.013535
scientific fraud 0.014331
Nichole Rolfe 0.014331
vaccine-autism link 0.015127
SV40 0.015127
adverse effects 0.015924
parents 0.015924
hepatitis B vaccine 0.015924
Merck 0.016720
vaccine ingredients 0.016720
informed consent 0.016720
pandemic H1N1 swine flu vaccine 0.017516
people 0.017516
measles mortality 0.017516
United States 0.018312
measles 0.018312
SB 277 0.020701
vaccination 0.021497
mandatory vaccines 0.023089
pharmaceutical companies 0.024682
flu shots 0.026274
doctors 0.031847
mainstream media 0.032643
mercury 0.032643
autism 0.037420
CDC 0.049363
vaccine industry 0.051752
thimerosal 0.057325
children 0.060510
vaccines 0.106688

1257 rows × 1 columns


In [76]:
bc_df


Out[76]:
betweenness cent
marketing vaccines to children 0.000000
government contracts 0.000000
pertussis vaccine booster 0.000000
criminal conspiracy 0.000000
healthier children 0.000000
health of children 0.000000
pack of lemmings 0.000000
health authorities 0.000000
poor access to clean water 0.000000
cancer 0.000000
inexpensive test 0.000000
vaccine messages 0.000000
uninformed consent 0.000000
reduced oxidative-reduction activity 0.000000
vaccine successes 0.000000
vitamin D deficiency 0.000000
THEY 0.000000
Hitler's minions 0.000000
health care professionals 0.000000
piece of evidence 0.000000
trust 0.000000
Disney 0.000000
flu shots do not work 0.000000
severe brain damage 0.000000
guinea pigs 0.000000
trigger 0.000000
War on Poverty 0.000000
reproductive toxin 0.000000
the herd 0.000000
healthy people 0.000000
... ...
genetic predisposition 0.005725
SV40 0.005895
drug companies 0.006021
vaccine ingredients 0.006164
shingles vaccine 0.006205
pandemic H1N1 swine flu vaccine 0.006207
vaccine court 0.006507
measles 0.006626
scientific fraud 0.006837
people 0.007292
toxic chemical ingredients 0.007512
Big Pharma 0.007582
informed consent 0.007617
vaccine injuries 0.008023
mandatory vaccines 0.008129
Vaccine Injury Compensation Program 0.008603
science 0.009106
doctors 0.010647
vaccination 0.010850
pharmaceutical companies 0.012136
flu shots 0.013237
vaccine damage 0.013714
mercury 0.014344
mainstream media 0.019455
CDC 0.021349
autism 0.026289
thimerosal 0.033104
vaccine industry 0.034441
children 0.036934
vaccines 0.068920

1257 rows × 1 columns


In [77]:
cc_df


Out[77]:
closeness cent
representative 0.000000
Bill of Rights 0.000000
complicated 0.000000
better diagnosis 0.000000
paid damages 0.000000
vaccine consent forms 0.000000
Guillian-Barre Syndrome 0.000000
regional lymphadenopathy 0.000000
greater severity of autism 0.000000
chickenpox 0.000000
scary 0.000000
flu-related hospitalizations 0.000000
world's healthiest children 0.000000
scare tactic 0.000000
deliberate release of pathogens 0.000000
vaccine pushers 0.000000
internet's truth-based alternative 0.000000
outbreak 0.000000
pain and suffering 0.000000
sinister revenue model 0.000000
measles rate 0.000000
to vaccinate children 0.000000
antibodies 0.000000
vaccine risks 0.000000
medical ethics 0.000000
conscientious belief exemption 0.000000
issue 0.000000
seizures 0.000000
tune 0.000000
personal exemption 0.000000
... ...
United States 0.122897
compromised immunity 0.123528
Rep. Bill Posey 0.123779
Big Tobacco 0.123915
Dr. Paul Offit 0.124378
government 0.125477
hepatitis B vaccine 0.125622
antibiotics 0.125719
vaccine story 0.125719
vaccine-injured children 0.126696
National Vaccine Injury Compensation Program 0.126868
Jim Carrey 0.127171
Big Pharma 0.127289
science 0.127938
children 0.128594
people 0.129052
genetically susceptible children 0.130185
autism 0.131710
vaccination 0.132030
pharmaceutical companies 0.133217
doctors 0.133928
flu shots 0.134093
SB 277 0.135881
vaccine damage 0.137717
mercury 0.139009
thimerosal 0.147097
vaccines 0.147697
mainstream media 0.151329
CDC 0.154337
vaccine industry 0.158231

1257 rows × 1 columns


In [ ]: