neutral graph


In [1]:
# 1_network_df

import networkx as nx
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
import os
from glob import glob

plt.style.use('ggplot')
pd.set_option('display.width', 5000) 
pd.set_option('display.max_columns', 60)

# load
gml_files = glob('../output/network/article_u_neu.gml')

undirected graph


In [2]:
def calculate_graph_inf(graph):
    graph.name = filename
    info = nx.info(graph)
    print info

def highest_centrality(cent_dict):
    """Returns a tuple (node,value) with the node
    with largest value from centrality dictionary."""
    # create ordered tuple of centrality data
    cent_items = [(b,a) for (a,b) in cent_dict.iteritems()]
    # sort in descending order
    cent_items.sort()
    cent_items.reverse()
    return tuple(reversed(cent_items[0]))

In [5]:
# ugraph = undirected; dgraph = directed
for graph_num, gml_graph in enumerate(gml_files):
    ugraph = nx.read_gml(gml_graph)
    (filepath, filename) = os.path.split(gml_graph)
    print('-' * 10)
    print(gml_graph)
    calculate_graph_inf(ugraph)


----------
../output/network/article_u_neu.gml
Name: article_u_neu.gml
Type: MultiGraph
Number of nodes: 201
Number of edges: 236
Average degree:   2.3483

3. all nodes table


In [6]:
# 2_node_df: list all nodes and centrality
data_columns = ['name',
                'sentiment'
                ]
data = pd.DataFrame(columns = data_columns)
combined_df = pd.DataFrame()

In [7]:
# calculate variables and save into list
sent = "neutral"    
deg_cent = nx.degree_centrality(ugraph)
bet_cent = nx.betweenness_centrality(ugraph)
clo_cent = nx.closeness_centrality(ugraph)
graph_values = {'name':filename,
                'sentiment':sent
                }
data = data.append(graph_values, ignore_index=True)

degree = nx.degree(ugraph)
deg_df = pd.DataFrame.from_dict(degree, orient = 'index')
deg_df.columns = ['degree']

# degree centrality
deg_cent = nx.degree_centrality(ugraph)
dc_df = pd.DataFrame.from_dict(deg_cent, orient = 'index')
dc_df.columns = ['deg cent']

# betweenness centrality
bet_cent = nx.betweenness_centrality(ugraph)
bc_df = pd.DataFrame.from_dict(bet_cent, orient = 'index')
bc_df.columns = ['bet cent']

# closeness centrality
clo_cent = nx.closeness_centrality(ugraph)
cc_df = pd.DataFrame.from_dict(clo_cent, orient = 'index')
cc_df.columns = ['clo cent']

# concat node frames into node_df
frames = [deg_df, dc_df, bc_df, cc_df]
node_df = pd.concat(frames, axis = 1)
node_df.index.name = 'node'
node_df = node_df.reset_index()

values = pd.DataFrame(graph_values, columns = ('name', 'sentiment'), index = [0])

# df = merges graph_values with node_df for single graph and fill NaNs
df = pd.concat([values, node_df], axis = 1)
df = df.fillna(method='ffill')
combined_df = combined_df.append(df)

In [8]:
# print entire network
combined_df


Out[8]:
name sentiment node degree deg cent bet cent clo cent
0 article_u_neu.gml neutral 7-11 year olds 2 0.010 0.015611 0.147149
1 article_u_neu.gml neutral Alfred and Lisa Claire Dwoskin 1 0.005 0.000000 0.138278
2 article_u_neu.gml neutral Americans 1 0.005 0.000000 0.130533
3 article_u_neu.gml neutral Andrew Wakefield 5 0.025 0.101658 0.133796
4 article_u_neu.gml neutral Australia 1 0.005 0.000000 0.131723
5 article_u_neu.gml neutral Barry Segal 1 0.005 0.000000 0.093953
6 article_u_neu.gml neutral Ben Allen 4 0.020 0.000034 0.132569
7 article_u_neu.gml neutral CDC 4 0.020 0.070193 0.160556
8 article_u_neu.gml neutral California 1 0.005 0.000000 0.132084
9 article_u_neu.gml neutral Chairwoman Carol Liu 2 0.010 0.008492 0.132326
10 article_u_neu.gml neutral Children's Hospital of Philadelphia 1 0.005 0.000000 0.130533
11 article_u_neu.gml neutral Chris Christie 3 0.015 0.008518 0.118540
12 article_u_neu.gml neutral Connie Leyva 3 0.015 0.000067 0.132569
13 article_u_neu.gml neutral Disneyland measles outbreak 1 0.005 0.000000 0.114865
14 article_u_neu.gml neutral Dr. Anna Acosta 2 0.010 0.025176 0.135936
15 article_u_neu.gml neutral Dr. Paul Offit 3 0.015 0.012663 0.154051
16 article_u_neu.gml neutral Drew Downing 1 0.005 0.000000 0.005000
17 article_u_neu.gml neutral Dwoskin Family Foundation 8 0.040 0.295402 0.164954
18 article_u_neu.gml neutral Focus for Health 4 0.020 0.073568 0.105551
19 article_u_neu.gml neutral Gambhir study 3 0.015 0.000151 0.015000
20 article_u_neu.gml neutral Generation Rescue 4 0.020 0.088618 0.118637
21 article_u_neu.gml neutral Immunization Action Coalition 1 0.005 0.000000 0.135173
22 article_u_neu.gml neutral Jenny McCarthy 3 0.015 0.024020 0.119619
23 article_u_neu.gml neutral Lisa and J.B. Handley 1 0.005 0.000000 0.104182
24 article_u_neu.gml neutral MMR vaccine 1 0.005 0.000000 0.085251
25 article_u_neu.gml neutral National Vaccine Information Center 2 0.010 0.008492 0.138543
26 article_u_neu.gml neutral PLOS Computational Biology 1 0.005 0.000000 0.143211
27 article_u_neu.gml neutral Pez dispensers 1 0.005 0.000000 0.161093
28 article_u_neu.gml neutral Rand Paul 3 0.015 0.033392 0.162542
29 article_u_neu.gml neutral Renee Gentry 1 0.005 0.000000 0.005000
... ... ... ... ... ... ... ...
171 article_u_neu.gml neutral teens 1 0.005 0.000000 0.131723
172 article_u_neu.gml neutral the sake of being anti-vaccine 1 0.005 0.000000 0.116626
173 article_u_neu.gml neutral upward trend 1 0.005 0.000000 0.130298
174 article_u_neu.gml neutral vaccinated 1 0.005 0.000000 0.110053
175 article_u_neu.gml neutral vaccinated children 2 0.010 0.008492 0.132326
176 article_u_neu.gml neutral vaccination 5 0.025 0.038961 0.149431
177 article_u_neu.gml neutral vaccination exemption 3 0.015 0.008492 0.132326
178 article_u_neu.gml neutral vaccination schedule 1 0.005 0.000000 0.009000
179 article_u_neu.gml neutral vaccine allergy 1 0.005 0.000000 0.005000
180 article_u_neu.gml neutral vaccine choice 1 0.005 0.000000 0.104107
181 article_u_neu.gml neutral vaccine concerns 1 0.005 0.000000 0.119224
182 article_u_neu.gml neutral vaccine debate 4 0.020 0.017035 0.104786
183 article_u_neu.gml neutral vaccine duration 2 0.010 0.014639 0.160734
184 article_u_neu.gml neutral vaccine efficacy 3 0.015 0.008492 0.153397
185 article_u_neu.gml neutral vaccine refusal 3 0.015 0.000000 0.110053
186 article_u_neu.gml neutral vaccine risk 3 0.015 0.008492 0.116909
187 article_u_neu.gml neutral vaccine safety 2 0.010 0.003857 0.146701
188 article_u_neu.gml neutral vaccine-autism link 4 0.020 0.261307 0.149896
189 article_u_neu.gml neutral vaccine-injured children 3 0.015 0.008342 0.117099
190 article_u_neu.gml neutral vaccine-preventable diseases 1 0.005 0.000000 0.102847
191 article_u_neu.gml neutral vaccines 19 0.095 0.414983 0.198489
192 article_u_neu.gml neutral vaccines cause childhood illnesses 1 0.005 0.000000 0.138278
193 article_u_neu.gml neutral voluntary 1 0.005 0.000000 0.161093
194 article_u_neu.gml neutral vomiting 1 0.005 0.000000 0.131723
195 article_u_neu.gml neutral waning effectiveness 2 0.010 0.000101 0.131006
196 article_u_neu.gml neutral waning immunity 1 0.005 0.000000 0.153397
197 article_u_neu.gml neutral wealthy family foundations 1 0.005 0.000000 0.093346
198 article_u_neu.gml neutral whole-cell vaccine 5 0.025 0.108918 0.181305
199 article_u_neu.gml neutral whooping cough 1 0.005 0.000000 0.131723
200 article_u_neu.gml neutral young people 1 0.005 0.000000 0.114774

201 rows × 7 columns


In [ ]:
# save
#combined_df.to_csv('../output/df/article_u_neu.csv')

4. Draw undirected network


In [9]:
# 7_graph_calculation
def drawIt(graph, what = 'graph'):
    nsize = graph.number_of_nodes()
    print "Drawing %s of size %s:" % (what, nsize)
    
    if nsize > 20:
        plt.figure(figsize=(10, 10))
        if nsize > 40:
            nx.draw_spring(graph, with_labels = True, node_size = 70, font_size = 12)
        else:
            nx.draw_spring(graph, with_labels = True)
    else:
        nx.draw_spring(graph, with_labels = True)
    plt.show()

# for undirected graphs
def describeGraph(graph):
    components = sorted(nx.connected_components(graph), key = len, reverse = True)
    cc = [len(c) for c in components]
    subgraphs = list(nx.connected_component_subgraphs(graph))
    params = (graph.number_of_edges(),graph.number_of_nodes(),len(cc))
    print "Graph has %s edges, %s nodes, %s connected components\n" % params
    drawIt(graph)
    for sub in components:
        drawIt(graph.subgraph(sub), what = 'component')

In [10]:
# UNDIRECTED network graph
describeGraph(ugraph)


Graph has 236 edges, 201 nodes, 12 connected components

Drawing graph of size 201:
Drawing component of size 171:
Drawing component of size 4:
Drawing component of size 4:
Drawing component of size 4:
Drawing component of size 3:
Drawing component of size 3:
Drawing component of size 2:
Drawing component of size 2:
Drawing component of size 2:
Drawing component of size 2:
Drawing component of size 2:
Drawing component of size 2:

Undirected graph components


In [11]:
# list of connected components by size (undirected graph)
connected_components = [len(c) for c in sorted(nx.connected_components(ugraph), key=len, reverse=True)]

# generate connected components as subgraphs (undirected graph)
subgraphs = list(nx.connected_component_subgraphs(ugraph))

# greatest component (undirected MultiGraph)
u_Gc = max(nx.connected_component_subgraphs(ugraph), key=len)
u_Gc.name = "undirected Gc"

In [12]:
print "connected components = ", connected_components
print nx.info(u_Gc)


connected components =  [171, 4, 4, 4, 3, 3, 2, 2, 2, 2, 2, 2]
Name: undirected Gc
Type: MultiGraph
Number of nodes: 171
Number of edges: 216
Average degree:   2.5263

5. Greatest component graph


In [13]:
# finally, greatest components for undirected and directed graphs
print nx.info(u_Gc)


Name: undirected Gc
Type: MultiGraph
Number of nodes: 171
Number of edges: 216
Average degree:   2.5263

In [ ]:
# save Gc
#nx.write_gml(u_Gc, "../output/network/u_Gc_neutral2.gml")

7. network stats for UNDIRECTED GC


In [14]:
# load UNdirected Gc
Gc_files = glob('../output/network/u_Gc_neutral2.gml')

network_data_columns = ['name',
                    'sentiment',
                    '# nodes',
                    '# edges',
                    #'avg deg',
                    'density',
                    'deg assort coef', 
                    'avg deg cent',
                    'avg bet cent',
                    'avg clo cent',
                    'high deg cent',
                    'high bet cent',
                    'high clo cent',
                    'avg node conn'
                    #'# conn comp',
                    #'gc size'
                    ]
network_data = pd.DataFrame(columns = network_data_columns)

In [15]:
# Gc_files
for graph_num, gml_graph in enumerate(Gc_files):
    graph = nx.read_gml(gml_graph)
    (filepath, filename) = os.path.split(gml_graph)
    print('-' * 10)
    print(gml_graph)
    calculate_graph_inf(graph)
    
    # calculate variables
    sent = "neu"
    nodes = nx.number_of_nodes(graph)
    edges = nx.number_of_edges(graph)
    density = float("{0:.4f}".format(nx.density(graph)))
    avg_deg_cen = np.array(nx.degree_centrality(graph).values()).mean()
    avg_bet_cen = np.array(nx.betweenness_centrality(graph).values()).mean()
    avg_clo_cen = np.array(nx.closeness_centrality(graph).values()).mean()
    #avg_deg = float("{0:.4f}".format(in_deg + out_deg))
    avg_node_con = float("{0:.4f}".format((nx.average_node_connectivity(graph))))
    deg_assort_coeff = float("{0:.4f}".format((nx.degree_assortativity_coefficient(graph))))
    #conn_comp = nx.number_weakly_connected_components(graph) # ugraph
    deg_cen = nx.degree_centrality(graph)
    bet_cen = nx.betweenness_centrality(graph)
    clo_cen = nx.closeness_centrality(graph)
    highest_deg_cen = highest_centrality(deg_cen)
    highest_bet_cen = highest_centrality(bet_cen)
    highest_clo_cen = highest_centrality(clo_cen)
    #Gc = len(max(nx.weakly_connected_component_subgraphs(graph), key=len))

    # save variables into list
    graph_values = {'name':filename,
                    'sentiment':sent,
                    '# nodes':nodes,
                    '# edges':edges,
                    #'avg deg':avg_deg,
                    'density':density,
                    'deg assort coef':deg_assort_coeff,
                    'avg deg cent':"%.4f" % avg_deg_cen,
                    'avg bet cent':"%.4f" % avg_bet_cen,
                    'avg clo cent':"%.4f" % avg_clo_cen,
                    'high deg cent':highest_deg_cen,
                    'high bet cent':highest_bet_cen,
                    'high clo cent':highest_clo_cen,
                    'avg node conn':avg_node_con
                    #'# conn comp':conn_comp,
                    #'gc size':Gc
                    }
    network_data = network_data.append(graph_values, ignore_index=True)


----------
../output/network/u_Gc_neutral2.gml
Name: u_Gc_neutral2.gml
Type: MultiGraph
Number of nodes: 171
Number of edges: 216
Average degree:   2.5263

In [16]:
# print network data for greatest component
network_data


Out[16]:
name sentiment # nodes # edges density deg assort coef avg deg cent avg bet cent avg clo cent high deg cent high bet cent high clo cent avg node conn
0 u_Gc_neutral2.gml neu 171.0 216.0 0.0149 -0.3055 0.0149 0.0342 0.1533 (SB 277, 0.182352941176) (vaccines, 0.574881076691) (vaccines, 0.233516483516) 1.035

In [ ]:
# save
#network_data.to_csv('../output/df/u_Gc_neu2.csv')

Gc nodes table (undirected)


In [17]:
gml_files = glob('../output/network/u_Gc_neutral2.gml')

In [18]:
# 2_node_df: list all nodes and centrality
data_columns = ['name',
                'sentiment'
                ]
data = pd.DataFrame(columns = data_columns)
#combined_df = pd.DataFrame()

In [19]:
for graph_num, gml_graph in enumerate(gml_files):
    graph = nx.read_gml(gml_graph)
    (filepath, filename) = os.path.split(gml_graph)
    print('-' * 10)
    print(gml_graph)
    calculate_graph_inf(graph)

    # calculate variables and save into list
    sent = "neu"    
    deg_cent = nx.degree_centrality(graph)
    bet_cent = nx.betweenness_centrality(graph)
    clo_cent = nx.closeness_centrality(graph)
    graph_values = {'name':filename,
                    'sentiment':sent
                    }
    data = data.append(graph_values, ignore_index=True)

    degree = nx.degree(graph)
    deg_df = pd.DataFrame.from_dict(degree, orient = 'index')
    deg_df.columns = ['degree']
    # degree centrality
    deg_cent = nx.degree_centrality(graph)
    dc_df = pd.DataFrame.from_dict(deg_cent, orient = 'index')
    dc_df.columns = ['deg cent']
    # betweenness centrality
    bet_cent = nx.betweenness_centrality(graph)
    bc_df = pd.DataFrame.from_dict(bet_cent, orient = 'index')
    bc_df.columns = ['bet cent']
    # closeness centrality
    clo_cent = nx.closeness_centrality(graph)
    cc_df = pd.DataFrame.from_dict(clo_cent, orient = 'index')
    cc_df.columns = ['clo cent']
    # concat node frames into node_df
    frames = [deg_df, dc_df, bc_df, cc_df]
    node_df = pd.concat(frames, axis = 1)
    node_df.index.name = 'node'
    node_df = node_df.reset_index()

    values = pd.DataFrame(graph_values, columns = ('name', 'sentiment'), index = [0])
    
    # df = merges graph_values with node_df for single graph and fill NaNs
    df = pd.concat([values, node_df], axis = 1)
    df = df.fillna(method='ffill')
    #combined_df = combined_df.append(df)


----------
../output/network/u_Gc_neutral2.gml
Name: u_Gc_neutral2.gml
Type: MultiGraph
Number of nodes: 171
Number of edges: 216
Average degree:   2.5263

In [20]:
# print neutral gc nodes
df


Out[20]:
name sentiment node degree deg cent bet cent clo cent
0 u_Gc_neutral2.gml neu 7-11 year olds 2 0.011765 0.021627 0.173116
1 u_Gc_neutral2.gml neu Alfred and Lisa Claire Dwoskin 1 0.005882 0.000000 0.162679
2 u_Gc_neutral2.gml neu Americans 1 0.005882 0.000000 0.153568
3 u_Gc_neutral2.gml neu Andrew Wakefield 5 0.029412 0.140828 0.157407
4 u_Gc_neutral2.gml neu Australia 1 0.005882 0.000000 0.154968
5 u_Gc_neutral2.gml neu Barry Segal 1 0.005882 0.000000 0.110533
6 u_Gc_neutral2.gml neu Ben Allen 4 0.023529 0.000046 0.155963
7 u_Gc_neutral2.gml neu CDC 4 0.023529 0.097239 0.188889
8 u_Gc_neutral2.gml neu California 1 0.005882 0.000000 0.155393
9 u_Gc_neutral2.gml neu Chairwoman Carol Liu 2 0.011765 0.011765 0.155678
10 u_Gc_neutral2.gml neu Children's Hospital of Philadelphia 1 0.005882 0.000000 0.153568
11 u_Gc_neutral2.gml neu Chris Christie 3 0.017647 0.011800 0.139459
12 u_Gc_neutral2.gml neu Connie Leyva 3 0.017647 0.000093 0.155963
13 u_Gc_neutral2.gml neu Disneyland measles outbreak 1 0.005882 0.000000 0.135135
14 u_Gc_neutral2.gml neu Dr. Anna Acosta 2 0.011765 0.034876 0.159925
15 u_Gc_neutral2.gml neu Dr. Paul Offit 3 0.017647 0.017543 0.181237
16 u_Gc_neutral2.gml neu Dwoskin Family Foundation 8 0.047059 0.409224 0.194064
17 u_Gc_neutral2.gml neu Focus for Health 4 0.023529 0.101914 0.124178
18 u_Gc_neutral2.gml neu Generation Rescue 4 0.023529 0.122764 0.139573
19 u_Gc_neutral2.gml neu Immunization Action Coalition 1 0.005882 0.000000 0.159027
20 u_Gc_neutral2.gml neu Jenny McCarthy 3 0.017647 0.033275 0.140728
21 u_Gc_neutral2.gml neu Lisa and J.B. Handley 1 0.005882 0.000000 0.122567
22 u_Gc_neutral2.gml neu MMR vaccine 1 0.005882 0.000000 0.100295
23 u_Gc_neutral2.gml neu National Vaccine Information Center 2 0.011765 0.011765 0.162991
24 u_Gc_neutral2.gml neu PLOS Computational Biology 1 0.005882 0.000000 0.168484
25 u_Gc_neutral2.gml neu Pez dispensers 1 0.005882 0.000000 0.189521
26 u_Gc_neutral2.gml neu Rand Paul 3 0.017647 0.046258 0.191226
27 u_Gc_neutral2.gml neu Republican 2 0.011765 0.011625 0.161290
28 u_Gc_neutral2.gml neu Richard Pan 2 0.011765 0.000046 0.135674
29 u_Gc_neutral2.gml neu SB 277 31 0.182353 0.329352 0.183784
... ... ... ... ... ... ... ...
141 u_Gc_neutral2.gml neu state-required vaccinations 1 0.005882 0.000000 0.171371
142 u_Gc_neutral2.gml neu students 1 0.005882 0.000000 0.134814
143 u_Gc_neutral2.gml neu teens 1 0.005882 0.000000 0.154968
144 u_Gc_neutral2.gml neu the sake of being anti-vaccine 1 0.005882 0.000000 0.137207
145 u_Gc_neutral2.gml neu upward trend 1 0.005882 0.000000 0.153291
146 u_Gc_neutral2.gml neu vaccinated 1 0.005882 0.000000 0.129474
147 u_Gc_neutral2.gml neu vaccinated children 2 0.011765 0.011765 0.155678
148 u_Gc_neutral2.gml neu vaccination 5 0.029412 0.053974 0.175801
149 u_Gc_neutral2.gml neu vaccination exemption 3 0.017647 0.011765 0.155678
150 u_Gc_neutral2.gml neu vaccine choice 1 0.005882 0.000000 0.122478
151 u_Gc_neutral2.gml neu vaccine concerns 1 0.005882 0.000000 0.140264
152 u_Gc_neutral2.gml neu vaccine debate 4 0.023529 0.023599 0.123278
153 u_Gc_neutral2.gml neu vaccine duration 2 0.011765 0.020279 0.189099
154 u_Gc_neutral2.gml neu vaccine efficacy 3 0.017647 0.011765 0.180467
155 u_Gc_neutral2.gml neu vaccine refusal 3 0.017647 0.000000 0.129474
156 u_Gc_neutral2.gml neu vaccine risk 3 0.017647 0.011765 0.137540
157 u_Gc_neutral2.gml neu vaccine safety 2 0.011765 0.005344 0.172589
158 u_Gc_neutral2.gml neu vaccine-autism link 4 0.023529 0.361991 0.176349
159 u_Gc_neutral2.gml neu vaccine-injured children 3 0.017647 0.011556 0.137763
160 u_Gc_neutral2.gml neu vaccine-preventable diseases 1 0.005882 0.000000 0.120996
161 u_Gc_neutral2.gml neu vaccines 19 0.111765 0.574881 0.233516
162 u_Gc_neutral2.gml neu vaccines cause childhood illnesses 1 0.005882 0.000000 0.162679
163 u_Gc_neutral2.gml neu voluntary 1 0.005882 0.000000 0.189521
164 u_Gc_neutral2.gml neu vomiting 1 0.005882 0.000000 0.154968
165 u_Gc_neutral2.gml neu waning effectiveness 2 0.011765 0.000139 0.154125
166 u_Gc_neutral2.gml neu waning immunity 1 0.005882 0.000000 0.180467
167 u_Gc_neutral2.gml neu wealthy family foundations 1 0.005882 0.000000 0.109819
168 u_Gc_neutral2.gml neu whole-cell vaccine 5 0.029412 0.150886 0.213300
169 u_Gc_neutral2.gml neu whooping cough 1 0.005882 0.000000 0.154968
170 u_Gc_neutral2.gml neu young people 1 0.005882 0.000000 0.135028

171 rows × 7 columns


In [ ]:
# save
#df.to_csv('../output/df/u_Gc_nodes_neu2.csv')

full network node centrality (undirected)


In [21]:
# make sure you're using the right graph
print "gml_files = ", gml_files
print "gml_graph = ", gml_graph


gml_files =  ['../output/network/u_Gc_neutral2.gml']
gml_graph =  ../output/network/u_Gc_neutral2.gml

In [ ]:
# FULL UNDIRECTED
graph = nx.read_gml('../output/network/article_u_neu.gml')
print nx.info(graph)

In [ ]:
# degree centrality
dc = nx.degree_centrality(graph)
dc_df = pd.DataFrame.from_dict(dc, orient = 'index')
dc_df.columns = ['degree cent']
dc_df = dc_df.sort_values(by = ['degree cent'])
#dc_df

# betweenness centrality
bc = nx.betweenness_centrality(graph)
bc_df = pd.DataFrame.from_dict(bc, orient = 'index')
bc_df.columns = ['betweenness cent']
bc_df = bc_df.sort_values(by = ['betweenness cent'])
#bc_df

# closeness centrality
cc = nx.closeness_centrality(graph)
cc_df = pd.DataFrame.from_dict(cc, orient = 'index')
cc_df.columns = ['closeness cent']
cc_df = cc_df.sort_values(by = ['closeness cent'])
#cc_df

In [ ]:
dc_df

In [ ]:
bc_df

In [ ]:
cc_df

Gc node centrality (undirected)


In [22]:
# Gc undirected
graph = nx.read_gml('../output/network/u_Gc_neutral2.gml')
print nx.info(graph)


Name: undirected Gc
Type: MultiGraph
Number of nodes: 171
Number of edges: 216
Average degree:   2.5263

In [ ]:
# degree centrality
dc = nx.degree_centrality(graph)
dc_df = pd.DataFrame.from_dict(dc, orient = 'index')
dc_df.columns = ['degree cent']
dc_df = dc_df.sort_values(by = ['degree cent'])
#dc_df

# betweenness centrality
bc = nx.betweenness_centrality(graph)
bc_df = pd.DataFrame.from_dict(bc, orient = 'index')
bc_df.columns = ['betweenness cent']
bc_df = bc_df.sort_values(by = ['betweenness cent'])
#bc_df

# closeness centrality
cc = nx.closeness_centrality(graph)
cc_df = pd.DataFrame.from_dict(cc, orient = 'index')
cc_df.columns = ['closeness cent']
cc_df = cc_df.sort_values(by = ['closeness cent'])
#cc_df

In [ ]:
dc_df

In [ ]:
bc_df

In [ ]:
cc_df

Cutsets (undirected)


In [ ]:
# Gc undirected
graph = nx.read_gml('../output/network/u_Gc_neutral2.gml')
print nx.info(graph)

In [ ]:
print "Greatest component size =", len(graph)

In [ ]:
# returns all minimum k cutsets of an undirected graph
# i.e., the set(s) of nodes of cardinality equal to the node connectivity of G
# thus if removed, would break G into two or more connected components

#cutsets = list(nx.all_node_cuts(graph))  # must be undirected

print "Greatest component size =", len(graph)
#print "# of cutsets =", len(cutsets)

# returns a set of nodes or edges of minimum cardinality that disconnects G
min_ncut = nx.minimum_node_cut(graph)
min_ecut = nx.minimum_edge_cut(graph)

print "Min node cut =", min_ncut
print "Min edge cut =", min_ecut

# min cuts with source and target
print nx.minimum_node_cut(graph, s='vaccines', t='autism')
print nx.minimum_edge_cut(graph, s='vaccines', t='autism')

In [ ]:
# read edge labels in min cut for Gc
# change source and target
a = nx.minimum_edge_cut(graph, s='vaccines', t='autism')
#a = nx.minimum_edge_cut(graph)

labels = nx.get_edge_attributes(graph,'edge')
edgelabels = {}
for e in labels.keys():
    e1 = e[0:2]
    edgelabels[e1]=labels[e]

for e in a:
    if edgelabels.has_key(e):
        print e,edgelabels[e]
    else:
        rev_e = e[::-1]
        print rev_e, edgelabels[rev_e]

Community detection


In [24]:
graph


Out[24]:
<networkx.classes.multigraph.MultiGraph at 0x118828190>

In [ ]: