Neutral network:

  • jaccard coefficient
  • adamic adar index
  • closeness vitality
  • page rank

In [15]:
import networkx as nx
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

pd.options.display.max_rows = 20

In [2]:
# read multigraph M
M = nx.read_gml('../output/network/u_Gc_neutral2.gml')


# convert multigraph M to simple graph G (remove parallel edges)
G = nx.Graph()
for u,v,data in M.edges_iter(data=True):
    w = data['weight'] if 'weight' in data else 1.0
    if G.has_edge(u,v):
        G[u][v]['weight'] += w
    else:
        G.add_edge(u, v, weight=w)
#print G.edges(data=True)

print nx.info(M), '\n'
print nx.info(G)


Name: undirected Gc
Type: MultiGraph
Number of nodes: 171
Number of edges: 216
Average degree:   2.5263 

Name: 
Type: Graph
Number of nodes: 171
Number of edges: 199
Average degree:   2.3275

In [3]:
## jaccard coefficient

jc = nx.jaccard_coefficient(G)
jc = list(jc)
for u, v, p in jc:
    '(%s, %s) -> %.4f' % (u, v, p)

In [16]:
df = pd.DataFrame(jc, columns=['u', 'v', 'jaccard'])
subset_df = df.ix[df['jaccard'] != 0, :]
subset_df.sort_values('jaccard', ascending=False)


Out[16]:
u v jaccard
9978 for-profit imperfect 1.000000
2520 inflammatory disorders immune disorders 1.000000
7357 voluntary pharmaceuticals 1.000000
7345 voluntary miracles 1.000000
7325 voluntary childhood illnesses 1.000000
7322 voluntary Pez dispensers 1.000000
7310 voluntary for-profit 1.000000
11458 infectious disease Senate Education Committee 1.000000
7173 bacteria PLOS Computational Biology 1.000000
11489 infectious disease California 1.000000
... ... ... ...
13644 protested SB 277 0.041667
1442 scientist SB 277 0.041667
5388 Disneyland measles outbreak SB 277 0.041667
10620 options SB 277 0.041667
13759 SB 277 students 0.041667
13754 SB 277 concerns 0.041667
4849 parental choice SB 277 0.041667
13768 SB 277 whole-cell vaccine 0.037037
7791 vaccines pertussis 0.033333
7853 vaccines pertussis vaccine 0.033333

939 rows × 3 columns


In [5]:
# save jaccard
#subset_df.to_csv('jaccard_neutral.csv')

In [6]:
## adamic adar index

aa = nx.adamic_adar_index(G)
aa = list(aa)
for u, v, p in aa:
    '(%s, %s) -> %.8f' % (u, v, p)

In [17]:
df = pd.DataFrame(aa, columns=['u', 'v', 'adamic_adar'])
subset_df = df.ix[df['adamic_adar'] != 0, :]
subset_df.sort_values('adamic_adar', ascending=False)


Out[17]:
u v adamic_adar
2605 Dwoskin Family Foundation vaccines 4.328085
2309 Chris Christie Rand Paul 2.885390
9829 proposed restrictions home-school 2.667592
1646 children parents 2.667592
9383 parents with vaccine-injured children anti-vaccination 2.352934
13757 SB 277 pertussis vaccine 2.000806
1859 Dr. Paul Offit pertussis increase 1.832566
6559 waning effectiveness pertussis vaccine 1.820478
8310 Republican president 1.820478
407 Generation Rescue anti-vaccination 1.531574
... ... ... ...
2804 parents Saron Runner 0.314658
11525 infectious disease Tina Kimmel 0.314658
11488 infectious disease personal belief exemption 0.314658
11523 infectious disease Senate committee 0.314658
11516 infectious disease Saron Runner 0.314658
11514 infectious disease changes 0.314658
11497 infectious disease home-school 0.314658
11490 infectious disease Chairwoman Carol Liu 0.314658
11489 infectious disease California 0.314658
7506 vaccinated children public education 0.314658

939 rows × 3 columns


In [8]:
# save adamic idar
#subset_df.to_csv('adamic_neutral.csv')

In [18]:
# closeness vitality
# of a node is the change in the sum of distances between all node pairs when excluding that node

cv = nx.closeness_vitality(M)

cv_df = pd.DataFrame.from_dict(cv, orient = 'index')
cv_df.columns = ['closeness vitality']
cv_df.sort_values(by = ['closeness vitality'], ascending = False)


Out[18]:
closeness vitality
vaccines 127564.0
Dwoskin Family Foundation 109972.0
vaccine-autism link 100468.0
SB 277 49768.0
acellular pertussis vaccine 48048.0
artificial vaccine 43430.0
anti-vaccination 41638.0
Generation Rescue 37594.0
immune response 34424.0
Focus for Health 32640.0
... ...
cognitive disorders 1606.0
adolescents 1604.0
Jenny McCarthy 1592.0
celebrities 1550.0
effective 1436.0
children -940.0
protection -1624.0
side effects -1708.0
whole-cell vaccine -2036.0
infants -2528.0

171 rows × 1 columns


In [10]:
# save closeness vitality
#cv_df.to_csv('cv_neutral.csv')

In [19]:
# link analysis: page rank
# PageRank computes a ranking of nodes based on structure of incoming links

pr = nx.pagerank_numpy(M)

pr_df = pd.DataFrame.from_dict(pr, orient = 'index')
pr_df.columns = ['page rank']
pr_df.sort_values(by = ['page rank'], ascending = False)


Out[19]:
page rank
SB 277 0.056590
vaccines 0.040836
anti-vaccination 0.037711
pertussis 0.032642
pertussis vaccine 0.028755
high-dose flu vaccine 0.023436
Dwoskin Family Foundation 0.017205
acellular pertussis vaccine 0.015689
autism 0.014050
parents 0.013750
... ...
vaccinated 0.002523
committee hearing room 0.002429
California 0.002429
Saron Runner 0.002429
Senate Education Committee 0.002429
infectious disease 0.002429
everyone 0.002429
protest 0.002429
state-required vaccinations 0.002399
immunization 0.002338

171 rows × 1 columns


In [12]:
# save page rank
#pr_df.to_csv('pr_neutral.csv')

In [ ]: