Neutral network:

jaccard coefficient
adamic adar index
closeness vitality
page rank



In [15]:

    
import networkx as nx
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

pd.options.display.max_rows = 20



In [2]:

    
# read multigraph M
M = nx.read_gml('../output/network/u_Gc_neutral2.gml')


# convert multigraph M to simple graph G (remove parallel edges)
G = nx.Graph()
for u,v,data in M.edges_iter(data=True):
    w = data['weight'] if 'weight' in data else 1.0
    if G.has_edge(u,v):
        G[u][v]['weight'] += w
    else:
        G.add_edge(u, v, weight=w)
#print G.edges(data=True)

print nx.info(M), '\n'
print nx.info(G)









    



Name: undirected Gc
Type: MultiGraph
Number of nodes: 171
Number of edges: 216
Average degree:   2.5263 

Name: 
Type: Graph
Number of nodes: 171
Number of edges: 199
Average degree:   2.3275



In [3]:

    
## jaccard coefficient

jc = nx.jaccard_coefficient(G)
jc = list(jc)
for u, v, p in jc:
    '(%s, %s) -> %.4f' % (u, v, p)



In [16]:

    
df = pd.DataFrame(jc, columns=['u', 'v', 'jaccard'])
subset_df = df.ix[df['jaccard'] != 0, :]
subset_df.sort_values('jaccard', ascending=False)









    Out[16]:






  
    
      
      u
      v
      jaccard
    
  
  
    
      9978
      for-profit
      imperfect
      1.000000
    
    
      2520
      inflammatory disorders
      immune disorders
      1.000000
    
    
      7357
      voluntary
      pharmaceuticals
      1.000000
    
    
      7345
      voluntary
      miracles
      1.000000
    
    
      7325
      voluntary
      childhood illnesses
      1.000000
    
    
      7322
      voluntary
      Pez dispensers
      1.000000
    
    
      7310
      voluntary
      for-profit
      1.000000
    
    
      11458
      infectious disease
      Senate Education Committee
      1.000000
    
    
      7173
      bacteria
      PLOS Computational Biology
      1.000000
    
    
      11489
      infectious disease
      California
      1.000000
    
    
      ...
      ...
      ...
      ...
    
    
      13644
      protested
      SB 277
      0.041667
    
    
      1442
      scientist
      SB 277
      0.041667
    
    
      5388
      Disneyland measles outbreak
      SB 277
      0.041667
    
    
      10620
      options
      SB 277
      0.041667
    
    
      13759
      SB 277
      students
      0.041667
    
    
      13754
      SB 277
      concerns
      0.041667
    
    
      4849
      parental choice
      SB 277
      0.041667
    
    
      13768
      SB 277
      whole-cell vaccine
      0.037037
    
    
      7791
      vaccines
      pertussis
      0.033333
    
    
      7853
      vaccines
      pertussis vaccine
      0.033333
    
  

939 rows × 3 columns



In [5]:

    
# save jaccard
#subset_df.to_csv('jaccard_neutral.csv')



In [6]:

    
## adamic adar index

aa = nx.adamic_adar_index(G)
aa = list(aa)
for u, v, p in aa:
    '(%s, %s) -> %.8f' % (u, v, p)



In [17]:

    
df = pd.DataFrame(aa, columns=['u', 'v', 'adamic_adar'])
subset_df = df.ix[df['adamic_adar'] != 0, :]
subset_df.sort_values('adamic_adar', ascending=False)









    Out[17]:






  
    
      
      u
      v
      adamic_adar
    
  
  
    
      2605
      Dwoskin Family Foundation
      vaccines
      4.328085
    
    
      2309
      Chris Christie
      Rand Paul
      2.885390
    
    
      9829
      proposed restrictions
      home-school
      2.667592
    
    
      1646
      children
      parents
      2.667592
    
    
      9383
      parents with vaccine-injured children
      anti-vaccination
      2.352934
    
    
      13757
      SB 277
      pertussis vaccine
      2.000806
    
    
      1859
      Dr. Paul Offit
      pertussis increase
      1.832566
    
    
      6559
      waning effectiveness
      pertussis vaccine
      1.820478
    
    
      8310
      Republican
      president
      1.820478
    
    
      407
      Generation Rescue
      anti-vaccination
      1.531574
    
    
      ...
      ...
      ...
      ...
    
    
      2804
      parents
      Saron Runner
      0.314658
    
    
      11525
      infectious disease
      Tina Kimmel
      0.314658
    
    
      11488
      infectious disease
      personal belief exemption
      0.314658
    
    
      11523
      infectious disease
      Senate committee
      0.314658
    
    
      11516
      infectious disease
      Saron Runner
      0.314658
    
    
      11514
      infectious disease
      changes
      0.314658
    
    
      11497
      infectious disease
      home-school
      0.314658
    
    
      11490
      infectious disease
      Chairwoman Carol Liu
      0.314658
    
    
      11489
      infectious disease
      California
      0.314658
    
    
      7506
      vaccinated children
      public education
      0.314658
    
  

939 rows × 3 columns



In [8]:

    
# save adamic idar
#subset_df.to_csv('adamic_neutral.csv')



In [18]:

    
# closeness vitality
# of a node is the change in the sum of distances between all node pairs when excluding that node

cv = nx.closeness_vitality(M)

cv_df = pd.DataFrame.from_dict(cv, orient = 'index')
cv_df.columns = ['closeness vitality']
cv_df.sort_values(by = ['closeness vitality'], ascending = False)









    Out[18]:






  
    
      
      closeness vitality
    
  
  
    
      vaccines
      127564.0
    
    
      Dwoskin Family Foundation
      109972.0
    
    
      vaccine-autism link
      100468.0
    
    
      SB 277
      49768.0
    
    
      acellular pertussis vaccine
      48048.0
    
    
      artificial vaccine
      43430.0
    
    
      anti-vaccination
      41638.0
    
    
      Generation Rescue
      37594.0
    
    
      immune response
      34424.0
    
    
      Focus for Health
      32640.0
    
    
      ...
      ...
    
    
      cognitive disorders
      1606.0
    
    
      adolescents
      1604.0
    
    
      Jenny McCarthy
      1592.0
    
    
      celebrities
      1550.0
    
    
      effective
      1436.0
    
    
      children
      -940.0
    
    
      protection
      -1624.0
    
    
      side effects
      -1708.0
    
    
      whole-cell vaccine
      -2036.0
    
    
      infants
      -2528.0
    
  

171 rows × 1 columns



In [10]:

    
# save closeness vitality
#cv_df.to_csv('cv_neutral.csv')



In [19]:

    
# link analysis: page rank
# PageRank computes a ranking of nodes based on structure of incoming links

pr = nx.pagerank_numpy(M)

pr_df = pd.DataFrame.from_dict(pr, orient = 'index')
pr_df.columns = ['page rank']
pr_df.sort_values(by = ['page rank'], ascending = False)









    Out[19]:






  
    
      
      page rank
    
  
  
    
      SB 277
      0.056590
    
    
      vaccines
      0.040836
    
    
      anti-vaccination
      0.037711
    
    
      pertussis
      0.032642
    
    
      pertussis vaccine
      0.028755
    
    
      high-dose flu vaccine
      0.023436
    
    
      Dwoskin Family Foundation
      0.017205
    
    
      acellular pertussis vaccine
      0.015689
    
    
      autism
      0.014050
    
    
      parents
      0.013750
    
    
      ...
      ...
    
    
      vaccinated
      0.002523
    
    
      committee hearing room
      0.002429
    
    
      California
      0.002429
    
    
      Saron Runner
      0.002429
    
    
      Senate Education Committee
      0.002429
    
    
      infectious disease
      0.002429
    
    
      everyone
      0.002429
    
    
      protest
      0.002429
    
    
      state-required vaccinations
      0.002399
    
    
      immunization
      0.002338
    
  

171 rows × 1 columns



In [12]:

    
# save page rank
#pr_df.to_csv('pr_neutral.csv')



In [ ]:

	u	v	jaccard
9978	for-profit	imperfect	1.000000
2520	inflammatory disorders	immune disorders	1.000000
7357	voluntary	pharmaceuticals	1.000000
7345	voluntary	miracles	1.000000
7325	voluntary	childhood illnesses	1.000000
7322	voluntary	Pez dispensers	1.000000
7310	voluntary	for-profit	1.000000
11458	infectious disease	Senate Education Committee	1.000000
7173	bacteria	PLOS Computational Biology	1.000000
11489	infectious disease	California	1.000000
...	...	...	...
13644	protested	SB 277	0.041667
1442	scientist	SB 277	0.041667
5388	Disneyland measles outbreak	SB 277	0.041667
10620	options	SB 277	0.041667
13759	SB 277	students	0.041667
13754	SB 277	concerns	0.041667
4849	parental choice	SB 277	0.041667
13768	SB 277	whole-cell vaccine	0.037037
7791	vaccines	pertussis	0.033333
7853	vaccines	pertussis vaccine	0.033333

	u	v	adamic_adar
2605	Dwoskin Family Foundation	vaccines	4.328085
2309	Chris Christie	Rand Paul	2.885390
9829	proposed restrictions	home-school	2.667592
1646	children	parents	2.667592
9383	parents with vaccine-injured children	anti-vaccination	2.352934
13757	SB 277	pertussis vaccine	2.000806
1859	Dr. Paul Offit	pertussis increase	1.832566
6559	waning effectiveness	pertussis vaccine	1.820478
8310	Republican	president	1.820478
407	Generation Rescue	anti-vaccination	1.531574
...	...	...	...
2804	parents	Saron Runner	0.314658
11525	infectious disease	Tina Kimmel	0.314658
11488	infectious disease	personal belief exemption	0.314658
11523	infectious disease	Senate committee	0.314658
11516	infectious disease	Saron Runner	0.314658
11514	infectious disease	changes	0.314658
11497	infectious disease	home-school	0.314658
11490	infectious disease	Chairwoman Carol Liu	0.314658
11489	infectious disease	California	0.314658
7506	vaccinated children	public education	0.314658

	closeness vitality
vaccines	127564.0
Dwoskin Family Foundation	109972.0
vaccine-autism link	100468.0
SB 277	49768.0
acellular pertussis vaccine	48048.0
artificial vaccine	43430.0
anti-vaccination	41638.0
Generation Rescue	37594.0
immune response	34424.0
Focus for Health	32640.0
...	...
cognitive disorders	1606.0
adolescents	1604.0
Jenny McCarthy	1592.0
celebrities	1550.0
effective	1436.0
children	-940.0
protection	-1624.0
side effects	-1708.0
whole-cell vaccine	-2036.0
infants	-2528.0

	page rank
SB 277	0.056590
vaccines	0.040836
anti-vaccination	0.037711
pertussis	0.032642
pertussis vaccine	0.028755
high-dose flu vaccine	0.023436
Dwoskin Family Foundation	0.017205
acellular pertussis vaccine	0.015689
autism	0.014050
parents	0.013750
...	...
vaccinated	0.002523
committee hearing room	0.002429
California	0.002429
Saron Runner	0.002429
Senate Education Committee	0.002429
infectious disease	0.002429
everyone	0.002429
protest	0.002429
state-required vaccinations	0.002399
immunization	0.002338