Negative network:

  • jaccard coefficient
  • adamic adar index
  • closeness vitality
  • page rank

In [10]:
import networkx as nx
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

pd.options.display.max_rows = 20

In [2]:
# read multigraph M
M = nx.read_gml('../output/network/u_Gc_negative2.gml')


# convert multigraph M to simple graph G (remove parallel edges)
G = nx.Graph()
for u,v,data in M.edges_iter(data=True):
    w = data['weight'] if 'weight' in data else 1.0
    if G.has_edge(u,v):
        G[u][v]['weight'] += w
    else:
        G.add_edge(u, v, weight=w)
#print G.edges(data=True)

print nx.info(M), '\n'
print nx.info(G)


Name: undirected Gc
Type: MultiGraph
Number of nodes: 1140
Number of edges: 1783
Average degree:   3.1281 

Name: 
Type: Graph
Number of nodes: 1140
Number of edges: 1661
Average degree:   2.9140

In [3]:
## jaccard coefficient

jc = nx.jaccard_coefficient(G)
jc = list(jc)
for u, v, p in jc:
    '(%s, %s) -> %.4f' % (u, v, p)

In [11]:
df = pd.DataFrame(jc, columns=['u', 'v', 'jaccard'])
subset_df = df.ix[df['jaccard'] != 0, :]
subset_df.sort_values('jaccard', ascending=False)


Out[11]:
u v jaccard
647499 cancer-causing lung cancer 1.000000
342436 convulsions gene products 1.000000
340885 U.S. measles mortality global population 1.000000
340888 U.S. measles mortality outrageous 1.000000
341027 U.S. measles mortality death from suicide 1.000000
341069 U.S. measles mortality death from congenital malformation 1.000000
341084 U.S. measles mortality death from murder 1.000000
341153 U.S. measles mortality death from heart disease 1.000000
341164 U.S. measles mortality death from congenital disease 1.000000
341322 U.S. measles mortality death from car accident 1.000000
... ... ... ...
529221 vaccines behavior changes 0.009804
529366 vaccines measles decline 0.009804
529157 vaccines pro-vaccination 0.009709
304036 vaccine reactions vaccines 0.009709
240061 hepatitis B vaccines 0.009615
529505 vaccines you 0.009524
174507 vaccinated children vaccines 0.009524
529335 vaccines narcolepsy 0.009434
529147 vaccines measles mortality 0.008850
317145 Merck vaccines 0.008772

18693 rows × 3 columns


In [5]:
# save jaccard
#subset_df.to_csv('jaccard_negative.csv')

In [6]:
## adamic adar index

aa = nx.adamic_adar_index(G)
aa = list(aa)
for u, v, p in aa:
    '(%s, %s) -> %.8f' % (u, v, p)

In [12]:
df = pd.DataFrame(aa, columns=['u', 'v', 'adamic_adar'])
subset_df = df.ix[df['adamic_adar'] != 0, :]
subset_df.sort_values('adamic_adar', ascending=False)


Out[12]:
u v adamic_adar
203451 sanctions harassment 5.947526
386930 prejudice discrimination 5.034327
88273 sick children disabled children 4.547174
529439 vaccines pharmaceutical companies 4.507343
254211 vaccine decisions young doctors 3.795629
51785 vaccine additives vaccines 3.792387
466766 flu shots vaccines 3.766455
469803 formaldehyde monosodium glutamate 3.744558
417885 doctors pharmaceutical companies 3.666743
204164 science mainstream media 3.605236
... ... ... ...
79066 genetically susceptible children trustworthy 0.219089
79041 genetically susceptible children schedule 0.219089
79037 genetically susceptible children sick children 0.219089
79035 genetically susceptible children compromised immunity 0.219089
79023 genetically susceptible children benefit 0.219089
31838 disability vaccine ingredients 0.219089
78998 genetically susceptible children mentally challenged 0.219089
78990 genetically susceptible children side effects 0.219089
78940 genetically susceptible children toxic substances 0.219089
414305 thimerosal widespread fear 0.219089

18693 rows × 3 columns


In [8]:
# save adamic idar
#subset_df.to_csv('adamic_negative.csv')

In [13]:
# closeness vitality
# of a node is the change in the sum of distances between all node pairs when excluding that node

cv = nx.closeness_vitality(M)

cv_df = pd.DataFrame.from_dict(cv, orient = 'index')
cv_df.columns = ['closeness vitality']
cv_df.sort_values(by = ['closeness vitality'], ascending = False)


Out[13]:
closeness vitality
thimerosal 239154.0
MTHFR C677T defect 222220.0
millions of dollars 210944.0
children with autism 201122.0
measles mortality 179468.0
vaccine court 172456.0
National Vaccine Injury Compensation Program 168948.0
anti-vaccination 145200.0
measles 141736.0
adverse effects 141140.0
... ...
families who decline vaccines 1962.0
widespread fear 1420.0
false sense of security 0.0
Surgeon General Vivek Murthy -1452.0
MMR -2658.0
Vaccine Injury Compensation Program -3072.0
aborted baby -3458.0
vaccines are safe -6066.0
vaccine safety -16150.0
lobbying -27364.0

1140 rows × 1 columns


In [15]:
# save closeness vitality
#cv_df.to_csv('out/cv_negative.csv')

In [14]:
# link analysis: page rank
# PageRank computes a ranking of nodes based on structure of incoming links

pr = nx.pagerank_numpy(M)

pr_df = pd.DataFrame.from_dict(pr, orient = 'index')
pr_df.columns = ['page rank']
pr_df.sort_values(by = ['page rank'], ascending = False)


Out[14]:
page rank
vaccines 0.025987
children 0.015807
thimerosal 0.014948
vaccine industry 0.013321
CDC 0.013005
autism 0.009828
mainstream media 0.009027
doctors 0.008974
mercury 0.007887
pharmaceutical companies 0.007226
... ...
trustworthy 0.000316
Vaccine Adverse Event Reporting System 0.000314
multi-dose vaccines 0.000308
glutathione levels 0.000308
children with severe ASDs 0.000308
inflammation of immune system 0.000308
central nervous system disorders 0.000308
inflammatory adjuvant 0.000301
live viruses 0.000299
false sense of security 0.000132

1140 rows × 1 columns


In [16]:
# save page rank
#pr_df.to_csv('out/pr_negative.csv')

In [ ]: