notebook.community

Edit and run



In [1]:

    
import networkx as nx
import custom_funcs as cf
import matplotlib.pyplot as plt
import numpy as np

from time import time
from joblib import Parallel, delayed

%matplotlib inline

%load_ext autoreload
%autoreload 2



In [2]:

    
G = nx.read_gpickle('20150902_all_ird Final Graph.pkl')
G = cf.impute_reassortant_status(G)
G = cf.impute_weights(G)
G = cf.clean_host_species_names(G)



In [3]:

    
def subtype_domain_graph(G):
    sG = nx.DiGraph()

    for sc, sk, d in G.edges(data=True):
        if d['edge_type'] == 'reassortant':
            reassortant = True
        else:
            reassortant = False
        sc_subtype = G.node[sc]['subtype']
        sk_subtype = G.node[sk]['subtype']

        if not sG.has_node(sc_subtype):
            sG.add_node(sc_subtype)
        if not sG.has_node(sk_subtype):
            sG.add_node(sk_subtype)
        
        if reassortant:
            if not sG.has_edge(sc_subtype, sk_subtype):
                sG.add_edge(sc_subtype, sk_subtype, count=d['weight'])
            if sG.has_edge(sc_subtype, sk_subtype):
                sG.edge[sc_subtype][sk_subtype]['count'] += d['weight']

    return sG

start = time()
sG = subtype_domain_graph(G)
end = time()

print('Total time: {0} seconds.'.format(end - start))









    



Total time: 0.1163487434387207 seconds.



In [4]:

    
sG.predecessors('H8N8')









    Out[4]:





['H6N8', 'H8N4']



In [5]:

    
# import cProfile
# cProfile.run('cf.shuffle_node_attribute_label(G, "subtype", equally=False)')



In [6]:

    
def null_proportion_subtype_reassorting(G, equally=False):
    G_shuffled = cf.shuffle_node_attribute_label(G, 'subtype', equally=True)
    sG_shuf = subtype_domain_graph(G_shuffled)
    
    return sG_shuf

start = time()
iters = 100
results = Parallel(n_jobs=-1)(delayed(null_proportion_subtype_reassorting)(G, equally=True) for i in range(iters))
results
end = time()
print('Total time for {0} iterations: {1} seconds.'.format(iters, end - start))









    



Total time for 100 iterations: 189.6634864807129 seconds.



In [7]:

    
from collections import defaultdict
def weighted_degree_centrality(G):
    centralities = defaultdict(float)
    
    for n, d in G.nodes(data=True):
        predecessors = G.predecessors(n)
        successors = G.successors(n)
        
        for p in predecessors:
            centralities[n] += G.edge[p][n]['count']
        for s in successors:
            centralities[n] += G.edge[n][s]['count']
    return centralities

cents = weighted_degree_centrality(sG)
cents_shufs = []
for sG_shuf in results:
    cents_shuf = weighted_degree_centrality(sG_shuf)
    cents_shufs.append(cents_shuf)



In [8]:

    
subtype = 'H8N8'
cents[subtype], cents_shuf[subtype]









    Out[8]:





(2.0, 74.83333333333334)



In [9]:

    
import pandas as pd
means = pd.DataFrame(cents_shufs).mean()
means = pd.DataFrame(means)
means.columns = ['mean']

stds = pd.DataFrame(cents_shufs).std() * 5
stds = pd.DataFrame(stds)
stds.columns = ['std']

data = pd.DataFrame(list(zip(cents.keys(), cents.values())))
data.set_index(0, inplace=True)
data.columns = ['data']

joined = data.join(means).join(stds)
joined[joined['data'] > joined['mean'] + joined['std'] * 2.5]



In [ ]:

	data	mean	std
0
H3N8	336.238095	70.384871	81.551226
H1N1	300.750000	72.372269	83.179586