In [1]:
import networkx as nx
import custom_funcs as cf
import matplotlib.pyplot as plt
import numpy as np

from time import time
from joblib import Parallel, delayed

%matplotlib inline

%load_ext autoreload
%autoreload 2

In [2]:
G = nx.read_gpickle('20150902_all_ird Final Graph.pkl')
G = cf.impute_reassortant_status(G)
G = cf.impute_weights(G)
G = cf.clean_host_species_names(G)

In [3]:
def subtype_domain_graph(G):
    sG = nx.DiGraph()

    for sc, sk, d in G.edges(data=True):
        if d['edge_type'] == 'reassortant':
            reassortant = True
        else:
            reassortant = False
        sc_subtype = G.node[sc]['subtype']
        sk_subtype = G.node[sk]['subtype']

        if not sG.has_node(sc_subtype):
            sG.add_node(sc_subtype)
        if not sG.has_node(sk_subtype):
            sG.add_node(sk_subtype)
        
        if reassortant:
            if not sG.has_edge(sc_subtype, sk_subtype):
                sG.add_edge(sc_subtype, sk_subtype, count=d['weight'])
            if sG.has_edge(sc_subtype, sk_subtype):
                sG.edge[sc_subtype][sk_subtype]['count'] += d['weight']

    return sG

start = time()
sG = subtype_domain_graph(G)
end = time()

print('Total time: {0} seconds.'.format(end - start))


Total time: 0.1163487434387207 seconds.

In [4]:
sG.predecessors('H8N8')


Out[4]:
['H6N8', 'H8N4']

In [5]:
# import cProfile
# cProfile.run('cf.shuffle_node_attribute_label(G, "subtype", equally=False)')

In [6]:
def null_proportion_subtype_reassorting(G, equally=False):
    G_shuffled = cf.shuffle_node_attribute_label(G, 'subtype', equally=True)
    sG_shuf = subtype_domain_graph(G_shuffled)
    
    return sG_shuf

start = time()
iters = 100
results = Parallel(n_jobs=-1)(delayed(null_proportion_subtype_reassorting)(G, equally=True) for i in range(iters))
results
end = time()
print('Total time for {0} iterations: {1} seconds.'.format(iters, end - start))


Total time for 100 iterations: 189.6634864807129 seconds.

In [7]:
from collections import defaultdict
def weighted_degree_centrality(G):
    centralities = defaultdict(float)
    
    for n, d in G.nodes(data=True):
        predecessors = G.predecessors(n)
        successors = G.successors(n)
        
        for p in predecessors:
            centralities[n] += G.edge[p][n]['count']
        for s in successors:
            centralities[n] += G.edge[n][s]['count']
    return centralities

cents = weighted_degree_centrality(sG)
cents_shufs = []
for sG_shuf in results:
    cents_shuf = weighted_degree_centrality(sG_shuf)
    cents_shufs.append(cents_shuf)

In [8]:
subtype = 'H8N8'
cents[subtype], cents_shuf[subtype]


Out[8]:
(2.0, 74.83333333333334)

In [9]:
import pandas as pd
means = pd.DataFrame(cents_shufs).mean()
means = pd.DataFrame(means)
means.columns = ['mean']

stds = pd.DataFrame(cents_shufs).std() * 5
stds = pd.DataFrame(stds)
stds.columns = ['std']

data = pd.DataFrame(list(zip(cents.keys(), cents.values())))
data.set_index(0, inplace=True)
data.columns = ['data']

joined = data.join(means).join(stds)
joined[joined['data'] > joined['mean'] + joined['std'] * 2.5]


Out[9]:
data mean std
0
H3N8 336.238095 70.384871 81.551226
H1N1 300.750000 72.372269 83.179586

In [ ]: