Background

In the reviews provided, Reviewer 1 requested an analysis that showed that reassortment is favoured between different wild bird species. In the review, one specific example brought up was about genome transfer (clonal & reassortment) between gulls and mallards. In this notebook, I attempt this analysis.


In [100]:
import networkx as nx
import custom_funcs as cf
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

sns.set_context('paper')
sns.set_style('white')

%matplotlib inline

from joblib import Parallel, delayed
from time import time
from tqdm import tqdm


/Users/ericmjl/anaconda/lib/python3.4/site-packages/matplotlib/__init__.py:872: UserWarning: axes.color_cycle is deprecated and replaced with axes.prop_cycle; please use the latter.
  warnings.warn(self.msg_depr % (key, alt_key))

In [58]:
# Read in the graph data and clean it.
G = nx.read_gpickle('20150902_all_ird Final Graph.pkl')
G = cf.clean_host_species_names(G)
G = cf.impute_reassortant_status(G)
G = cf.impute_weights(G)
G = cf.remove_zero_weighted_edges(G)

In [59]:
G.nodes(data=True)[3]


Out[59]:
('A/swine/Hong Kong/2461/1994',
 {'collection_date': Timestamp('1994-04-18 00:00:00'),
  'country': 'Hong Kong',
  'host_species': 'Swine',
  'reassortant': False,
  'subtype': 'H1N1'})

In [62]:
# Encode any gull species as "Gull" in the "host_label" field.
for n, d in G.nodes(data=True):
    if d['host_species'] == 'Gull' or 'Gull' in d['host_species']:
        G.node[n]['host_label'] = 'Gull'
    elif d['host_species'] == 'Mallard':
        G.node[n]['host_label'] = 'Mallard'
    else:
        G.node[n]['host_label'] = 'Unknown'

In [63]:
[n for n, d in G.nodes(data=True) if d['host_label'] == 'Gull']


Out[63]:
['A/herring gull/Iceland/1359/2011',
 'A/black-headed gull/Republic of Georgia/2/2012',
 'A/laughing gull/NJ/276/1989',
 'A/gull/Delaware/AI09-453/2009',
 'A/lesser black-backed gull/Iceland/145/2010',
 'A/black-headed gull/Sweden/87533/2009',
 'A/black-headed gull/Sweden/1/2005',
 'A/California gull/California/1196P/2013',
 'A/herring gull/New Jersey/AI09-335/2009',
 'A/black-headed gull/Republic of Georgia/4/2012',
 'A/black-headed gull/Republic of Georgia/2/2011',
 'A/herring gull/Delaware Bay/2617/1987',
 'A/laughing gull/New Jersey/75/1985',
 'A/black-headed gull/Republic of Georgia/8/2012',
 'A/laughing gull/New Jersey/177/1990',
 'A/laughing gull/New Jersey/180/1990',
 'A/herring gull/Iceland/1342/2011',
 'A/yellow-legged gull/Republic of Georgia/1/2011',
 'A/ring-billed gull/Quebec/G139/2010',
 'A/laughing gull/New York/AI00-470/2000',
 'A/laughing gull/Delaware Bay/261/1991',
 'A/wild bird/Chile/1805/2008',
 'A/herring gull/Mongolia/454/2008',
 'A/black-headed gull/Republic of Georgia/6/2011',
 'A/laughing gull/Delaware Bay/2952/1988',
 'A/laughing gull/Delaware Bay/208/1994',
 'A/herring gull/Delaware Bay/106/1995',
 'A/brown-headed gull/Qinghai/19/2009',
 'A/laughing gull/Delaware/2971/1988',
 'A/laughing gull/Delaware Bay/2/2002',
 'A/herring gull/Netherlands/4/2006',
 'A/ring-billed gull/Quebec/G018/2010',
 'A/herring gull/Delaware Bay/433/1986',
 'A/California gull/Washington/20371-003/2007',
 'A/laughing gull/Delaware Bay/4/1990',
 'A/herring gull/DE/703/1988',
 'A/yellow-legged gull/Republic of Georgia/1/2013',
 'A/glaucous-winged gull/Southeastern Alaska/10JR01700R0/2010',
 'A/laughing gull/Delaware/42/06',
 'A/yellow-legged gull/Republic of Georgia/6/2012',
 'A/gull/Delaware/AI09-438/2009',
 'A/glaucous-winged gull/Southeastern Alaska/10JR01681R0/2010',
 'A/ring-billed gull/Quebec/G068/2010',
 'A/laughing gull/Delaware Bay/61/2002',
 'A/black-headed gull/Republic of Georgia/4/2011',
 'A/brown-headed gull/Qinghai/9/2009',
 'A/laughing gull/New Jersey/AI09-218/2009',
 'A/ring-billed gull/Quebec/G066/2010',
 'A/laughing gull/NJ/768/2005',
 'A/black-headed gull/Iceland/1298/2011',
 'A/herring gull/Iceland/1394/2011',
 'A/black-headed gull/Sweden/74342/2008',
 'A/herring gull/Delaware Bay/712/1988',
 'A/herring gull/DE/475/1986',
 'A/black-headed gull/Republic of Georgia/3/2011',
 'A/herring gull/Delaware Bay/2610/1987',
 'A/gull/New Jersey/AI09-964/2009',
 'A/black-headed gull/Sweden/74340/2008',
 'A/Mediterranean gull/Republic of Georgia/1/2011',
 'A/herring gull/DE/692/1988',
 'A/black-headed gull/Republic of Georgia/9/2011',
 'A/black-headed gull/Republic of Georgia/9/2012',
 'A/glaucous-winged gull/Southeastern Alaska/10JR01572R0/2010',
 'A/yellow-legged gull/Republic of Georgia/5/2012',
 'A/laughing gull/DE/5/2003',
 'A/gull/Delaware Bay/2673/1988',
 'A/glaucous gull/Iceland/1382/2011',
 'A/black-headed gull/Republic of Georgia/1/2011',
 'A/great black-backed gull/Iceland/1395/2011',
 'A/yellow-legged gull/Republic of Georgia/3/2012',
 'A/laughing gull/Delaware Bay/296/1998',
 'A/herring gull/Iceland/1436/2011',
 'A/laughing gull/Delaware Bay/94/1995',
 'A/herring gull/Iceland/1320/2011',
 'A/laughing gull/New Jersey/194/1990',
 'A/herring gull/New Jersey/AI09-1262/2009',
 'A/glaucous-winged gull/Southeastern Alaska/10JR01856R0/2010',
 'A/herring gull/Delaware Bay/77/1994',
 'A/glaucous gull/Iceland/1444/2011',
 'A/glaucous gull-herring gull hybrid/Iceland/1108/2011',
 'A/black-headed gull/Netherlands/1/2006',
 'A/laughing gull/Delaware Bay/2839/1987',
 'A/laughing gull/Delaware Bay/2718/1987',
 'A/black-headed gull/Republic of Georgia/7/2012',
 'A/black-headed gull/Republic of Georgia/7/2011',
 'A/great black-backed gull/Iceland/1110/2011',
 'A/herring gull/New Jersey/780/1986',
 'A/gull/Delaware Bay/18/2000',
 'A/laughing gull/Delaware Bay/79/2002',
 'A/ring-billed gull/Quebec/02622-1/2009',
 'A/Iceland gull/Iceland/1124/2011',
 'A/black-headed gull/Netherlands/1/2005',
 'A/ring-billed gull/Quebec/G192/2010',
 'A/gull/Moscow/3100/2006',
 'A/herring gull/New Jersey/413/1989',
 'A/laughing gull/Delaware Bay/245/1996',
 'A/gull/Southeastern Alaska/10JR01527R0/2010',
 'A/herring gull/New York/AI00-532/2000',
 'A/black-headed gull/Republic of Georgia/8/2011',
 'A/herring gull/NJ/402/1989',
 'A/great black-headed gull/Qinghai/8/2009',
 'A/laughing gull/Delaware Bay/10/1990',
 'A/herring gull/NJ/782/1986',
 'A/black headed gull/Mongolia/1766/2006',
 'A/herring gull/New Jersey/159/1990',
 'A/herring gull/DE/698/1988',
 'A/laughing gull/Delaware Bay/34/1994',
 'A/great black-backed gull/Iceland/1113/2011',
 'A/laughing gull/New Jersey/AI09-271/2009',
 'A/laughing gull/Delaware Bay/2623/1987',
 'A/gull/Delaware/428/2009',
 'A/yellow-legged gull/Republic of Georgia/2/2013',
 'A/glaucous-winged gull/Southeastern Alaska/9JR0822R0/2009',
 'A/herring gull/Delaware/712/1988',
 'A/black-headed gull/Iceland/713/2010',
 'A/glaucous-winged gull/Southeastern Alaska/10JR01811R0/2010',
 'A/yellow-legged gull/Republic of Georgia/2/2012',
 'A/black headed gull/Mongolia/1756/2006',
 'A/great black-backed gull/Iceland/1393/2011',
 'A/laughing gull/New Jersey/276/1989']

In [64]:
'Gull' in 'Herring Gull'


Out[64]:
True

In [65]:
set([d['host_species'] for n, d in G.nodes(data=True)])


Out[65]:
{'American Black Duck',
 'American Green-Winged Teal',
 'American Wigeon',
 'Aquatic Bird',
 'Avian',
 'Babbler',
 'Baikal Teal',
 'Bar-Headed Goose',
 'Barn Swallow',
 'Barnacle Goose',
 "Bewick's Swan",
 'Bird',
 'Black Duck',
 'Black Scoter',
 'Black-Billed Magpie',
 'Black-Headed Gull',
 'Blue-Winged Teal',
 'Brown-Headed Gull',
 'Bufflehead',
 'Camel',
 'Canada Goose',
 'Canvasback',
 'Chicken',
 'Chinese Francolin',
 'Chukar',
 'Cinnamon Teal',
 'Common Coot',
 'Common Eider',
 'Common Goldeneye',
 'Common Murre',
 'Common Scoter',
 'Condor',
 'Coot',
 'Crane',
 'Crow',
 'Dog',
 'Domestic Cat',
 'Dove',
 'Duck',
 'Dunlin',
 'Eagle',
 'Eagle-Owl',
 'Egret',
 'Emu',
 'Environment',
 'Falcon',
 'Ferret',
 'Finch',
 'Flycatcher',
 'Gadwall',
 'Garganey',
 'Glaucous Gull',
 'Goose',
 'Great Black-Headed Gull',
 'Great Crested Grebe',
 'Greater Scaup',
 'Greater White-Fronted Goose',
 'Grebe',
 'Green-Winged Teal',
 'Grey Heron',
 'Guinea Fowl',
 'Guineafowl',
 'Gull',
 'Heron',
 'Herring Gull',
 'Hooded Merganser',
 'Horse',
 'Human',
 'Iceland Gull',
 'Japanese Quail',
 'Kelp Gull',
 'Knot',
 'Large Cat',
 'Laughing Gull',
 'Least Sandpiper',
 'Lesser Scaup',
 'Little Egret',
 'Little Grebe',
 'Long-Tailed Duck',
 'Magpie',
 'Magpie Robin',
 'Mallard',
 'Mallard-Black Duck Hybrid',
 'Mink',
 'Murre',
 'Muscovy Duck',
 'Mute Swan',
 'Myna',
 'Northern Shoveler',
 'Openbill Stork',
 'Ostrich',
 'Panda',
 'Parakeet',
 'Parrot',
 'Partridge',
 'Pelican',
 'Peregrine Falcon',
 'Pheasant',
 'Pigeon',
 'Pink-Footed Goose',
 'Pintail',
 'Poultry',
 'Quail',
 'Red Knot',
 'Red-Crested Pochard',
 'Red-Necked Stint',
 'Redhead',
 'Redhead Duck',
 'Rhea',
 'Ring-Billed Gull',
 'Ring-Necked Duck',
 'Rook',
 'Rosy-Billed Pochard',
 'Ruddy Turnstone',
 'Saker Falcon',
 'Sanderling',
 'Scaup',
 'Sea Mammal',
 'Semipalmated Sandpiper',
 'Sharp-Tailed Sandpiper',
 'Shorebird',
 'Shoveler',
 'Shrike',
 'Silky Chicken',
 'Sloth Bear',
 'Snow Goose',
 'Softbill',
 'Sooty Tern',
 'Sparrow',
 'Spot-Billed Duck',
 'Starling',
 'Stork',
 'Swan',
 'Swine',
 'Teal',
 'Tree Sparrow',
 'Tufted Duck',
 'Tundra Swan',
 'Turkey',
 'Unknown',
 'Waterfowl',
 'Weasel',
 'Whiskered Tern',
 'White-Fronted Goose',
 'White-Rumped Sandpiper',
 'White-Winged Scoter',
 'Whooper Swan',
 'Widgeon',
 'Wigeon',
 'Wild Chicken',
 'Wild Duck',
 'Wood Duck'}

In [66]:
def counts_reassortant_domain_graph(G, node_attr):
    """
    Computes the weighted counts of reassortant edges when going between different node attributes.
    
    Returns a "domain graph" with counts of weighted reasosrtant edges and sum weighted edges.
    """
    hg_graph = nx.DiGraph()

    for n, node_d in G.nodes(data=True):

        in_edges = G.in_edges(n, data=True)
        total_edges = len(in_edges)
        is_reassortant = node_d['reassortant']

        sk_hg = G.node[n][node_attr]
        if sk_hg not in hg_graph.nodes():
            hg_graph.add_node(sk_hg)

        for sc, _, edge_d in in_edges:
            sc_hg = G.node[sc][node_attr]
            if sc_hg not in hg_graph.nodes():
                hg_graph.add_node(sc_hg)

            if (sc_hg, sk_hg) not in hg_graph.edges():
                hg_graph.add_edge(sc_hg, sk_hg, total=edge_d['weight'], reassortant=0)
            if (sc_hg, sk_hg) in hg_graph.edges():
                hg_graph.edge[sc_hg][sk_hg]['total'] += edge_d['weight']

            if is_reassortant:
                hg_graph.edge[sc_hg][sk_hg]['reassortant'] += edge_d['weight']

    for sc, sk, d in hg_graph.edges(data=True):
        hg_graph.edge[sc][sk]['p_reassortant'] = d['reassortant'] / d['total']
        
    return hg_graph

In [67]:
# Compute the proportion reassortant across different host class pairs.
hg = counts_reassortant_domain_graph(G, 'host_label')

hg.edges(data=True)


Out[67]:
[('Gull',
  'Gull',
  {'p_reassortant': 0.2222222222222222, 'reassortant': 12.0, 'total': 54.0}),
 ('Gull',
  'Mallard',
  {'p_reassortant': 0.5714285714285714, 'reassortant': 2.0, 'total': 3.5}),
 ('Gull',
  'Unknown',
  {'p_reassortant': 0.4235294117647059, 'reassortant': 18.0, 'total': 42.5}),
 ('Mallard',
  'Gull',
  {'p_reassortant': 0.8695652173913043, 'reassortant': 10.0, 'total': 11.5}),
 ('Mallard',
  'Mallard',
  {'p_reassortant': 0.23864756477943258,
   'reassortant': 294.5666666666666,
   'total': 1234.3166666666666}),
 ('Mallard',
  'Unknown',
  {'p_reassortant': 0.43752248710739206,
   'reassortant': 390.8690476190477,
   'total': 893.3690476190473}),
 ('Unknown',
  'Gull',
  {'p_reassortant': 0.45544554455445546, 'reassortant': 23.0, 'total': 50.5}),
 ('Unknown',
  'Mallard',
  {'p_reassortant': 0.38963245348692654,
   'reassortant': 313.43333333333317,
   'total': 804.433333333333}),
 ('Unknown',
  'Unknown',
  {'p_reassortant': 0.07361793995931051,
   'reassortant': 1128.1309523809523,
   'total': 15324.130952380403})]

In [68]:
# A helper function for computing the null distribution.
def null_proportion_domain_graph_reassortant(G, node_attr, equally=False):
    
    G_shuffled = cf.shuffle_node_attribute_label(G, node_attr, equally)
    hg_graph_shuf = counts_reassortant_domain_graph(G_shuffled, node_attr)
    
    return hg_graph_shuf

In [72]:
# Compute the null distribution.
### CAUTION! YOU WILL HAVE TO WAIT 3 MINUTES FOR THIS TO FINISH!
start = time()
results = Parallel(n_jobs=-1)(delayed(null_proportion_domain_graph_reassortant)(G, 'host_label', equally=True) for i in range(100))
len(results)
end = time()
print(end - start)


366.61181807518005

As shown in the analysis, there are insufficient numbers to compare reassortment between Mallards and Gulls (of any species). Within species, it is possible to do so for Mallards but not Gulls. At p_reassortant = 0.23, I would expect this (intuitively) to be greater than the null.


In [86]:
# Summarize the proportion reassortant distribution under null.
def distr_null_p_reassortant(list_of_hg_graphs):
    hg_graph = nx.DiGraph()
    for g in tqdm(list_of_hg_graphs):
        hg_graph.add_nodes_from(g.nodes())
        for sc, sk, d in g.edges(data=True):
            if (sc, sk) not in hg_graph.edges():
                hg_graph.add_edge(sc, sk, p_reassortant=[d['p_reassortant']])
            else:
                hg_graph.edge[sc][sk]['p_reassortant'].append(d['p_reassortant'])
    return hg_graph

In [89]:
summaryG = distr_null_p_reassortant(results)




In [91]:
# Remove "unknowns" from consideration
for n, d in summaryG.nodes(data=True):
    if 'Unknown' in n:
        summaryG.remove_node(n)
summaryG.edges(data=True)


Out[91]:
[('Gull',
  'Gull',
  {'p_reassortant': [0.11186105273958351,
    0.11743158554559348,
    0.11460536974882943,
    0.1376971925510451,
    0.13139767167300093,
    0.11151050856179888,
    0.1166016162651999,
    0.10238016976912594,
    0.1172233233031455,
    0.11860438572451573,
    0.103870413287675,
    0.12854558634994304,
    0.11134131568376841,
    0.11956246205210017,
    0.10781202399673226,
    0.12886973200718627,
    0.11978802152370584,
    0.1171926551393703,
    0.11306710996422856,
    0.1200804495052311,
    0.12272865986924143,
    0.13287737523436713,
    0.12724237644177314,
    0.11671203108180446,
    0.1166801144981119,
    0.10390234209478312,
    0.11036922618196054,
    0.1252267983129836,
    0.10805040804826795,
    0.12452595308290461,
    0.1151475365438372,
    0.10600259597879937,
    0.12647666882994846,
    0.11994100977419073,
    0.10682902516765425,
    0.1210106919493094,
    0.11325430679155178,
    0.11894379480211498,
    0.11813527782860041,
    0.11826949253379532,
    0.144561489632876,
    0.12229543320164736,
    0.1178692112348493,
    0.13195256535043767,
    0.1076945240316991,
    0.12950516400701642,
    0.11974552963074953,
    0.12863005737864977,
    0.10514855180889068,
    0.10024981762519118,
    0.11711993513609893,
    0.11361359144463964,
    0.10276545054453642,
    0.12726875392460654,
    0.12260964362675762,
    0.1387626210809711,
    0.11212526092573742,
    0.13321576743472477,
    0.11751692067204032,
    0.09820954108149267,
    0.10473479300198424,
    0.123820928691585,
    0.12148626196478844,
    0.1133149907902827,
    0.10733557059120064,
    0.11106622157852461,
    0.099990829768444,
    0.11612341326995675,
    0.10966870736968042,
    0.11068813264723432,
    0.11813211606536263,
    0.11113167099912005,
    0.11732377910373636,
    0.12843903379624053,
    0.13077148932532706,
    0.12418791823692286,
    0.11662247255608253,
    0.12365137523604805,
    0.09948142221326221,
    0.1161439645324352,
    0.11455394911178392,
    0.09926582609942788,
    0.12662921763709295,
    0.11855796162378174,
    0.11547820108782558,
    0.13671221311674106,
    0.1251131156057328,
    0.11417704875161902,
    0.12607050027329264,
    0.11682516879488286,
    0.1245021248766852,
    0.11226509939106234,
    0.10802204105855183,
    0.11863942274057389,
    0.11512462706764558,
    0.11662860598590968,
    0.11486264379588747,
    0.11986549840390352,
    0.12145643603502629,
    0.11991698845622674]}),
 ('Gull',
  'Mallard',
  {'p_reassortant': [0.11774649620846803,
    0.12726927788010875,
    0.11147985829484247,
    0.13576656435491224,
    0.12381468757917845,
    0.11109614997237945,
    0.1118161866357862,
    0.11423121315737489,
    0.10695797250202906,
    0.125784655415098,
    0.1062147417933725,
    0.12875621530913084,
    0.1207457871815367,
    0.12127390146372748,
    0.11911951419213418,
    0.12497088372051006,
    0.11273278958806947,
    0.1199666254231097,
    0.10509287219399238,
    0.12186718086488726,
    0.12540697247478522,
    0.11435082124091224,
    0.1212634397006018,
    0.11407909451635605,
    0.11358040566506543,
    0.11429963837592223,
    0.10141042707761576,
    0.12080783870586494,
    0.10851249812521346,
    0.13711355228931105,
    0.11779815759378004,
    0.11176964470520848,
    0.1253933205340037,
    0.12042188995903871,
    0.11618349500680648,
    0.13353814278627554,
    0.1128116383704931,
    0.11578684767183789,
    0.11729258426050818,
    0.11861167649774837,
    0.12544482769946455,
    0.11437915605064267,
    0.12498549082945798,
    0.123036325463703,
    0.12051342531504251,
    0.12160226788731823,
    0.113425047278951,
    0.13221317645228486,
    0.12379216578129623,
    0.10889436079758211,
    0.10828105680358932,
    0.11040340102908117,
    0.11519924650509539,
    0.1329722916630231,
    0.12806793182645512,
    0.12613371671458712,
    0.10870817476053783,
    0.12937346201707547,
    0.10766196640545689,
    0.1073625876217264,
    0.1031282925864682,
    0.10169493569603094,
    0.12316266188120313,
    0.12233705848723184,
    0.11237601152184964,
    0.11833632930807554,
    0.11182295352675023,
    0.12655679558304753,
    0.11204983793747644,
    0.10901257408540037,
    0.12051726761779114,
    0.1180007431262984,
    0.10912995593830771,
    0.13154187691098315,
    0.10999600459722526,
    0.1275237336685877,
    0.11882217382186919,
    0.121667670622862,
    0.11977503945004818,
    0.1292605706179449,
    0.1085616231047107,
    0.10121916695129371,
    0.12950524772602862,
    0.1285217560409359,
    0.12303440568710906,
    0.13663366022455928,
    0.12275823699819752,
    0.1195030539027944,
    0.11228055458454474,
    0.11322726361871213,
    0.1197220621433377,
    0.11360463140838667,
    0.11573391371564686,
    0.1236319861843031,
    0.11915665796616727,
    0.11171953485923344,
    0.12409469026766198,
    0.11557893153998637,
    0.11211202318640108,
    0.11371745842485562]}),
 ('Mallard',
  'Gull',
  {'p_reassortant': [0.12487593620859433,
    0.1189503874191585,
    0.11785164130186142,
    0.10605319425971622,
    0.11576648215479456,
    0.1097099841359158,
    0.11828204918068616,
    0.1228417552247024,
    0.13467857021910956,
    0.12136964825904091,
    0.1282674313067018,
    0.10723398310429857,
    0.11266071237123003,
    0.12630929111071293,
    0.12850725094020474,
    0.10852179413572507,
    0.11187740897354313,
    0.12143465940731775,
    0.11352745619915453,
    0.12517695303660564,
    0.11747154096029405,
    0.1301311125655423,
    0.11377489352794261,
    0.11180766285291716,
    0.12611682348797468,
    0.11205058239443652,
    0.11833003792267871,
    0.12555529672246238,
    0.13496312956875076,
    0.11639606897995172,
    0.12166161195937784,
    0.1405454477851709,
    0.12946334040346674,
    0.12387694346139622,
    0.12222181494159977,
    0.12622600890036714,
    0.11826462562388709,
    0.11253928346522715,
    0.12514906080029484,
    0.12415176222600312,
    0.12074494096479395,
    0.12326990629747851,
    0.10976361824794416,
    0.10638852645435178,
    0.12028611427873633,
    0.13179724284197158,
    0.14427669014616884,
    0.10778544397029925,
    0.11400864138550248,
    0.10791815465365887,
    0.1223148165391059,
    0.11099687707593048,
    0.1305344021425118,
    0.11242094798511727,
    0.12337166466200948,
    0.1215251805338616,
    0.11715123144502061,
    0.12707755675753496,
    0.11467105590251632,
    0.1306118352400234,
    0.12309531929633584,
    0.12990932919885784,
    0.12377140687683895,
    0.11577258216255601,
    0.1369777597137763,
    0.12829114126594277,
    0.11495818477752029,
    0.12584773720418976,
    0.11003969619014667,
    0.11389407430559148,
    0.10816836324896793,
    0.11655586087154321,
    0.10977433787451318,
    0.11143563571955223,
    0.12227718720867264,
    0.11388540733235167,
    0.1150154370242685,
    0.11893027154419047,
    0.12015924812271193,
    0.1209378190464235,
    0.12075197738548457,
    0.11595393236712725,
    0.11390353680472311,
    0.11090812189436747,
    0.13181551278749853,
    0.11934735839682063,
    0.12313843061469719,
    0.11564925873245992,
    0.1130464410192899,
    0.11491527768523643,
    0.11653717171274353,
    0.1052857076360361,
    0.13304974918983123,
    0.11701240583539814,
    0.1214956822010354,
    0.11574336390049374,
    0.11778498561104334,
    0.11192514582220413,
    0.116988286386399,
    0.13113804591653494]}),
 ('Mallard',
  'Mallard',
  {'p_reassortant': [0.11847630447115796,
    0.11782473196991702,
    0.12266009809710697,
    0.10419526848404675,
    0.11512725023839132,
    0.1158038609328965,
    0.10853083651627922,
    0.1225362796520621,
    0.1268965641360678,
    0.13052160999518075,
    0.12803233662412092,
    0.12160167767468287,
    0.12727953996082628,
    0.11529705799482869,
    0.12127167638300196,
    0.11619125850212847,
    0.11620615129791143,
    0.12078636085705882,
    0.13176308927460473,
    0.13002599194154668,
    0.12237995080827815,
    0.12836547885964067,
    0.11660269186108638,
    0.11248015081709362,
    0.11877656361205446,
    0.12938395828353322,
    0.10753563694308557,
    0.11952996156385538,
    0.1330861115945084,
    0.1189248392669619,
    0.1286994103150055,
    0.13129444977669089,
    0.11759153733582664,
    0.12353306818682654,
    0.12312626542259651,
    0.1235859831167058,
    0.11736065233126285,
    0.10470783033808652,
    0.11146637892572946,
    0.12514021722030452,
    0.10878682450192734,
    0.12380251146027356,
    0.11551559465156057,
    0.11310548026446397,
    0.1221332844919585,
    0.12454376748232765,
    0.12789393151272924,
    0.1129933840789423,
    0.11967642817782437,
    0.1207547146174721,
    0.11035683283475699,
    0.11029200698235028,
    0.13142256933501675,
    0.11755597370553657,
    0.1307817736864727,
    0.11378313268409843,
    0.11904228099199003,
    0.1217322979042016,
    0.11648203095140842,
    0.1325208170217611,
    0.1386725794737595,
    0.11974359635974205,
    0.10280257506291124,
    0.12565656911771453,
    0.12662917213766148,
    0.12306812275441087,
    0.1275923739379203,
    0.12615820181034615,
    0.11824603848045535,
    0.11527962237576432,
    0.12110423513669812,
    0.11740220031365524,
    0.119819696767975,
    0.12441667657078162,
    0.12456775916565778,
    0.10851595204503107,
    0.1159996064796846,
    0.1031748207824357,
    0.14097751320019497,
    0.13007712112384406,
    0.12267115700250382,
    0.1284924545855737,
    0.11463513970076561,
    0.12893185065721247,
    0.1281958636646088,
    0.11807074815017132,
    0.1119255480533738,
    0.12603835142214936,
    0.10348024456531939,
    0.1208189005266598,
    0.0999467030608813,
    0.11972816256500021,
    0.11823950901910257,
    0.1134977284731093,
    0.12339116768947281,
    0.10595387217547507,
    0.1193838995187683,
    0.12295699328339771,
    0.12255822591445577,
    0.12817584414273153]})]

In [95]:
# Grab out the "null" model statistics.
means = []  # mean of the distribution under null.
stds  = []  # standard deviation of distribution under null.
names = []  # names
# grab out the 1st, 5th, 95th and 99th percentile of null distribution
percs = dict()
for p in [0.5, 5, 95, 99.5]:
    percs[p] = []

name_map = {'Gull': 'Gull',
            'Mallard': 'Mallard'}

# Reverse name_map for convenience
key_map = {v:k for k, v in name_map.items()}

for sc, sk, d in sorted(summaryG.edges(data=True), key=lambda x:(x[0], x[1])):
    mean = np.mean(d['p_reassortant'])
    std = np.std(d['p_reassortant'])

    names.append('{0}:{1}'.format(name_map[sc], name_map[sk]))
    means.append(mean)
    stds.append(std)
    for p in [0.5, 5, 95, 99.5]:
        percs[p].append(np.percentile(d['p_reassortant'], p))

In [96]:
# Compile the "data" statistics.
data = []
names_data = []
log10weights = []
log10reassort = []
log10clonal = []
for sc, sk, d in sorted(hg.edges(data=True), key=lambda x:(x[0], x[1])):
    if sc == 'Unknown' or sk == 'Unknown':
        pass
    else:
        names_data.append('{0}:{1}'.format(name_map[sc], name_map[sk]))
        data.append(d['p_reassortant'])
        log10weights.append(np.log10(d['total']))
        log10reassort.append(np.log10(d['reassortant']))
        log10clonal.append(np.log10(d['total'] - d['reassortant']))
data


Out[96]:
[0.2222222222222222,
 0.5714285714285714,
 0.8695652173913043,
 0.23864756477943258]

In [115]:
# Plot data vs. null model.
fig = plt.figure(figsize=(4,3))

ind = np.arange(len(means))
width = 0.35

ax = fig.add_subplot(1,1,1)
ax.bar(ind, means, width=width, 
       color='blue', 
       label='Null', 
       #yerr=np.array(stds)*3, 
       yerr=[np.array(means) - percs[0.5],
             percs[99.5] - np.array(means)],
       alpha=0.3)
ax.bar(ind+width, data, width=width, color='blue', label='Data')
ax.set_xticks(ind+width)
ax.set_xticklabels(names, rotation=45, ha='right')
ax.set_ylabel('Proportion Reassortant')
ax.set_xlabel('Bird Transition')

for i, label in enumerate(ax.get_xaxis().get_ticklabels()):
    if log10weights[i] > 3 or log10reassort[i] > 1:
        label.set_weight('bold')

ax2 = ax.twinx()
ax2.scatter(ind+width, log10weights, color='orange', label='Total', alpha=0.3)
ax2.scatter(ind+width, log10reassort, color='green', label='Reassortant', alpha=0.3)
ax2.set_ylabel('log10(Num. Events)')
ax.legend(loc='upper left')
ax2.legend(loc='upper center')

ax2.axhline(y=1, color='green', alpha=0.3, linestyle='--')
ax2.axhline(y=3, color='orange', alpha=0.3, linestyle='--')

# ax.annotate('B', xy=(0,1), xycoords='figure fraction', va='top', ha='left')
# plt.legend()
plt.subplots_adjust(left=0.10, right=0.92, bottom=0.23)
plt.savefig('figures/Proportion Reassortant Gull Mallard.pdf', bbox_inches='tight')



In [ ]: