In [1]:
%matplotlib inline

import os
import random
from collections import OrderedDict
from statistics import stdev, mean
from math import log10
import math

import numpy as np
import pandas as pd
import networkx as nx
from IPython.display import display
import matplotlib.pyplot as plt
import matplotlib.image as mpimg

from atntools import foodwebs
from atntools.foodwebs import draw_food_web, get_source_nodes, get_basal_species, get_plant_eaters, predator_complete_subweb

#species_table = foodwebs.read_species_table()

#def show_species_table(node_ids):
#    global species_table
#    df = species_table
#    display(df.loc[node_ids].sort_values('trophic_level', 0, False))

serengeti = foodwebs.read_serengeti()
print("{} nodes, {} edges".format(serengeti.number_of_nodes(), serengeti.number_of_edges()))


#species_table['trophic_level'].describe()

# Draw a food web
subweb = serengeti.subgraph([31, 44, 45, 47, 49, 50, 66, 92, 7, 5])
draw_food_web(subweb, show_names=False, show_legend=True)


#plt.savefig('food-web-10.png', dpi=600, transparent=True)


87 nodes, 538 edges

Random subgraph

As a starter exercise, generate a random subgraph of G by choosing N nodes at random, and including all of the existing edges between them. With the Serengeti food web, this usually results in few edges, and so is not useful in itself.


In [2]:
def random_subgraph(graph, N):
    nodes = random.sample(graph.nodes(), N)
    return graph.subgraph(nodes)

subweb = random_subgraph(serengeti, 5)
draw_food_web(subweb, show_legend=True)


Random connected subgraph

Generate a random connected subgraph of G by choosing a node at random, and iteratively adding neighbors to the subgraph until N nodes have been added. This produces some potentially viable food webs, but doesn't guarantee adequate representation of all trophic levels (crucially, basal species aren't always included).


In [3]:
def random_connected_subgraph(G, N, seed_nodes=None, retry=1):
    """seed_nodes: list of nodes from which to randomly choose a starting node"""
    
    if seed_nodes is None:
        seed_nodes = G.nodes()
    nodes = set([random.choice(seed_nodes)])
    
    for i in range(N-1):
        
        # Find the neighbor nodes of the subgraph nodes
        neighbors = set()
        for node in nodes:
            neighbors = neighbors | set(G.predecessors(node) + G.successors(node))
        neighbors = neighbors - nodes # Exclude nodes already in the subgraph
        
        if len(neighbors) == 0:
            print("random_connected_subgraph: no candidate neighbors found. ", end='')
            if (retry > 0):
                print("Retrying.")
                return random_connected_subgraph(G, N, seed_nodes, retry - 1)
            else:
                print("Giving up.")
                return None

        # Add a random neighbor to the subgraph
        nodes.add(random.choice(list(neighbors)))
    
    return G.subgraph(list(nodes))

subweb = random_connected_subgraph(serengeti, 5)

draw_food_web(subweb, show_legend=True)
#show_species_table([int(node_id) for node_id in subweb.nodes()])


Random successor subgraph

Generate a random connected subgraph of the given graph. On each iteration, choose only successors to nodes currently in the subgraph.


In [11]:
def random_successor_subgraph(G, N, seed_nodes=None, seed_size=1, retry=1):
    """seed_nodes: list of nodes from which to randomly choose seed_size starting nodes"""
    
    if seed_nodes is None:
        seed_nodes = G.nodes()
    nodes = set(random.sample(seed_nodes, seed_size))
    
    for i in range(N - seed_size):
        
        # Find the neighbor nodes of the subgraph nodes
        neighbors = set()
        for node in nodes:
            neighbors = neighbors | set(G.successors(node))
        neighbors = neighbors - nodes # Exclude nodes already in the subgraph
        
        if len(neighbors) == 0:
            print("random_successor_subgraph: no candidate neighbors found. ", end='')
            if (retry > 0):
                print("Retrying.")
                return random_successor_subgraph(G, N, seed_nodes, seed_size=seed_size,
                                                 retry=retry-1)
            else:
                print("Giving up.")
                return None

        # Add a random neighbor to the subgraph
        nodes.add(random.choice(list(neighbors)))
    
    subgraph = G.subgraph(list(nodes))
    num_components = nx.number_connected_components(subgraph.to_undirected())
    if num_components > 1:
        print("random_successor_subgraph: {} connected components. ".format(num_components),
              end='')
        if (retry > 0):
            print("Retrying.")
            return random_successor_subgraph(G, N, seed_nodes, seed_size=seed_size,
                                             retry=retry-1)
        else:
            print("Giving up.")
            return None
    
    return subgraph

# For the seed nodes, use the basal species excluding Decaying Material (89)
seed_nodes = get_basal_species(serengeti)
if 89 in seed_nodes:
    seed_nodes.remove(89)
    
subweb = random_successor_subgraph(serengeti, 10, seed_nodes=seed_nodes, seed_size=3, retry=3)

draw_food_web(subweb)
print(str(subweb.nodes()))


random_successor_subgraph: 3 connected components. Retrying.
random_successor_subgraph: 2 connected components. Retrying.
random_successor_subgraph: 2 connected components. Retrying.
[32, 1, 66, 3, 4, 37, 27, 36, 83, 28]

Finding all simple paths (food chains) from a basal species


In [8]:
def all_simple_paths_from(graph, source, visited=set()):
    paths = []
    for successor in graph.successors(source):
        if successor not in visited and successor != source:
            paths.append([source, successor])
            for successor_path in all_simple_paths_from(graph, successor, visited | set([source])):
                paths.append([source] + successor_path)
    return paths

# Visual testing
graph = nx.fast_gnp_random_graph(5, 0.25, directed=True)
nx.draw_networkx(graph)
print(all_simple_paths_from(graph, 0))


[[0, 1], [0, 2], [0, 2, 4], [0, 4]]

Simple cycles


In [9]:
graph = nx.fast_gnp_random_graph(5, 0.25, directed=True)

# Add a self-loop to node 0. We want to exclude self-loops from food chain loops.
graph.add_edge(0, 0)
# It ends up being counted as a simple cycle. Can simply exclude cycles with length 1.

nx.draw_networkx(graph)

print(list(nx.simple_cycles(graph)))


[[0, 4, 2], [0, 2], [0]]

Adding nodes to subgraphs along with edges


In [7]:
g = nx.DiGraph()
g.add_node(3, {'something': 'hello', 'else': 'world'})
g.add_edge(1,2)
g.add_edge(2,3)
g.add_edge(3,4)
g.add_edge(4,1)
g.add_edge(3,1)
plt.figure()
nx.draw_networkx(g)

def add_node_with_edges(graph, subgraph, node):
    """Add a node from a directed graph to a subgraph of it, along with all edges from the original graph
    that are adjacent to that node and another node existing in the subgraph.
    """
    subgraph.add_node(node)
    for edge in graph.out_edges_iter(node):
        if subgraph.has_node(edge[1]):
            subgraph.add_edge(*edge)
    for edge in graph.in_edges_iter(node):
        if subgraph.has_node(edge[0]):
            subgraph.add_edge(*edge)

plt.figure()
h = g.subgraph([1, 3])
nx.draw_networkx(h)
plt.figure()
add_node_with_edges(g, h, 2)
#add_node_with_edges(g, h, 3)
#add_node_with_edges(g, h, 4)
nx.draw_networkx(h)

# Demonstrate how to copy the attributes of a node from one graph to another
h.node[3] = g.node[3]
print(h.node[3])


{'else': 'world', 'something': 'hello'}

Completing food chains of a graph

The most important constraint of a food web is that there are no incomplete food chains. This function takes a graph and returns a new graph in which all food chains are completed by adding nodes, if necessary.


In [8]:
def complete_food_chains(full_graph, subgraph):
    
    # Make a copy of the subgraph to modify and return
    subgraph2 = subgraph.copy()
    
    # Get the source nodes in the subgraph which are consumers (non-plants).
    # The goal is to have none of these, at which point all food chains will be complete.
    consumer_source_nodes = []
    source_nodes = get_source_nodes(subgraph2)
    for node in source_nodes:
        if float(subgraph2.node[node]['trophic_level']) > 1:
            consumer_source_nodes.append(node)
    
    # Add predecessors to consumer source nodes until there are no consumer source nodes left.
    while len(consumer_source_nodes) > 0:
        
        # Choose a random consumer source node
        consumer_node = random.choice(consumer_source_nodes)
        consumer_source_nodes.remove(consumer_node)
        
        # Add a random predecessor and associated edges
        new_node = random.choice(full_graph.predecessors(consumer_node))
        add_node_with_edges(full_graph, subgraph2, new_node)
        subgraph2.node[new_node] = full_graph.node[new_node] # Copy node attributes
        
        # If the new node is also a consumer source node, add it to the list
        if float(subgraph2.node[new_node]['trophic_level']) > 1:
            consumer_source_nodes.append(new_node)
                
    return subgraph2

subweb = random_connected_subgraph(serengeti, 4)
complete_subweb = complete_food_chains(serengeti, subweb)

draw_food_web(subweb)
plt.figure()
draw_food_web(complete_subweb)


<matplotlib.figure.Figure at 0x10ff56f28>

Predator-complete sub-webs


In [10]:
# For the seed nodes, use the basal species excluding Decaying Material (89)
seed_nodes = get_basal_species(serengeti)
if 1 in seed_nodes:
    seed_nodes.remove(1)

subweb = predator_complete_subweb(serengeti, 10, seed_nodes=seed_nodes, seed_size=2, retry=3)

draw_food_web(subweb, False, True)
node_ids = sorted(subweb.nodes())
print("Node IDs: {}".format(node_ids))
print("Food web ID: {}".format('-'.join(map(str, node_ids))))


Node IDs: [3, 7, 16, 28, 49, 50, 65, 80, 86, 87]
Food web ID: 3-7-16-28-49-50-65-80-86-87

In [15]:
# Generate a bunch of random subwebs

node_id_lists = []
for i in range(25):
    subweb = predator_complete_subweb(serengeti, 15, seed_nodes=seed_nodes, seed_size=3, retry=5)
    node_ids = sorted(subweb.nodes())
    node_id_lists.append(node_ids)
    
print('[\n' + ',\n'.join(map(str, sorted(node_id_lists))) + '\n]')


random_successor_subgraph: 3 connected components. Retrying.
random_successor_subgraph: 2 connected components. Retrying.
random_successor_subgraph: 2 connected components. Retrying.
random_successor_subgraph: 2 connected components. Retrying.
random_successor_subgraph: 3 connected components. Retrying.
random_successor_subgraph: 2 connected components. Retrying.
random_successor_subgraph: 2 connected components. Retrying.
random_successor_subgraph: 2 connected components. Retrying.
random_successor_subgraph: 2 connected components. Retrying.
random_successor_subgraph: 2 connected components. Giving up.
---------------------------------------------------------------------------
AttributeError                            Traceback (most recent call last)
<ipython-input-15-9ee8b86b1fe1> in <module>()
      4 for i in range(25):
      5     subweb = predator_complete_subweb(serengeti, 15, seed_nodes=seed_nodes, seed_size=3, retry=5)
----> 6     species_ids = sorted(subweb.nodes())
      7     species_id_lists.append(species_ids)
      8 

AttributeError: 'NoneType' object has no attribute 'nodes'

Swapping in trophically equivalent species


In [10]:
import copy

def swap_species(full, sub):
    """
    Attempt to swap a species in a food sub-web with a trophically equivalent species.
    
    "Trophically equivalent" is defined here as having exactly the same predators and prey
    within the sub-web.
    
    Parameters
    ----------
    full : DiGraph
        The full food web from which to choose a trophically equivalent species.
    sub : DiGraph
        The sub-web.
        
    Returns
    -------
    DiGraph
        The new sub-web with species swapped, or None if no swap was possible.
    """
    
    orig_node = None
    new_node = None
    
    # Iterate through subweb nodes in random order, trying to find one that can be replaced
    for node in random.sample(sub.nodes(), len(sub)):
        orig_node = node
        pred = set(sub.successors(node))
        prey = set(sub.predecessors(node))
        orig_is_cannibal = orig_node in pred
        
        # Find a species in the full graph with the same predators and prey
        
        # Start with candidate species sharing any predators or prey
        new_node_candidates = set()
        for prey_node in prey:
            new_node_candidates |= set(full.successors(prey_node))
        for pred_node in pred:
            new_node_candidates |= set(full.predecessors(pred_node))
        new_node_candidates -= {node}
            
        for candidate in new_node_candidates:
            
            # Original and new node must either be both cannibals or both non-cannibals
            candidate_is_cannibal = full.has_edge(candidate, candidate)
            if candidate_is_cannibal != orig_is_cannibal:
                continue
            
            # If the candidate shares the same predators and prey in the subweb,
            # we've found the species to swap in
            pred_of_candidate = set(full.successors(candidate)) & set(sub.nodes())
            prey_of_candidate = set(full.predecessors(candidate)) & set(sub.nodes())
            if (pred_of_candidate == pred and prey_of_candidate == prey):
                new_node = candidate
                break
                
        if new_node is not None:
            break
    
    if new_node is None:
        print("Could not find a trophically equivalent species.")
        return None
    
    print("Substituted {} for {}".format(new_node, orig_node))
    
    new_sub = full.subgraph(set(sub.nodes()) - {orig_node} | {new_node})
    return new_sub

seed_nodes = get_basal_species(serengeti)
if 1 in seed_nodes:
    seed_nodes.remove(1)
    
subweb = predator_complete_subweb(serengeti, 5, seed_nodes=seed_nodes, seed_size=1, retry=3)

draw_food_web(subweb)
print("Original: " + str(sorted(subweb.nodes())))

# Do a few swaps
desired_swaps = 5
attempts_allowed = 10
successful_swaps = 0
subweb2 = subweb
for i in range(attempts_allowed):
    new_subweb = swap_species(serengeti, subweb2)
    if new_subweb is not None:
        subweb2 = new_subweb
        successful_swaps += 1
        if successful_swaps == desired_swaps:
            break
    
if subweb2 is not None:
    print("New:      " + str(sorted(subweb2.nodes())))
    draw_food_web(subweb2)


Original: [8, 49, 55, 88, 1002]
Substituted 15 for 8
Substituted 8 for 15
Substituted 15 for 8
Substituted 8 for 15
Substituted 15 for 8
New:      [15, 49, 55, 88, 1002]

Structural properties of food webs

Williams & Martinez 2000 define 12 properties of the network structure of food webs.


In [11]:
def get_food_chains(graph):
    """
    Return all food chains in the graph, as a list of lists of nodes.
    A food chain is defined here as a simple path from a source node
    (a node with no predecessors) to another node.
    NOTE: The source node is not required to be a basal species.
    """
    food_chains = []
    for source in get_source_nodes(graph):
        food_chains += all_simple_paths_from(graph, source)
    return food_chains

def get_loops(graph):
    """
    Return a list of all simple cycles in the graph with more than one node.
    """
    return [cycle for cycle in nx.simple_cycles(graph) if len(cycle) > 1]

def food_web_properties(graph):
    
    species_count = graph.number_of_nodes()
    link_count = graph.size() # Yes, graph.size() is the number of edges
    
    consumers_without_prey = []
    
    top_predator_count = 0
    intermediate_species_count = 0
    basal_species_count = 0
    generality = [None] * species_count
    vulnerability = [None] * species_count
    max_similarity = [None] * species_count
    
    food_chains = get_food_chains(graph)                  
    #print("Food chains: {}\n".format(food_chains))
    food_chain_lengths = [len(chain) - 1 for chain in food_chains] # subtract 1 to count edges
    
    loops = get_loops(graph)
    species_in_loops = set()
    for loop in loops:
        species_in_loops |= set(loop)
    #print("Loops: {}\n".format(loops))
    #print("Species in loops: {}\n".format(species_in_loops))
    
    # Species with multiple prey and food chains of different lengths
    omnivores = []
    # Food chains indexed by ID of the species at the top
    food_chains_by_species = {}
    for chain in food_chains:
        node_id = chain[-1]
        if node_id not in food_chains_by_species:
            food_chains_by_species[node_id] = []
        food_chains_by_species[chain[-1]].append(chain)
    #print("Food chains by species: {}".format(food_chains_by_species))
    
    for i, (node_id, species_attributes) in enumerate(graph.nodes(data=True)):
        predators = graph.successors(node_id)
        prey = graph.predecessors(node_id)
        trophic_level = float(species_attributes['trophic_level'])
        
        if trophic_level > 1 and (len(prey) == 0 or
                                  # Cannibal with no other prey in the subweb:
                                  (len(prey) == 1 and prey[0] == node_id)):
            consumers_without_prey.append(node_id)
        
        # Identify top predators by their position in this sub-web
        if (len(predators) == 0 or
                # Allow for cannibalism in top predators
                (len(predators) == 1 and predators[0] == node_id)):
            top_predator_count += 1
            
        # Identify basal species by trophic level in the full Serengeti food web
        elif trophic_level == 1:
            basal_species_count += 1
            
        # Count all other species as intermediate
        else:
            intermediate_species_count += 1
            
        generality[i] = len(prey) / (link_count / species_count)
        vulnerability[i] = len(predators) / (link_count / species_count)
        
        # Trophic similarity of a pair of species (s[i,j]) is the number of
        # predators and prey shared in common divided by the pair's total
        # number of predators and prey. (Williams & Martinez 2000)
        # FIXME: This could probably be much more efficient with matrices
        trophic_similarity = [None] * species_count
        for j, j_node_id in enumerate(graph.nodes()):
            if j == i:
                trophic_similarity[j] = 0
                continue
            j_predators = graph.successors(j_node_id)
            j_prey = graph.predecessors(j_node_id)
            shared_predators = set(predators) & set(j_predators)
            shared_prey = set(prey) & set(j_prey)
            total_predators = set(predators + j_predators)
            total_prey = set(prey + j_prey)
            
            # TODO: Is this interpretation correct?
            shared_count = len(shared_predators) + len(shared_prey)
            total_count = len(total_predators)  + len(total_prey)
            if total_count == 0:
                trophic_similarity[j] = 0.0
            else:
                trophic_similarity[j] = shared_count / total_count
   
        max_similarity[i] = max(trophic_similarity)
    
        # Omnivory
        if len(prey) > 1:
                             # subtract 1 to count edges
            chain_lengths = [len(chain) - 1 for chain in food_chains_by_species.get(node_id, [])]
            if len(set(chain_lengths)) > 1:
                omnivores.append(node_id)
    
    #print("Omnivores: {}".format(omnivores))
    
    #print("Consumers without prey: {}".format(consumers_without_prey))
            
    props = OrderedDict()
    props['species_count'] = species_count
    props['link_count'] = link_count
    props['connectance'] = link_count / species_count ** 2
    props['frac_top_pred'] = top_predator_count / species_count
    props['frac_intermed'] = intermediate_species_count / species_count
    props['frac_basal'] = basal_species_count / species_count
    props['GenSD'] = stdev(generality)
    props['VulSD'] = stdev(vulnerability)
    props['MxSim'] = sum(max_similarity) / species_count
    props['ChnLg'] = mean(food_chain_lengths)
    props['ChnSD'] = stdev(food_chain_lengths)
    props['ChnNo'] = log10(len(food_chains))
    props['Cannib'] = graph.number_of_selfloops() / species_count
    props['Loop'] = len(species_in_loops) / species_count
    props['Omniv'] = len(omnivores) / species_count
    #props['food_chains'] = sorted(food_chains, key=len, reverse=True)
    
    return props

def print_food_web_properties(properties):
    print("Properties\n------------------")
    for k, v in properties.items():
        if isinstance(v, float) or isinstance(v, int):
            print("{:15s} {:.4f}".format(k, v))
        elif k == 'food_chains':
            print("{:15s} Food chains:")
            if len(v) > 20:
                print("More than 20")
            else:
                for chain in v:
                    print("{:15s} {}".format('', chain))

#basal_species = get_basal_species(serengeti)
#subweb = random_connected_subgraph(serengeti, 6, basal_species, retry=3)
#subweb = random_successor_subgraph(serengeti, 6, basal_species, retry=3)
#subweb = complete_food_chains(serengeti, random_connected_subgraph(serengeti, 6))
#draw_food_web(subweb)

#properties = food_web_properties(subweb)
#print_food_web_properties(properties)

for food_web_id in '15-17-55-80-1002 2-5-57-61-1005 26-40-49-73-1004 5-14-67-85-1005 53-74-82-86-1004'.split():
    node_ids = [int(i) for i in food_web_id.split('-')]
    subweb = serengeti.subgraph(node_ids)
    properties = food_web_properties(subweb)
    print_food_web_properties(properties)


Properties
------------------
species_count   5.0000
link_count      4.0000
connectance     0.1600
frac_top_pred   0.4000
frac_intermed   0.4000
frac_basal      0.2000
GenSD           0.5590
VulSD           1.0458
MxSim           0.1333
ChnLg           1.5000
ChnSD           0.5774
ChnNo           0.6021
Cannib          0.0000
Loop            0.0000
Omniv           0.0000
Properties
------------------
species_count   5.0000
link_count      4.0000
connectance     0.1600
frac_top_pred   0.6000
frac_intermed   0.2000
frac_basal      0.2000
GenSD           0.5590
VulSD           1.6298
MxSim           0.6000
ChnLg           1.7500
ChnSD           0.5000
ChnNo           0.6021
Cannib          0.0000
Loop            0.0000
Omniv           0.0000
Properties
------------------
species_count   5.0000
link_count      5.0000
connectance     0.2000
frac_top_pred   0.4000
frac_intermed   0.4000
frac_basal      0.2000
GenSD           0.7071
VulSD           1.2247
MxSim           0.5000
ChnLg           1.4000
ChnSD           0.5477
ChnNo           0.6990
Cannib          0.0000
Loop            0.0000
Omniv           0.0000
Properties
------------------
species_count   5.0000
link_count      6.0000
connectance     0.2400
frac_top_pred   0.6000
frac_intermed   0.2000
frac_basal      0.2000
GenSD           0.6972
VulSD           1.3693
MxSim           0.4467
ChnLg           1.4000
ChnSD           0.5477
ChnNo           0.6990
Cannib          0.2000
Loop            0.0000
Omniv           0.2000
Properties
------------------
species_count   5.0000
link_count      6.0000
connectance     0.2400
frac_top_pred   0.4000
frac_intermed   0.4000
frac_basal      0.2000
GenSD           0.9129
VulSD           0.6972
MxSim           0.3667
ChnLg           1.6000
ChnSD           0.5477
ChnNo           0.6990
Cannib          0.2000
Loop            0.0000
Omniv           0.0000

Properties of full Serengeti food web


In [12]:
#print("ORIGINAL SERENGETI FOOD WEB")
#print_food_web_properties(food_web_properties(serengeti_orig))
#print()
print("PRUNED SERENGETI FOOD WEB")
serengeti_properties = food_web_properties(serengeti)
print_food_web_properties(serengeti_properties)


PRUNED SERENGETI FOOD WEB
Properties
------------------
species_count   87.0000
link_count      538.0000
connectance     0.0711
frac_top_pred   0.2184
frac_intermed   0.7126
frac_basal      0.0690
GenSD           0.8989
VulSD           1.1828
MxSim           0.4502
ChnLg           6.5461
ChnSD           1.7752
ChnNo           4.8983
Cannib          0.1379
Loop            0.0460
Omniv           0.6437

Components


In [13]:
serengeti_u = serengeti.to_undirected()
components = list(nx.connected_component_subgraphs(serengeti_u))
print("Number of components: {}".format(len(components)))
print("Sizes: {}".format([g.number_of_nodes() for g in components]))

for component in components:
    plt.figure()
    draw_food_web(component)
    
# Originally, this revealed 11 isolated species, now removed in the first cell of code


Number of components: 1
Sizes: [87]
<matplotlib.figure.Figure at 0x10eb26f98>

Generate and compare food sub-webs, compare to Serengeti

Generate and save the data


In [14]:
foodweb_count = 1001
df = pd.DataFrame(index=np.arange(foodweb_count), columns=['species_list'] + list(serengeti_properties.keys()))
df.loc[0] = serengeti_properties
for i in range(1, foodweb_count):
    desired_species_count = random.randint(3, 86)
    subweb = complete_food_chains(serengeti, random_connected_subgraph(serengeti, desired_species_count))
    data = food_web_properties(subweb)
    data['species_list'] = ' '.join([str(i) for i in sorted(subweb.nodes())])
    df.loc[i] = data
    
df.to_csv('food-webs-{}.csv'.format(foodweb_count))


---------------------------------------------------------------------------
StatisticsError                           Traceback (most recent call last)
<ipython-input-14-a787926643cd> in <module>()
      5     desired_species_count = random.randint(3, 86)
      6     subweb = complete_food_chains(serengeti, random_connected_subgraph(serengeti, desired_species_count))
----> 7     data = food_web_properties(subweb)
      8     data['species_list'] = ' '.join([str(i) for i in sorted(subweb.nodes())])
      9     df.loc[i] = data

<ipython-input-11-cc69cae11293> in food_web_properties(graph)
    127     props['VulSD'] = stdev(vulnerability)
    128     props['MxSim'] = sum(max_similarity) / species_count
--> 129     props['ChnLg'] = mean(food_chain_lengths)
    130     props['ChnSD'] = stdev(food_chain_lengths)
    131     props['ChnNo'] = log10(len(food_chains))

/Users/ben/miniconda3/envs/atn-data/lib/python3.5/statistics.py in mean(data)
    290     n = len(data)
    291     if n < 1:
--> 292         raise StatisticsError('mean requires at least one data point')
    293     return _sum(data)/n
    294 

StatisticsError: mean requires at least one data point

Plot properties


In [ ]:
properties_to_plot = [
    'link_count',
    'connectance',
    'frac_top_pred',
    'frac_intermed',
    'frac_basal',
    'GenSD',
    'VulSD',
    'MxSim',
    'ChnLg',
    'ChnSD',
    'ChnNo',
    'Cannib',
    'Loop',
    'Omniv',
]

fig, axes = plt.subplots(nrows=math.ceil(len(properties_to_plot)/2), ncols=2)

for i, prop in enumerate(properties_to_plot):
    df.plot.scatter('species_count', prop, ax=axes[i // 2, i % 2], figsize=(14,30))

Pick some


In [ ]:
df7 = df.loc[df['species_count'] == 7]
#df7 = df.query('species_count == 7 and frac_intermed > 2 * frac_top_pred')
display(df.iloc[0])
df7 = df7.assign(ratio_top_intermed = df7['frac_top_pred'] / df7['frac_intermed'])
df7 = df7.loc[df7['ratio_top_intermed'] <= 0.5]
display(df7)
df7.describe()

Calculating trophic levels


In [ ]:
def calc_trophic_level(node, graph, visited=set()):
    species = graph.node[node]
    if 'calc_trophic_level' in species:
        return species['calc_trophic_level']  # Return memoized value
    
    # Track the nodes visited along this path to avoid getting stuck in trophic loops
    visited = visited | {node}
    
    prey = set(graph.predecessors(node)) - visited
    if len(prey) == 0:
        level = 1  # basal species
    else:
        level = 1 + mean([calc_trophic_level(n, graph, visited) for n in prey])
    species['calc_trophic_level'] = level  # Memoize
    return level

def get_top_predators(graph):
    top = []
    for node, out_degree in graph.out_degree_iter():
        if out_degree == 0 or (
                # Top predator with cannibalism
                out_degree == 1 and graph.successors(node)[0] == node):
            top.append(node)
    return top

for node in get_top_predators(serengeti):
    calc_trophic_level(node, serengeti)

species_table['calc_trophic_level'] = None
for node, species in serengeti.nodes_iter(data=True):
    species_table.set_value(node, 'calc_trophic_level', species['calc_trophic_level'])
species_table['calc_trophic_level_err'] = species_table['calc_trophic_level'] - species_table['trophic_level']
species_table['calc_trophic_level_err2'] = species_table['calc_trophic_level_err'] ** 2
rmse = math.sqrt(species_table['calc_trophic_level_err2'].mean())
print("Root mean squared error: {}".format(rmse))
print("Bias (mean error): {}".format(species_table['calc_trophic_level_err'].mean()))
species_table.plot.scatter('trophic_level', 'calc_trophic_level_err')

In [ ]:
species_table.loc[:, ('name', 'trophic_level', 'calc_trophic_level', 'calc_trophic_level_err2')].to_csv('calc_trophic_level.csv')

Topological sort

This puts prey before predators. Hoped it would fix bad ATNEngine behavior, but it doesn't.


In [ ]:
subgraph_nodes = map(int, '64 16 26 69 87 1001 42 1003 45 31'.split())
g = serengeti.subgraph(subgraph_nodes)
g.remove_edges_from(g.selfloop_edges())
sorted_nodes = nx.topological_sort(g)
print(' '.join(map(str, sorted_nodes)))
print("\nTrophic levels:")
for node in sorted_nodes:
    print(serengeti.node[node]['trophic_level'])

Replacing a species in an existing sub-web


In [ ]:
# Convergence ecosystem 0
orig_nodes = [1005, 14, 31, 42, 2]
#orig_subweb = serengeti.subgraph(orig_nodes)
#draw_food_web(orig_subweb)

orig_nodes.remove(2)
new_subweb = random_successor_subgraph(serengeti, 5, orig_nodes, 4)
print(new_subweb.nodes())
draw_food_web(new_subweb)

Scratch


In [ ]:
for col in species_table.columns:
    print(type(col))

In [ ]: