In [1]:
%matplotlib inline
import os
import random
from collections import OrderedDict
from statistics import stdev, mean
from math import log10
import math
import numpy as np
import pandas as pd
import networkx as nx
from IPython.display import display
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
from atntools import foodwebs
from atntools.foodwebs import draw_food_web, get_source_nodes, get_basal_species, get_plant_eaters, predator_complete_subweb
#species_table = foodwebs.read_species_table()
#def show_species_table(node_ids):
# global species_table
# df = species_table
# display(df.loc[node_ids].sort_values('trophic_level', 0, False))
serengeti = foodwebs.read_serengeti()
print("{} nodes, {} edges".format(serengeti.number_of_nodes(), serengeti.number_of_edges()))
#species_table['trophic_level'].describe()
# Draw a food web
subweb = serengeti.subgraph([31, 44, 45, 47, 49, 50, 66, 92, 7, 5])
draw_food_web(subweb, show_names=False, show_legend=True)
#plt.savefig('food-web-10.png', dpi=600, transparent=True)
In [2]:
def random_subgraph(graph, N):
nodes = random.sample(graph.nodes(), N)
return graph.subgraph(nodes)
subweb = random_subgraph(serengeti, 5)
draw_food_web(subweb, show_legend=True)
Generate a random connected subgraph of G by choosing a node at random, and iteratively adding neighbors to the subgraph until N nodes have been added. This produces some potentially viable food webs, but doesn't guarantee adequate representation of all trophic levels (crucially, basal species aren't always included).
In [3]:
def random_connected_subgraph(G, N, seed_nodes=None, retry=1):
"""seed_nodes: list of nodes from which to randomly choose a starting node"""
if seed_nodes is None:
seed_nodes = G.nodes()
nodes = set([random.choice(seed_nodes)])
for i in range(N-1):
# Find the neighbor nodes of the subgraph nodes
neighbors = set()
for node in nodes:
neighbors = neighbors | set(G.predecessors(node) + G.successors(node))
neighbors = neighbors - nodes # Exclude nodes already in the subgraph
if len(neighbors) == 0:
print("random_connected_subgraph: no candidate neighbors found. ", end='')
if (retry > 0):
print("Retrying.")
return random_connected_subgraph(G, N, seed_nodes, retry - 1)
else:
print("Giving up.")
return None
# Add a random neighbor to the subgraph
nodes.add(random.choice(list(neighbors)))
return G.subgraph(list(nodes))
subweb = random_connected_subgraph(serengeti, 5)
draw_food_web(subweb, show_legend=True)
#show_species_table([int(node_id) for node_id in subweb.nodes()])
In [11]:
def random_successor_subgraph(G, N, seed_nodes=None, seed_size=1, retry=1):
"""seed_nodes: list of nodes from which to randomly choose seed_size starting nodes"""
if seed_nodes is None:
seed_nodes = G.nodes()
nodes = set(random.sample(seed_nodes, seed_size))
for i in range(N - seed_size):
# Find the neighbor nodes of the subgraph nodes
neighbors = set()
for node in nodes:
neighbors = neighbors | set(G.successors(node))
neighbors = neighbors - nodes # Exclude nodes already in the subgraph
if len(neighbors) == 0:
print("random_successor_subgraph: no candidate neighbors found. ", end='')
if (retry > 0):
print("Retrying.")
return random_successor_subgraph(G, N, seed_nodes, seed_size=seed_size,
retry=retry-1)
else:
print("Giving up.")
return None
# Add a random neighbor to the subgraph
nodes.add(random.choice(list(neighbors)))
subgraph = G.subgraph(list(nodes))
num_components = nx.number_connected_components(subgraph.to_undirected())
if num_components > 1:
print("random_successor_subgraph: {} connected components. ".format(num_components),
end='')
if (retry > 0):
print("Retrying.")
return random_successor_subgraph(G, N, seed_nodes, seed_size=seed_size,
retry=retry-1)
else:
print("Giving up.")
return None
return subgraph
# For the seed nodes, use the basal species excluding Decaying Material (89)
seed_nodes = get_basal_species(serengeti)
if 89 in seed_nodes:
seed_nodes.remove(89)
subweb = random_successor_subgraph(serengeti, 10, seed_nodes=seed_nodes, seed_size=3, retry=3)
draw_food_web(subweb)
print(str(subweb.nodes()))
In [8]:
def all_simple_paths_from(graph, source, visited=set()):
paths = []
for successor in graph.successors(source):
if successor not in visited and successor != source:
paths.append([source, successor])
for successor_path in all_simple_paths_from(graph, successor, visited | set([source])):
paths.append([source] + successor_path)
return paths
# Visual testing
graph = nx.fast_gnp_random_graph(5, 0.25, directed=True)
nx.draw_networkx(graph)
print(all_simple_paths_from(graph, 0))
In [9]:
graph = nx.fast_gnp_random_graph(5, 0.25, directed=True)
# Add a self-loop to node 0. We want to exclude self-loops from food chain loops.
graph.add_edge(0, 0)
# It ends up being counted as a simple cycle. Can simply exclude cycles with length 1.
nx.draw_networkx(graph)
print(list(nx.simple_cycles(graph)))
In [7]:
g = nx.DiGraph()
g.add_node(3, {'something': 'hello', 'else': 'world'})
g.add_edge(1,2)
g.add_edge(2,3)
g.add_edge(3,4)
g.add_edge(4,1)
g.add_edge(3,1)
plt.figure()
nx.draw_networkx(g)
def add_node_with_edges(graph, subgraph, node):
"""Add a node from a directed graph to a subgraph of it, along with all edges from the original graph
that are adjacent to that node and another node existing in the subgraph.
"""
subgraph.add_node(node)
for edge in graph.out_edges_iter(node):
if subgraph.has_node(edge[1]):
subgraph.add_edge(*edge)
for edge in graph.in_edges_iter(node):
if subgraph.has_node(edge[0]):
subgraph.add_edge(*edge)
plt.figure()
h = g.subgraph([1, 3])
nx.draw_networkx(h)
plt.figure()
add_node_with_edges(g, h, 2)
#add_node_with_edges(g, h, 3)
#add_node_with_edges(g, h, 4)
nx.draw_networkx(h)
# Demonstrate how to copy the attributes of a node from one graph to another
h.node[3] = g.node[3]
print(h.node[3])
In [8]:
def complete_food_chains(full_graph, subgraph):
# Make a copy of the subgraph to modify and return
subgraph2 = subgraph.copy()
# Get the source nodes in the subgraph which are consumers (non-plants).
# The goal is to have none of these, at which point all food chains will be complete.
consumer_source_nodes = []
source_nodes = get_source_nodes(subgraph2)
for node in source_nodes:
if float(subgraph2.node[node]['trophic_level']) > 1:
consumer_source_nodes.append(node)
# Add predecessors to consumer source nodes until there are no consumer source nodes left.
while len(consumer_source_nodes) > 0:
# Choose a random consumer source node
consumer_node = random.choice(consumer_source_nodes)
consumer_source_nodes.remove(consumer_node)
# Add a random predecessor and associated edges
new_node = random.choice(full_graph.predecessors(consumer_node))
add_node_with_edges(full_graph, subgraph2, new_node)
subgraph2.node[new_node] = full_graph.node[new_node] # Copy node attributes
# If the new node is also a consumer source node, add it to the list
if float(subgraph2.node[new_node]['trophic_level']) > 1:
consumer_source_nodes.append(new_node)
return subgraph2
subweb = random_connected_subgraph(serengeti, 4)
complete_subweb = complete_food_chains(serengeti, subweb)
draw_food_web(subweb)
plt.figure()
draw_food_web(complete_subweb)
In [10]:
# For the seed nodes, use the basal species excluding Decaying Material (89)
seed_nodes = get_basal_species(serengeti)
if 1 in seed_nodes:
seed_nodes.remove(1)
subweb = predator_complete_subweb(serengeti, 10, seed_nodes=seed_nodes, seed_size=2, retry=3)
draw_food_web(subweb, False, True)
node_ids = sorted(subweb.nodes())
print("Node IDs: {}".format(node_ids))
print("Food web ID: {}".format('-'.join(map(str, node_ids))))
In [15]:
# Generate a bunch of random subwebs
node_id_lists = []
for i in range(25):
subweb = predator_complete_subweb(serengeti, 15, seed_nodes=seed_nodes, seed_size=3, retry=5)
node_ids = sorted(subweb.nodes())
node_id_lists.append(node_ids)
print('[\n' + ',\n'.join(map(str, sorted(node_id_lists))) + '\n]')
In [10]:
import copy
def swap_species(full, sub):
"""
Attempt to swap a species in a food sub-web with a trophically equivalent species.
"Trophically equivalent" is defined here as having exactly the same predators and prey
within the sub-web.
Parameters
----------
full : DiGraph
The full food web from which to choose a trophically equivalent species.
sub : DiGraph
The sub-web.
Returns
-------
DiGraph
The new sub-web with species swapped, or None if no swap was possible.
"""
orig_node = None
new_node = None
# Iterate through subweb nodes in random order, trying to find one that can be replaced
for node in random.sample(sub.nodes(), len(sub)):
orig_node = node
pred = set(sub.successors(node))
prey = set(sub.predecessors(node))
orig_is_cannibal = orig_node in pred
# Find a species in the full graph with the same predators and prey
# Start with candidate species sharing any predators or prey
new_node_candidates = set()
for prey_node in prey:
new_node_candidates |= set(full.successors(prey_node))
for pred_node in pred:
new_node_candidates |= set(full.predecessors(pred_node))
new_node_candidates -= {node}
for candidate in new_node_candidates:
# Original and new node must either be both cannibals or both non-cannibals
candidate_is_cannibal = full.has_edge(candidate, candidate)
if candidate_is_cannibal != orig_is_cannibal:
continue
# If the candidate shares the same predators and prey in the subweb,
# we've found the species to swap in
pred_of_candidate = set(full.successors(candidate)) & set(sub.nodes())
prey_of_candidate = set(full.predecessors(candidate)) & set(sub.nodes())
if (pred_of_candidate == pred and prey_of_candidate == prey):
new_node = candidate
break
if new_node is not None:
break
if new_node is None:
print("Could not find a trophically equivalent species.")
return None
print("Substituted {} for {}".format(new_node, orig_node))
new_sub = full.subgraph(set(sub.nodes()) - {orig_node} | {new_node})
return new_sub
seed_nodes = get_basal_species(serengeti)
if 1 in seed_nodes:
seed_nodes.remove(1)
subweb = predator_complete_subweb(serengeti, 5, seed_nodes=seed_nodes, seed_size=1, retry=3)
draw_food_web(subweb)
print("Original: " + str(sorted(subweb.nodes())))
# Do a few swaps
desired_swaps = 5
attempts_allowed = 10
successful_swaps = 0
subweb2 = subweb
for i in range(attempts_allowed):
new_subweb = swap_species(serengeti, subweb2)
if new_subweb is not None:
subweb2 = new_subweb
successful_swaps += 1
if successful_swaps == desired_swaps:
break
if subweb2 is not None:
print("New: " + str(sorted(subweb2.nodes())))
draw_food_web(subweb2)
In [11]:
def get_food_chains(graph):
"""
Return all food chains in the graph, as a list of lists of nodes.
A food chain is defined here as a simple path from a source node
(a node with no predecessors) to another node.
NOTE: The source node is not required to be a basal species.
"""
food_chains = []
for source in get_source_nodes(graph):
food_chains += all_simple_paths_from(graph, source)
return food_chains
def get_loops(graph):
"""
Return a list of all simple cycles in the graph with more than one node.
"""
return [cycle for cycle in nx.simple_cycles(graph) if len(cycle) > 1]
def food_web_properties(graph):
species_count = graph.number_of_nodes()
link_count = graph.size() # Yes, graph.size() is the number of edges
consumers_without_prey = []
top_predator_count = 0
intermediate_species_count = 0
basal_species_count = 0
generality = [None] * species_count
vulnerability = [None] * species_count
max_similarity = [None] * species_count
food_chains = get_food_chains(graph)
#print("Food chains: {}\n".format(food_chains))
food_chain_lengths = [len(chain) - 1 for chain in food_chains] # subtract 1 to count edges
loops = get_loops(graph)
species_in_loops = set()
for loop in loops:
species_in_loops |= set(loop)
#print("Loops: {}\n".format(loops))
#print("Species in loops: {}\n".format(species_in_loops))
# Species with multiple prey and food chains of different lengths
omnivores = []
# Food chains indexed by ID of the species at the top
food_chains_by_species = {}
for chain in food_chains:
node_id = chain[-1]
if node_id not in food_chains_by_species:
food_chains_by_species[node_id] = []
food_chains_by_species[chain[-1]].append(chain)
#print("Food chains by species: {}".format(food_chains_by_species))
for i, (node_id, species_attributes) in enumerate(graph.nodes(data=True)):
predators = graph.successors(node_id)
prey = graph.predecessors(node_id)
trophic_level = float(species_attributes['trophic_level'])
if trophic_level > 1 and (len(prey) == 0 or
# Cannibal with no other prey in the subweb:
(len(prey) == 1 and prey[0] == node_id)):
consumers_without_prey.append(node_id)
# Identify top predators by their position in this sub-web
if (len(predators) == 0 or
# Allow for cannibalism in top predators
(len(predators) == 1 and predators[0] == node_id)):
top_predator_count += 1
# Identify basal species by trophic level in the full Serengeti food web
elif trophic_level == 1:
basal_species_count += 1
# Count all other species as intermediate
else:
intermediate_species_count += 1
generality[i] = len(prey) / (link_count / species_count)
vulnerability[i] = len(predators) / (link_count / species_count)
# Trophic similarity of a pair of species (s[i,j]) is the number of
# predators and prey shared in common divided by the pair's total
# number of predators and prey. (Williams & Martinez 2000)
# FIXME: This could probably be much more efficient with matrices
trophic_similarity = [None] * species_count
for j, j_node_id in enumerate(graph.nodes()):
if j == i:
trophic_similarity[j] = 0
continue
j_predators = graph.successors(j_node_id)
j_prey = graph.predecessors(j_node_id)
shared_predators = set(predators) & set(j_predators)
shared_prey = set(prey) & set(j_prey)
total_predators = set(predators + j_predators)
total_prey = set(prey + j_prey)
# TODO: Is this interpretation correct?
shared_count = len(shared_predators) + len(shared_prey)
total_count = len(total_predators) + len(total_prey)
if total_count == 0:
trophic_similarity[j] = 0.0
else:
trophic_similarity[j] = shared_count / total_count
max_similarity[i] = max(trophic_similarity)
# Omnivory
if len(prey) > 1:
# subtract 1 to count edges
chain_lengths = [len(chain) - 1 for chain in food_chains_by_species.get(node_id, [])]
if len(set(chain_lengths)) > 1:
omnivores.append(node_id)
#print("Omnivores: {}".format(omnivores))
#print("Consumers without prey: {}".format(consumers_without_prey))
props = OrderedDict()
props['species_count'] = species_count
props['link_count'] = link_count
props['connectance'] = link_count / species_count ** 2
props['frac_top_pred'] = top_predator_count / species_count
props['frac_intermed'] = intermediate_species_count / species_count
props['frac_basal'] = basal_species_count / species_count
props['GenSD'] = stdev(generality)
props['VulSD'] = stdev(vulnerability)
props['MxSim'] = sum(max_similarity) / species_count
props['ChnLg'] = mean(food_chain_lengths)
props['ChnSD'] = stdev(food_chain_lengths)
props['ChnNo'] = log10(len(food_chains))
props['Cannib'] = graph.number_of_selfloops() / species_count
props['Loop'] = len(species_in_loops) / species_count
props['Omniv'] = len(omnivores) / species_count
#props['food_chains'] = sorted(food_chains, key=len, reverse=True)
return props
def print_food_web_properties(properties):
print("Properties\n------------------")
for k, v in properties.items():
if isinstance(v, float) or isinstance(v, int):
print("{:15s} {:.4f}".format(k, v))
elif k == 'food_chains':
print("{:15s} Food chains:")
if len(v) > 20:
print("More than 20")
else:
for chain in v:
print("{:15s} {}".format('', chain))
#basal_species = get_basal_species(serengeti)
#subweb = random_connected_subgraph(serengeti, 6, basal_species, retry=3)
#subweb = random_successor_subgraph(serengeti, 6, basal_species, retry=3)
#subweb = complete_food_chains(serengeti, random_connected_subgraph(serengeti, 6))
#draw_food_web(subweb)
#properties = food_web_properties(subweb)
#print_food_web_properties(properties)
for food_web_id in '15-17-55-80-1002 2-5-57-61-1005 26-40-49-73-1004 5-14-67-85-1005 53-74-82-86-1004'.split():
node_ids = [int(i) for i in food_web_id.split('-')]
subweb = serengeti.subgraph(node_ids)
properties = food_web_properties(subweb)
print_food_web_properties(properties)
In [12]:
#print("ORIGINAL SERENGETI FOOD WEB")
#print_food_web_properties(food_web_properties(serengeti_orig))
#print()
print("PRUNED SERENGETI FOOD WEB")
serengeti_properties = food_web_properties(serengeti)
print_food_web_properties(serengeti_properties)
In [13]:
serengeti_u = serengeti.to_undirected()
components = list(nx.connected_component_subgraphs(serengeti_u))
print("Number of components: {}".format(len(components)))
print("Sizes: {}".format([g.number_of_nodes() for g in components]))
for component in components:
plt.figure()
draw_food_web(component)
# Originally, this revealed 11 isolated species, now removed in the first cell of code
In [14]:
foodweb_count = 1001
df = pd.DataFrame(index=np.arange(foodweb_count), columns=['species_list'] + list(serengeti_properties.keys()))
df.loc[0] = serengeti_properties
for i in range(1, foodweb_count):
desired_species_count = random.randint(3, 86)
subweb = complete_food_chains(serengeti, random_connected_subgraph(serengeti, desired_species_count))
data = food_web_properties(subweb)
data['species_list'] = ' '.join([str(i) for i in sorted(subweb.nodes())])
df.loc[i] = data
df.to_csv('food-webs-{}.csv'.format(foodweb_count))
In [ ]:
properties_to_plot = [
'link_count',
'connectance',
'frac_top_pred',
'frac_intermed',
'frac_basal',
'GenSD',
'VulSD',
'MxSim',
'ChnLg',
'ChnSD',
'ChnNo',
'Cannib',
'Loop',
'Omniv',
]
fig, axes = plt.subplots(nrows=math.ceil(len(properties_to_plot)/2), ncols=2)
for i, prop in enumerate(properties_to_plot):
df.plot.scatter('species_count', prop, ax=axes[i // 2, i % 2], figsize=(14,30))
Pick some
In [ ]:
df7 = df.loc[df['species_count'] == 7]
#df7 = df.query('species_count == 7 and frac_intermed > 2 * frac_top_pred')
display(df.iloc[0])
df7 = df7.assign(ratio_top_intermed = df7['frac_top_pred'] / df7['frac_intermed'])
df7 = df7.loc[df7['ratio_top_intermed'] <= 0.5]
display(df7)
df7.describe()
In [ ]:
def calc_trophic_level(node, graph, visited=set()):
species = graph.node[node]
if 'calc_trophic_level' in species:
return species['calc_trophic_level'] # Return memoized value
# Track the nodes visited along this path to avoid getting stuck in trophic loops
visited = visited | {node}
prey = set(graph.predecessors(node)) - visited
if len(prey) == 0:
level = 1 # basal species
else:
level = 1 + mean([calc_trophic_level(n, graph, visited) for n in prey])
species['calc_trophic_level'] = level # Memoize
return level
def get_top_predators(graph):
top = []
for node, out_degree in graph.out_degree_iter():
if out_degree == 0 or (
# Top predator with cannibalism
out_degree == 1 and graph.successors(node)[0] == node):
top.append(node)
return top
for node in get_top_predators(serengeti):
calc_trophic_level(node, serengeti)
species_table['calc_trophic_level'] = None
for node, species in serengeti.nodes_iter(data=True):
species_table.set_value(node, 'calc_trophic_level', species['calc_trophic_level'])
species_table['calc_trophic_level_err'] = species_table['calc_trophic_level'] - species_table['trophic_level']
species_table['calc_trophic_level_err2'] = species_table['calc_trophic_level_err'] ** 2
rmse = math.sqrt(species_table['calc_trophic_level_err2'].mean())
print("Root mean squared error: {}".format(rmse))
print("Bias (mean error): {}".format(species_table['calc_trophic_level_err'].mean()))
species_table.plot.scatter('trophic_level', 'calc_trophic_level_err')
In [ ]:
species_table.loc[:, ('name', 'trophic_level', 'calc_trophic_level', 'calc_trophic_level_err2')].to_csv('calc_trophic_level.csv')
In [ ]:
subgraph_nodes = map(int, '64 16 26 69 87 1001 42 1003 45 31'.split())
g = serengeti.subgraph(subgraph_nodes)
g.remove_edges_from(g.selfloop_edges())
sorted_nodes = nx.topological_sort(g)
print(' '.join(map(str, sorted_nodes)))
print("\nTrophic levels:")
for node in sorted_nodes:
print(serengeti.node[node]['trophic_level'])
In [ ]:
# Convergence ecosystem 0
orig_nodes = [1005, 14, 31, 42, 2]
#orig_subweb = serengeti.subgraph(orig_nodes)
#draw_food_web(orig_subweb)
orig_nodes.remove(2)
new_subweb = random_successor_subgraph(serengeti, 5, orig_nodes, 4)
print(new_subweb.nodes())
draw_food_web(new_subweb)
In [ ]:
for col in species_table.columns:
print(type(col))
In [ ]: