notebook.community

Edit and run



In [373]:

    
%matplotlib inline

import os.path
import random
import re
import json

import numpy as np
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import classification_report, confusion_matrix, f1_score
from sklearn.model_selection import cross_val_score

from atntools import util



In [374]:

    
initial_uniform_set = 0
first_multi_region_set = 13
second_multi_region_set = 14

set_number = first_multi_region_set

training_batch = 0
training_batch_2 = 2
training_batch_3 = 3
test_batch = 1

set_dir = util.find_set_dir(set_number)
food_web_id = re.match(r'.*?((\d+-)+\d+).*', set_dir).group(1)
print(set_dir)
print(food_web_id)









    



/Users/ben/SFSU/thesis/data/5-species/2-8-9-26-41/set-13
2-8-9-26-41



In [393]:

    
def get_batch_summary(set_dir, batch_num):
    summary_file = os.path.join(util.find_batch_dir(set_dir, batch_num), 'summary.csv')
    return pd.read_csv(summary_file)

training_df = get_batch_summary(set_dir, training_batch)

# Add more training data
training_df = training_df.append(get_batch_summary(set_dir, training_batch_2))
#training_df = training_df.append(get_batch_summary(set_dir, training_batch_3))

training_df.head()









    Out[393]:






  
    
      
      batch_number
      set_number
      sim_number
      K2
      X26
      X41
      X8
      X9
      initialBiomass2
      initialBiomass26
      ...
      perUnitBiomass8
      perUnitBiomass9
      extinction_2
      extinction_26
      extinction_41
      extinction_8
      extinction_9
      extinction_count
      stop_event
      timesteps_simulated
    
  
  
    
      0
      0
      13
      0
      6159.760
      0.185261
      0.161514
      0.483434
      0.602287
      4511.40
      4818.710
      ...
      1.0
      1.0
      -1
      1370
      2280
      -1
      -1
      2
      OSCILLATING_STEADY_STATE
      8406
    
    
      1
      0
      13
      1
      9843.530
      0.511866
      0.529141
      0.480717
      0.045213
      4064.85
      2039.620
      ...
      1.0
      1.0
      -1
      1030
      741
      -1
      -1
      2
      OSCILLATING_STEADY_STATE
      16127
    
    
      2
      0
      13
      2
      859.248
      0.808830
      0.677815
      0.154646
      0.801382
      3984.25
      3747.980
      ...
      1.0
      1.0
      -1
      429
      544
      -1
      -1
      2
      OSCILLATING_STEADY_STATE
      1167
    
    
      3
      0
      13
      3
      6650.920
      0.656525
      0.066649
      0.477812
      0.996610
      4800.44
      349.439
      ...
      1.0
      1.0
      -1
      442
      5416
      -1
      -1
      2
      OSCILLATING_STEADY_STATE
      8448
    
    
      4
      0
      13
      4
      545.753
      0.695684
      0.894182
      0.194586
      0.536934
      3219.54
      3620.090
      ...
      1.0
      1.0
      -1
      481
      410
      -1
      -1
      2
      CONSTANT_BIOMASS_WITH_CONSUMERS
      2104
    
  

5 rows × 26 columns



In [394]:

    
# Look at distribution of stop_event

training_df['stop_event'].value_counts()









    Out[394]:





CONSTANT_BIOMASS_WITH_CONSUMERS    950
OSCILLATING_STEADY_STATE           945
NONE                               102
UNKNOWN_EVENT                        3
Name: stop_event, dtype: int64



In [395]:

    
# Remove inconclusive simulations

training_df_filtered = training_df[
    (training_df['stop_event'] != 'NONE') &
    (training_df['stop_event'] != 'UNKNOWN_EVENT')].copy()
training_df_filtered['stop_event'].value_counts()









    Out[395]:





CONSTANT_BIOMASS_WITH_CONSUMERS    950
OSCILLATING_STEADY_STATE           945
Name: stop_event, dtype: int64



In [396]:

    
# Look at distribution of extinction_count among simulations with conclusive stop_events

ec = training_df_filtered['extinction_count']

ec.value_counts(normalize=True).sort_index()









    Out[396]:





0    0.006860
1    0.148813
2    0.806332
3    0.037995
Name: extinction_count, dtype: float64



In [397]:

    
median_extinction_count = ec.median()
median_extinction_count









    Out[397]:





2.0



In [398]:

    
# Assign balanced classes based on median extinction count

def class_label(x):
    return 1 if x < median_extinction_count else 0

training_df_filtered['class_label'] = ec.map(class_label)
pd.crosstab(training_df_filtered['extinction_count'], training_df_filtered['class_label'])









    Out[398]:






  
    
      class_label
      0
      1
    
    
      extinction_count
      
      
    
  
  
    
      0
      0
      13
    
    
      1
      0
      282
    
    
      2
      1528
      0
    
    
      3
      72
      0



In [399]:

    
training_df_filtered['class_label'].value_counts()









    Out[399]:





0    1600
1     295
Name: class_label, dtype: int64



In [400]:

    
# Make above into a function

def label_dataset(df, median_extinction_count=None):
    
    df = df[
        (df['stop_event'] != 'NONE') &
        (df['stop_event'] != 'UNKNOWN_EVENT')].copy()
    
    if median_extinction_count is None:
        median_extinction_count = df['extinction_count'].median()
    
    df['class_label'] = df['extinction_count'].map(lambda x: 1 if x < median_extinction_count else 0)
    
    return df, median_extinction_count

training_df_filtered, median_extinction_count = label_dataset(training_df)

training_df_filtered['class_label'].value_counts()









    Out[400]:





0    1600
1     295
Name: class_label, dtype: int64



In [401]:

    
# Balance training set by undersampling
#df0 = training_df_filtered[training_df_filtered['class_label'] == 0].sample(n=145)
#df1 = training_df_filtered[training_df_filtered['class_label'] == 1]
#training_df_filtered = df0.append(df1)



In [402]:

    
# Prepare X_train

X_cols = []
for col in training_df.columns:
    if col[0] in ('K', 'X'):
        X_cols.append(col)
    elif col.startswith('initialBiomass'):
        X_cols.append(col)

X_train = training_df_filtered[X_cols]
X_train.head()









    Out[402]:






  
    
      
      K2
      X26
      X41
      X8
      X9
      initialBiomass2
      initialBiomass26
      initialBiomass41
      initialBiomass8
      initialBiomass9
    
  
  
    
      0
      6159.760
      0.185261
      0.161514
      0.483434
      0.602287
      4511.40
      4818.710
      2446.98
      4509.550
      3685.67
    
    
      1
      9843.530
      0.511866
      0.529141
      0.480717
      0.045213
      4064.85
      2039.620
      3072.78
      3268.810
      2819.37
    
    
      2
      859.248
      0.808830
      0.677815
      0.154646
      0.801382
      3984.25
      3747.980
      1712.94
      156.454
      1308.25
    
    
      3
      6650.920
      0.656525
      0.066649
      0.477812
      0.996610
      4800.44
      349.439
      3003.60
      4457.340
      2582.21
    
    
      4
      545.753
      0.695684
      0.894182
      0.194586
      0.536934
      3219.54
      3620.090
      3685.99
      3228.640
      1633.11



In [403]:

    
# Prepare y_train

y_train = training_df_filtered['class_label']
y_train.head()









    Out[403]:





0    0
1    0
2    0
3    0
4    0
Name: class_label, dtype: int64



In [407]:

    
# Create the model and do cross-validation on the training data
clf = DecisionTreeClassifier(
    min_samples_leaf=0.01,
    class_weight='balanced',
    #min_weight_fraction_leaf=0.01
)
cross_val_score(clf, X_train, y_train, cv=10)









    Out[407]:





array([ 0.87894737,  0.92105263,  0.89473684,  0.9       ,  0.90526316,
        0.91005291,  0.88359788,  0.92592593,  0.8994709 ,  0.87301587])



In [408]:

    
# Fit the model on all the training data
clf = clf.fit(X_train, y_train)



In [409]:

    
# Visualize the decision tree

from sklearn import tree
import pydotplus 
from IPython.display import Image

def visualize_tree(classifier, feature_names):
    dot_data = tree.export_graphviz(
        classifier, out_file=None,
        feature_names=feature_names,
        class_names=['bad', 'good'],
        filled=True, rounded=True, node_ids=True
    )
    graph = pydotplus.graph_from_dot_data(dot_data)  
    return Image(graph.create_png())
    
visualize_tree(clf, X_cols)









    Out[409]:



In [410]:

    
# Every classifier has a "score" method
clf.score(X_train, y_train)









    Out[410]:





0.9129287598944591



In [411]:

    
# Evaluation on training data

y_predict = clf.predict(X_train)

print("Feature importances:")
for importance, feature in sorted(zip(clf.feature_importances_, X_cols), reverse=True):
    print("{:2.2f}  {}".format(importance, feature))

print(classification_report(y_train, y_predict))
print(confusion_matrix(y_train, y_predict))
f1_score(y_train, y_predict, average=None)









    



Feature importances:
0.46  X8
0.31  X9
0.21  K2
0.01  X41
0.00  initialBiomass41
0.00  initialBiomass26
0.00  X26
0.00  initialBiomass9
0.00  initialBiomass8
0.00  initialBiomass2
             precision    recall  f1-score   support

          0       1.00      0.90      0.95      1600
          1       0.65      0.98      0.78       295

avg / total       0.94      0.91      0.92      1895

[[1442  158]
 [   7  288]]






    Out[411]:





array([ 0.9458839 ,  0.77732794])

Evaluation on test set



In [412]:

    
test_df = get_batch_summary(set_dir, test_batch)

test_df, _ = label_dataset(test_df, median_extinction_count)

test_df['class_label'].value_counts()









    Out[412]:





0    820
1    125
Name: class_label, dtype: int64



In [413]:

    
X_test = test_df[X_cols]
y_test = test_df['class_label']
y_predict = clf.predict(X_test)
print(classification_report(y_test, y_predict))
print(confusion_matrix(y_test, y_predict))

f1_score(y_test, y_predict, average=None)









    



             precision    recall  f1-score   support

          0       0.98      0.90      0.94       820
          1       0.58      0.90      0.70       125

avg / total       0.93      0.90      0.91       945

[[737  83]
 [ 12 113]]






    Out[413]:





array([ 0.93945188,  0.70404984])

Synthesizing new data in the "1" class



In [21]:

    
#help(clf.tree_)



In [22]:

    
from sklearn.tree._tree import TREE_LEAF



In [23]:

    
def get_leaves(tree):
    """ Return the node IDs of the leaves in the given sklearn Tree """
    return [i for i in range(tree.node_count) if tree.children_left[i] == TREE_LEAF]

#get_leaves(clf.tree_)



In [24]:

    
#for leaf in get_leaves(clf.tree_):
#    print("leaf = {}, value = {}, shape = {}".format(leaf, clf.tree_.value[leaf], clf.tree_.value[leaf].shape))



In [25]:

    
def dfs(tree, node_id=0):
    print(node_id)
    if tree.children_left[node_id] == TREE_LEAF:
        return
    dfs(tree, tree.children_left[node_id])
    dfs(tree, tree.children_right[node_id])
    
#dfs(clf.tree_)



In [26]:

    
from copy import deepcopy

# NOT USED

def get_leaf_regions(tree, node_id=0, subtree_region={}):
    """
    Calculate the feature bounds of each leaf defined by the path from the root to the leaf.
    
    Parameters
    ----------
    tree : sklearn.tree._tree.Tree
        The underlying Tree object of a trained classifier
    node_id : int
        The node ID of the root node of the subtree to traverse
    subtree_region : dict
        A dict describing the feature bounds to which this subtree is constrained.
        Key: feature_id
        Value: [lower_bound, upper_bound] (Either value may be None)
        
    Returns
    -------
    dict
        A dict of dicts describing the feature bounds to which each leaf of this subtree is constrained.
        Key: node_id of leaf
        Value: dict in the same format as subtree_region
    """
    
    if tree.children_left[node_id] == TREE_LEAF:
        return {node_id: deepcopy(subtree_region)}
    
    feature_id = tree.feature[node_id]
    feature_threshold = tree.threshold[node_id]
    
    left_subtree_region = deepcopy(subtree_region)
    if feature_id not in left_subtree_region:
        left_subtree_region[feature_id] = [None, feature_threshold]
    elif left_subtree_region[feature_id][1] is None:
        left_subtree_region[feature_id][1] = feature_threshold
    else:
        left_subtree_region[feature_id][1] = min(left_subtree_region[feature_id][1], feature_threshold)
    
    right_subtree_region = deepcopy(subtree_region)
    if feature_id not in right_subtree_region:
        right_subtree_region[feature_id] = [feature_threshold, None]
    elif right_subtree_region[feature_id][0] is None:
        right_subtree_region[feature_id][0] = feature_threshold
    else:
        right_subtree_region[feature_id][0] = max(left_subtree_region[feature_id][0], feature_threshold)
    
    leaf_regions = get_leaf_regions(tree, tree.children_left[node_id], left_subtree_region)
    leaf_regions.update(get_leaf_regions(tree, tree.children_right[node_id], right_subtree_region))
    
    return leaf_regions
    
#leaf_regions = get_leaf_regions(clf.tree_)
#leaf_regions



In [27]:

    
def get_good_leaves(tree):
    return [
        i for i in range(tree.node_count)
        if tree.children_left[i] == TREE_LEAF
        and tree.value[i, 0, 1] > tree.value[i, 0, 0]
    ]
get_good_leaves(clf.tree_)









    Out[27]:





[6, 11, 12, 13, 17, 18, 36, 37, 38, 43, 46, 50]

function to generate synthetic data points from good leaf regions
arguments:
- regions
- default upper and lower bounds
  - actually, modify get_leaf_regions to take default upper/lower bounds; should simplify its code
  - set default bounds to either:
    - metaparameter bounds (seems better)
    - observed bounds in training data
replace get_leaf_regions with get_node_bounds
- populates a 3D array: [node_id, feature_id, 0/1] shape = (n_nodes, n_features, 2)
- calculate_node_bounds(node_id, node_bounds_array)
- first call: node_id is 0, node_bounds_array first row is initialized to metaparameter bounds
- each call: calculate bounds of left child and right child, then recursively call on left child and right child to have them calculate the bounds for their children
- data has to be copied and computed for every node anyway, not much downside to computing bounds for all nodes

Calculating region bounds



In [28]:

    
def parse_feature_name(feature_name):
    """ Convert a feature name such as "X8" into a node-id, param-name pair such as (8, 'X') """
    match = re.match(r'^([a-zA-Z]+)(\d+)$', feature_name)
    if match is None:
        raise RuntimeError("Invalid feature name {}".format(feature_name))
    param = match.group(1)
    node_id = int(match.group(2))
    return node_id, param

print(parse_feature_name('X8'))
print(parse_feature_name('initialBiomass14'))









    



(8, 'X')
(14, 'initialBiomass')

Getting root bounds



In [29]:

    
def get_metaparameters_for_set(set_identifier):
    """ Parse the metaparameters.json file for the given set (directory or set number)
    and return the parsed dict.
    """
    if isinstance(set_identifier, int):
        set_dir = util.find_set_dir(set_identifier)
    else:
        set_dir = set_identifier

    with open(os.path.join(set_dir, 'metaparameters.json'), 'r') as f:
        metaparameters = json.load(f)
    
    return metaparameters

metaparameters = get_metaparameters_for_set(set_dir)
metaparameters









    Out[29]:





{'args': {'count': 1000,
  'regions': [{'bounds': {'2': {'K': [347.3869934082031, 933.281982421875],
      'initialBiomass': [100.0, 5000.0]},
     '26': {'X': [0.0, 1.0], 'initialBiomass': [100.0, 5000.0]},
     '41': {'X': [0.0, 1.0], 'initialBiomass': [100.0, 5000.0]},
     '8': {'X': [0.0, 0.12120950222015381], 'initialBiomass': [100.0, 5000.0]},
     '9': {'X': [0.0, 1.0], 'initialBiomass': [100.0, 5000.0]}},
    'weight': 1},
   {'bounds': {'2': {'K': [347.3869934082031, 933.281982421875],
      'initialBiomass': [100.0, 5000.0]},
     '26': {'X': [0.0, 1.0], 'initialBiomass': [100.0, 5000.0]},
     '41': {'X': [0.0, 1.0], 'initialBiomass': [100.0, 5000.0]},
     '8': {'X': [0.12120950222015381, 0.34381598234176636],
      'initialBiomass': [100.0, 5000.0]},
     '9': {'X': [0.0, 1.0], 'initialBiomass': [100.0, 5000.0]}},
    'weight': 1},
   {'bounds': {'2': {'K': [933.281982421875, 10000.0],
      'initialBiomass': [100.0, 5000.0]},
     '26': {'X': [0.0, 1.0], 'initialBiomass': [100.0, 5000.0]},
     '41': {'X': [0.0, 1.0], 'initialBiomass': [100.0, 5000.0]},
     '8': {'X': [0.012544550001621246, 0.4641059935092926],
      'initialBiomass': [100.0, 5000.0]},
     '9': {'X': [0.0, 1.0], 'initialBiomass': [100.0, 5000.0]}},
    'weight': 1},
   {'bounds': {'2': {'K': [4837.5048828125, 10000.0],
      'initialBiomass': [100.0, 5000.0]},
     '26': {'X': [0.0, 1.0], 'initialBiomass': [100.0, 5000.0]},
     '41': {'X': [0.0, 1.0], 'initialBiomass': [100.0, 5000.0]},
     '8': {'X': [0.4641059935092926, 0.5022624731063843],
      'initialBiomass': [100.0, 5000.0]},
     '9': {'X': [0.0, 1.0], 'initialBiomass': [100.0, 5000.0]}},
    'weight': 1}]},
 'generator': 'multi-region'}



In [31]:

    
def get_root_bounds_from_uniform_metaparameters(feature_names, metaparameters):   
    root_bounds = np.zeros((len(feature_names), 2))
    param_ranges = metaparameters['args']['param_ranges']
    for feature_id, feature_name in enumerate(feature_names):
        if feature_name.startswith('X'):
            bounds = param_ranges['X']
        elif feature_name.startswith('K'):
            bounds = param_ranges['K']
        elif feature_name.startswith('initialBiomass'):
            bounds = param_ranges['initialBiomass']
        else:
            raise RuntimeError("Unexpected feature name '{}'".format(feature_name))
        root_bounds[feature_id, :] = bounds
    
    return root_bounds

#get_root_bounds_from_uniform_metaparameters(X_cols, metaparameters)



In [32]:

    
def get_root_bounds_from_multi_region_metaparameters(feature_names, metaparameters):
    """
    Outermost bounds for each feature (node-id, param pair)
    """
    root_bounds = np.empty((len(feature_names), 2))
    root_bounds[:, 0] = np.inf   # Smallest lower bound found so far for each feature
    root_bounds[:, 1] = -np.inf  # Largest upper bound found so far for each feature
    
    for feature_id, feature_name in enumerate(feature_names):
        node_id, param = parse_feature_name(feature_name)
        for region in metaparameters['args']['regions']:
            lower, upper = region['bounds'][str(node_id)][param]
            if lower < root_bounds[feature_id, 0]:
                root_bounds[feature_id, 0] = lower
            if upper > root_bounds[feature_id, 1]:
                root_bounds[feature_id, 1] = upper

    return root_bounds

multi_region_metaparameters = get_metaparameters_for_set(first_multi_region_set)

multi_root_bounds = get_root_bounds_from_multi_region_metaparameters(X_cols, multi_region_metaparameters)
for feature_id, bounds in enumerate(multi_root_bounds):
    print("{}: {}".format(X_cols[feature_id], bounds))









    



K2: [   347.38699341  10000.        ]
X26: [ 0.  1.]
X41: [ 0.  1.]
X8: [ 0.          0.50226247]
X9: [ 0.  1.]
initialBiomass2: [  100.  5000.]
initialBiomass26: [  100.  5000.]
initialBiomass41: [  100.  5000.]
initialBiomass8: [  100.  5000.]
initialBiomass9: [  100.  5000.]



In [33]:

    
def get_root_bounds_from_metaparameters(feature_names, metaparameters):
    generator = metaparameters['generator']
    if generator == 'uniform':
        return get_root_bounds_from_uniform_metaparameters(feature_names, metaparameters)
    elif generator == 'multi-region':
        return get_root_bounds_from_multi_region_metaparameters(feature_names, metaparameters)
    else:
        raise RuntimeError("Unexpected generator '{}' in metaparameters".format(generator))

get_root_bounds_from_metaparameters(X_cols, metaparameters)









    Out[33]:





array([[  3.47386993e+02,   1.00000000e+04],
       [  0.00000000e+00,   1.00000000e+00],
       [  0.00000000e+00,   1.00000000e+00],
       [  0.00000000e+00,   5.02262473e-01],
       [  0.00000000e+00,   1.00000000e+00],
       [  1.00000000e+02,   5.00000000e+03],
       [  1.00000000e+02,   5.00000000e+03],
       [  1.00000000e+02,   5.00000000e+03],
       [  1.00000000e+02,   5.00000000e+03],
       [  1.00000000e+02,   5.00000000e+03]])



In [34]:

    
def is_leaf(tree, node_id):
    return tree.children_left[node_id] == TREE_LEAF

def _calculate_node_bounds(tree, node_id, node_bounds):
    """
    Populate the node_bounds array rows for the children of node_id.
    
    Parameters
    ----------
    """
    
    if is_leaf(tree, node_id):
        return
    
    feature_id = tree.feature[node_id]
    feature_threshold = tree.threshold[node_id]
    
    # Calculate left child node bounds
    left_child_id = tree.children_left[node_id]
    node_bounds[left_child_id, :, :] = node_bounds[node_id, :, :]
    node_bounds[left_child_id, feature_id, 1] = min(feature_threshold, node_bounds[node_id, feature_id, 1])
    
    # Calculate right child node bounds
    right_child_id = tree.children_right[node_id]
    node_bounds[right_child_id, :, :] = node_bounds[node_id, :, :]
    node_bounds[right_child_id, feature_id, 0] = max(feature_threshold, node_bounds[node_id, feature_id, 0])
    
    # Traverse subtrees
    _calculate_node_bounds(tree, left_child_id, node_bounds)
    _calculate_node_bounds(tree, right_child_id, node_bounds)
    
def get_node_bounds(tree, root_bounds):
    node_bounds = np.zeros((tree.node_count, tree.n_features, 2))
    node_bounds[0, :, :] = root_bounds
    _calculate_node_bounds(tree, 0, node_bounds)
    return node_bounds

root_bounds = get_root_bounds_from_metaparameters(X_cols, metaparameters)


node_bounds = get_node_bounds(clf.tree_, root_bounds)
#node_bounds



In [35]:

    
# Checking results of get_node_bounds

leaf_id = 14

for feature_id, feature_name in enumerate(X_cols):
    print("{}: {}".format(feature_name, node_bounds[leaf_id, feature_id]))
    
# Results are correct









    



K2: [   347.38699341  10000.        ]
X26: [ 0.  1.]
X41: [ 0.  1.]
X8: [ 0.17437199  0.2614235 ]
X9: [ 0.         0.1947495]
initialBiomass2: [  100.  5000.]
initialBiomass26: [  100.  5000.]
initialBiomass41: [  100.  5000.]
initialBiomass8: [  100.  5000.]
initialBiomass9: [  100.  5000.]



In [36]:

    
# NOT USED

# Synthesize data based on bounds of 'good' leaves
def synthesize_from_regions(regions, n_samples):
    """
    Parameters
    ----------
    regions : array_like, shape = (n_regions, n_features, 2)
        regions[region_id, feature_id, [lower_bound, upper_bound]]
        
    Returns
    -------
    numpy.ndarray
        shape = (n_samples, n_features)
    """
    
    n_features = regions.shape[1]
    
    data = np.empty((n_samples, n_features))
    
    for i in range(n_samples):
    
        # Choose a region at random
        region_id = random.randrange(0, len(regions))
        region = regions[region_id]
        
        for feature_id in range(n_features):
            data[i, feature_id] = random.uniform(region[feature_id, 0], region[feature_id, 1])
    
    return data

#regions = node_bounds[get_good_leaves(clf.tree_)]

#synthesized_data = synthesize_from_regions(regions, 10)
#synthesized_data

# This needs looking over
# Skip this: should be done by node config generator based on generated metaparameter file

Create the multi-region metaparameter file



In [37]:

    
# Map feature IDs to node_id, param pairs

def features_to_node_param_pairs(feature_names):
    pairs = []
    for feature_name in feature_names:
        pairs.append(parse_feature_name(feature_name))
    return pairs

features_to_node_param_pairs(X_cols)









    Out[37]:





[(2, 'K'),
 (26, 'X'),
 (41, 'X'),
 (8, 'X'),
 (9, 'X'),
 (2, 'initialBiomass'),
 (26, 'initialBiomass'),
 (41, 'initialBiomass'),
 (8, 'initialBiomass'),
 (9, 'initialBiomass')]



In [38]:

    
# Generate the list of weighted regions from the node_bounds

import json

good_leaf_bounds = node_bounds[get_good_leaves(clf.tree_)]

def make_region_list(feature_names, leaf_bounds):
    regions = []
    
    node_param_pairs = features_to_node_param_pairs(feature_names)
    node_ids = [p[0] for p in node_param_pairs]
    
    for bounds in leaf_bounds:
        region = {
            'weight': 1,
            'bounds': {i: {} for i in node_ids}
        }
        for feature_id, (lower, upper) in enumerate(bounds):
            node_id, param = node_param_pairs[feature_id]
            region['bounds'][node_id][param] = (lower, upper)
    
        regions.append(region)
        
    return regions

# regions = make_region_list(X_cols, good_leaf_bounds)

#print(good_leaf_bounds)
#print(json.dumps(regions, indent=4))

# Checked for a couple of leaves - correct



In [39]:

    
# Create the metaparameter dict

def make_multi_region_metaparameters(feature_names, leaf_bounds, count):
    return {
        'generator': 'multi-region',
        'args': {
            'count': count,
            'regions': make_region_list(feature_names, leaf_bounds)
        }
    }

#print(json.dumps(make_multi_region_metaparameters(X_cols, good_leaf_bounds, 1000), indent=4))
metaparameters = make_multi_region_metaparameters(X_cols, good_leaf_bounds, 1000)

Make a new set with the multi-region parameter file



In [ ]:

    
#util.create_set_dir(food_web_id, metaparameters)
# (13, '/Users/ben/SFSU/thesis/data/5-species/2-8-9-26-41/set-13')
# (14, '/Users/ben/SFSU/thesis/data/5-species/2-8-9-26-41/set-14')



In [ ]:

	set_number	sim_number	K2	X26	X41	X8	X9	initialBiomass2	initialBiomass26	...	perUnitBiomass8	perUnitBiomass9	extinction_2	extinction_26	extinction_41	extinction_8	extinction_9	extinction_count	stop_event	timesteps_simulated
0	13	0	6159.760	0.185261	0.161514	0.483434	0.602287	4511.40	4818.710	...	1.0	1.0	-1	1370	2280	-1	-1	2	OSCILLATING_STEADY_STATE	8406
1	13	1	9843.530	0.511866	0.529141	0.480717	0.045213	4064.85	2039.620	...	1.0	1.0	-1	1030	741	-1	-1	2	OSCILLATING_STEADY_STATE	16127
2	13	2	859.248	0.808830	0.677815	0.154646	0.801382	3984.25	3747.980	...	1.0	1.0	-1	429	544	-1	-1	2	OSCILLATING_STEADY_STATE	1167
3	13	3	6650.920	0.656525	0.066649	0.477812	0.996610	4800.44	349.439	...	1.0	1.0	-1	442	5416	-1	-1	2	OSCILLATING_STEADY_STATE	8448
4	13	4	545.753	0.695684	0.894182	0.194586	0.536934	3219.54	3620.090	...	1.0	1.0	-1	481	410	-1	-1	2	CONSTANT_BIOMASS_WITH_CONSUMERS	2104