All this crap is put into taxonomy.py


In [1]:
from collections import defaultdict
from itertools import takewhile
from pprint import pprint

DATA_PATH = '../data/chart_tabs.txt'

In [2]:
is_tab = '\t'.__eq__

def tree(): return defaultdict(tree)
def add(t, path):
    for node in path:
        t = t[node]
def dicts(t): return {k: dicts(t[k]) for k in t}

def build_tree(lines):
    lines = iter(lines)
    path = []
    ret = tree()
    for line in lines:
        entry = line.lstrip()
        indent = len(list(takewhile(is_tab, line)))
        path[indent:] = [entry]
        add(ret, path)
    return ret

def build_trace(lines):
    lines = iter(lines)
    path = []
    ret = []
    for line in lines:
        entry = line.lstrip()
        indent = len(list(takewhile(is_tab, line)))
        path[indent:] = [entry]
        if entry[0].lower() == entry[0]:
            ret.append(tuple(path[::-1]))
    return ret

In [52]:
with open(DATA_PATH, 'r') as f:
    lines = [l.rstrip('\n') for l in f.readlines()]

t = build_tree(lines)
trace = build_trace(lines)

In [4]:
len(trace)


Out[4]:
121

In [36]:
pprint(dicts(t))


{'Omega': {'Artifact': {'artifacts': {}, 'artifacts_edge': {}},
           'Plankton': {'Chaetognath': {'chaetognath_non_sagitta': {},
                                        'chaetognath_other': {},
                                        'chaetognath_sagitta': {}},
                        'Crustacean': {'Copepod': {'Copepod_Calnoid': {'Copepod_Calanoid_Large': {'copepod_calanoid_large': {},
                                                                                                  'copepod_calanoid_large_side_antennatucked': {}},
                                                                       'copepod_calanoid': {},
                                                                       'copepod_calanoid_eggs': {},
                                                                       'copepod_calanoid_eucalanus': {},
                                                                       'copepod_calanoid_flatheads': {},
                                                                       'copepod_calanoid_frillyAntennae': {},
                                                                       'copepod_calanoid_octomoms': {},
                                                                       'copepod_calanoid_small_longantennae': {},
                                                                       'copepod_other': {}},
                                                   'Cyclopoid_Copepod': {'Cyclopoid_Copepod_Oithona': {'copepod_cyclopoid_oithona': {},
                                                                                                       'copepod_cyclopoid_oithona_eggs': {}},
                                                                         'copepod_cyclopoid_copilia': {}}},
                                       'Shrimp-like': {'Decapod': {'decapods': {},
                                                                   'shrimp_caridean': {},
                                                                   'shrimp_sergestidae': {},
                                                                   'shrimp_zoea': {}},
                                                       'Euphauslid': {'euphausiids': {},
                                                                      'euphausiids_young': {}},
                                                       'shrimp-like_other': {}},
                                       'amphipods': {},
                                       'crustacean_other': {},
                                       'stomatopod': {}},
                        'Detritus': {'detritus_blob': {},
                                     'detritus_filamentous': {},
                                     'detritus_other': {},
                                     'fecal_pellet': {}},
                        'Diatom': {'diatom_chain_string': {},
                                   'diatom_chain_tube': {}},
                        'Fish': {'fish_larvae_deep_body': {},
                                 'fish_larvae_leptocephali': {},
                                 'fish_larvae_medium_body': {},
                                 'fish_larvae_myctophids': {},
                                 'fish_larvae_thin_body': {},
                                 'fish_larvae_very_thin_body': {}},
                        'Gastropod': {'Pteropod': {'pteropod_butterfly': {},
                                                   'pteropod_theco_dev_seq': {},
                                                   'pteropod_triangle': {}},
                                      'heteropod': {}},
                        'Gelatinous_Zooplankton': {'Ctenophore': {'ctenophore_cestid': {},
                                                                  'ctenophore_cydippid_no_tentacles': {},
                                                                  'ctenophore_cydippid_tentacles': {},
                                                                  'ctenophore_lobate': {}},
                                                   'Hydromedusae': {'Hydromedusae_Narcomedusae': {'Hydromedusae_Haliscera  ': {'hydromedusae_haliscera': {},
                                                                                                                               'hydromedusae_haliscera_small_sideview': {}},
                                                                                                  'Hydromedusae_Solmaris': {'hydromedusae_narco_young': {},
                                                                                                                            'hydromedusae_solmaris': {}},
                                                                                                  'Other_Hydromedusae': {'Hydromedusae_Shape_A_Sideview': {'hydromedusae_shapeA': {},
                                                                                                                                                           'hydromedusae_shapeA_sideview_small': {},
                                                                                                                                                           'hydromedusae_sideview_big': {}},
                                                                                                                         'Hydromedusae_Type_D_Bell_And_Tentacles': {'hydromedusae_bell_and_tentacles': {},
                                                                                                                                                                    'hydromedusae_typeD': {},
                                                                                                                                                                    'hydromedusae_typeD_bell_and_tentacles': {}},
                                                                                                                         'hydromedusae_h15': {},
                                                                                                                         'hydromedusae_other': {},
                                                                                                                         'hydromedusae_partial_dark': {},
                                                                                                                         'hydromedusae_shapeB': {},
                                                                                                                         'hydromedusae_typeE': {},
                                                                                                                         'hydromedusae_typeF': {}},
                                                                                                  'hydromedusae_aglaura': {},
                                                                                                  'hydromedusae_liriope': {},
                                                                                                  'hydromedusae_narco_dark': {},
                                                                                                  'hydromedusae_narcomedusae': {},
                                                                                                  'hydromedusae_solmundella': {}}},
                                                   'Pelagic_Tunicate': {'Appendicularian': {'appendicularian_fritillaridae': {},
                                                                                            'appendicularian_s_shape': {},
                                                                                            'appendicularian_slight_curve': {},
                                                                                            'appendicularian_straight': {}},
                                                                        'Tunicate': {'Tunicate_Doliolid': {'tunicate_doliolid': {},
                                                                                                           'tunicate_doliolid_nurse': {}},
                                                                                     'Tunicate_Salp': {'tunicate_salp': {},
                                                                                                       'tunicate_salp_chains': {}},
                                                                                     'tunicate_partial': {}}},
                                                   'Siphonophore': {'Calycophoran_Siphonophore': {'Calycophoran_Siphonophore_Rocketship': {'siphonophore_calycophoran_rocketship_adult': {},
                                                                                                                                           'siphonophore_calycophoran_rocketship_young': {}},
                                                                                                  'Calycophoran_Siphonophore_Sphaeronectes': {'siphonophore_calycophoran_sphaeronectes': {},
                                                                                                                                              'siphonophore_calycophoran_sphaeronectes_stem': {},
                                                                                                                                              'siphonophore_calycophoran_sphaeronectes_young': {}},
                                                                                                  'siphonophore_calycophoran_abylidae': {}},
                                                                    'Siphonophore_Physonect': {'siphonophore_physonect': {},
                                                                                               'siphonophore_physonect_young': {}},
                                                                    'siphonophore_other_parts': {},
                                                                    'siphonophore_partial': {}},
                                                   'ephyra': {},
                                                   'jellies_tentacles': {}},
                        'Other_Invert_Larvae': {'Echinoderm': {'Echinoderm_Larva_Seastar': {'echinoderm_larva_seastar_bipinnaria': {},
                                                                                            'echinoderm_larva_seastar_brachiolaria': {}},
                                                               'echinoderm_larva_pluteus_brittlestar': {},
                                                               'echinoderm_larva_pluteus_early': {},
                                                               'echinoderm_larva_pluteus_typeC': {},
                                                               'echinoderm_larva_pluteus_urchin': {},
                                                               'echinoderm_seacucumber_auricularia_larva': {},
                                                               'echinopluteus': {}},
                                                'invertebrate_larvae_other_A': {},
                                                'invertebrate_larvae_other_B': {},
                                                'tornaria_acorn_worm_larvae': {},
                                                'trochophore_larvae': {}},
                        'Protist': {'Acantharia_Protist': {'acantharia_protist': {},
                                                           'acantharia_protist_big_center': {},
                                                           'acantharia_protist_halo': {}},
                                    'Protist_Other': {'protist_dark_center': {},
                                                      'protist_fuzzy_olive': {},
                                                      'protist_other': {},
                                                      'protist_star': {}},
                                    'Radiolarian_Colony': {'radiolarian_chain': {},
                                                           'radiolarian_colony': {}},
                                    'protist_noctiluca': {}},
                        'Trichodesmium': {'trichodesmium_bowtie': {},
                                          'trichodesmium_multiple': {},
                                          'trichodesmium_puff': {},
                                          'trichodesmium_tuft': {}},
                        'Unknown': {'unknown_blobs_and_smudges': {},
                                    'unknown_sticks': {},
                                    'unknown_unclassified': {}},
                        'chordate_type1': {},
                        'polychaete': {}}}}

In [14]:
# extending out deep leaves
max_len = max([len(p) for p in trace])
trace_ext = []
for path in trace:
    while len(path) < max_len:
        path = (path[0],) + path
    trace_ext.append(path)

In [23]:
for n in range(max_len):
    print n, len({p[n] for p in trace_ext})
    
trace_d = {p[0]: list(p[1:]) for p in trace_ext}


0 121
1 117
2 101
3 72
4 48
5 15
6 2
7 1

In [38]:
from sklearn.preprocessing import LabelEncoder
depth_le = {n: LabelEncoder().fit(sorted(list({p[n] for p in trace_ext}))) 
            for n in range(max_len)}

In [51]:
q = depth_le[5]
q.classes_


Out[51]:
array(['Chaetognath', 'Crustacean', 'Detritus', 'Diatom', 'Fish',
       'Gastropod', 'Gelatinous_Zooplankton', 'Other_Invert_Larvae',
       'Protist', 'Trichodesmium', 'Unknown', 'artifacts',
       'artifacts_edge', 'chordate_type1', 'polychaete'], 
      dtype='|S22')

In [5]:
import taxonomy as tax
for n in range(7):
    print n, len({p[n] for p in trace_ext})


Out[5]:
[('acantharia_protist',
  'acantharia_protist',
  'acantharia_protist',
  'acantharia_protist',
  'Acantharia_Protist',
  'Protist',
  'Plankton',
  'Omega'),
 ('acantharia_protist_big_center',
  'acantharia_protist_big_center',
  'acantharia_protist_big_center',
  'acantharia_protist_big_center',
  'Acantharia_Protist',
  'Protist',
  'Plankton',
  'Omega'),
 ('acantharia_protist_halo',
  'acantharia_protist_halo',
  'acantharia_protist_halo',
  'acantharia_protist_halo',
  'Acantharia_Protist',
  'Protist',
  'Plankton',
  'Omega'),
 ('protist_noctiluca',
  'protist_noctiluca',
  'protist_noctiluca',
  'protist_noctiluca',
  'protist_noctiluca',
  'Protist',
  'Plankton',
  'Omega'),
 ('protist_other',
  'protist_other',
  'protist_other',
  'protist_other',
  'Protist_Other',
  'Protist',
  'Plankton',
  'Omega'),
 ('protist_star',
  'protist_star',
  'protist_star',
  'protist_star',
  'Protist_Other',
  'Protist',
  'Plankton',
  'Omega'),
 ('protist_fuzzy_olive',
  'protist_fuzzy_olive',
  'protist_fuzzy_olive',
  'protist_fuzzy_olive',
  'Protist_Other',
  'Protist',
  'Plankton',
  'Omega'),
 ('protist_dark_center',
  'protist_dark_center',
  'protist_dark_center',
  'protist_dark_center',
  'Protist_Other',
  'Protist',
  'Plankton',
  'Omega'),
 ('radiolarian_colony',
  'radiolarian_colony',
  'radiolarian_colony',
  'radiolarian_colony',
  'Radiolarian_Colony',
  'Protist',
  'Plankton',
  'Omega'),
 ('radiolarian_chain',
  'radiolarian_chain',
  'radiolarian_chain',
  'radiolarian_chain',
  'Radiolarian_Colony',
  'Protist',
  'Plankton',
  'Omega'),
 ('trichodesmium_tuft',
  'trichodesmium_tuft',
  'trichodesmium_tuft',
  'trichodesmium_tuft',
  'trichodesmium_tuft',
  'Trichodesmium',
  'Plankton',
  'Omega'),
 ('trichodesmium_bowtie',
  'trichodesmium_bowtie',
  'trichodesmium_bowtie',
  'trichodesmium_bowtie',
  'trichodesmium_bowtie',
  'Trichodesmium',
  'Plankton',
  'Omega'),
 ('trichodesmium_puff',
  'trichodesmium_puff',
  'trichodesmium_puff',
  'trichodesmium_puff',
  'trichodesmium_puff',
  'Trichodesmium',
  'Plankton',
  'Omega'),
 ('trichodesmium_multiple',
  'trichodesmium_multiple',
  'trichodesmium_multiple',
  'trichodesmium_multiple',
  'trichodesmium_multiple',
  'Trichodesmium',
  'Plankton',
  'Omega'),
 ('diatom_chain_string',
  'diatom_chain_string',
  'diatom_chain_string',
  'diatom_chain_string',
  'diatom_chain_string',
  'Diatom',
  'Plankton',
  'Omega'),
 ('diatom_chain_tube',
  'diatom_chain_tube',
  'diatom_chain_tube',
  'diatom_chain_tube',
  'diatom_chain_tube',
  'Diatom',
  'Plankton',
  'Omega'),
 ('jellies_tentacles',
  'jellies_tentacles',
  'jellies_tentacles',
  'jellies_tentacles',
  'jellies_tentacles',
  'Gelatinous_Zooplankton',
  'Plankton',
  'Omega'),
 ('appendicularian_fritillaridae',
  'appendicularian_fritillaridae',
  'appendicularian_fritillaridae',
  'Appendicularian',
  'Pelagic_Tunicate',
  'Gelatinous_Zooplankton',
  'Plankton',
  'Omega'),
 ('appendicularian_s_shape',
  'appendicularian_s_shape',
  'appendicularian_s_shape',
  'Appendicularian',
  'Pelagic_Tunicate',
  'Gelatinous_Zooplankton',
  'Plankton',
  'Omega'),
 ('appendicularian_slight_curve',
  'appendicularian_slight_curve',
  'appendicularian_slight_curve',
  'Appendicularian',
  'Pelagic_Tunicate',
  'Gelatinous_Zooplankton',
  'Plankton',
  'Omega'),
 ('appendicularian_straight',
  'appendicularian_straight',
  'appendicularian_straight',
  'Appendicularian',
  'Pelagic_Tunicate',
  'Gelatinous_Zooplankton',
  'Plankton',
  'Omega'),
 ('tunicate_doliolid',
  'tunicate_doliolid',
  'Tunicate_Doliolid',
  'Tunicate',
  'Pelagic_Tunicate',
  'Gelatinous_Zooplankton',
  'Plankton',
  'Omega'),
 ('tunicate_doliolid_nurse',
  'tunicate_doliolid_nurse',
  'Tunicate_Doliolid',
  'Tunicate',
  'Pelagic_Tunicate',
  'Gelatinous_Zooplankton',
  'Plankton',
  'Omega'),
 ('tunicate_salp',
  'tunicate_salp',
  'Tunicate_Salp',
  'Tunicate',
  'Pelagic_Tunicate',
  'Gelatinous_Zooplankton',
  'Plankton',
  'Omega'),
 ('tunicate_salp_chains',
  'tunicate_salp_chains',
  'Tunicate_Salp',
  'Tunicate',
  'Pelagic_Tunicate',
  'Gelatinous_Zooplankton',
  'Plankton',
  'Omega'),
 ('tunicate_partial',
  'tunicate_partial',
  'tunicate_partial',
  'Tunicate',
  'Pelagic_Tunicate',
  'Gelatinous_Zooplankton',
  'Plankton',
  'Omega'),
 ('siphonophore_calycophoran_abylidae',
  'siphonophore_calycophoran_abylidae',
  'siphonophore_calycophoran_abylidae',
  'Calycophoran_Siphonophore',
  'Siphonophore',
  'Gelatinous_Zooplankton',
  'Plankton',
  'Omega'),
 ('siphonophore_calycophoran_rocketship_adult',
  'siphonophore_calycophoran_rocketship_adult',
  'Calycophoran_Siphonophore_Rocketship',
  'Calycophoran_Siphonophore',
  'Siphonophore',
  'Gelatinous_Zooplankton',
  'Plankton',
  'Omega'),
 ('siphonophore_calycophoran_rocketship_young',
  'siphonophore_calycophoran_rocketship_young',
  'Calycophoran_Siphonophore_Rocketship',
  'Calycophoran_Siphonophore',
  'Siphonophore',
  'Gelatinous_Zooplankton',
  'Plankton',
  'Omega'),
 ('siphonophore_calycophoran_sphaeronectes',
  'siphonophore_calycophoran_sphaeronectes',
  'Calycophoran_Siphonophore_Sphaeronectes',
  'Calycophoran_Siphonophore',
  'Siphonophore',
  'Gelatinous_Zooplankton',
  'Plankton',
  'Omega'),
 ('siphonophore_calycophoran_sphaeronectes_young',
  'siphonophore_calycophoran_sphaeronectes_young',
  'Calycophoran_Siphonophore_Sphaeronectes',
  'Calycophoran_Siphonophore',
  'Siphonophore',
  'Gelatinous_Zooplankton',
  'Plankton',
  'Omega'),
 ('siphonophore_calycophoran_sphaeronectes_stem',
  'siphonophore_calycophoran_sphaeronectes_stem',
  'Calycophoran_Siphonophore_Sphaeronectes',
  'Calycophoran_Siphonophore',
  'Siphonophore',
  'Gelatinous_Zooplankton',
  'Plankton',
  'Omega'),
 ('siphonophore_physonect',
  'siphonophore_physonect',
  'siphonophore_physonect',
  'Siphonophore_Physonect',
  'Siphonophore',
  'Gelatinous_Zooplankton',
  'Plankton',
  'Omega'),
 ('siphonophore_physonect_young',
  'siphonophore_physonect_young',
  'siphonophore_physonect_young',
  'Siphonophore_Physonect',
  'Siphonophore',
  'Gelatinous_Zooplankton',
  'Plankton',
  'Omega'),
 ('siphonophore_other_parts',
  'siphonophore_other_parts',
  'siphonophore_other_parts',
  'siphonophore_other_parts',
  'Siphonophore',
  'Gelatinous_Zooplankton',
  'Plankton',
  'Omega'),
 ('siphonophore_partial',
  'siphonophore_partial',
  'siphonophore_partial',
  'siphonophore_partial',
  'Siphonophore',
  'Gelatinous_Zooplankton',
  'Plankton',
  'Omega'),
 ('ephyra',
  'ephyra',
  'ephyra',
  'ephyra',
  'ephyra',
  'Gelatinous_Zooplankton',
  'Plankton',
  'Omega'),
 ('hydromedusae_narcomedusae',
  'hydromedusae_narcomedusae',
  'hydromedusae_narcomedusae',
  'Hydromedusae_Narcomedusae',
  'Hydromedusae',
  'Gelatinous_Zooplankton',
  'Plankton',
  'Omega'),
 ('hydromedusae_narco_dark',
  'hydromedusae_narco_dark',
  'hydromedusae_narco_dark',
  'Hydromedusae_Narcomedusae',
  'Hydromedusae',
  'Gelatinous_Zooplankton',
  'Plankton',
  'Omega'),
 ('hydromedusae_solmaris',
  'hydromedusae_solmaris',
  'Hydromedusae_Solmaris',
  'Hydromedusae_Narcomedusae',
  'Hydromedusae',
  'Gelatinous_Zooplankton',
  'Plankton',
  'Omega'),
 ('hydromedusae_narco_young',
  'hydromedusae_narco_young',
  'Hydromedusae_Solmaris',
  'Hydromedusae_Narcomedusae',
  'Hydromedusae',
  'Gelatinous_Zooplankton',
  'Plankton',
  'Omega'),
 ('hydromedusae_solmundella',
  'hydromedusae_solmundella',
  'hydromedusae_solmundella',
  'Hydromedusae_Narcomedusae',
  'Hydromedusae',
  'Gelatinous_Zooplankton',
  'Plankton',
  'Omega'),
 ('hydromedusae_aglaura',
  'hydromedusae_aglaura',
  'hydromedusae_aglaura',
  'Hydromedusae_Narcomedusae',
  'Hydromedusae',
  'Gelatinous_Zooplankton',
  'Plankton',
  'Omega'),
 ('hydromedusae_liriope',
  'hydromedusae_liriope',
  'hydromedusae_liriope',
  'Hydromedusae_Narcomedusae',
  'Hydromedusae',
  'Gelatinous_Zooplankton',
  'Plankton',
  'Omega'),
 ('hydromedusae_haliscera',
  'hydromedusae_haliscera',
  'Hydromedusae_Haliscera  ',
  'Hydromedusae_Narcomedusae',
  'Hydromedusae',
  'Gelatinous_Zooplankton',
  'Plankton',
  'Omega'),
 ('hydromedusae_haliscera_small_sideview',
  'hydromedusae_haliscera_small_sideview',
  'Hydromedusae_Haliscera  ',
  'Hydromedusae_Narcomedusae',
  'Hydromedusae',
  'Gelatinous_Zooplankton',
  'Plankton',
  'Omega'),
 ('hydromedusae_bell_and_tentacles',
  'Hydromedusae_Type_D_Bell_And_Tentacles',
  'Other_Hydromedusae',
  'Hydromedusae_Narcomedusae',
  'Hydromedusae',
  'Gelatinous_Zooplankton',
  'Plankton',
  'Omega'),
 ('hydromedusae_typeD_bell_and_tentacles',
  'Hydromedusae_Type_D_Bell_And_Tentacles',
  'Other_Hydromedusae',
  'Hydromedusae_Narcomedusae',
  'Hydromedusae',
  'Gelatinous_Zooplankton',
  'Plankton',
  'Omega'),
 ('hydromedusae_typeD',
  'Hydromedusae_Type_D_Bell_And_Tentacles',
  'Other_Hydromedusae',
  'Hydromedusae_Narcomedusae',
  'Hydromedusae',
  'Gelatinous_Zooplankton',
  'Plankton',
  'Omega'),
 ('hydromedusae_shapeA',
  'Hydromedusae_Shape_A_Sideview',
  'Other_Hydromedusae',
  'Hydromedusae_Narcomedusae',
  'Hydromedusae',
  'Gelatinous_Zooplankton',
  'Plankton',
  'Omega'),
 ('hydromedusae_shapeA_sideview_small',
  'Hydromedusae_Shape_A_Sideview',
  'Other_Hydromedusae',
  'Hydromedusae_Narcomedusae',
  'Hydromedusae',
  'Gelatinous_Zooplankton',
  'Plankton',
  'Omega'),
 ('hydromedusae_sideview_big',
  'Hydromedusae_Shape_A_Sideview',
  'Other_Hydromedusae',
  'Hydromedusae_Narcomedusae',
  'Hydromedusae',
  'Gelatinous_Zooplankton',
  'Plankton',
  'Omega'),
 ('hydromedusae_typeE',
  'hydromedusae_typeE',
  'Other_Hydromedusae',
  'Hydromedusae_Narcomedusae',
  'Hydromedusae',
  'Gelatinous_Zooplankton',
  'Plankton',
  'Omega'),
 ('hydromedusae_shapeB',
  'hydromedusae_shapeB',
  'Other_Hydromedusae',
  'Hydromedusae_Narcomedusae',
  'Hydromedusae',
  'Gelatinous_Zooplankton',
  'Plankton',
  'Omega'),
 ('hydromedusae_typeF',
  'hydromedusae_typeF',
  'Other_Hydromedusae',
  'Hydromedusae_Narcomedusae',
  'Hydromedusae',
  'Gelatinous_Zooplankton',
  'Plankton',
  'Omega'),
 ('hydromedusae_h15',
  'hydromedusae_h15',
  'Other_Hydromedusae',
  'Hydromedusae_Narcomedusae',
  'Hydromedusae',
  'Gelatinous_Zooplankton',
  'Plankton',
  'Omega'),
 ('hydromedusae_other',
  'hydromedusae_other',
  'Other_Hydromedusae',
  'Hydromedusae_Narcomedusae',
  'Hydromedusae',
  'Gelatinous_Zooplankton',
  'Plankton',
  'Omega'),
 ('hydromedusae_partial_dark',
  'hydromedusae_partial_dark',
  'Other_Hydromedusae',
  'Hydromedusae_Narcomedusae',
  'Hydromedusae',
  'Gelatinous_Zooplankton',
  'Plankton',
  'Omega'),
 ('ctenophore_cestid',
  'ctenophore_cestid',
  'ctenophore_cestid',
  'ctenophore_cestid',
  'Ctenophore',
  'Gelatinous_Zooplankton',
  'Plankton',
  'Omega'),
 ('ctenophore_cydippid_tentacles',
  'ctenophore_cydippid_tentacles',
  'ctenophore_cydippid_tentacles',
  'ctenophore_cydippid_tentacles',
  'Ctenophore',
  'Gelatinous_Zooplankton',
  'Plankton',
  'Omega'),
 ('ctenophore_cydippid_no_tentacles',
  'ctenophore_cydippid_no_tentacles',
  'ctenophore_cydippid_no_tentacles',
  'ctenophore_cydippid_no_tentacles',
  'Ctenophore',
  'Gelatinous_Zooplankton',
  'Plankton',
  'Omega'),
 ('ctenophore_lobate',
  'ctenophore_lobate',
  'ctenophore_lobate',
  'ctenophore_lobate',
  'Ctenophore',
  'Gelatinous_Zooplankton',
  'Plankton',
  'Omega'),
 ('chordate_type1',
  'chordate_type1',
  'chordate_type1',
  'chordate_type1',
  'chordate_type1',
  'chordate_type1',
  'Plankton',
  'Omega'),
 ('fish_larvae_leptocephali',
  'fish_larvae_leptocephali',
  'fish_larvae_leptocephali',
  'fish_larvae_leptocephali',
  'fish_larvae_leptocephali',
  'Fish',
  'Plankton',
  'Omega'),
 ('fish_larvae_myctophids',
  'fish_larvae_myctophids',
  'fish_larvae_myctophids',
  'fish_larvae_myctophids',
  'fish_larvae_myctophids',
  'Fish',
  'Plankton',
  'Omega'),
 ('fish_larvae_very_thin_body',
  'fish_larvae_very_thin_body',
  'fish_larvae_very_thin_body',
  'fish_larvae_very_thin_body',
  'fish_larvae_very_thin_body',
  'Fish',
  'Plankton',
  'Omega'),
 ('fish_larvae_thin_body',
  'fish_larvae_thin_body',
  'fish_larvae_thin_body',
  'fish_larvae_thin_body',
  'fish_larvae_thin_body',
  'Fish',
  'Plankton',
  'Omega'),
 ('fish_larvae_medium_body',
  'fish_larvae_medium_body',
  'fish_larvae_medium_body',
  'fish_larvae_medium_body',
  'fish_larvae_medium_body',
  'Fish',
  'Plankton',
  'Omega'),
 ('fish_larvae_deep_body',
  'fish_larvae_deep_body',
  'fish_larvae_deep_body',
  'fish_larvae_deep_body',
  'fish_larvae_deep_body',
  'Fish',
  'Plankton',
  'Omega'),
 ('crustacean_other',
  'crustacean_other',
  'crustacean_other',
  'crustacean_other',
  'crustacean_other',
  'Crustacean',
  'Plankton',
  'Omega'),
 ('copepod_cyclopoid_oithona',
  'copepod_cyclopoid_oithona',
  'Cyclopoid_Copepod_Oithona',
  'Cyclopoid_Copepod',
  'Copepod',
  'Crustacean',
  'Plankton',
  'Omega'),
 ('copepod_cyclopoid_oithona_eggs',
  'copepod_cyclopoid_oithona_eggs',
  'Cyclopoid_Copepod_Oithona',
  'Cyclopoid_Copepod',
  'Copepod',
  'Crustacean',
  'Plankton',
  'Omega'),
 ('copepod_cyclopoid_copilia',
  'copepod_cyclopoid_copilia',
  'copepod_cyclopoid_copilia',
  'Cyclopoid_Copepod',
  'Copepod',
  'Crustacean',
  'Plankton',
  'Omega'),
 ('copepod_calanoid',
  'copepod_calanoid',
  'copepod_calanoid',
  'Copepod_Calnoid',
  'Copepod',
  'Crustacean',
  'Plankton',
  'Omega'),
 ('copepod_calanoid_small_longantennae',
  'copepod_calanoid_small_longantennae',
  'copepod_calanoid_small_longantennae',
  'Copepod_Calnoid',
  'Copepod',
  'Crustacean',
  'Plankton',
  'Omega'),
 ('copepod_calanoid_frillyAntennae',
  'copepod_calanoid_frillyAntennae',
  'copepod_calanoid_frillyAntennae',
  'Copepod_Calnoid',
  'Copepod',
  'Crustacean',
  'Plankton',
  'Omega'),
 ('copepod_calanoid_flatheads',
  'copepod_calanoid_flatheads',
  'copepod_calanoid_flatheads',
  'Copepod_Calnoid',
  'Copepod',
  'Crustacean',
  'Plankton',
  'Omega'),
 ('copepod_calanoid_eggs',
  'copepod_calanoid_eggs',
  'copepod_calanoid_eggs',
  'Copepod_Calnoid',
  'Copepod',
  'Crustacean',
  'Plankton',
  'Omega'),
 ('copepod_calanoid_octomoms',
  'copepod_calanoid_octomoms',
  'copepod_calanoid_octomoms',
  'Copepod_Calnoid',
  'Copepod',
  'Crustacean',
  'Plankton',
  'Omega'),
 ('copepod_calanoid_large',
  'copepod_calanoid_large',
  'Copepod_Calanoid_Large',
  'Copepod_Calnoid',
  'Copepod',
  'Crustacean',
  'Plankton',
  'Omega'),
 ('copepod_calanoid_large_side_antennatucked',
  'copepod_calanoid_large_side_antennatucked',
  'Copepod_Calanoid_Large',
  'Copepod_Calnoid',
  'Copepod',
  'Crustacean',
  'Plankton',
  'Omega'),
 ('copepod_calanoid_eucalanus',
  'copepod_calanoid_eucalanus',
  'copepod_calanoid_eucalanus',
  'Copepod_Calnoid',
  'Copepod',
  'Crustacean',
  'Plankton',
  'Omega'),
 ('copepod_other',
  'copepod_other',
  'copepod_other',
  'Copepod_Calnoid',
  'Copepod',
  'Crustacean',
  'Plankton',
  'Omega'),
 ('stomatopod',
  'stomatopod',
  'stomatopod',
  'stomatopod',
  'stomatopod',
  'Crustacean',
  'Plankton',
  'Omega'),
 ('amphipods',
  'amphipods',
  'amphipods',
  'amphipods',
  'amphipods',
  'Crustacean',
  'Plankton',
  'Omega'),
 ('shrimp-like_other',
  'shrimp-like_other',
  'shrimp-like_other',
  'shrimp-like_other',
  'Shrimp-like',
  'Crustacean',
  'Plankton',
  'Omega'),
 ('euphausiids',
  'euphausiids',
  'euphausiids',
  'Euphauslid',
  'Shrimp-like',
  'Crustacean',
  'Plankton',
  'Omega'),
 ('euphausiids_young',
  'euphausiids_young',
  'euphausiids_young',
  'Euphauslid',
  'Shrimp-like',
  'Crustacean',
  'Plankton',
  'Omega'),
 ('decapods',
  'decapods',
  'decapods',
  'Decapod',
  'Shrimp-like',
  'Crustacean',
  'Plankton',
  'Omega'),
 ('shrimp_zoea',
  'shrimp_zoea',
  'shrimp_zoea',
  'Decapod',
  'Shrimp-like',
  'Crustacean',
  'Plankton',
  'Omega'),
 ('shrimp_caridean',
  'shrimp_caridean',
  'shrimp_caridean',
  'Decapod',
  'Shrimp-like',
  'Crustacean',
  'Plankton',
  'Omega'),
 ('shrimp_sergestidae',
  'shrimp_sergestidae',
  'shrimp_sergestidae',
  'Decapod',
  'Shrimp-like',
  'Crustacean',
  'Plankton',
  'Omega'),
 ('chaetognath_sagitta',
  'chaetognath_sagitta',
  'chaetognath_sagitta',
  'chaetognath_sagitta',
  'chaetognath_sagitta',
  'Chaetognath',
  'Plankton',
  'Omega'),
 ('chaetognath_non_sagitta',
  'chaetognath_non_sagitta',
  'chaetognath_non_sagitta',
  'chaetognath_non_sagitta',
  'chaetognath_non_sagitta',
  'Chaetognath',
  'Plankton',
  'Omega'),
 ('chaetognath_other',
  'chaetognath_other',
  'chaetognath_other',
  'chaetognath_other',
  'chaetognath_other',
  'Chaetognath',
  'Plankton',
  'Omega'),
 ('polychaete',
  'polychaete',
  'polychaete',
  'polychaete',
  'polychaete',
  'polychaete',
  'Plankton',
  'Omega'),
 ('heteropod',
  'heteropod',
  'heteropod',
  'heteropod',
  'heteropod',
  'Gastropod',
  'Plankton',
  'Omega'),
 ('pteropod_butterfly',
  'pteropod_butterfly',
  'pteropod_butterfly',
  'pteropod_butterfly',
  'Pteropod',
  'Gastropod',
  'Plankton',
  'Omega'),
 ('pteropod_triangle',
  'pteropod_triangle',
  'pteropod_triangle',
  'pteropod_triangle',
  'Pteropod',
  'Gastropod',
  'Plankton',
  'Omega'),
 ('pteropod_theco_dev_seq',
  'pteropod_theco_dev_seq',
  'pteropod_theco_dev_seq',
  'pteropod_theco_dev_seq',
  'Pteropod',
  'Gastropod',
  'Plankton',
  'Omega'),
 ('trochophore_larvae',
  'trochophore_larvae',
  'trochophore_larvae',
  'trochophore_larvae',
  'trochophore_larvae',
  'Other_Invert_Larvae',
  'Plankton',
  'Omega'),
 ('echinoderm_larva_seastar_bipinnaria',
  'echinoderm_larva_seastar_bipinnaria',
  'echinoderm_larva_seastar_bipinnaria',
  'Echinoderm_Larva_Seastar',
  'Echinoderm',
  'Other_Invert_Larvae',
  'Plankton',
  'Omega'),
 ('echinoderm_larva_seastar_brachiolaria',
  'echinoderm_larva_seastar_brachiolaria',
  'echinoderm_larva_seastar_brachiolaria',
  'Echinoderm_Larva_Seastar',
  'Echinoderm',
  'Other_Invert_Larvae',
  'Plankton',
  'Omega'),
 ('echinoderm_larva_pluteus_early',
  'echinoderm_larva_pluteus_early',
  'echinoderm_larva_pluteus_early',
  'echinoderm_larva_pluteus_early',
  'Echinoderm',
  'Other_Invert_Larvae',
  'Plankton',
  'Omega'),
 ('echinoderm_larva_pluteus_urchin',
  'echinoderm_larva_pluteus_urchin',
  'echinoderm_larva_pluteus_urchin',
  'echinoderm_larva_pluteus_urchin',
  'Echinoderm',
  'Other_Invert_Larvae',
  'Plankton',
  'Omega'),
 ('echinoderm_larva_pluteus_typeC',
  'echinoderm_larva_pluteus_typeC',
  'echinoderm_larva_pluteus_typeC',
  'echinoderm_larva_pluteus_typeC',
  'Echinoderm',
  'Other_Invert_Larvae',
  'Plankton',
  'Omega'),
 ('echinoderm_larva_pluteus_brittlestar',
  'echinoderm_larva_pluteus_brittlestar',
  'echinoderm_larva_pluteus_brittlestar',
  'echinoderm_larva_pluteus_brittlestar',
  'Echinoderm',
  'Other_Invert_Larvae',
  'Plankton',
  'Omega'),
 ('echinopluteus',
  'echinopluteus',
  'echinopluteus',
  'echinopluteus',
  'Echinoderm',
  'Other_Invert_Larvae',
  'Plankton',
  'Omega'),
 ('echinoderm_seacucumber_auricularia_larva',
  'echinoderm_seacucumber_auricularia_larva',
  'echinoderm_seacucumber_auricularia_larva',
  'echinoderm_seacucumber_auricularia_larva',
  'Echinoderm',
  'Other_Invert_Larvae',
  'Plankton',
  'Omega'),
 ('tornaria_acorn_worm_larvae',
  'tornaria_acorn_worm_larvae',
  'tornaria_acorn_worm_larvae',
  'tornaria_acorn_worm_larvae',
  'tornaria_acorn_worm_larvae',
  'Other_Invert_Larvae',
  'Plankton',
  'Omega'),
 ('invertebrate_larvae_other_A',
  'invertebrate_larvae_other_A',
  'invertebrate_larvae_other_A',
  'invertebrate_larvae_other_A',
  'invertebrate_larvae_other_A',
  'Other_Invert_Larvae',
  'Plankton',
  'Omega'),
 ('invertebrate_larvae_other_B',
  'invertebrate_larvae_other_B',
  'invertebrate_larvae_other_B',
  'invertebrate_larvae_other_B',
  'invertebrate_larvae_other_B',
  'Other_Invert_Larvae',
  'Plankton',
  'Omega'),
 ('fecal_pellet',
  'fecal_pellet',
  'fecal_pellet',
  'fecal_pellet',
  'fecal_pellet',
  'Detritus',
  'Plankton',
  'Omega'),
 ('detritus_blob',
  'detritus_blob',
  'detritus_blob',
  'detritus_blob',
  'detritus_blob',
  'Detritus',
  'Plankton',
  'Omega'),
 ('detritus_filamentous',
  'detritus_filamentous',
  'detritus_filamentous',
  'detritus_filamentous',
  'detritus_filamentous',
  'Detritus',
  'Plankton',
  'Omega'),
 ('detritus_other',
  'detritus_other',
  'detritus_other',
  'detritus_other',
  'detritus_other',
  'Detritus',
  'Plankton',
  'Omega'),
 ('unknown_blobs_and_smudges',
  'unknown_blobs_and_smudges',
  'unknown_blobs_and_smudges',
  'unknown_blobs_and_smudges',
  'unknown_blobs_and_smudges',
  'Unknown',
  'Plankton',
  'Omega'),
 ('unknown_sticks',
  'unknown_sticks',
  'unknown_sticks',
  'unknown_sticks',
  'unknown_sticks',
  'Unknown',
  'Plankton',
  'Omega'),
 ('unknown_unclassified',
  'unknown_unclassified',
  'unknown_unclassified',
  'unknown_unclassified',
  'unknown_unclassified',
  'Unknown',
  'Plankton',
  'Omega'),
 ('artifacts',
  'artifacts',
  'artifacts',
  'artifacts',
  'artifacts',
  'artifacts',
  'Artifact',
  'Omega'),
 ('artifacts_edge',
  'artifacts_edge',
  'artifacts_edge',
  'artifacts_edge',
  'artifacts_edge',
  'artifacts_edge',
  'Artifact',
  'Omega')]