All this crap is put into taxonomy.py


In [1]:
from collections import defaultdict
from itertools import takewhile
from pprint import pprint

DATA_PATH = '../data/chart_tabs.txt'

In [2]:
is_tab = '\t'.__eq__

def tree(): return defaultdict(tree)
def add(t, path):
    for node in path:
        t = t[node]
def dicts(t): return {k: dicts(t[k]) for k in t}

def build_tree(lines):
    lines = iter(lines)
    path = []
    ret = tree()
    for line in lines:
        entry = line.lstrip()
        indent = len(list(takewhile(is_tab, line)))
        path[indent:] = [entry]
        add(ret, path)
    return ret

def build_trace(lines):
    lines = iter(lines)
    path = []
    ret = []
    for line in lines:
        entry = line.lstrip()
        indent = len(list(takewhile(is_tab, line)))
        path[indent:] = [entry]
        if entry[0].lower() == entry[0]:
            ret.append(tuple(path[::-1]))
    return ret

In [52]:
with open(DATA_PATH, 'r') as f:
    lines = [l.rstrip('\n') for l in f.readlines()]

t = build_tree(lines)
trace = build_trace(lines)

In [4]:
len(trace)


Out[4]:
121

In [36]:
pprint(dicts(t))


{'Omega': {'Artifact': {'artifacts': {}, 'artifacts_edge': {}},
           'Plankton': {'Chaetognath': {'chaetognath_non_sagitta': {},
                                        'chaetognath_other': {},
                                        'chaetognath_sagitta': {}},
                        'Crustacean': {'Copepod': {'Copepod_Calnoid': {'Copepod_Calanoid_Large': {'copepod_calanoid_large': {},
                                                                                                  'copepod_calanoid_large_side_antennatucked': {}},
                                                                       'copepod_calanoid': {},
                                                                       'copepod_calanoid_eggs': {},
                                                                       'copepod_calanoid_eucalanus': {},
                                                                       'copepod_calanoid_flatheads': {},
                                                                       'copepod_calanoid_frillyAntennae': {},
                                                                       'copepod_calanoid_octomoms': {},
                                                                       'copepod_calanoid_small_longantennae': {},
                                                                       'copepod_other': {}},
                                                   'Cyclopoid_Copepod': {'Cyclopoid_Copepod_Oithona': {'copepod_cyclopoid_oithona': {},
                                                                                                       'copepod_cyclopoid_oithona_eggs': {}},
                                                                         'copepod_cyclopoid_copilia': {}}},
                                       'Shrimp-like': {'Decapod': {'decapods': {},
                                                                   'shrimp_caridean': {},
                                                                   'shrimp_sergestidae': {},
                                                                   'shrimp_zoea': {}},
                                                       'Euphauslid': {'euphausiids': {},
                                                                      'euphausiids_young': {}},
                                                       'shrimp-like_other': {}},
                                       'amphipods': {},
                                       'crustacean_other': {},
                                       'stomatopod': {}},
                        'Detritus': {'detritus_blob': {},
                                     'detritus_filamentous': {},
                                     'detritus_other': {},
                                     'fecal_pellet': {}},
                        'Diatom': {'diatom_chain_string': {},
                                   'diatom_chain_tube': {}},
                        'Fish': {'fish_larvae_deep_body': {},
                                 'fish_larvae_leptocephali': {},
                                 'fish_larvae_medium_body': {},
                                 'fish_larvae_myctophids': {},
                                 'fish_larvae_thin_body': {},
                                 'fish_larvae_very_thin_body': {}},
                        'Gastropod': {'Pteropod': {'pteropod_butterfly': {},
                                                   'pteropod_theco_dev_seq': {},
                                                   'pteropod_triangle': {}},
                                      'heteropod': {}},
                        'Gelatinous_Zooplankton': {'Ctenophore': {'ctenophore_cestid': {},
                                                                  'ctenophore_cydippid_no_tentacles': {},
                                                                  'ctenophore_cydippid_tentacles': {},
                                                                  'ctenophore_lobate': {}},
                                                   'Hydromedusae': {'Hydromedusae_Narcomedusae': {'Hydromedusae_Haliscera  ': {'hydromedusae_haliscera': {},
                                                                                                                               'hydromedusae_haliscera_small_sideview': {}},
                                                                                                  'Hydromedusae_Solmaris': {'hydromedusae_narco_young': {},
                                                                                                                            'hydromedusae_solmaris': {}},
                                                                                                  'Other_Hydromedusae': {'Hydromedusae_Shape_A_Sideview': {'hydromedusae_shapeA': {},
                                                                                                                                                           'hydromedusae_shapeA_sideview_small': {},
                                                                                                                                                           'hydromedusae_sideview_big': {}},
                                                                                                                         'Hydromedusae_Type_D_Bell_And_Tentacles': {'hydromedusae_bell_and_tentacles': {},
                                                                                                                                                                    'hydromedusae_typeD': {},
                                                                                                                                                                    'hydromedusae_typeD_bell_and_tentacles': {}},
                                                                                                                         'hydromedusae_h15': {},
                                                                                                                         'hydromedusae_other': {},
                                                                                                                         'hydromedusae_partial_dark': {},
                                                                                                                         'hydromedusae_shapeB': {},
                                                                                                                         'hydromedusae_typeE': {},
                                                                                                                         'hydromedusae_typeF': {}},
                                                                                                  'hydromedusae_aglaura': {},
                                                                                                  'hydromedusae_liriope': {},
                                                                                                  'hydromedusae_narco_dark': {},
                                                                                                  'hydromedusae_narcomedusae': {},
                                                                                                  'hydromedusae_solmundella': {}}},
                                                   'Pelagic_Tunicate': {'Appendicularian': {'appendicularian_fritillaridae': {},
                                                                                            'appendicularian_s_shape': {},
                                                                                            'appendicularian_slight_curve': {},
                                                                                            'appendicularian_straight': {}},
                                                                        'Tunicate': {'Tunicate_Doliolid': {'tunicate_doliolid': {},
                                                                                                           'tunicate_doliolid_nurse': {}},
                                                                                     'Tunicate_Salp': {'tunicate_salp': {},
                                                                                                       'tunicate_salp_chains': {}},
                                                                                     'tunicate_partial': {}}},
                                                   'Siphonophore': {'Calycophoran_Siphonophore': {'Calycophoran_Siphonophore_Rocketship': {'siphonophore_calycophoran_rocketship_adult': {},
                                                                                                                                           'siphonophore_calycophoran_rocketship_young': {}},
                                                                                                  'Calycophoran_Siphonophore_Sphaeronectes': {'siphonophore_calycophoran_sphaeronectes': {},
                                                                                                                                              'siphonophore_calycophoran_sphaeronectes_stem': {},
                                                                                                                                              'siphonophore_calycophoran_sphaeronectes_young': {}},
                                                                                                  'siphonophore_calycophoran_abylidae': {}},
                                                                    'Siphonophore_Physonect': {'siphonophore_physonect': {},
                                                                                               'siphonophore_physonect_young': {}},
                                                                    'siphonophore_other_parts': {},
                                                                    'siphonophore_partial': {}},
                                                   'ephyra': {},
                                                   'jellies_tentacles': {}},
                        'Other_Invert_Larvae': {'Echinoderm': {'Echinoderm_Larva_Seastar': {'echinoderm_larva_seastar_bipinnaria': {},
                                                                                            'echinoderm_larva_seastar_brachiolaria': {}},
                                                               'echinoderm_larva_pluteus_brittlestar': {},
                                                               'echinoderm_larva_pluteus_early': {},
                                                               'echinoderm_larva_pluteus_typeC': {},
                                                               'echinoderm_larva_pluteus_urchin': {},
                                                               'echinoderm_seacucumber_auricularia_larva': {},
                                                               'echinopluteus': {}},
                                                'invertebrate_larvae_other_A': {},
                                                'invertebrate_larvae_other_B': {},
                                                'tornaria_acorn_worm_larvae': {},
                                                'trochophore_larvae': {}},
                        'Protist': {'Acantharia_Protist': {'acantharia_protist': {},
                                                           'acantharia_protist_big_center': {},
                                                           'acantharia_protist_halo': {}},
                                    'Protist_Other': {'protist_dark_center': {},
                                                      'protist_fuzzy_olive': {},
                                                      'protist_other': {},
                                                      'protist_star': {}},
                                    'Radiolarian_Colony': {'radiolarian_chain': {},
                                                           'radiolarian_colony': {}},
                                    'protist_noctiluca': {}},
                        'Trichodesmium': {'trichodesmium_bowtie': {},
                                          'trichodesmium_multiple': {},
                                          'trichodesmium_puff': {},
                                          'trichodesmium_tuft': {}},
                        'Unknown': {'unknown_blobs_and_smudges': {},
                                    'unknown_sticks': {},
                                    'unknown_unclassified': {}},
                        'chordate_type1': {},
                        'polychaete': {}}}}

In [14]:
# extending out deep leaves
max_len = max([len(p) for p in trace])
trace_ext = []
for path in trace:
    while len(path) < max_len:
        path = (path[0],) + path
    trace_ext.append(path)

In [23]:
for n in range(max_len):
    print n, len({p[n] for p in trace_ext})
    
trace_d = {p[0]: list(p[1:]) for p in trace_ext}


0 121
1 117
2 101
3 72
4 48
5 15
6 2
7 1

In [38]:
from sklearn.preprocessing import LabelEncoder
depth_le = {n: LabelEncoder().fit(sorted(list({p[n] for p in trace_ext}))) 
            for n in range(max_len)}

In [51]:
q = depth_le[5]
q.classes_


Out[51]:
array(['Chaetognath', 'Crustacean', 'Detritus', 'Diatom', 'Fish',
       'Gastropod', 'Gelatinous_Zooplankton', 'Other_Invert_Larvae',
       'Protist', 'Trichodesmium', 'Unknown', 'artifacts',
       'artifacts_edge', 'chordate_type1', 'polychaete'], 
      dtype='|S22')

In [13]:
import taxonomy as tax
for n in range(0,8):
    qq = {p[n] for p in tax.trace_ext}
    print n, len(qq), qq
    print


0 121 set(['echinoderm_larva_seastar_bipinnaria', 'unknown_sticks', 'tornaria_acorn_worm_larvae', 'echinoderm_seacucumber_auricularia_larva', 'ctenophore_lobate', 'pteropod_triangle', 'ctenophore_cestid', 'appendicularian_slight_curve', 'detritus_blob', 'chaetognath_non_sagitta', 'tunicate_doliolid', 'copepod_calanoid_octomoms', 'shrimp_caridean', 'hydromedusae_typeE', 'hydromedusae_typeD', 'hydromedusae_narco_young', 'siphonophore_calycophoran_rocketship_young', 'hydromedusae_haliscera_small_sideview', 'chaetognath_sagitta', 'hydromedusae_narco_dark', 'shrimp_zoea', 'ctenophore_cydippid_tentacles', 'hydromedusae_h15', 'acantharia_protist_halo', 'copepod_other', 'copepod_calanoid_large_side_antennatucked', 'copepod_calanoid_eggs', 'copepod_calanoid_eucalanus', 'copepod_calanoid_flatheads', 'chordate_type1', 'hydromedusae_solmundella', 'heteropod', 'hydromedusae_aglaura', 'radiolarian_colony', 'stomatopod', 'trichodesmium_multiple', 'copepod_cyclopoid_oithona', 'pteropod_butterfly', 'fish_larvae_leptocephali', 'tunicate_salp', 'protist_other', 'detritus_other', 'echinoderm_larva_pluteus_urchin', 'radiolarian_chain', 'unknown_blobs_and_smudges', 'crustacean_other', 'invertebrate_larvae_other_B', 'tunicate_salp_chains', 'fecal_pellet', 'siphonophore_other_parts', 'siphonophore_calycophoran_sphaeronectes_stem', 'trochophore_larvae', 'acantharia_protist', 'hydromedusae_shapeB', 'hydromedusae_shapeA', 'fish_larvae_thin_body', 'fish_larvae_very_thin_body', 'protist_dark_center', 'hydromedusae_other', 'copepod_calanoid_large', 'fish_larvae_myctophids', 'amphipods', 'siphonophore_calycophoran_sphaeronectes_young', 'siphonophore_calycophoran_sphaeronectes', 'hydromedusae_partial_dark', 'trichodesmium_tuft', 'pteropod_theco_dev_seq', 'hydromedusae_sideview_big', 'appendicularian_fritillaridae', 'hydromedusae_haliscera', 'appendicularian_straight', 'hydromedusae_narcomedusae', 'shrimp-like_other', 'siphonophore_partial', 'hydromedusae_solmaris', 'ephyra', 'artifacts', 'trichodesmium_puff', 'protist_star', 'echinoderm_larva_seastar_brachiolaria', 'trichodesmium_bowtie', 'hydromedusae_typeF', 'echinoderm_larva_pluteus_early', 'copepod_calanoid_small_longantennae', 'unknown_unclassified', 'artifacts_edge', 'detritus_filamentous', 'siphonophore_physonect_young', 'copepod_calanoid', 'hydromedusae_shapeA_sideview_small', 'siphonophore_calycophoran_abylidae', 'fish_larvae_deep_body', 'echinopluteus', 'hydromedusae_liriope', 'siphonophore_calycophoran_rocketship_adult', 'siphonophore_physonect', 'appendicularian_s_shape', 'echinoderm_larva_pluteus_typeC', 'copepod_calanoid_frillyAntennae', 'invertebrate_larvae_other_A', 'hydromedusae_typeD_bell_and_tentacles', 'jellies_tentacles', 'decapods', 'protist_fuzzy_olive', 'copepod_cyclopoid_oithona_eggs', 'shrimp_sergestidae', 'hydromedusae_bell_and_tentacles', 'fish_larvae_medium_body', 'chaetognath_other', 'euphausiids_young', 'ctenophore_cydippid_no_tentacles', 'diatom_chain_string', 'copepod_cyclopoid_copilia', 'diatom_chain_tube', 'euphausiids', 'tunicate_partial', 'acantharia_protist_big_center', 'echinoderm_larva_pluteus_brittlestar', 'tunicate_doliolid_nurse', 'polychaete', 'protist_noctiluca'])

1 117 set(['echinoderm_larva_seastar_bipinnaria', 'unknown_sticks', 'tornaria_acorn_worm_larvae', 'echinoderm_seacucumber_auricularia_larva', 'shrimp_zoea', 'pteropod_triangle', 'ctenophore_cestid', 'appendicularian_slight_curve', 'detritus_blob', 'chaetognath_non_sagitta', 'tunicate_doliolid', 'copepod_calanoid_octomoms', 'hydromedusae_typeE', 'hydromedusae_narco_young', 'siphonophore_calycophoran_rocketship_young', 'hydromedusae_haliscera_small_sideview', 'chaetognath_sagitta', 'hydromedusae_narco_dark', 'ctenophore_lobate', 'ctenophore_cydippid_tentacles', 'hydromedusae_h15', 'acantharia_protist_halo', 'copepod_other', 'copepod_calanoid_large_side_antennatucked', 'copepod_calanoid_eggs', 'copepod_calanoid_eucalanus', 'copepod_calanoid_flatheads', 'chordate_type1', 'hydromedusae_solmundella', 'heteropod', 'hydromedusae_aglaura', 'radiolarian_colony', 'stomatopod', 'trichodesmium_multiple', 'copepod_cyclopoid_oithona', 'pteropod_butterfly', 'fish_larvae_leptocephali', 'tunicate_salp', 'protist_other', 'detritus_other', 'echinoderm_larva_pluteus_urchin', 'radiolarian_chain', 'unknown_blobs_and_smudges', 'crustacean_other', 'invertebrate_larvae_other_B', 'tunicate_salp_chains', 'fecal_pellet', 'siphonophore_other_parts', 'siphonophore_calycophoran_sphaeronectes_stem', 'trochophore_larvae', 'acantharia_protist', 'hydromedusae_shapeB', 'fish_larvae_thin_body', 'fish_larvae_very_thin_body', 'protist_dark_center', 'hydromedusae_other', 'copepod_calanoid_large', 'fish_larvae_myctophids', 'amphipods', 'siphonophore_calycophoran_sphaeronectes_young', 'siphonophore_calycophoran_sphaeronectes', 'hydromedusae_partial_dark', 'trichodesmium_tuft', 'pteropod_theco_dev_seq', 'appendicularian_fritillaridae', 'hydromedusae_haliscera', 'appendicularian_straight', 'hydromedusae_narcomedusae', 'shrimp-like_other', 'siphonophore_partial', 'hydromedusae_solmaris', 'ephyra', 'artifacts', 'trichodesmium_puff', 'protist_star', 'echinoderm_larva_seastar_brachiolaria', 'trichodesmium_bowtie', 'hydromedusae_typeF', 'echinoderm_larva_pluteus_early', 'copepod_calanoid_small_longantennae', 'unknown_unclassified', 'Hydromedusae_Shape_A_Sideview', 'artifacts_edge', 'detritus_filamentous', 'siphonophore_physonect_young', 'copepod_calanoid', 'shrimp_caridean', 'siphonophore_calycophoran_abylidae', 'fish_larvae_deep_body', 'echinopluteus', 'hydromedusae_liriope', 'siphonophore_calycophoran_rocketship_adult', 'siphonophore_physonect', 'appendicularian_s_shape', 'Hydromedusae_Type_D_Bell_And_Tentacles', 'echinoderm_larva_pluteus_typeC', 'copepod_calanoid_frillyAntennae', 'invertebrate_larvae_other_A', 'jellies_tentacles', 'decapods', 'protist_fuzzy_olive', 'copepod_cyclopoid_oithona_eggs', 'shrimp_sergestidae', 'fish_larvae_medium_body', 'chaetognath_other', 'euphausiids_young', 'ctenophore_cydippid_no_tentacles', 'diatom_chain_string', 'copepod_cyclopoid_copilia', 'diatom_chain_tube', 'euphausiids', 'tunicate_partial', 'acantharia_protist_big_center', 'echinoderm_larva_pluteus_brittlestar', 'tunicate_doliolid_nurse', 'polychaete', 'protist_noctiluca'])

2 101 set(['echinoderm_larva_seastar_bipinnaria', 'unknown_sticks', 'tornaria_acorn_worm_larvae', 'echinoderm_seacucumber_auricularia_larva', 'ctenophore_lobate', 'pteropod_triangle', 'ctenophore_cestid', 'appendicularian_slight_curve', 'invertebrate_larvae_other_A', 'chaetognath_non_sagitta', 'copepod_calanoid_octomoms', 'chaetognath_sagitta', 'hydromedusae_narco_dark', 'shrimp_zoea', 'Cyclopoid_Copepod_Oithona', 'ctenophore_cydippid_tentacles', 'acantharia_protist_halo', 'copepod_other', 'siphonophore_physonect_young', 'copepod_calanoid_eggs', 'copepod_calanoid_eucalanus', 'radiolarian_colony', 'chordate_type1', 'hydromedusae_solmundella', 'heteropod', 'hydromedusae_aglaura', 'copepod_calanoid_flatheads', 'stomatopod', 'trichodesmium_multiple', 'pteropod_butterfly', 'Copepod_Calanoid_Large', 'fish_larvae_leptocephali', 'protist_other', 'detritus_other', 'echinoderm_larva_pluteus_urchin', 'radiolarian_chain', 'unknown_blobs_and_smudges', 'fish_larvae_medium_body', 'invertebrate_larvae_other_B', 'fecal_pellet', 'siphonophore_other_parts', 'echinoderm_larva_pluteus_brittlestar', 'trochophore_larvae', 'acantharia_protist', 'trichodesmium_puff', 'fish_larvae_thin_body', 'fish_larvae_very_thin_body', 'protist_dark_center', 'fish_larvae_myctophids', 'amphipods', 'Calycophoran_Siphonophore_Rocketship', 'trichodesmium_tuft', 'pteropod_theco_dev_seq', 'appendicularian_fritillaridae', 'appendicularian_straight', 'hydromedusae_narcomedusae', 'shrimp-like_other', 'siphonophore_partial', 'Hydromedusae_Solmaris', 'ephyra', 'artifacts', 'echinopluteus', 'protist_star', 'echinoderm_larva_seastar_brachiolaria', 'trichodesmium_bowtie', 'echinoderm_larva_pluteus_early', 'copepod_calanoid_small_longantennae', 'unknown_unclassified', 'artifacts_edge', 'detritus_filamentous', 'copepod_calanoid', 'shrimp_caridean', 'siphonophore_calycophoran_abylidae', 'fish_larvae_deep_body', 'hydromedusae_liriope', 'siphonophore_physonect', 'Tunicate_Salp', 'appendicularian_s_shape', 'echinoderm_larva_pluteus_typeC', 'copepod_calanoid_frillyAntennae', 'Other_Hydromedusae', 'Hydromedusae_Haliscera  ', 'jellies_tentacles', 'decapods', 'protist_fuzzy_olive', 'shrimp_sergestidae', 'crustacean_other', 'chaetognath_other', 'euphausiids_young', 'ctenophore_cydippid_no_tentacles', 'diatom_chain_string', 'copepod_cyclopoid_copilia', 'diatom_chain_tube', 'Calycophoran_Siphonophore_Sphaeronectes', 'euphausiids', 'tunicate_partial', 'Tunicate_Doliolid', 'acantharia_protist_big_center', 'detritus_blob', 'polychaete', 'protist_noctiluca'])

3 72 set(['trichodesmium_multiple', 'Calycophoran_Siphonophore', 'unknown_sticks', 'echinoderm_seacucumber_auricularia_larva', 'polychaete', 'trichodesmium_tuft', 'ctenophore_lobate', 'pteropod_triangle', 'ctenophore_cestid', 'fish_larvae_leptocephali', 'pteropod_theco_dev_seq', 'echinoderm_larva_pluteus_typeC', 'protist_other', 'invertebrate_larvae_other_A', 'Tunicate', 'detritus_other', 'Hydromedusae_Narcomedusae', 'shrimp-like_other', 'siphonophore_partial', 'jellies_tentacles', 'fish_larvae_very_thin_body', 'ephyra', 'artifacts', 'acantharia_protist', 'tornaria_acorn_worm_larvae', 'radiolarian_chain', 'echinopluteus', 'protist_star', 'crustacean_other', 'invertebrate_larvae_other_B', 'protist_fuzzy_olive', 'trichodesmium_bowtie', 'chaetognath_non_sagitta', 'Euphauslid', 'Appendicularian', 'echinoderm_larva_pluteus_early', 'chaetognath_sagitta', 'Siphonophore_Physonect', 'pteropod_butterfly', 'unknown_unclassified', 'echinoderm_larva_pluteus_urchin', 'fish_larvae_medium_body', 'Copepod_Calnoid', 'chaetognath_other', 'artifacts_edge', 'siphonophore_other_parts', 'ctenophore_cydippid_tentacles', 'ctenophore_cydippid_no_tentacles', 'echinoderm_larva_pluteus_brittlestar', 'diatom_chain_string', 'unknown_blobs_and_smudges', 'trochophore_larvae', 'acantharia_protist_halo', 'diatom_chain_tube', 'trichodesmium_puff', 'fish_larvae_deep_body', 'detritus_filamentous', 'fish_larvae_thin_body', 'detritus_blob', 'Decapod', 'acantharia_protist_big_center', 'protist_dark_center', 'Cyclopoid_Copepod', 'radiolarian_colony', 'chordate_type1', 'heteropod', 'Echinoderm_Larva_Seastar', 'fecal_pellet', 'fish_larvae_myctophids', 'stomatopod', 'amphipods', 'protist_noctiluca'])

4 48 set(['trichodesmium_multiple', 'polychaete', 'unknown_sticks', 'tornaria_acorn_worm_larvae', 'Shrimp-like', 'trichodesmium_tuft', 'Pelagic_Tunicate', 'Copepod', 'fish_larvae_leptocephali', 'Protist_Other', 'invertebrate_larvae_other_A', 'Acantharia_Protist', 'detritus_other', 'chaetognath_non_sagitta', 'jellies_tentacles', 'fish_larvae_very_thin_body', 'ephyra', 'artifacts', 'Radiolarian_Colony', 'Pteropod', 'unknown_blobs_and_smudges', 'crustacean_other', 'invertebrate_larvae_other_B', 'trichodesmium_bowtie', 'chaetognath_sagitta', 'unknown_unclassified', 'Hydromedusae', 'fish_larvae_medium_body', 'trochophore_larvae', 'chaetognath_other', 'artifacts_edge', 'detritus_filamentous', 'Echinoderm', 'diatom_chain_string', 'detritus_blob', 'Ctenophore', 'diatom_chain_tube', 'trichodesmium_puff', 'fish_larvae_deep_body', 'fish_larvae_thin_body', 'Siphonophore', 'chordate_type1', 'heteropod', 'fecal_pellet', 'fish_larvae_myctophids', 'stomatopod', 'amphipods', 'protist_noctiluca'])

5 15 set(['Gelatinous_Zooplankton', 'Chaetognath', 'Other_Invert_Larvae', 'Detritus', 'Fish', 'chordate_type1', 'artifacts_edge', 'Protist', 'artifacts', 'Crustacean', 'Trichodesmium', 'Diatom', 'Unknown', 'polychaete', 'Gastropod'])

6 2 set(['Artifact', 'Plankton'])

7 1 set(['Omega'])