In [1]:
import json
from collections import defaultdict

import numpy as np
import pandas as pd
from BCBio import GFF
from Bio import SeqIO

from IPython.display import display

In [2]:
pd.options.display.max_colwidth = 1000
pd.options.display.max_rows = 500

In [3]:
with open('data/candidate_homologues.json', 'r') as handle:
    candidate_homologues = json.load(handle)

with open('data/candidate_scaffolds.json', 'r') as handle:
    candidate_scaffolds = json.load(handle)

In [4]:
GENOMES = dict()
GENOMES['MNH120'] = SeqIO.to_dict(GFF.parse('data/augustus.hints.GeneWithUTR.withNCBI_Fungi.gff3'))
GENOMES['B04'] = SeqIO.to_dict(GFF.parse('data/B04.genes.gff3'))
GENOMES['USR5'] = SeqIO.to_dict(GFF.parse('data/I5V.PredictedPass.gff3'))

for genome, scaffolds in GENOMES.items():
    for scaffold, seq in scaffolds.items():
        seq.features.sort(key=lambda f: f.location.start)

In [5]:
tpsi_cols = [
    'qid',
    'qlen',
    'search_method',
    'db_name',
    'sid',
    'qstart',
    'qend',
    'sstart',
    'send',
    'pid',
    'psim',
    'HSP_score',
    'bitscore',
    'description',
    'qframe',
    'qstrand',
    'slen',
    'evalue',
    'pvalue',
    ]
cazy_cols = ['seqid', 'subfamily', 'family_name', 'desc', 'notes']
location_cols = ['seqid', 'location', 'membrane']
panther_cols = ['seqid', 'family_id', 'family_name']
superfamilies_cols = ['seqid', 'category', 'short_catname', 
                      'long_catname', 'id', 'name']
go_cols = ['seqid', 'goid', 'goname', 'godomain']
ips_cols = ['seqid', 'md5', 'length', 'analysis', 'accession',
            'description', 'start', 'end', 'evalue', 'status',
            'date', 'ipracc', 'iprdesc', 'goterms', 'pathterms']

ANALYSES = defaultdict(dict)
ANALYSES['MNH120']['cazy'] = pd.read_table(
    'data/MNH120.cazy_fams.tsv',
    names=cazy_cols
    )
ANALYSES['MNH120']['tpsi'] = pd.read_table(
    'data/MNH120.combined.TPSI.topHits',
    names=tpsi_cols
    )
ANALYSES['MNH120']['location'] = pd.read_table(
    'data/MNH120.location.tsv',
    names=location_cols
    )
ANALYSES['MNH120']['panther'] = pd.read_table(
    'data/MNH120.panther_fams.tsv',
    names=panther_cols
    )
ANALYSES['MNH120']['superfamilies'] = pd.read_table(
    'data/MNH120.superfamilies.tsv',
    names=superfamilies_cols
    )
ANALYSES['MNH120']['go'] = pd.read_table(
    'data/MNH120.goterms.tsv',
    names=go_cols
    )
ANALYSES['MNH120']['ips'] = pd.read_table(
    'data/MNH120.combined.tsv',
    names=ips_cols
    )
ANALYSES['MNH120']['pfr'] = pd.read_table(
    'data/atg.Description.txt',
    names=['seqid', 'description']
    )


ANALYSES['B04']['cazy'] = pd.read_table(
    'data/B04.cazy_fams.tsv',
    names=cazy_cols
    )
ANALYSES['B04']['tpsi'] = pd.read_table(
    'data/B04.combined.TPSI.topHits',
    names=tpsi_cols
    )
ANALYSES['B04']['location'] = pd.read_table(
    'data/B04.location.tsv',
    names=location_cols
    )
ANALYSES['B04']['panther'] = pd.read_table(
    'data/B04.panther_fams.tsv',
    names=panther_cols
    )
ANALYSES['B04']['superfamilies'] = pd.read_table(
    'data/B04.superfamilies.tsv',
    names=superfamilies_cols
    )
ANALYSES['B04']['go'] = pd.read_table(
    'data/B04.goterms.tsv',
    names=go_cols
    )
ANALYSES['B04']['ips'] = pd.read_table(
    'data/B04.combined.tsv',
    names=ips_cols
    )
ANALYSES['B04']['pfr'] = pd.read_table(
    'data/B04.Description.txt',
    names=['seqid', 'description']
    )

ANALYSES['USR5']['cazy'] = pd.read_table(
    'data/I5V.cazy_fams.tsv',
    names=cazy_cols
    )
ANALYSES['USR5']['tpsi'] = pd.read_table(
    'data/I5V.combined.TPSI.topHits',
    names=tpsi_cols
    )
ANALYSES['USR5']['location'] = pd.read_table(
    'data/I5V.location.tsv',
    names=location_cols
    )
ANALYSES['USR5']['panther'] = pd.read_table(
    'data/I5V.panther_fams.tsv',
    names=panther_cols
    )
ANALYSES['USR5']['superfamilies'] = pd.read_table(
    'data/I5V.superfamilies.tsv',
    names=superfamilies_cols
    )
ANALYSES['USR5']['go'] = pd.read_table(
    'data/I5V.goterms.tsv',
    names=go_cols
    )
ANALYSES['USR5']['ips'] = pd.read_table(
    'data/I5V.combined.tsv', 
    names=ips_cols)

In [6]:
def get(seqid):
    idcols = {
        'panther': 'seqid',
        'cazy': 'seqid',
        'tpsi': 'sid',
        'location': 'seqid',
        'superfamilies': 'seqid',
        'go': 'seqid',
        'ips': 'seqid',
        'pfr': 'seqid'
        }
    for isolate, analyses in ANALYSES.items():
        for analysis, table in analyses.items():
            filtered = table[table[idcols[analysis]] == seqid]
            if len(filtered) == 0:
                continue
            else:
                print(isolate, analysis)
                display(filtered)

def getblast(seqid, isolate, thresh=1e-10):
    files = {
        'MNH120': 'data/MNH120.swiss.combined.tsv',
        'B04': 'data/B04.swiss.combined.tsv',
        'USR5': 'data/I5V.swiss.combined.tsv',
        }
    cols = (
        "qseqid qlen sallseqid sgi sacc saccver slen qstart "
        "qend sstart send qseq sseq evalue bitscore score "
        "length pident nident mismatch positive gapopen gaps "
        "ppos frames qframe sframe btop staxids sscinames "
        "scomnames sblastnames sskingdoms stitle salltitles "
        "sstrand qcovs qcovhsp"
        ).split(' ')
    interesting_cols = [
        'qseqid', 'qlen', 'sacc', 'slen', 'qstart', 'qend',
        'sstart', 'send', 'evalue', 'bitscore', 'sscinames',
        'salltitles',
        ]
    out = list()
    with open(files[isolate], 'r') as handle:
        for line in handle:
            if not line.startswith(seqid):
                continue
            line = line.rstrip('\n')
            line = line.split('\t')
            line = dict(zip(cols, line))
            if line['qseqid'] == seqid:
                if float(line['evalue']) < thresh:
                    out.append(line)
    if len(out) > 0:
        return pd.DataFrame(out)[interesting_cols]
    else:
        print('No matches')

In [7]:
def subset_features(record, start, end):
    """ Filters features to include what's in the bounds.

    Because the slice operator on SeqRecord objects does
    not handle features well, we need this function.

    Keyword arguments:
    record -- a SeqRecord object containing features.
    start -- the lower bound to include.
    end -- the upper bound to include.

    returns:
    A list of features.
    """

    new_features = list()
    for feature in record.features:
        f_start = feature.location.start
        f_end = feature.location.end
        if (
                (start <= f_start < end) or
                (start < f_end <= end) or
                ((f_start < start) and (f_end > end))
                ):
            new_features.append(feature)
    return new_features

In [28]:
def scaf_to_homologues(scaf):
    isolate = None
    for i, d in GENOMES.items():
        if scaf in d:
            qisolate = i
        
    seq = GENOMES[qisolate][scaf]
    features = [f for f in seq.features if f.type == 'gene']
    features.sort(key=lambda f: f.location.start)
    ids = [f.id for f in features]
    table = list()
    for id_ in ids:
        try:
            homologues = candidate_homologues[id_]
        except KeyError:
            continue
        group = dict()
        for isolate, hlogs in homologues.items():
            if isolate not in GENOMES or len(hlogs) == 0:
                continue
            elif isolate == qisolate:
                scaffold = scaf
                group[qisolate + '_scaffold'] = scaffold
                group[qisolate + '_id'] = id_
                continue
                
            hlog = hlogs[0]
            scaffold = hlog['scaffold']
            start = hlog['start']
            end = hlog['end']
            if scaffold not in GENOMES[isolate]:
                continue
            features = subset_features(GENOMES[isolate][scaffold], start, end)
            features = [f.id for f in features if f.type == 'gene']
            if len(features) > 0:
                feature = features[0]
            else:
                continue
            group[isolate + '_scaffold'] = scaffold
            group[isolate + '_id'] = feature
        table.append(group)
    return pd.DataFrame(table)

Region surrounding atg140

scaffold 38


In [417]:
"""{
"MNH120 scaffold":, "MNH120 id":, "MNH120 function":, 
"B04 scaffold":, "B04 id":, "B04 function":, 
"USR5 scaffold":, "USR5 id":, "USR5 function":,
}"""
table = [
    {
     "MNH120 scaffold": 'scaffold_38', "MNH120 id": 'atg136', "MNH120 function": "Unknown",
     "B04 scaffold": 'B04S72', "B04 id": 'B04S72.g5566', "B04 function": 'Unknown', 
     "USR5 scaffold": 'NODE_28888_length_496775_cov_15.218104', "USR5 id": 'NS.04813', "USR5 function": 'Unknown',
    },
    {
     "USR5 scaffold": 'NODE_28888_length_496775_cov_15.218104', "USR5 id": 'NS.04814', "USR5 function": 'Unknown',
    },
    {
     "USR5 scaffold": 'NODE_28888_length_496775_cov_15.218104', "USR5 id": 'NS.04815', "USR5 function": 'Unknown',
    },
    {
     "MNH120 scaffold": 'scaffold_38', "MNH120 id": 'atg137', "MNH120 function": "Membrane associated protein with unknown function",
     "B04 scaffold": 'B04S72', "B04 id": 'B04S72.g5565', "B04 function": 'Membrane associated protein with unknown function', 
     "USR5 scaffold": 'NODE_28888_length_496775_cov_15.218104', "USR5 id": 'NS.04816', "USR5 function": 'Membrane associated protein with unknown function',
    },
    {
     "USR5 scaffold": 'NODE_28888_length_496775_cov_15.218104', "USR5 id": 'NS.04817', "USR5 function": 'Unknown',
    },
    {
     "USR5 scaffold": 'NODE_28888_length_496775_cov_15.218104', "USR5 id": 'NS.04818', "USR5 function": 'Unknown',
    },
    {
     "MNH120 scaffold": 'scaffold_38', "MNH120 id": 'atg138', "MNH120 function": "DNA binding cell division control protein",
     "B04 scaffold": 'B04S72', "B04 id": 'B04S72.g5564', "B04 function": 'DNA binding cell division control protein', 
     "USR5 scaffold": 'NODE_28888_length_496775_cov_15.218104', "USR5 id": 'NS.04819', "USR5 function": 'DNA binding cell division control protein',
    },
    {
     "MNH120 scaffold": 'scaffold_38', "MNH120 id": 'atg139', "MNH120 function": "Putative V-type ATP synthase subunit I homologue",
     "B04 scaffold": 'B04S72', "B04 id": 'B04S72.g5563', "B04 function": 'Putative V-type ATP synthase subunit I homologue', 
     "USR5 scaffold": 'NODE_28888_length_496775_cov_15.218104', "USR5 id": 'NS.04820', "USR5 function": 'Unknown',
    },
    {
     "MNH120 scaffold": 'scaffold_38', "MNH120 id": 'atg140', "MNH120 function": "Secreted protein with unknown function",
     "B04 scaffold": 'B04S196', "B04 id": 'B04S196.g9853', "B04 function": 'Secreted protein with unknown function', 
     "USR5 scaffold": 'NODE_35582_length_606476_cov_15.460932', "USR5 id": 'exon.CUFF.10985.1.77', "USR5 function": 'Secreted protein with unknown function',
    },
]
table = pd.DataFrame(table)
table = table[["MNH120 scaffold", "MNH120 id", "USR5 scaffold", "USR5 id", "B04 scaffold", "B04 id", "MNH120 function", "USR5 function", "B04 function"]]
table.to_csv('scaffold_38.function.tsv', sep='\t', index=False)
table.to_latex('scaffold_38.function.tex', index=False, na_rep='', longtable=True)
table


Out[417]:
MNH120 scaffold MNH120 id USR5 scaffold USR5 id B04 scaffold B04 id MNH120 function USR5 function B04 function
0 scaffold_38 atg136 NODE_28888_length_496775_cov_15.218104 NS.04813 B04S72 B04S72.g5566 Unknown Unknown Unknown
1 NaN NaN NODE_28888_length_496775_cov_15.218104 NS.04814 NaN NaN NaN Unknown NaN
2 NaN NaN NODE_28888_length_496775_cov_15.218104 NS.04815 NaN NaN NaN Unknown NaN
3 scaffold_38 atg137 NODE_28888_length_496775_cov_15.218104 NS.04816 B04S72 B04S72.g5565 Membrane associated protein with unknown function Membrane associated protein with unknown function Membrane associated protein with unknown function
4 NaN NaN NODE_28888_length_496775_cov_15.218104 NS.04817 NaN NaN NaN Unknown NaN
5 NaN NaN NODE_28888_length_496775_cov_15.218104 NS.04818 NaN NaN NaN Unknown NaN
6 scaffold_38 atg138 NODE_28888_length_496775_cov_15.218104 NS.04819 B04S72 B04S72.g5564 DNA binding cell division control protein DNA binding cell division control protein DNA binding cell division control protein
7 scaffold_38 atg139 NODE_28888_length_496775_cov_15.218104 NS.04820 B04S72 B04S72.g5563 Putative V-type ATP synthase subunit I homologue Unknown Putative V-type ATP synthase subunit I homologue
8 scaffold_38 atg140 NODE_35582_length_606476_cov_15.460932 exon.CUFF.10985.1.77 B04S196 B04S196.g9853 Secreted protein with unknown function Secreted protein with unknown function Secreted protein with unknown function

scaffolds containing most probable atg140 homologue


In [416]:
"""{
"MNH120 scaffold":, "MNH120 id":, "MNH120 function":, 
"B04 scaffold":, "B04 id":, "B04 function":, 
"USR5 scaffold":, "USR5 id":, "USR5 function":,
}"""
table = [
    {
    "MNH120 scaffold": 'scaffold_997', "MNH120 id": 'atg10842', "MNH120 function": "Membrane associated PQ loop repeat protein",
    "B04 scaffold":'B04S196', "B04 id":'B04S196.g9843', "B04 function": "Membrane associated PQ loop repeat protein", 
    "USR5 scaffold": 'NODE_35582_length_606476_cov_15.460932', "USR5 id":'NS.06676', "USR5 function":"Membrane associated PQ loop repeat protein",
    },
    {
    "MNH120 scaffold":'atg10843', "MNH120 id":'scaffold_997', "MNH120 function":"Pyridine nucleotide-disulphide oxidoreductase", 
    "B04 scaffold":'B04S196', "B04 id":'B04S196.g9844', "B04 function":"Pyridine nucleotide-disulphide oxidoreductase",
    "USR5 scaffold":'NODE_35582_length_606476_cov_15.460932', "USR5 id":'NS.06675', "USR5 function":"Pyridine nucleotide-disulphide oxidoreductase",
    },
    {
    "MNH120 scaffold":'scaffold_997', "MNH120 id":'atg10844', "MNH120 function":"Sodium-dependent phosphate transporter", 
    "B04 scaffold":'B04S196', "B04 id":'B04S196.g9845', "B04 function":"Sodium-dependent phosphate transporter", 
    "USR5 scaffold":'NODE_35582_length_606476_cov_15.460932', "USR5 id":'NS.06674', "USR5 function":"Sodium-dependent phosphate transporter",
    }, ## Possibly need to split into USR5 and MNH120 vs Bo4
    {
    "USR5 scaffold":'NODE_35582_length_606476_cov_15.460932', "USR5 id":'NS.06673',
    },
    {
    "USR5 scaffold":'NODE_35582_length_606476_cov_15.460932', "USR5 id":'NS.06672', "USR5 function":"Transcription factor",
    },
    {
    "USR5 scaffold":'NODE_35582_length_606476_cov_15.460932', "USR5 id":'NS.06671',
    }, # Possible split with NS.06670
    {
    "MNH120 scaffold":'scaffold_997', "MNH120 id":'atg10846', "MNH120 function":"NMT1/THI5 like protein, pyrimidine biosynthesis", 
    "B04 scaffold":'B04S196', "B04 id":'B04S196.g9846', ## Unknown
    "USR5 scaffold":'NODE_35582_length_606476_cov_15.460932', "USR5 id":'NS.06670', "USR5 function":'NMT1/THI5 like protein, pyrimidine biosynthesis',
    },
    {
    "MNH120 scaffold":'scaffold_997', "MNH120 id":'atg10847', "MNH120 function":'DNA replication licensing factor MCM5', 
    "B04 scaffold":'B04S196', "B04 id":'B04S196.g9847', "B04 function":"DNA replication licensing factor MCM5", 
    "USR5 scaffold":'NODE_35582_length_606476_cov_15.460932', "USR5 id":'exon.CUFF.10994.1.84', "USR5 function":"DNA replication licensing factor MCM5",
    },
    {
    "MNH120 scaffold":'scaffold_997', "MNH120 id":'atg10848', "MNH120 function":"Cysteine proteinase", 
    "B04 scaffold":'B04S196', "B04 id":'B04S196.g9848', "B04 function":"Cysteine proteinase", 
    "USR5 scaffold":'NODE_35582_length_606476_cov_15.460932', "USR5 id":'NS.06669', "USR5 function":"Cysteine proteinase",
    },
    {
    "USR5 scaffold":'NODE_35582_length_606476_cov_15.460932', "USR5 id":'exon.CUFF.10998.1.83',
    },
    {
    "B04 scaffold":'B04S196', "B04 id":'B04S196.g9849',
    },
    {
    "MNH120 scaffold":'scaffold_997', "MNH120 id":'atg10849', "MNH120 function":"Eukaryotic translation initiation factor 4E", 
    "B04 scaffold":'B04S196', "B04 id":'B04S196.g9850', "B04 function":"Eukaryotic translation initiation factor 4E",
    "USR5 scaffold":'NODE_35582_length_606476_cov_15.460932', "USR5 id":'NS.06668', "USR5 function":"Eukaryotic translation initiation factor 4E",
    },
    {
    "MNH120 scaffold":'scaffold_997', "MNH120 id":'atg10850', "MNH120 function": "Protein phosphatase inhibitor", 
    "B04 scaffold":'B04S196', "B04 id":'B04S196.g9851', "B04 function":'Protein phosphatase inhibitor', 
    "USR5 scaffold":'NODE_35582_length_606476_cov_15.460932', "USR5 id":'exon.CUFF.11014.2.81', "USR5 function":'Protein phosphatase inhibitor',
    },
    {
    "USR5 scaffold":'NODE_35582_length_606476_cov_15.460932', "USR5 id":'NS.06667',
    },
    {
    "MNH120 scaffold":'scaffold_997', "MNH120 id":'atg10851', 
    },
    {
    "USR5 scaffold":'NODE_35582_length_606476_cov_15.460932', "USR5 id":'NS.06666', "USR5 function":"Aminopeptidase",
    }, # possible split with 'exon.CUFF.11014.2.80'
    {
    "MNH120 scaffold":'scaffold_997', "MNH120 id":'atg10852', "MNH120 function":"Aminopeptidase", 
    "B04 scaffold":'B04S196', "B04 id":'B04S196.g9852', "B04 function":"Aminopeptidase", 
    "USR5 scaffold":'NODE_35582_length_606476_cov_15.460932', "USR5 id":'exon.CUFF.11014.2.80', "USR5 function":"Aminopeptidase",
    },
    {
    "USR5 scaffold":'NODE_35582_length_606476_cov_15.460932', "USR5 id":'NS.06665',
    },
    {
    "MNH120 scaffold": 'scaffold_38', "MNH120 id": 'atg140', "MNH120 function": "Secreted protein with unknown function",
    "B04 scaffold": 'B04S196', "B04 id": 'B04S196.g9853', "B04 function": 'Secreted protein with unknown function', 
    "USR5 scaffold": 'NODE_35582_length_606476_cov_15.460932', "USR5 id": 'exon.CUFF.10985.1.77', "USR5 function": 'Secreted protein with unknown function',
    },
    {
    "USR5 scaffold":'NODE_35582_length_606476_cov_15.460932', "USR5 id":'NS.06664',
    },
    {
    "MNH120 scaffold":'scaffold_887', "MNH120 id":'atg7428', "MNH120 function":"Transmembrane ion transporter", 
    "B04 scaffold":'B04S196', "B04 id":'B04S196.g9854', "B04 function":"Transmembrane ion transporter", 
    "USR5 scaffold":'NODE_35582_length_606476_cov_15.460932', "USR5 id":'NS.06663', "USR5 function":"Transmembrane ion transporter",
    },
    {
    "B04 scaffold":'B04S196', "B04 id":'B04S196.g9855', 
    },
    {
    "MNH120 scaffold":'scaffold_887', "MNH120 id":'atg7429', "MNH120 function":"Methylenetetrahydrofolate reductase", 
    "B04 scaffold":'B04S196', "B04 id":'B04S196.g9856', "B04 function":"Methylenetetrahydrofolate reductase", 
    "USR5 scaffold":'NODE_35582_length_606476_cov_15.460932', "USR5 id":'exon.CUFF.10988.1.76',
    },
    {
    "MNH120 scaffold":'scaffold_887', "MNH120 id":'atg7430', "MNH120 function":'Mannosyltransferase', 
    "USR5 scaffold":'NODE_35582_length_606476_cov_15.460932', "USR5 id":'exon.CUFF.10988.1.75', "USR5 function":'Mannosyltransferase',
    }, ## Split with over 'B04S196.g9856'
    {
    "MNH120 scaffold":'scaffold_887', "MNH120 id":'atg7431', "MNH120 function":"Gamma-tubulin protein", 
    "B04 scaffold":'B04S196', "B04 id":'B04S196.g9857', "B04 function":"Gamma-tubulin protein", 
    "USR5 scaffold":'NODE_35582_length_606476_cov_15.460932', "USR5 id":'exon.CUFF.10983.1.74', "USR5 function":"Gamma-tubulin protein",
    },
    {
    "USR5 scaffold":'NODE_35582_length_606476_cov_15.460932', "USR5 id":'NS.06662',
    },
    {
    "MNH120 scaffold":'scaffold_887', "MNH120 id":'atg7432', 
    "USR5 scaffold":'NODE_35582_length_606476_cov_15.460932', "USR5 id":'exon.CUFF.10983.1.73',
    },
    {
    "MNH120 scaffold":'scaffold_244', "MNH120 id":'atg1389', 
    "B04 scaffold":'B04S196', "B04 id":'B04S196.g9858', 
    "USR5 scaffold":'NODE_35582_length_606476_cov_15.460932', "USR5 id":'NS.06661',
    },
    {
    "MNH120 scaffold":'scaffold_244', "MNH120 id":'atg1390', "MNH120 function":'Transcription factor', 
    "B04 scaffold":'B04S196', "B04 id":'B04S196.g9859', "B04 function":'Transcription factor', 
    "USR5 scaffold":'NODE_35582_length_606476_cov_15.460932', "USR5 id":'exon.CUFF.10989.1.72', "USR5 function":'Transcription factor',
    },
    {
    "MNH120 scaffold":'scaffold_244', "MNH120 id":'atg1391',
    "USR5 scaffold":'NODE_35582_length_606476_cov_15.460932', "USR5 id":'NS.06660',
    },
    {
    "USR5 scaffold":'NODE_35582_length_606476_cov_15.460932', "USR5 id":'exon.CUFF.10986.1.71', "USR5 function": "DNA binding protein",
    },
    {
    "MNH120 scaffold":'scaffold_878', "MNH120 id":'atg7619', "MNH120 function":'Dimethylaniline monooxygenase', 
    "B04 scaffold":'B04S196', "B04 id":'B04S196.g9860', "B04 function": 'Flavin mononucleotide binding protein', 
    "USR5 scaffold":'NODE_35582_length_606476_cov_15.460932', "USR5 id":'exon.CUFF.11171.1.68', "USR5 function":'Flavin mononucleotide binding protein',
    },
    {
    "MNH120 scaffold":'scaffold_878', "MNH120 id":'atg7620', "MNH120 function": "Zn/Fe transporter", 
    "B04 scaffold":'B04S196', "B04 id":'B04S196.g9861', "B04 function": "Zn/Fe transporter", 
    "USR5 scaffold":'NODE_35582_length_606476_cov_15.460932', "USR5 id":'NS.06659', "USR5 function": "Zn/Fe transporter",
    },
    ]
table = pd.DataFrame(table)
table = table[["MNH120 scaffold", "MNH120 id", "USR5 scaffold", "USR5 id", "B04 scaffold", "B04 id", "MNH120 function", "USR5 function", "B04 function"]]
table.to_csv('atg140.function.tsv', sep='\t', index=False)
table.to_latex('atg140.function.tex', index=False, na_rep='', longtable=True)
table


Out[416]:
MNH120 scaffold MNH120 id USR5 scaffold USR5 id B04 scaffold B04 id MNH120 function USR5 function B04 function
0 scaffold_997 atg10842 NODE_35582_length_606476_cov_15.460932 NS.06676 B04S196 B04S196.g9843 Membrane associated PQ loop repeat protein Membrane associated PQ loop repeat protein Membrane associated PQ loop repeat protein
1 atg10843 scaffold_997 NODE_35582_length_606476_cov_15.460932 NS.06675 B04S196 B04S196.g9844 Pyridine nucleotide-disulphide oxidoreductase Pyridine nucleotide-disulphide oxidoreductase Pyridine nucleotide-disulphide oxidoreductase
2 scaffold_997 atg10844 NODE_35582_length_606476_cov_15.460932 NS.06674 B04S196 B04S196.g9845 Sodium-dependent phosphate transporter Sodium-dependent phosphate transporter Sodium-dependent phosphate transporter
3 NaN NaN NODE_35582_length_606476_cov_15.460932 NS.06673 NaN NaN NaN NaN NaN
4 NaN NaN NODE_35582_length_606476_cov_15.460932 NS.06672 NaN NaN NaN Transcription factor NaN
5 NaN NaN NODE_35582_length_606476_cov_15.460932 NS.06671 NaN NaN NaN NaN NaN
6 scaffold_997 atg10846 NODE_35582_length_606476_cov_15.460932 NS.06670 B04S196 B04S196.g9846 NMT1/THI5 like protein, pyrimidine biosynthesis NMT1/THI5 like protein, pyrimidine biosynthesis NaN
7 scaffold_997 atg10847 NODE_35582_length_606476_cov_15.460932 exon.CUFF.10994.1.84 B04S196 B04S196.g9847 DNA replication licensing factor MCM5 DNA replication licensing factor MCM5 DNA replication licensing factor MCM5
8 scaffold_997 atg10848 NODE_35582_length_606476_cov_15.460932 NS.06669 B04S196 B04S196.g9848 Cysteine proteinase Cysteine proteinase Cysteine proteinase
9 NaN NaN NODE_35582_length_606476_cov_15.460932 exon.CUFF.10998.1.83 NaN NaN NaN NaN NaN
10 NaN NaN NaN NaN B04S196 B04S196.g9849 NaN NaN NaN
11 scaffold_997 atg10849 NODE_35582_length_606476_cov_15.460932 NS.06668 B04S196 B04S196.g9850 Eukaryotic translation initiation factor 4E Eukaryotic translation initiation factor 4E Eukaryotic translation initiation factor 4E
12 scaffold_997 atg10850 NODE_35582_length_606476_cov_15.460932 exon.CUFF.11014.2.81 B04S196 B04S196.g9851 Protein phosphatase inhibitor Protein phosphatase inhibitor Protein phosphatase inhibitor
13 NaN NaN NODE_35582_length_606476_cov_15.460932 NS.06667 NaN NaN NaN NaN NaN
14 scaffold_997 atg10851 NaN NaN NaN NaN NaN NaN NaN
15 NaN NaN NODE_35582_length_606476_cov_15.460932 NS.06666 NaN NaN NaN Aminopeptidase NaN
16 scaffold_997 atg10852 NODE_35582_length_606476_cov_15.460932 exon.CUFF.11014.2.80 B04S196 B04S196.g9852 Aminopeptidase Aminopeptidase Aminopeptidase
17 NaN NaN NODE_35582_length_606476_cov_15.460932 NS.06665 NaN NaN NaN NaN NaN
18 scaffold_38 atg140 NODE_35582_length_606476_cov_15.460932 exon.CUFF.10985.1.77 B04S196 B04S196.g9853 Secreted protein with unknown function Secreted protein with unknown function Secreted protein with unknown function
19 NaN NaN NODE_35582_length_606476_cov_15.460932 NS.06664 NaN NaN NaN NaN NaN
20 scaffold_887 atg7428 NODE_35582_length_606476_cov_15.460932 NS.06663 B04S196 B04S196.g9854 Transmembrane ion transporter Transmembrane ion transporter Transmembrane ion transporter
21 NaN NaN NaN NaN B04S196 B04S196.g9855 NaN NaN NaN
22 scaffold_887 atg7429 NODE_35582_length_606476_cov_15.460932 exon.CUFF.10988.1.76 B04S196 B04S196.g9856 Methylenetetrahydrofolate reductase NaN Methylenetetrahydrofolate reductase
23 scaffold_887 atg7430 NODE_35582_length_606476_cov_15.460932 exon.CUFF.10988.1.75 NaN NaN Mannosyltransferase Mannosyltransferase NaN
24 scaffold_887 atg7431 NODE_35582_length_606476_cov_15.460932 exon.CUFF.10983.1.74 B04S196 B04S196.g9857 Gamma-tubulin protein Gamma-tubulin protein Gamma-tubulin protein
25 NaN NaN NODE_35582_length_606476_cov_15.460932 NS.06662 NaN NaN NaN NaN NaN
26 scaffold_887 atg7432 NODE_35582_length_606476_cov_15.460932 exon.CUFF.10983.1.73 NaN NaN NaN NaN NaN
27 scaffold_244 atg1389 NODE_35582_length_606476_cov_15.460932 NS.06661 B04S196 B04S196.g9858 NaN NaN NaN
28 scaffold_244 atg1390 NODE_35582_length_606476_cov_15.460932 exon.CUFF.10989.1.72 B04S196 B04S196.g9859 Transcription factor Transcription factor Transcription factor
29 scaffold_244 atg1391 NODE_35582_length_606476_cov_15.460932 NS.06660 NaN NaN NaN NaN NaN
30 NaN NaN NODE_35582_length_606476_cov_15.460932 exon.CUFF.10986.1.71 NaN NaN NaN DNA binding protein NaN
31 scaffold_878 atg7619 NODE_35582_length_606476_cov_15.460932 exon.CUFF.11171.1.68 B04S196 B04S196.g9860 Dimethylaniline monooxygenase Flavin mononucleotide binding protein Flavin mononucleotide binding protein
32 scaffold_878 atg7620 NODE_35582_length_606476_cov_15.460932 NS.06659 B04S196 B04S196.g9861 Zn/Fe transporter Zn/Fe transporter Zn/Fe transporter

atg12487


In [412]:
"""{
"MNH120 scaffold":, "MNH120 id":, "MNH120 function":, 
"B04 scaffold":, "B04 id":, "B04 function":, 
"USR5 scaffold":, "USR5 id":, "USR5 function":,
}"""
table = [
    { ## Very weak match to Fbox domain
    "MNH120 scaffold": "scaffold_978", "MNH120 id": "atg12496",
    "USR5 scaffold": "NODE_36160_length_249094_cov_15.165792", "USR5 id": "exon.CUFF.11385.1.31",
    },
    {
    "MNH120 scaffold": "scaffold_978", "MNH120 id": "atg12495", "MNH120 function": "WD40 repeat domain-containing protein", 
    "B04 scaffold": "B04S84", "B04 id": "B04S84.g6037", "B04 function": "WD40 repeat domain-containing protein", 
    "USR5 scaffold": "NODE_36160_length_249094_cov_15.165792", "USR5 id": "exon.CUFF.11385.1.32", "USR5 function": "WD40 repeat domain-containing protein",
    },
    {
    "MNH120 scaffold": "scaffold_978", "MNH120 id": "atg12494", "MNH120 function": "Cell division control protein", 
    "B04 scaffold": "B04S84", "B04 id": "B04S84.g6038", "B04 function": "Cell division control protein", 
    "USR5 scaffold": "NODE_36160_length_249094_cov_15.165792", "USR5 id": "exon.CUFF.11385.1.33", "USR5 function": "Cell division control protein",
    },
    {
    "MNH120 scaffold": "scaffold_978", "MNH120 id": "atg12493", "MNH120 function": "Cell division control protein", 
    "B04 scaffold": "B04S84", "B04 id": "B04S84.g6038", "B04 function": "Cell division control protein", 
    "USR5 scaffold": "NODE_36160_length_249094_cov_15.165792", "USR5 id": "exon.CUFF.11385.1.34", "USR5 function": "Cell division control protein",
    },
    { # B04S84.g6039 is a small gene within B04S84.g6038, no rearrangement
    "B04 scaffold": "B04S84", "B04 id": "B04S84.g6039",
    },
    {
    "MNH120 scaffold": "scaffold_978", "MNH120 id": "atg12492", "MNH120 function": "WD40 repeat domain-containing protein, possibly ribosome associated, possible protease activity", 
    "B04 scaffold": "B04S84", "B04 id": "B04S84.g6040", "B04 function": "WD40 repeat domain-containing protein, possibly ribosome associated, possible protease activity",
    "USR5 scaffold": "NODE_36160_length_249094_cov_15.165792", "USR5 id": "NS.06862", "USR5 function": "WD40 repeat domain-containing protein, possibly ribosome associated, possible protease activity",
    },
    {
    "MNH120 scaffold": "scaffold_978", "MNH120 id": "atg12491",
    "USR5 scaffold": "NODE_36160_length_249094_cov_15.165792", "USR5 id": "NS.06861",
    },
    {
    "MNH120 scaffold": "scaffold_978", "MNH120 id": "atg12490", "MNH120 function": "GATS-like protein", 
    "B04 scaffold": "B04S84", "B04 id": "B04S84.g6041", "B04 function": "GATS-like protein", 
    "USR5 scaffold": "NODE_36160_length_249094_cov_15.165792", "USR5 id": "exon.CUFF.11309.1.28", "USR5 function": "GATS-like protein",
    },
    {
    "USR5 scaffold": "NODE_36160_length_249094_cov_15.165792", "USR5 id": "NS.06860",
    },
    {
    "MNH120 scaffold": "scaffold_978", "MNH120 id": "atg12489", 
    "B04 scaffold": "B04S84", "B04 id": "B04S84.g6042", 
    "USR5 scaffold": "NODE_36160_length_249094_cov_15.165792", "USR5 id": "NS.06859",
    },
    {
    "MNH120 scaffold": "scaffold_978", "MNH120 id": "atg12488",  
    "B04 scaffold": "B04S84", "B04 id": "B04S84.g6043",
    "USR5 scaffold": "NODE_36160_length_249094_cov_15.165792", "USR5 id": "NS.06858",
    },
    {
    "MNH120 scaffold": "scaffold_978", "MNH120 id": "atg12488",
    "USR5 scaffold": "NODE_36160_length_249094_cov_15.165792", "USR5 id": "NS.06857",
    },
    {
    "MNH120 scaffold": "scaffold_978", "MNH120 id": "atg12487", "MNH120 function": "Cellobiose dehydrogenase", 
    "B04 scaffold": "B04S84", "B04 id": "B04S84.g6044", "B04 function": "Cellobiose dehydrogenase", 
    "USR5 scaffold": "NODE_36160_length_249094_cov_15.165792", "USR5 id": "exon.CUFF.11325.4.26", "USR5 function": "Cellobiose dehydrogenase",
    },
    {
    "MNH120 scaffold": "scaffold_978", "MNH120 id": "atg12486", "MNH120 function": "GDP-mannose transporter", 
    "B04 scaffold": "B04S84", "B04 id": "B04S84.g6045", "B04 function": "GDP-mannose transporter", 
    "USR5 scaffold": "NODE_36160_length_249094_cov_15.165792", "USR5 id": "exon.CUFF.11301.1.19", "USR5 function": "GDP-mannose transporter",
    },
    {
    "MNH120 scaffold": "scaffold_978", "MNH120 id": "atg12485",
    },
    {
    "MNH120 scaffold": "scaffold_978", "MNH120 id": "atg12484", "MNH120 function": "Dimeric alpha-beta barrel protein, secondary metabolite associated", 
    "B04 scaffold": "B04S84", "B04 id": "B04S84.g6046", "B04 function": "Dimeric alpha-beta barrel protein, secondary metabolite associated", 
    "USR5 scaffold": "NODE_36160_length_249094_cov_15.165792", "USR5 id": "NS.06856", "USR5 function": "Dimeric alpha-beta barrel protein, secondary metabolite associated",
    },
    {
    "MNH120 scaffold": "scaffold_978", "MNH120 id": "atg12483", "MNH120 function": "Cytochrome P450", 
    "B04 scaffold": "B04S84", "B04 id": "B04S84.g6047", "B04 function": "Cytochrome P450", 
    "USR5 scaffold": "NODE_36160_length_249094_cov_15.165792", "USR5 id": "NS.06855", "USR5 function": "Cytochrome P450",
    },
    {
    "MNH120 scaffold": "scaffold_978", "MNH120 id": "atg12483", "MNH120 function": "Cytochrome P450", 
    "B04 scaffold": "B04S84", "B04 id": "B04S84.g6047", "B04 function": "Cytochrome P450", 
    "USR5 scaffold": "NODE_36160_length_249094_cov_15.165792", "USR5 id": "exon.CUFF.11297.1.18", "USR5 function": "Cytochrome P450",
    },
    {
    "MNH120 scaffold": "scaffold_978", "MNH120 id": "atg12483", "MNH120 function": "Cytochrome P450", 
    "B04 scaffold": "B04S84", "B04 id": "B04S84.g6047", "B04 function": "Cytochrome P450", 
    "USR5 scaffold": "NODE_36160_length_249094_cov_15.165792", "USR5 id": "NS.06854",
    },
    ]
table = pd.DataFrame(table[::-1])
table = table[["MNH120 scaffold", "MNH120 id", "USR5 scaffold", "USR5 id", "B04 scaffold", "B04 id", "MNH120 function", "USR5 function", "B04 function"]]
table.to_csv('atg12487.function.tsv', sep='\t', index=False)
table.to_latex('atg12487.function.tex', index=False, na_rep='', longtable=True)
table


Out[412]:
MNH120 scaffold MNH120 id USR5 scaffold USR5 id B04 scaffold B04 id MNH120 function USR5 function B04 function
0 scaffold_978 atg12483 NODE_36160_length_249094_cov_15.165792 NS.06854 B04S84 B04S84.g6047 Cytochrome P450 NaN Cytochrome P450
1 scaffold_978 atg12483 NODE_36160_length_249094_cov_15.165792 exon.CUFF.11297.1.18 B04S84 B04S84.g6047 Cytochrome P450 Cytochrome P450 Cytochrome P450
2 scaffold_978 atg12483 NODE_36160_length_249094_cov_15.165792 NS.06855 B04S84 B04S84.g6047 Cytochrome P450 Cytochrome P450 Cytochrome P450
3 scaffold_978 atg12484 NODE_36160_length_249094_cov_15.165792 NS.06856 B04S84 B04S84.g6046 Dimeric alpha-beta barrel protein, secondary metabolite associated Dimeric alpha-beta barrel protein, secondary metabolite associated Dimeric alpha-beta barrel protein, secondary metabolite associated
4 scaffold_978 atg12485 NaN NaN NaN NaN NaN NaN NaN
5 scaffold_978 atg12486 NODE_36160_length_249094_cov_15.165792 exon.CUFF.11301.1.19 B04S84 B04S84.g6045 GDP-mannose transporter GDP-mannose transporter GDP-mannose transporter
6 scaffold_978 atg12487 NODE_36160_length_249094_cov_15.165792 exon.CUFF.11325.4.26 B04S84 B04S84.g6044 Cellobiose dehydrogenase Cellobiose dehydrogenase Cellobiose dehydrogenase
7 scaffold_978 atg12488 NODE_36160_length_249094_cov_15.165792 NS.06857 NaN NaN NaN NaN NaN
8 scaffold_978 atg12488 NODE_36160_length_249094_cov_15.165792 NS.06858 B04S84 B04S84.g6043 NaN NaN NaN
9 scaffold_978 atg12489 NODE_36160_length_249094_cov_15.165792 NS.06859 B04S84 B04S84.g6042 NaN NaN NaN
10 NaN NaN NODE_36160_length_249094_cov_15.165792 NS.06860 NaN NaN NaN NaN NaN
11 scaffold_978 atg12490 NODE_36160_length_249094_cov_15.165792 exon.CUFF.11309.1.28 B04S84 B04S84.g6041 GATS-like protein GATS-like protein GATS-like protein
12 scaffold_978 atg12491 NODE_36160_length_249094_cov_15.165792 NS.06861 NaN NaN NaN NaN NaN
13 scaffold_978 atg12492 NODE_36160_length_249094_cov_15.165792 NS.06862 B04S84 B04S84.g6040 WD40 repeat domain-containing protein, possibly ribosome associated, possible protease activity WD40 repeat domain-containing protein, possibly ribosome associated, possible protease activity WD40 repeat domain-containing protein, possibly ribosome associated, possible protease activity
14 NaN NaN NaN NaN B04S84 B04S84.g6039 NaN NaN NaN
15 scaffold_978 atg12493 NODE_36160_length_249094_cov_15.165792 exon.CUFF.11385.1.34 B04S84 B04S84.g6038 Cell division control protein Cell division control protein Cell division control protein
16 scaffold_978 atg12494 NODE_36160_length_249094_cov_15.165792 exon.CUFF.11385.1.33 B04S84 B04S84.g6038 Cell division control protein Cell division control protein Cell division control protein
17 scaffold_978 atg12495 NODE_36160_length_249094_cov_15.165792 exon.CUFF.11385.1.32 B04S84 B04S84.g6037 WD40 repeat domain-containing protein WD40 repeat domain-containing protein WD40 repeat domain-containing protein
18 scaffold_978 atg12496 NODE_36160_length_249094_cov_15.165792 exon.CUFF.11385.1.31 NaN NaN NaN NaN NaN

rest


In [414]:
table = [
    {
    "MNH120 scaffold": "scaffold_48", "MNH120 id": "atg162",
    "B04 scaffold": "B04S209", "B04 id": "B04S209.g10047",
    "USR5 scaffold": "NODE_36160_length_249094_cov_15.165792", "USR5 id": "NS.06847",
    },
    {
    "MNH120 scaffold": "scaffold_48", "MNH120 id": "atg161",
    "B04 scaffold": "B04S209", "B04 id": "B04S209.g10048",
    "USR5 scaffold": "NODE_36160_length_249094_cov_15.165792", "USR5 id": "NS.06846",
    },
    {
    "MNH120 scaffold": "scaffold_48", "MNH120 id": "atg160", "MNH120 function": "Sulfite oxidase", 
    "B04 scaffold": "B04S209", "B04 id": "B04S209.g10049", "B04 function": "Sulfite oxidase",
    "USR5 scaffold": "NODE_36160_length_249094_cov_15.165792", "USR5 id": "exon.CUFF.11287.1.9", "USR5 function": "Sulfite oxidase",
    },
    {
    "B04 scaffold": "B04S209", "B04 id": "B04S209.g10050",
    },
    {
    "MNH120 scaffold": "scaffold_48", "MNH120 id": "atg159", "MNH120 function": "Amun-like protein", 
    "B04 scaffold": "B04S209", "B04 id": "B04S209.g10051", "B04 function": "Amun-like protein",
    "USR5 scaffold": "NODE_36160_length_249094_cov_15.165792", "USR5 id": "exon.CUFF.11284.2.7", "USR5 function": "Amun-like protein",
    },
    {
    "MNH120 scaffold": "scaffold_893", "MNH120 id": "atg7462", "MNH120 function": "Ubiquitin carboxyl-terminal hydrolase", 
    "B04 scaffold": "B04S209", "B04 id": "B04S209.g10052", "B04 function": "Ubiquitin carboxyl-terminal hydrolase",
    "USR5 scaffold": "NODE_36160_length_249094_cov_15.165792", "USR5 id": "exon.CUFF.11303.1.5", "USR5 function": "Ubiquitin carboxyl-terminal hydrolase",
    },
    {
    "MNH120 scaffold": "scaffold_893", "MNH120 id": "atg7463", "MNH120 function": "Pyruvate dehydrogenase kinase", 
    "B04 scaffold": "B04S209", "B04 id": "B04S209.g10053", "B04 function": "Pyruvate dehydrogenase kinase",
    "USR5 scaffold": "NODE_36160_length_249094_cov_15.165792", "USR5 id": "exon.CUFF.11302.1.4", "USR5 function": "Pyruvate dehydrogenase kinase",
    },
    {
    "USR5 scaffold": "NODE_36160_length_249094_cov_15.165792", "USR5 id": "NS.06845",
    },
    {
    "MNH120 scaffold": "scaffold_893", "MNH120 id": "atg7464",
    "B04 scaffold": "B04S209", "B04 id": "B04S209.g10054",
    "USR5 scaffold": "NODE_36160_length_249094_cov_15.165792", "USR5 id": "exon.CUFF.11302.1.3",
    },
    {
    "MNH120 scaffold": "scaffold_893", "MNH120 id": "atg7465", "MNH120 function": "Beta-glucosidase family 1", 
    "B04 scaffold": "B04S209", "B04 id": "B04S209.g10055", "B04 function": "Beta-glucosidase family 1",
    "USR5 scaffold": "NODE_36160_length_249094_cov_15.165792", "USR5 id": "exon.CUFF.11282.1.2", "USR5 function": "Beta-glucosidase family 1",
    },
    {
    "MNH120 scaffold": "scaffold_996", "MNH120 id": "atg10702", "MNH120 function": "Membrane protein with unknown function", 
    "B04 scaffold": "B04S209", "B04 id": "B04S209.g10056", "B04 function": "Membrane protein with unknown function",
    "USR5 scaffold": "NODE_36160_length_249094_cov_15.165792", "USR5 id": "NS.06844", "USR5 function": "Membrane protein with unknown function",
    },
    {
    "MNH120 scaffold": "scaffold_996", "MNH120 id": "atg10703", "MNH120 function": "Secreted protein with unknown function", 
    "B04 scaffold": "B04S209", "B04 id": "B04S209.g10057", "B04 function": "Secreted protein with unknown function",
    "USR5 scaffold": "NODE_36160_length_249094_cov_15.165792", "USR5 id": "exon.CUFF.11283.1.0", "USR5 function": "Secreted protein with unknown function",
    },
    {
    "MNH120 scaffold": "scaffold_996", "MNH120 id": "atg10704",
    "B04 scaffold": "B04S209", "B04 id": "B04S209.g10058",
    "USR5 scaffold": "NODE_36160_length_249094_cov_15.165792", "USR5 id": "NS.06843", # Possible RNI-like superfamily
    },
    {
    "MNH120 scaffold": "scaffold_996", "MNH120 id": "atg10705", "MNH120 function": "GPI transamidase component PIG-U", 
    "B04 scaffold": "B04S209", "B04 id": "B04S209.g10059", "B04 function": "GPI transamidase component PIG-U",
    "USR5 scaffold": "NODE_36160_length_249094_cov_15.165792", "USR5 id": "NS.06842", "USR5 function": "GPI transamidase component PIG-U",
    },
    {
    "USR5 scaffold": "NODE_5090_length_307152_cov_15.165000", "USR5 id": "NS.08710", "USR5 function": "Oxidoreductase",
    },
    {
    "USR5 scaffold": "NODE_5090_length_307152_cov_15.165000", "USR5 id": "exon.CUFF.14260.1.0", "USR5 function": "Aromatic-L-amino acid decarboxylase",
    },
    {
    "MNH120 scaffold": "scaffold_996", "MNH120 id": "atg10706", "MNH120 function": "Possible glycosyltransferase", 
    "B04 scaffold": "B04S209", "B04 id": "B04S209.g10060", "B04 function": "Possible glycosyltransferase",
    },
    {
    "USR5 scaffold": "NODE_5090_length_307152_cov_15.165000", "USR5 id": "NS.08711", ## 3' tail of atg10706
    },
    {
    "USR5 scaffold": "NODE_5090_length_307152_cov_15.165000", "USR5 id": "NS.08712",
    },
    {
    "USR5 scaffold": "NODE_5090_length_307152_cov_15.165000", "USR5 id": "NS.08713",
    },
    {
    "USR5 scaffold": "NODE_5090_length_307152_cov_15.165000", "USR5 id": "NS.08714",
    },
    {
    "USR5 scaffold": "NODE_5090_length_307152_cov_15.165000", "USR5 id": "NS.08715",
    },
    {
    "USR5 scaffold": "NODE_5090_length_307152_cov_15.165000", "USR5 id": "NS.08716", "USR5 function": "Partial Cytochrome P450 match",
    },
    {
    "USR5 scaffold": "NODE_5090_length_307152_cov_15.165000", "USR5 id": "NS.08717", "USR5 function": "Partial ribonuclease H/Integrase, possible Gypsy TE",
    },
    {
    "USR5 scaffold": "NODE_5090_length_307152_cov_15.165000", "USR5 id": "NS.08718", "USR5 function": "Partial polymerase, possible Gypsy TE",
    },
    {
    "USR5 scaffold": "NODE_5090_length_307152_cov_15.165000", "USR5 id": "NS.08719", "USR5 function": "Partial polymerase, possible Gypsy TE",
    },
    {
    "USR5 scaffold": "NODE_5090_length_307152_cov_15.165000", "USR5 id": "NS.08720", "USR5 function": "Weak partial Gypsy TE match",
    },
    {
    "USR5 scaffold": "NODE_5090_length_307152_cov_15.165000", "USR5 id": "NS.08721", "USR5 function": "Partial phospholipase",
    },
    {
    "USR5 scaffold": "NODE_5090_length_307152_cov_15.165000", "USR5 id": "NS.08722",
    },
    {
    "USR5 scaffold": "NODE_5090_length_307152_cov_15.165000", "USR5 id": "NS.08723",
    },
    {
    "MNH120 scaffold": "scaffold_996", "MNH120 id": "atg10706", "MNH120 function": "Possible glycosyltransferase",
    "B04 scaffold": "B04S209", "B04 id": "B04S209.g10060", "B04 function": "Possible glycosyltransferase",
    "USR5 scaffold": "NODE_5090_length_307152_cov_15.165000", "USR5 id": "NS.08724", "USR5 function": "Possible glycosyltransferase",
    },
    {
    "MNH120 scaffold": "scaffold_996", "MNH120 id": "atg10707", "MNH120 function": "MM3350-like protein, possible DNA-binding", 
    "B04 scaffold": "B04S209", "B04 id": "B04S209.g10062", "B04 function": "MM3350-like protein, possible DNA-binding",
    "USR5 scaffold": "NODE_5090_length_307152_cov_15.165000", "USR5 id": "exon.CUFF.14230.1.3", "USR5 function": "MM3350-like protein, possible DNA-binding",
    },
    {
    "MNH120 scaffold": "scaffold_996", "MNH120 id": "atg10708",
    "B04 scaffold": "B04S209", "B04 id": "B04S209.g10063",
    "USR5 scaffold": "NODE_5090_length_307152_cov_15.165000", "USR5 id": "NS.08725",
    },
    {
    "B04 scaffold": "B04S209", "B04 id": "B04S209.g10064", "B04 function": "NCS1 family purine/pyrimidine transporter",
    "USR5 scaffold": "NODE_5090_length_307152_cov_15.165000", "USR5 id": "NS.08726", "USR5 function": "NCS1 family purine/pyrimidine transporter",
    },
    {
    "B04 scaffold": "B04S209", "B04 id": "B04S209.g10065", "B04 function": "Clavaminate synthase-like protein", #### Split?
    "USR5 scaffold": "NODE_5090_length_307152_cov_15.165000", "USR5 id": "exon.CUFF.14211.1.7", "USR5 function": "Transcriptional regulatory protein",
    },
    {
    "MNH120 scaffold": "scaffold_996", "MNH120 id": "atg10709",
    "USR5 scaffold": "NODE_5090_length_307152_cov_15.165000", "USR5 id": "NS.08729", ## Partial plexin repeat
    },
    {
    "USR5 scaffold": "NODE_5090_length_307152_cov_15.165000", "USR5 id": "exon.CUFF.14232.2.11",
    },
    {
    "MNH120 scaffold": "scaffold_996", "MNH120 id": "atg10710", "MNH120 function": "Chloride conductance regulatory protein ICLN", 
    "B04 scaffold": "B04S209", "B04 id": "B04S209.g10066", "B04 function": "Chloride conductance regulatory protein ICLN",
    "USR5 scaffold": "NODE_5090_length_307152_cov_15.165000", "USR5 id": "exon.CUFF.14232.2.10", "USR5 function": "Chloride conductance regulatory protein ICLN",
    },
    {
    "B04 scaffold": "B04S209", "B04 id": "B04S209.g10067", "B04 function": "Glycosyltransferase family 25 member",
    "USR5 scaffold": "NODE_5090_length_307152_cov_15.165000", "USR5 id": "exon.CUFF.14214.1.12", "USR5 function": "Glycosyltransferase family 25 member, possibly inactive",
    },
    {
    "MNH120 scaffold": "scaffold_996", "MNH120 id": "atg10711", "MNH120 function": "Glycosyltransferase family 25 member, possibly inactive", 
    "B04 scaffold": "B04S209", "B04 id": "B04S209.g10067", "B04 function": "Glycosyltransferase family 25 member",
    "USR5 scaffold": "NODE_5090_length_307152_cov_15.165000", "USR5 id": "NS.08730", "USR5 function": "Glycosyltransferase family 25 member, possibly inactive",
    },
    {
    "B04 scaffold": "B04S209", "B04 id": "B04S209.g10068", "B04 function": "Glycosyltransferase family 25 member",
    "USR5 scaffold": "NODE_5090_length_307152_cov_15.165000", "USR5 id": "NS.08730", "USR5 function": "Glycosyltransferase family 25 member, possibly inactive",
    },
    {
    "MNH120 scaffold": "scaffold_996", "MNH120 id": "atg10712",
    "B04 scaffold": "B04S209", "B04 id": "B04S209.g10069",
    "USR5 scaffold": "NODE_5090_length_307152_cov_15.165000", "USR5 id": "NS.08731",
    },
    {
    "MNH120 scaffold": "scaffold_996", "MNH120 id": "atg10713",
    "B04 scaffold": "B04S209", "B04 id": "B04S209.g10070",
    "USR5 scaffold": "NODE_5090_length_307152_cov_15.165000", "USR5 id": "exon.CUFF.14217.1.14",
    },
    {
    "MNH120 scaffold": "scaffold_996", "MNH120 id": "atg10713",
    "B04 scaffold": "B04S209", "B04 id": "B04S209.g10070",
    "USR5 scaffold": "NODE_5090_length_307152_cov_15.165000", "USR5 id": "exon.CUFF.14217.2.15",
    },
    {
    "MNH120 scaffold": "scaffold_996", "MNH120 id": "atg10713",
    "B04 scaffold": "B04S209", "B04 id": "B04S209.g10070",
    "USR5 scaffold": "NODE_5090_length_307152_cov_15.165000", "USR5 id": "exon.CUFF.14217.3.16",
    },
    {
    "USR5 scaffold": "NODE_5090_length_307152_cov_15.165000", "USR5 id": "NS.08732",
    },
    {
    "MNH120 scaffold": "scaffold_996", "MNH120 id": "atg10714", "MNH120 function": "Vacuolar protein sorting-associated protein 54", 
    "B04 scaffold": "B04S209", "B04 id": "B04S209.g10071", "B04 function": "Vacuolar protein sorting-associated protein 54",
    "USR5 scaffold": "NODE_5090_length_307152_cov_15.165000", "USR5 id": "exon.CUFF.14228.1.17", "USR5 function": "Vacuolar protein sorting-associated protein 54",
    },
    {
    "MNH120 scaffold": "scaffold_996", "MNH120 id": "atg10715",
    "B04 scaffold": "B04S209", "B04 id": "B04S209.g10072",
    "USR5 scaffold": "NODE_5090_length_307152_cov_15.165000", "USR5 id": "NS.08733",
    },
    {
    "USR5 scaffold": "NODE_5090_length_307152_cov_15.165000", "USR5 id": "NS.08734",
    },
    {
    "MNH120 scaffold": "scaffold_996", "MNH120 id": "atg10716",
    "B04 scaffold": "B04S209", "B04 id": "B04S209.g10073",
    "USR5 scaffold": "NODE_5090_length_307152_cov_15.165000", "USR5 id": "NS.08735",
    },
    {
    "MNH120 scaffold": "scaffold_996", "MNH120 id": "atg10717",
    "B04 scaffold": "B04S209", "B04 id": "B04S209.g10074",
    "USR5 scaffold": "NODE_5090_length_307152_cov_15.165000", "USR5 id": "NS.08736",
    },
    {
    "MNH120 scaffold": "scaffold_996", "MNH120 id": "atg10718", "MNH120 function": "MFS transporter", 
    "B04 scaffold": "B04S209", "B04 id": "B04S209.g10075", "B04 function": "MFS transporter",
    "USR5 scaffold": "NODE_5090_length_307152_cov_15.165000", "USR5 id": "NS.08737", "USR5 function": "MFS transporter",
    },
    {
    "MNH120 scaffold": "scaffold_996", "MNH120 id": "atg10719", "MNH120 function": "Secreted protein with unknown function", 
    "B04 scaffold": "B04S209", "B04 id": "B04S209.g10076", "B04 function": "Secreted protein with unknown function",
    "USR5 scaffold": "NODE_5090_length_307152_cov_15.165000", "USR5 id": "NS.08738", "USR5 function": "Secreted protein with unknown function",
    },
    {
    "MNH120 scaffold": "scaffold_996", "MNH120 id": "atg10720",
    },
    {
    "MNH120 scaffold": "scaffold_996", "MNH120 id": "atg10721", "MNH120 function": "Possible M-phase phosphoprotein 6", 
    "B04 scaffold": "B04S209", "B04 id": "B04S209.g10077", "B04 function": "Possible M-phase phosphoprotein 6",
    "USR5 scaffold": "NODE_5090_length_307152_cov_15.165000", "USR5 id": "NS.08739", "USR5 function": "Possible M-phase phosphoprotein 6",
    },
    {
    "MNH120 scaffold": "scaffold_996", "MNH120 id": "atg10722", "MNH120 function": "NADH-ubiquinone oxidoreductase", 
    "B04 scaffold": "B04S209", "B04 id": "B04S209.g10078", "B04 function": "NADH-ubiquinone oxidoreductase",
    "USR5 scaffold": "NODE_5090_length_307152_cov_15.165000", "USR5 id": "NS.08740", "USR5 function": "NADH-ubiquinone oxidoreductase",
    },
    {
    "USR5 scaffold": "NODE_5090_length_307152_cov_15.165000", "USR5 id": "NS.08741", "USR5 function": "",
    },
    {
    "MNH120 scaffold": "scaffold_996", "MNH120 id": "atg10723", "MNH120 function": "DNA damage repair protein, possible tRNA-splicing endonuclease",
    "B04 scaffold": "B04S209", "B04 id": "B04S209.g10079", "B04 function": "DNA damage repair protein, possible tRNA-splicing endonuclease",
    "USR5 scaffold": "NODE_5090_length_307152_cov_15.165000", "USR5 id": "exon.CUFF.14234.1.22", "USR5 function": "DNA damage repair protein, possible tRNA-splicing endonuclease",
    },
    {
    "USR5 scaffold": "NODE_5090_length_307152_cov_15.165000", "USR5 id": "NS.08742",
    },
    {
    "MNH120 scaffold": "scaffold_996", "MNH120 id": "atg10724", "MNH120 function": "Secreted protein with possible transmembrane domain, unknown function", 
    "B04 scaffold": "B04S209", "B04 id": "B04S209.g10080", "B04 function": "Secreted protein with possible transmembrane domain, unknown function",
    "USR5 scaffold": "NODE_5090_length_307152_cov_15.165000", "USR5 id": "NS.08743", "USR5 function": "Possible transmembrane protein, unknown function", # has extra bit on 5' end
    },
    {
    "MNH120 scaffold": "scaffold_996", "MNH120 id": "atg10725", "MNH120 function": "YjbQ-like protein, possible secondary thiamine-phosphate synthase activity", 
    "B04 scaffold": "B04S209", "B04 id": "B04S209.g10082", "B04 function": "YjbQ-like protein, possible secondary thiamine-phosphate synthase activity",
    "USR5 scaffold": "NODE_5090_length_307152_cov_15.165000", "USR5 id": "NS.08744", "USR5 function": "YjbQ-like protein, possible secondary thiamine-phosphate synthase activity",
    },
    {
    "MNH120 scaffold": "scaffold_996", "MNH120 id": "atg10726",
    "B04 scaffold": "B04S209", "B04 id": "B04S209.g10083",
    "USR5 scaffold": "NODE_5090_length_307152_cov_15.165000", "USR5 id": "exon.CUFF.14238.1.25",
    },
    {
    "B04 scaffold": "B04S209", "B04 id": "B04S209.g10084",
    "USR5 scaffold": "NODE_5090_length_307152_cov_15.16500", "USR5 id": "NS.08745",
    },
    {
    "USR5 scaffold": "NODE_5090_length_307152_cov_15.16500", "USR5 id": "NS.08746",
    },
    {
    "MNH120 scaffold": "scaffold_554", "MNH120 id": "atg4020", "MNH120 function": "Possible nuclear transport factor 2-like protein", 
    "B04 scaffold": "B04S181", "B04 id": "B04S181.g9491", "B04 function": "Possible nuclear transport factor 2-like protein",
    "USR5 scaffold": "NODE_5090_length_307152_cov_15.165000", "USR5 id": "NS.08747", "USR5 function": "Possible nuclear transport factor 2-like protein",
    },
    {
    "MNH120 scaffold": "scaffold_987", "MNH120 id": "atg10943", "MNH120 function": "Nuclear movement protein NUDC, possible HSP20-like chaperone", 
    "B04 scaffold": "B04S181", "B04 id": "B04S181.g9490", "B04 function": "Nuclear movement protein NUDC, possible HSP20-like chaperone",
    "USR5 scaffold": "NODE_5090_length_307152_cov_15.165000", "USR5 id": "exon.CUFF.14241.1.26", "USR5 function": "Nuclear movement protein NUDC, possible HSP20-like chaperone",
    },
    {
    "USR5 scaffold": "NODE_5090_length_307152_cov_15.16500", "USR5 id": "exon.CUFF.14242.1.27",
    },
    {
    "MNH120 scaffold": "scaffold_987", "MNH120 id": "atg10942", "MNH120 function": "Probable lipid transporter/ligase, possible Acetyl-CoA synthetase-like protien", 
    "B04 scaffold": "B04S181", "B04 id": "B04S181.g9489", "B04 function": "Probable lipid transporter/ligase, possible Acetyl-CoA synthetase-like protien",
    "USR5 scaffold": "NODE_5090_length_307152_cov_15.165000", "USR5 id": "NS.08748", "USR5 function": "Probable lipid transporter/ligase, possible Acetyl-CoA synthetase-like protien",
    },
    {
    "B04 scaffold": "B04S181", "B04 id": "B04S181.g9488",        
    },
    {
    "MNH120 scaffold": "scaffold_987", "MNH120 id": "atg10941", "MNH120 function": "NCS1 family purine-pyrimidine transporter/permease",
    "B04 scaffold": "B04S181", "B04 id": "B04S181.g9487", "B04 function": "NCS1 family purine-pyrimidine transporter/permease",
    "USR5 scaffold": "NODE_5090_length_307152_cov_15.165000", "USR5 id": "NS.08749", "USR5 function": "NCS1 family purine-pyrimidine transporter/permease",
    },
    {
    "MNH120 scaffold": "scaffold_987", "MNH120 id": "atg10941", "MNH120 function": "NCS1 family purine-pyrimidine transporter/permease", 
    "B04 scaffold": "B04S181", "B04 id": "B04S181.g9487", "B04 function": "NCS1 family purine-pyrimidine transporter/permease",
    "USR5 scaffold": "NODE_5090_length_307152_cov_15.165000", "USR5 id": "NS.08750", "USR5 function": "NCS1 family purine-pyrimidine transporter/permease",
    },
    {
    "MNH120 scaffold": "scaffold_987", "MNH120 id": "atg10940", "MNH120 function": "Sterol reductase/Lamin B receptor",
    "B04 scaffold": "B04S181", "B04 id": "B04S181.g9486", "B04 function": "Sterol reductase/Lamin B receptor",
    "USR5 scaffold": "NODE_5090_length_307152_cov_15.165000", "USR5 id": "exon.CUFF.14250.2.30", "USR5 function": "Sterol reductase/Lamin B receptor",
    },
    {
    "MNH120 scaffold": "scaffold_987", "MNH120 id": "atg10939", "MNH120 function": "Zinc finger C2H2 type domain protein", 
    "B04 scaffold": "B04S181", "B04 id": "B04S181.g9485", # Truncated at 5' end compared to USR5 and MNH120
    "USR5 scaffold": "NODE_5090_length_307152_cov_15.165000", "USR5 id": "exon.CUFF.14248.1.31", "USR5 function": "Zinc finger C2H2 type domain protein",
    },
    {
    "MNH120 scaffold": "scaffold_987", "MNH120 id": "atg10906",
    "B04 scaffold": "B04S181", "B04 id": "B04S181.g9484",
    "USR5 scaffold": "NODE_5090_length_307152_cov_15.165000", "USR5 id": "NS.08751",
    },
    {
    "B04 scaffold": "B04S181", "B04 id": "B04S181.g9484",
    "USR5 scaffold": "NODE_5090_length_307152_cov_15.165000", "USR5 id": "NS.08752",
    },
    ]
table = pd.DataFrame(table)
table = table[["MNH120 scaffold", "MNH120 id", "USR5 scaffold", "USR5 id", "B04 scaffold", "B04 id", "MNH120 function", "USR5 function", "B04 function"]]
table.to_csv('group.function.tsv', sep='\t', index=False)
table.to_latex('group.function.tex', index=False, na_rep='', longtable=True)
table


Out[414]:
MNH120 scaffold MNH120 id USR5 scaffold USR5 id B04 scaffold B04 id MNH120 function USR5 function B04 function
0 scaffold_48 atg162 NODE_36160_length_249094_cov_15.165792 NS.06847 B04S209 B04S209.g10047 NaN NaN NaN
1 scaffold_48 atg161 NODE_36160_length_249094_cov_15.165792 NS.06846 B04S209 B04S209.g10048 NaN NaN NaN
2 scaffold_48 atg160 NODE_36160_length_249094_cov_15.165792 exon.CUFF.11287.1.9 B04S209 B04S209.g10049 Sulfite oxidase Sulfite oxidase Sulfite oxidase
3 NaN NaN NaN NaN B04S209 B04S209.g10050 NaN NaN NaN
4 scaffold_48 atg159 NODE_36160_length_249094_cov_15.165792 exon.CUFF.11284.2.7 B04S209 B04S209.g10051 Amun-like protein Amun-like protein Amun-like protein
5 scaffold_893 atg7462 NODE_36160_length_249094_cov_15.165792 exon.CUFF.11303.1.5 B04S209 B04S209.g10052 Ubiquitin carboxyl-terminal hydrolase Ubiquitin carboxyl-terminal hydrolase Ubiquitin carboxyl-terminal hydrolase
6 scaffold_893 atg7463 NODE_36160_length_249094_cov_15.165792 exon.CUFF.11302.1.4 B04S209 B04S209.g10053 Pyruvate dehydrogenase kinase Pyruvate dehydrogenase kinase Pyruvate dehydrogenase kinase
7 NaN NaN NODE_36160_length_249094_cov_15.165792 NS.06845 NaN NaN NaN NaN NaN
8 scaffold_893 atg7464 NODE_36160_length_249094_cov_15.165792 exon.CUFF.11302.1.3 B04S209 B04S209.g10054 NaN NaN NaN
9 scaffold_893 atg7465 NODE_36160_length_249094_cov_15.165792 exon.CUFF.11282.1.2 B04S209 B04S209.g10055 Beta-glucosidase family 1 Beta-glucosidase family 1 Beta-glucosidase family 1
10 scaffold_996 atg10702 NODE_36160_length_249094_cov_15.165792 NS.06844 B04S209 B04S209.g10056 Membrane protein with unknown function Membrane protein with unknown function Membrane protein with unknown function
11 scaffold_996 atg10703 NODE_36160_length_249094_cov_15.165792 exon.CUFF.11283.1.0 B04S209 B04S209.g10057 Secreted protein with unknown function Secreted protein with unknown function Secreted protein with unknown function
12 scaffold_996 atg10704 NODE_36160_length_249094_cov_15.165792 NS.06843 B04S209 B04S209.g10058 NaN NaN NaN
13 scaffold_996 atg10705 NODE_36160_length_249094_cov_15.165792 NS.06842 B04S209 B04S209.g10059 GPI transamidase component PIG-U GPI transamidase component PIG-U GPI transamidase component PIG-U
14 NaN NaN NODE_5090_length_307152_cov_15.165000 NS.08710 NaN NaN NaN Oxidoreductase NaN
15 NaN NaN NODE_5090_length_307152_cov_15.165000 exon.CUFF.14260.1.0 NaN NaN NaN Aromatic-L-amino acid decarboxylase NaN
16 scaffold_996 atg10706 NaN NaN B04S209 B04S209.g10060 Possible glycosyltransferase NaN Possible glycosyltransferase
17 NaN NaN NODE_5090_length_307152_cov_15.165000 NS.08711 NaN NaN NaN NaN NaN
18 NaN NaN NODE_5090_length_307152_cov_15.165000 NS.08712 NaN NaN NaN NaN NaN
19 NaN NaN NODE_5090_length_307152_cov_15.165000 NS.08713 NaN NaN NaN NaN NaN
20 NaN NaN NODE_5090_length_307152_cov_15.165000 NS.08714 NaN NaN NaN NaN NaN
21 NaN NaN NODE_5090_length_307152_cov_15.165000 NS.08715 NaN NaN NaN NaN NaN
22 NaN NaN NODE_5090_length_307152_cov_15.165000 NS.08716 NaN NaN NaN Partial Cytochrome P450 match NaN
23 NaN NaN NODE_5090_length_307152_cov_15.165000 NS.08717 NaN NaN NaN Partial ribonuclease H/Integrase, possible Gypsy TE NaN
24 NaN NaN NODE_5090_length_307152_cov_15.165000 NS.08718 NaN NaN NaN Partial polymerase, possible Gypsy TE NaN
25 NaN NaN NODE_5090_length_307152_cov_15.165000 NS.08719 NaN NaN NaN Partial polymerase, possible Gypsy TE NaN
26 NaN NaN NODE_5090_length_307152_cov_15.165000 NS.08720 NaN NaN NaN Weak partial Gypsy TE match NaN
27 NaN NaN NODE_5090_length_307152_cov_15.165000 NS.08721 NaN NaN NaN Partial phospholipase NaN
28 NaN NaN NODE_5090_length_307152_cov_15.165000 NS.08722 NaN NaN NaN NaN NaN
29 NaN NaN NODE_5090_length_307152_cov_15.165000 NS.08723 NaN NaN NaN NaN NaN
30 scaffold_996 atg10706 NODE_5090_length_307152_cov_15.165000 NS.08724 B04S209 B04S209.g10060 Possible glycosyltransferase Possible glycosyltransferase Possible glycosyltransferase
31 scaffold_996 atg10707 NODE_5090_length_307152_cov_15.165000 exon.CUFF.14230.1.3 B04S209 B04S209.g10062 MM3350-like protein, possible DNA-binding MM3350-like protein, possible DNA-binding MM3350-like protein, possible DNA-binding
32 scaffold_996 atg10708 NODE_5090_length_307152_cov_15.165000 NS.08725 B04S209 B04S209.g10063 NaN NaN NaN
33 NaN NaN NODE_5090_length_307152_cov_15.165000 NS.08726 B04S209 B04S209.g10064 NaN NCS1 family purine/pyrimidine transporter NCS1 family purine/pyrimidine transporter
34 NaN NaN NODE_5090_length_307152_cov_15.165000 exon.CUFF.14211.1.7 B04S209 B04S209.g10065 NaN Transcriptional regulatory protein Clavaminate synthase-like protein
35 scaffold_996 atg10709 NODE_5090_length_307152_cov_15.165000 NS.08729 NaN NaN NaN NaN NaN
36 NaN NaN NODE_5090_length_307152_cov_15.165000 exon.CUFF.14232.2.11 NaN NaN NaN NaN NaN
37 scaffold_996 atg10710 NODE_5090_length_307152_cov_15.165000 exon.CUFF.14232.2.10 B04S209 B04S209.g10066 Chloride conductance regulatory protein ICLN Chloride conductance regulatory protein ICLN Chloride conductance regulatory protein ICLN
38 NaN NaN NODE_5090_length_307152_cov_15.165000 exon.CUFF.14214.1.12 B04S209 B04S209.g10067 NaN Glycosyltransferase family 25 member, possibly inactive Glycosyltransferase family 25 member
39 scaffold_996 atg10711 NODE_5090_length_307152_cov_15.165000 NS.08730 B04S209 B04S209.g10067 Glycosyltransferase family 25 member, possibly inactive Glycosyltransferase family 25 member, possibly inactive Glycosyltransferase family 25 member
40 NaN NaN NODE_5090_length_307152_cov_15.165000 NS.08730 B04S209 B04S209.g10068 NaN Glycosyltransferase family 25 member, possibly inactive Glycosyltransferase family 25 member
41 scaffold_996 atg10712 NODE_5090_length_307152_cov_15.165000 NS.08731 B04S209 B04S209.g10069 NaN NaN NaN
42 scaffold_996 atg10713 NODE_5090_length_307152_cov_15.165000 exon.CUFF.14217.1.14 B04S209 B04S209.g10070 NaN NaN NaN
43 scaffold_996 atg10713 NODE_5090_length_307152_cov_15.165000 exon.CUFF.14217.2.15 B04S209 B04S209.g10070 NaN NaN NaN
44 scaffold_996 atg10713 NODE_5090_length_307152_cov_15.165000 exon.CUFF.14217.3.16 B04S209 B04S209.g10070 NaN NaN NaN
45 NaN NaN NODE_5090_length_307152_cov_15.165000 NS.08732 NaN NaN NaN NaN NaN
46 scaffold_996 atg10714 NODE_5090_length_307152_cov_15.165000 exon.CUFF.14228.1.17 B04S209 B04S209.g10071 Vacuolar protein sorting-associated protein 54 Vacuolar protein sorting-associated protein 54 Vacuolar protein sorting-associated protein 54
47 scaffold_996 atg10715 NODE_5090_length_307152_cov_15.165000 NS.08733 B04S209 B04S209.g10072 NaN NaN NaN
48 NaN NaN NODE_5090_length_307152_cov_15.165000 NS.08734 NaN NaN NaN NaN NaN
49 scaffold_996 atg10716 NODE_5090_length_307152_cov_15.165000 NS.08735 B04S209 B04S209.g10073 NaN NaN NaN
50 scaffold_996 atg10717 NODE_5090_length_307152_cov_15.165000 NS.08736 B04S209 B04S209.g10074 NaN NaN NaN
51 scaffold_996 atg10718 NODE_5090_length_307152_cov_15.165000 NS.08737 B04S209 B04S209.g10075 MFS transporter MFS transporter MFS transporter
52 scaffold_996 atg10719 NODE_5090_length_307152_cov_15.165000 NS.08738 B04S209 B04S209.g10076 Secreted protein with unknown function Secreted protein with unknown function Secreted protein with unknown function
53 scaffold_996 atg10720 NaN NaN NaN NaN NaN NaN NaN
54 scaffold_996 atg10721 NODE_5090_length_307152_cov_15.165000 NS.08739 B04S209 B04S209.g10077 Possible M-phase phosphoprotein 6 Possible M-phase phosphoprotein 6 Possible M-phase phosphoprotein 6
55 scaffold_996 atg10722 NODE_5090_length_307152_cov_15.165000 NS.08740 B04S209 B04S209.g10078 NADH-ubiquinone oxidoreductase NADH-ubiquinone oxidoreductase NADH-ubiquinone oxidoreductase
56 NaN NaN NODE_5090_length_307152_cov_15.165000 NS.08741 NaN NaN NaN NaN
57 scaffold_996 atg10723 NODE_5090_length_307152_cov_15.165000 exon.CUFF.14234.1.22 B04S209 B04S209.g10079 DNA damage repair protein, possible tRNA-splicing endonuclease DNA damage repair protein, possible tRNA-splicing endonuclease DNA damage repair protein, possible tRNA-splicing endonuclease
58 NaN NaN NODE_5090_length_307152_cov_15.165000 NS.08742 NaN NaN NaN NaN NaN
59 scaffold_996 atg10724 NODE_5090_length_307152_cov_15.165000 NS.08743 B04S209 B04S209.g10080 Secreted protein with possible transmembrane domain, unknown function Possible transmembrane protein, unknown function Secreted protein with possible transmembrane domain, unknown function
60 scaffold_996 atg10725 NODE_5090_length_307152_cov_15.165000 NS.08744 B04S209 B04S209.g10082 YjbQ-like protein, possible secondary thiamine-phosphate synthase activity YjbQ-like protein, possible secondary thiamine-phosphate synthase activity YjbQ-like protein, possible secondary thiamine-phosphate synthase activity
61 scaffold_996 atg10726 NODE_5090_length_307152_cov_15.165000 exon.CUFF.14238.1.25 B04S209 B04S209.g10083 NaN NaN NaN
62 NaN NaN NODE_5090_length_307152_cov_15.16500 NS.08745 B04S209 B04S209.g10084 NaN NaN NaN
63 NaN NaN NODE_5090_length_307152_cov_15.16500 NS.08746 NaN NaN NaN NaN NaN
64 scaffold_554 atg4020 NODE_5090_length_307152_cov_15.165000 NS.08747 B04S181 B04S181.g9491 Possible nuclear transport factor 2-like protein Possible nuclear transport factor 2-like protein Possible nuclear transport factor 2-like protein
65 scaffold_987 atg10943 NODE_5090_length_307152_cov_15.165000 exon.CUFF.14241.1.26 B04S181 B04S181.g9490 Nuclear movement protein NUDC, possible HSP20-like chaperone Nuclear movement protein NUDC, possible HSP20-like chaperone Nuclear movement protein NUDC, possible HSP20-like chaperone
66 NaN NaN NODE_5090_length_307152_cov_15.16500 exon.CUFF.14242.1.27 NaN NaN NaN NaN NaN
67 scaffold_987 atg10942 NODE_5090_length_307152_cov_15.165000 NS.08748 B04S181 B04S181.g9489 Probable lipid transporter/ligase, possible Acetyl-CoA synthetase-like protien Probable lipid transporter/ligase, possible Acetyl-CoA synthetase-like protien Probable lipid transporter/ligase, possible Acetyl-CoA synthetase-like protien
68 NaN NaN NaN NaN B04S181 B04S181.g9488 NaN NaN NaN
69 scaffold_987 atg10941 NODE_5090_length_307152_cov_15.165000 NS.08749 B04S181 B04S181.g9487 NCS1 family purine-pyrimidine transporter/permease NCS1 family purine-pyrimidine transporter/permease NCS1 family purine-pyrimidine transporter/permease
70 scaffold_987 atg10941 NODE_5090_length_307152_cov_15.165000 NS.08750 B04S181 B04S181.g9487 NCS1 family purine-pyrimidine transporter/permease NCS1 family purine-pyrimidine transporter/permease NCS1 family purine-pyrimidine transporter/permease
71 scaffold_987 atg10940 NODE_5090_length_307152_cov_15.165000 exon.CUFF.14250.2.30 B04S181 B04S181.g9486 Sterol reductase/Lamin B receptor Sterol reductase/Lamin B receptor Sterol reductase/Lamin B receptor
72 scaffold_987 atg10939 NODE_5090_length_307152_cov_15.165000 exon.CUFF.14248.1.31 B04S181 B04S181.g9485 Zinc finger C2H2 type domain protein Zinc finger C2H2 type domain protein NaN
73 scaffold_987 atg10906 NODE_5090_length_307152_cov_15.165000 NS.08751 B04S181 B04S181.g9484 NaN NaN NaN
74 NaN NaN NODE_5090_length_307152_cov_15.165000 NS.08752 B04S181 B04S181.g9484 NaN NaN NaN

In [405]:
pd.DataFrame.from_dict?

In [ ]: