In [1]:
import json
from collections import defaultdict
import numpy as np
import pandas as pd
from BCBio import GFF
from Bio import SeqIO
from IPython.display import display
In [2]:
pd.options.display.max_colwidth = 1000
pd.options.display.max_rows = 500
In [3]:
with open('data/candidate_homologues.json', 'r') as handle:
candidate_homologues = json.load(handle)
with open('data/candidate_scaffolds.json', 'r') as handle:
candidate_scaffolds = json.load(handle)
In [4]:
GENOMES = dict()
GENOMES['MNH120'] = SeqIO.to_dict(GFF.parse('data/augustus.hints.GeneWithUTR.withNCBI_Fungi.gff3'))
GENOMES['B04'] = SeqIO.to_dict(GFF.parse('data/B04.genes.gff3'))
GENOMES['USR5'] = SeqIO.to_dict(GFF.parse('data/I5V.PredictedPass.gff3'))
for genome, scaffolds in GENOMES.items():
for scaffold, seq in scaffolds.items():
seq.features.sort(key=lambda f: f.location.start)
In [5]:
tpsi_cols = [
'qid',
'qlen',
'search_method',
'db_name',
'sid',
'qstart',
'qend',
'sstart',
'send',
'pid',
'psim',
'HSP_score',
'bitscore',
'description',
'qframe',
'qstrand',
'slen',
'evalue',
'pvalue',
]
cazy_cols = ['seqid', 'subfamily', 'family_name', 'desc', 'notes']
location_cols = ['seqid', 'location', 'membrane']
panther_cols = ['seqid', 'family_id', 'family_name']
superfamilies_cols = ['seqid', 'category', 'short_catname',
'long_catname', 'id', 'name']
go_cols = ['seqid', 'goid', 'goname', 'godomain']
ips_cols = ['seqid', 'md5', 'length', 'analysis', 'accession',
'description', 'start', 'end', 'evalue', 'status',
'date', 'ipracc', 'iprdesc', 'goterms', 'pathterms']
ANALYSES = defaultdict(dict)
ANALYSES['MNH120']['cazy'] = pd.read_table(
'data/MNH120.cazy_fams.tsv',
names=cazy_cols
)
ANALYSES['MNH120']['tpsi'] = pd.read_table(
'data/MNH120.combined.TPSI.topHits',
names=tpsi_cols
)
ANALYSES['MNH120']['location'] = pd.read_table(
'data/MNH120.location.tsv',
names=location_cols
)
ANALYSES['MNH120']['panther'] = pd.read_table(
'data/MNH120.panther_fams.tsv',
names=panther_cols
)
ANALYSES['MNH120']['superfamilies'] = pd.read_table(
'data/MNH120.superfamilies.tsv',
names=superfamilies_cols
)
ANALYSES['MNH120']['go'] = pd.read_table(
'data/MNH120.goterms.tsv',
names=go_cols
)
ANALYSES['MNH120']['ips'] = pd.read_table(
'data/MNH120.combined.tsv',
names=ips_cols
)
ANALYSES['MNH120']['pfr'] = pd.read_table(
'data/atg.Description.txt',
names=['seqid', 'description']
)
ANALYSES['B04']['cazy'] = pd.read_table(
'data/B04.cazy_fams.tsv',
names=cazy_cols
)
ANALYSES['B04']['tpsi'] = pd.read_table(
'data/B04.combined.TPSI.topHits',
names=tpsi_cols
)
ANALYSES['B04']['location'] = pd.read_table(
'data/B04.location.tsv',
names=location_cols
)
ANALYSES['B04']['panther'] = pd.read_table(
'data/B04.panther_fams.tsv',
names=panther_cols
)
ANALYSES['B04']['superfamilies'] = pd.read_table(
'data/B04.superfamilies.tsv',
names=superfamilies_cols
)
ANALYSES['B04']['go'] = pd.read_table(
'data/B04.goterms.tsv',
names=go_cols
)
ANALYSES['B04']['ips'] = pd.read_table(
'data/B04.combined.tsv',
names=ips_cols
)
ANALYSES['B04']['pfr'] = pd.read_table(
'data/B04.Description.txt',
names=['seqid', 'description']
)
ANALYSES['USR5']['cazy'] = pd.read_table(
'data/I5V.cazy_fams.tsv',
names=cazy_cols
)
ANALYSES['USR5']['tpsi'] = pd.read_table(
'data/I5V.combined.TPSI.topHits',
names=tpsi_cols
)
ANALYSES['USR5']['location'] = pd.read_table(
'data/I5V.location.tsv',
names=location_cols
)
ANALYSES['USR5']['panther'] = pd.read_table(
'data/I5V.panther_fams.tsv',
names=panther_cols
)
ANALYSES['USR5']['superfamilies'] = pd.read_table(
'data/I5V.superfamilies.tsv',
names=superfamilies_cols
)
ANALYSES['USR5']['go'] = pd.read_table(
'data/I5V.goterms.tsv',
names=go_cols
)
ANALYSES['USR5']['ips'] = pd.read_table(
'data/I5V.combined.tsv',
names=ips_cols)
In [6]:
def get(seqid):
idcols = {
'panther': 'seqid',
'cazy': 'seqid',
'tpsi': 'sid',
'location': 'seqid',
'superfamilies': 'seqid',
'go': 'seqid',
'ips': 'seqid',
'pfr': 'seqid'
}
for isolate, analyses in ANALYSES.items():
for analysis, table in analyses.items():
filtered = table[table[idcols[analysis]] == seqid]
if len(filtered) == 0:
continue
else:
print(isolate, analysis)
display(filtered)
def getblast(seqid, isolate, thresh=1e-10):
files = {
'MNH120': 'data/MNH120.swiss.combined.tsv',
'B04': 'data/B04.swiss.combined.tsv',
'USR5': 'data/I5V.swiss.combined.tsv',
}
cols = (
"qseqid qlen sallseqid sgi sacc saccver slen qstart "
"qend sstart send qseq sseq evalue bitscore score "
"length pident nident mismatch positive gapopen gaps "
"ppos frames qframe sframe btop staxids sscinames "
"scomnames sblastnames sskingdoms stitle salltitles "
"sstrand qcovs qcovhsp"
).split(' ')
interesting_cols = [
'qseqid', 'qlen', 'sacc', 'slen', 'qstart', 'qend',
'sstart', 'send', 'evalue', 'bitscore', 'sscinames',
'salltitles',
]
out = list()
with open(files[isolate], 'r') as handle:
for line in handle:
if not line.startswith(seqid):
continue
line = line.rstrip('\n')
line = line.split('\t')
line = dict(zip(cols, line))
if line['qseqid'] == seqid:
if float(line['evalue']) < thresh:
out.append(line)
if len(out) > 0:
return pd.DataFrame(out)[interesting_cols]
else:
print('No matches')
In [7]:
def subset_features(record, start, end):
""" Filters features to include what's in the bounds.
Because the slice operator on SeqRecord objects does
not handle features well, we need this function.
Keyword arguments:
record -- a SeqRecord object containing features.
start -- the lower bound to include.
end -- the upper bound to include.
returns:
A list of features.
"""
new_features = list()
for feature in record.features:
f_start = feature.location.start
f_end = feature.location.end
if (
(start <= f_start < end) or
(start < f_end <= end) or
((f_start < start) and (f_end > end))
):
new_features.append(feature)
return new_features
In [28]:
def scaf_to_homologues(scaf):
isolate = None
for i, d in GENOMES.items():
if scaf in d:
qisolate = i
seq = GENOMES[qisolate][scaf]
features = [f for f in seq.features if f.type == 'gene']
features.sort(key=lambda f: f.location.start)
ids = [f.id for f in features]
table = list()
for id_ in ids:
try:
homologues = candidate_homologues[id_]
except KeyError:
continue
group = dict()
for isolate, hlogs in homologues.items():
if isolate not in GENOMES or len(hlogs) == 0:
continue
elif isolate == qisolate:
scaffold = scaf
group[qisolate + '_scaffold'] = scaffold
group[qisolate + '_id'] = id_
continue
hlog = hlogs[0]
scaffold = hlog['scaffold']
start = hlog['start']
end = hlog['end']
if scaffold not in GENOMES[isolate]:
continue
features = subset_features(GENOMES[isolate][scaffold], start, end)
features = [f.id for f in features if f.type == 'gene']
if len(features) > 0:
feature = features[0]
else:
continue
group[isolate + '_scaffold'] = scaffold
group[isolate + '_id'] = feature
table.append(group)
return pd.DataFrame(table)
In [417]:
"""{
"MNH120 scaffold":, "MNH120 id":, "MNH120 function":,
"B04 scaffold":, "B04 id":, "B04 function":,
"USR5 scaffold":, "USR5 id":, "USR5 function":,
}"""
table = [
{
"MNH120 scaffold": 'scaffold_38', "MNH120 id": 'atg136', "MNH120 function": "Unknown",
"B04 scaffold": 'B04S72', "B04 id": 'B04S72.g5566', "B04 function": 'Unknown',
"USR5 scaffold": 'NODE_28888_length_496775_cov_15.218104', "USR5 id": 'NS.04813', "USR5 function": 'Unknown',
},
{
"USR5 scaffold": 'NODE_28888_length_496775_cov_15.218104', "USR5 id": 'NS.04814', "USR5 function": 'Unknown',
},
{
"USR5 scaffold": 'NODE_28888_length_496775_cov_15.218104', "USR5 id": 'NS.04815', "USR5 function": 'Unknown',
},
{
"MNH120 scaffold": 'scaffold_38', "MNH120 id": 'atg137', "MNH120 function": "Membrane associated protein with unknown function",
"B04 scaffold": 'B04S72', "B04 id": 'B04S72.g5565', "B04 function": 'Membrane associated protein with unknown function',
"USR5 scaffold": 'NODE_28888_length_496775_cov_15.218104', "USR5 id": 'NS.04816', "USR5 function": 'Membrane associated protein with unknown function',
},
{
"USR5 scaffold": 'NODE_28888_length_496775_cov_15.218104', "USR5 id": 'NS.04817', "USR5 function": 'Unknown',
},
{
"USR5 scaffold": 'NODE_28888_length_496775_cov_15.218104', "USR5 id": 'NS.04818', "USR5 function": 'Unknown',
},
{
"MNH120 scaffold": 'scaffold_38', "MNH120 id": 'atg138', "MNH120 function": "DNA binding cell division control protein",
"B04 scaffold": 'B04S72', "B04 id": 'B04S72.g5564', "B04 function": 'DNA binding cell division control protein',
"USR5 scaffold": 'NODE_28888_length_496775_cov_15.218104', "USR5 id": 'NS.04819', "USR5 function": 'DNA binding cell division control protein',
},
{
"MNH120 scaffold": 'scaffold_38', "MNH120 id": 'atg139', "MNH120 function": "Putative V-type ATP synthase subunit I homologue",
"B04 scaffold": 'B04S72', "B04 id": 'B04S72.g5563', "B04 function": 'Putative V-type ATP synthase subunit I homologue',
"USR5 scaffold": 'NODE_28888_length_496775_cov_15.218104', "USR5 id": 'NS.04820', "USR5 function": 'Unknown',
},
{
"MNH120 scaffold": 'scaffold_38', "MNH120 id": 'atg140', "MNH120 function": "Secreted protein with unknown function",
"B04 scaffold": 'B04S196', "B04 id": 'B04S196.g9853', "B04 function": 'Secreted protein with unknown function',
"USR5 scaffold": 'NODE_35582_length_606476_cov_15.460932', "USR5 id": 'exon.CUFF.10985.1.77', "USR5 function": 'Secreted protein with unknown function',
},
]
table = pd.DataFrame(table)
table = table[["MNH120 scaffold", "MNH120 id", "USR5 scaffold", "USR5 id", "B04 scaffold", "B04 id", "MNH120 function", "USR5 function", "B04 function"]]
table.to_csv('scaffold_38.function.tsv', sep='\t', index=False)
table.to_latex('scaffold_38.function.tex', index=False, na_rep='', longtable=True)
table
Out[417]:
In [416]:
"""{
"MNH120 scaffold":, "MNH120 id":, "MNH120 function":,
"B04 scaffold":, "B04 id":, "B04 function":,
"USR5 scaffold":, "USR5 id":, "USR5 function":,
}"""
table = [
{
"MNH120 scaffold": 'scaffold_997', "MNH120 id": 'atg10842', "MNH120 function": "Membrane associated PQ loop repeat protein",
"B04 scaffold":'B04S196', "B04 id":'B04S196.g9843', "B04 function": "Membrane associated PQ loop repeat protein",
"USR5 scaffold": 'NODE_35582_length_606476_cov_15.460932', "USR5 id":'NS.06676', "USR5 function":"Membrane associated PQ loop repeat protein",
},
{
"MNH120 scaffold":'atg10843', "MNH120 id":'scaffold_997', "MNH120 function":"Pyridine nucleotide-disulphide oxidoreductase",
"B04 scaffold":'B04S196', "B04 id":'B04S196.g9844', "B04 function":"Pyridine nucleotide-disulphide oxidoreductase",
"USR5 scaffold":'NODE_35582_length_606476_cov_15.460932', "USR5 id":'NS.06675', "USR5 function":"Pyridine nucleotide-disulphide oxidoreductase",
},
{
"MNH120 scaffold":'scaffold_997', "MNH120 id":'atg10844', "MNH120 function":"Sodium-dependent phosphate transporter",
"B04 scaffold":'B04S196', "B04 id":'B04S196.g9845', "B04 function":"Sodium-dependent phosphate transporter",
"USR5 scaffold":'NODE_35582_length_606476_cov_15.460932', "USR5 id":'NS.06674', "USR5 function":"Sodium-dependent phosphate transporter",
}, ## Possibly need to split into USR5 and MNH120 vs Bo4
{
"USR5 scaffold":'NODE_35582_length_606476_cov_15.460932', "USR5 id":'NS.06673',
},
{
"USR5 scaffold":'NODE_35582_length_606476_cov_15.460932', "USR5 id":'NS.06672', "USR5 function":"Transcription factor",
},
{
"USR5 scaffold":'NODE_35582_length_606476_cov_15.460932', "USR5 id":'NS.06671',
}, # Possible split with NS.06670
{
"MNH120 scaffold":'scaffold_997', "MNH120 id":'atg10846', "MNH120 function":"NMT1/THI5 like protein, pyrimidine biosynthesis",
"B04 scaffold":'B04S196', "B04 id":'B04S196.g9846', ## Unknown
"USR5 scaffold":'NODE_35582_length_606476_cov_15.460932', "USR5 id":'NS.06670', "USR5 function":'NMT1/THI5 like protein, pyrimidine biosynthesis',
},
{
"MNH120 scaffold":'scaffold_997', "MNH120 id":'atg10847', "MNH120 function":'DNA replication licensing factor MCM5',
"B04 scaffold":'B04S196', "B04 id":'B04S196.g9847', "B04 function":"DNA replication licensing factor MCM5",
"USR5 scaffold":'NODE_35582_length_606476_cov_15.460932', "USR5 id":'exon.CUFF.10994.1.84', "USR5 function":"DNA replication licensing factor MCM5",
},
{
"MNH120 scaffold":'scaffold_997', "MNH120 id":'atg10848', "MNH120 function":"Cysteine proteinase",
"B04 scaffold":'B04S196', "B04 id":'B04S196.g9848', "B04 function":"Cysteine proteinase",
"USR5 scaffold":'NODE_35582_length_606476_cov_15.460932', "USR5 id":'NS.06669', "USR5 function":"Cysteine proteinase",
},
{
"USR5 scaffold":'NODE_35582_length_606476_cov_15.460932', "USR5 id":'exon.CUFF.10998.1.83',
},
{
"B04 scaffold":'B04S196', "B04 id":'B04S196.g9849',
},
{
"MNH120 scaffold":'scaffold_997', "MNH120 id":'atg10849', "MNH120 function":"Eukaryotic translation initiation factor 4E",
"B04 scaffold":'B04S196', "B04 id":'B04S196.g9850', "B04 function":"Eukaryotic translation initiation factor 4E",
"USR5 scaffold":'NODE_35582_length_606476_cov_15.460932', "USR5 id":'NS.06668', "USR5 function":"Eukaryotic translation initiation factor 4E",
},
{
"MNH120 scaffold":'scaffold_997', "MNH120 id":'atg10850', "MNH120 function": "Protein phosphatase inhibitor",
"B04 scaffold":'B04S196', "B04 id":'B04S196.g9851', "B04 function":'Protein phosphatase inhibitor',
"USR5 scaffold":'NODE_35582_length_606476_cov_15.460932', "USR5 id":'exon.CUFF.11014.2.81', "USR5 function":'Protein phosphatase inhibitor',
},
{
"USR5 scaffold":'NODE_35582_length_606476_cov_15.460932', "USR5 id":'NS.06667',
},
{
"MNH120 scaffold":'scaffold_997', "MNH120 id":'atg10851',
},
{
"USR5 scaffold":'NODE_35582_length_606476_cov_15.460932', "USR5 id":'NS.06666', "USR5 function":"Aminopeptidase",
}, # possible split with 'exon.CUFF.11014.2.80'
{
"MNH120 scaffold":'scaffold_997', "MNH120 id":'atg10852', "MNH120 function":"Aminopeptidase",
"B04 scaffold":'B04S196', "B04 id":'B04S196.g9852', "B04 function":"Aminopeptidase",
"USR5 scaffold":'NODE_35582_length_606476_cov_15.460932', "USR5 id":'exon.CUFF.11014.2.80', "USR5 function":"Aminopeptidase",
},
{
"USR5 scaffold":'NODE_35582_length_606476_cov_15.460932', "USR5 id":'NS.06665',
},
{
"MNH120 scaffold": 'scaffold_38', "MNH120 id": 'atg140', "MNH120 function": "Secreted protein with unknown function",
"B04 scaffold": 'B04S196', "B04 id": 'B04S196.g9853', "B04 function": 'Secreted protein with unknown function',
"USR5 scaffold": 'NODE_35582_length_606476_cov_15.460932', "USR5 id": 'exon.CUFF.10985.1.77', "USR5 function": 'Secreted protein with unknown function',
},
{
"USR5 scaffold":'NODE_35582_length_606476_cov_15.460932', "USR5 id":'NS.06664',
},
{
"MNH120 scaffold":'scaffold_887', "MNH120 id":'atg7428', "MNH120 function":"Transmembrane ion transporter",
"B04 scaffold":'B04S196', "B04 id":'B04S196.g9854', "B04 function":"Transmembrane ion transporter",
"USR5 scaffold":'NODE_35582_length_606476_cov_15.460932', "USR5 id":'NS.06663', "USR5 function":"Transmembrane ion transporter",
},
{
"B04 scaffold":'B04S196', "B04 id":'B04S196.g9855',
},
{
"MNH120 scaffold":'scaffold_887', "MNH120 id":'atg7429', "MNH120 function":"Methylenetetrahydrofolate reductase",
"B04 scaffold":'B04S196', "B04 id":'B04S196.g9856', "B04 function":"Methylenetetrahydrofolate reductase",
"USR5 scaffold":'NODE_35582_length_606476_cov_15.460932', "USR5 id":'exon.CUFF.10988.1.76',
},
{
"MNH120 scaffold":'scaffold_887', "MNH120 id":'atg7430', "MNH120 function":'Mannosyltransferase',
"USR5 scaffold":'NODE_35582_length_606476_cov_15.460932', "USR5 id":'exon.CUFF.10988.1.75', "USR5 function":'Mannosyltransferase',
}, ## Split with over 'B04S196.g9856'
{
"MNH120 scaffold":'scaffold_887', "MNH120 id":'atg7431', "MNH120 function":"Gamma-tubulin protein",
"B04 scaffold":'B04S196', "B04 id":'B04S196.g9857', "B04 function":"Gamma-tubulin protein",
"USR5 scaffold":'NODE_35582_length_606476_cov_15.460932', "USR5 id":'exon.CUFF.10983.1.74', "USR5 function":"Gamma-tubulin protein",
},
{
"USR5 scaffold":'NODE_35582_length_606476_cov_15.460932', "USR5 id":'NS.06662',
},
{
"MNH120 scaffold":'scaffold_887', "MNH120 id":'atg7432',
"USR5 scaffold":'NODE_35582_length_606476_cov_15.460932', "USR5 id":'exon.CUFF.10983.1.73',
},
{
"MNH120 scaffold":'scaffold_244', "MNH120 id":'atg1389',
"B04 scaffold":'B04S196', "B04 id":'B04S196.g9858',
"USR5 scaffold":'NODE_35582_length_606476_cov_15.460932', "USR5 id":'NS.06661',
},
{
"MNH120 scaffold":'scaffold_244', "MNH120 id":'atg1390', "MNH120 function":'Transcription factor',
"B04 scaffold":'B04S196', "B04 id":'B04S196.g9859', "B04 function":'Transcription factor',
"USR5 scaffold":'NODE_35582_length_606476_cov_15.460932', "USR5 id":'exon.CUFF.10989.1.72', "USR5 function":'Transcription factor',
},
{
"MNH120 scaffold":'scaffold_244', "MNH120 id":'atg1391',
"USR5 scaffold":'NODE_35582_length_606476_cov_15.460932', "USR5 id":'NS.06660',
},
{
"USR5 scaffold":'NODE_35582_length_606476_cov_15.460932', "USR5 id":'exon.CUFF.10986.1.71', "USR5 function": "DNA binding protein",
},
{
"MNH120 scaffold":'scaffold_878', "MNH120 id":'atg7619', "MNH120 function":'Dimethylaniline monooxygenase',
"B04 scaffold":'B04S196', "B04 id":'B04S196.g9860', "B04 function": 'Flavin mononucleotide binding protein',
"USR5 scaffold":'NODE_35582_length_606476_cov_15.460932', "USR5 id":'exon.CUFF.11171.1.68', "USR5 function":'Flavin mononucleotide binding protein',
},
{
"MNH120 scaffold":'scaffold_878', "MNH120 id":'atg7620', "MNH120 function": "Zn/Fe transporter",
"B04 scaffold":'B04S196', "B04 id":'B04S196.g9861', "B04 function": "Zn/Fe transporter",
"USR5 scaffold":'NODE_35582_length_606476_cov_15.460932', "USR5 id":'NS.06659', "USR5 function": "Zn/Fe transporter",
},
]
table = pd.DataFrame(table)
table = table[["MNH120 scaffold", "MNH120 id", "USR5 scaffold", "USR5 id", "B04 scaffold", "B04 id", "MNH120 function", "USR5 function", "B04 function"]]
table.to_csv('atg140.function.tsv', sep='\t', index=False)
table.to_latex('atg140.function.tex', index=False, na_rep='', longtable=True)
table
Out[416]:
In [412]:
"""{
"MNH120 scaffold":, "MNH120 id":, "MNH120 function":,
"B04 scaffold":, "B04 id":, "B04 function":,
"USR5 scaffold":, "USR5 id":, "USR5 function":,
}"""
table = [
{ ## Very weak match to Fbox domain
"MNH120 scaffold": "scaffold_978", "MNH120 id": "atg12496",
"USR5 scaffold": "NODE_36160_length_249094_cov_15.165792", "USR5 id": "exon.CUFF.11385.1.31",
},
{
"MNH120 scaffold": "scaffold_978", "MNH120 id": "atg12495", "MNH120 function": "WD40 repeat domain-containing protein",
"B04 scaffold": "B04S84", "B04 id": "B04S84.g6037", "B04 function": "WD40 repeat domain-containing protein",
"USR5 scaffold": "NODE_36160_length_249094_cov_15.165792", "USR5 id": "exon.CUFF.11385.1.32", "USR5 function": "WD40 repeat domain-containing protein",
},
{
"MNH120 scaffold": "scaffold_978", "MNH120 id": "atg12494", "MNH120 function": "Cell division control protein",
"B04 scaffold": "B04S84", "B04 id": "B04S84.g6038", "B04 function": "Cell division control protein",
"USR5 scaffold": "NODE_36160_length_249094_cov_15.165792", "USR5 id": "exon.CUFF.11385.1.33", "USR5 function": "Cell division control protein",
},
{
"MNH120 scaffold": "scaffold_978", "MNH120 id": "atg12493", "MNH120 function": "Cell division control protein",
"B04 scaffold": "B04S84", "B04 id": "B04S84.g6038", "B04 function": "Cell division control protein",
"USR5 scaffold": "NODE_36160_length_249094_cov_15.165792", "USR5 id": "exon.CUFF.11385.1.34", "USR5 function": "Cell division control protein",
},
{ # B04S84.g6039 is a small gene within B04S84.g6038, no rearrangement
"B04 scaffold": "B04S84", "B04 id": "B04S84.g6039",
},
{
"MNH120 scaffold": "scaffold_978", "MNH120 id": "atg12492", "MNH120 function": "WD40 repeat domain-containing protein, possibly ribosome associated, possible protease activity",
"B04 scaffold": "B04S84", "B04 id": "B04S84.g6040", "B04 function": "WD40 repeat domain-containing protein, possibly ribosome associated, possible protease activity",
"USR5 scaffold": "NODE_36160_length_249094_cov_15.165792", "USR5 id": "NS.06862", "USR5 function": "WD40 repeat domain-containing protein, possibly ribosome associated, possible protease activity",
},
{
"MNH120 scaffold": "scaffold_978", "MNH120 id": "atg12491",
"USR5 scaffold": "NODE_36160_length_249094_cov_15.165792", "USR5 id": "NS.06861",
},
{
"MNH120 scaffold": "scaffold_978", "MNH120 id": "atg12490", "MNH120 function": "GATS-like protein",
"B04 scaffold": "B04S84", "B04 id": "B04S84.g6041", "B04 function": "GATS-like protein",
"USR5 scaffold": "NODE_36160_length_249094_cov_15.165792", "USR5 id": "exon.CUFF.11309.1.28", "USR5 function": "GATS-like protein",
},
{
"USR5 scaffold": "NODE_36160_length_249094_cov_15.165792", "USR5 id": "NS.06860",
},
{
"MNH120 scaffold": "scaffold_978", "MNH120 id": "atg12489",
"B04 scaffold": "B04S84", "B04 id": "B04S84.g6042",
"USR5 scaffold": "NODE_36160_length_249094_cov_15.165792", "USR5 id": "NS.06859",
},
{
"MNH120 scaffold": "scaffold_978", "MNH120 id": "atg12488",
"B04 scaffold": "B04S84", "B04 id": "B04S84.g6043",
"USR5 scaffold": "NODE_36160_length_249094_cov_15.165792", "USR5 id": "NS.06858",
},
{
"MNH120 scaffold": "scaffold_978", "MNH120 id": "atg12488",
"USR5 scaffold": "NODE_36160_length_249094_cov_15.165792", "USR5 id": "NS.06857",
},
{
"MNH120 scaffold": "scaffold_978", "MNH120 id": "atg12487", "MNH120 function": "Cellobiose dehydrogenase",
"B04 scaffold": "B04S84", "B04 id": "B04S84.g6044", "B04 function": "Cellobiose dehydrogenase",
"USR5 scaffold": "NODE_36160_length_249094_cov_15.165792", "USR5 id": "exon.CUFF.11325.4.26", "USR5 function": "Cellobiose dehydrogenase",
},
{
"MNH120 scaffold": "scaffold_978", "MNH120 id": "atg12486", "MNH120 function": "GDP-mannose transporter",
"B04 scaffold": "B04S84", "B04 id": "B04S84.g6045", "B04 function": "GDP-mannose transporter",
"USR5 scaffold": "NODE_36160_length_249094_cov_15.165792", "USR5 id": "exon.CUFF.11301.1.19", "USR5 function": "GDP-mannose transporter",
},
{
"MNH120 scaffold": "scaffold_978", "MNH120 id": "atg12485",
},
{
"MNH120 scaffold": "scaffold_978", "MNH120 id": "atg12484", "MNH120 function": "Dimeric alpha-beta barrel protein, secondary metabolite associated",
"B04 scaffold": "B04S84", "B04 id": "B04S84.g6046", "B04 function": "Dimeric alpha-beta barrel protein, secondary metabolite associated",
"USR5 scaffold": "NODE_36160_length_249094_cov_15.165792", "USR5 id": "NS.06856", "USR5 function": "Dimeric alpha-beta barrel protein, secondary metabolite associated",
},
{
"MNH120 scaffold": "scaffold_978", "MNH120 id": "atg12483", "MNH120 function": "Cytochrome P450",
"B04 scaffold": "B04S84", "B04 id": "B04S84.g6047", "B04 function": "Cytochrome P450",
"USR5 scaffold": "NODE_36160_length_249094_cov_15.165792", "USR5 id": "NS.06855", "USR5 function": "Cytochrome P450",
},
{
"MNH120 scaffold": "scaffold_978", "MNH120 id": "atg12483", "MNH120 function": "Cytochrome P450",
"B04 scaffold": "B04S84", "B04 id": "B04S84.g6047", "B04 function": "Cytochrome P450",
"USR5 scaffold": "NODE_36160_length_249094_cov_15.165792", "USR5 id": "exon.CUFF.11297.1.18", "USR5 function": "Cytochrome P450",
},
{
"MNH120 scaffold": "scaffold_978", "MNH120 id": "atg12483", "MNH120 function": "Cytochrome P450",
"B04 scaffold": "B04S84", "B04 id": "B04S84.g6047", "B04 function": "Cytochrome P450",
"USR5 scaffold": "NODE_36160_length_249094_cov_15.165792", "USR5 id": "NS.06854",
},
]
table = pd.DataFrame(table[::-1])
table = table[["MNH120 scaffold", "MNH120 id", "USR5 scaffold", "USR5 id", "B04 scaffold", "B04 id", "MNH120 function", "USR5 function", "B04 function"]]
table.to_csv('atg12487.function.tsv', sep='\t', index=False)
table.to_latex('atg12487.function.tex', index=False, na_rep='', longtable=True)
table
Out[412]:
In [414]:
table = [
{
"MNH120 scaffold": "scaffold_48", "MNH120 id": "atg162",
"B04 scaffold": "B04S209", "B04 id": "B04S209.g10047",
"USR5 scaffold": "NODE_36160_length_249094_cov_15.165792", "USR5 id": "NS.06847",
},
{
"MNH120 scaffold": "scaffold_48", "MNH120 id": "atg161",
"B04 scaffold": "B04S209", "B04 id": "B04S209.g10048",
"USR5 scaffold": "NODE_36160_length_249094_cov_15.165792", "USR5 id": "NS.06846",
},
{
"MNH120 scaffold": "scaffold_48", "MNH120 id": "atg160", "MNH120 function": "Sulfite oxidase",
"B04 scaffold": "B04S209", "B04 id": "B04S209.g10049", "B04 function": "Sulfite oxidase",
"USR5 scaffold": "NODE_36160_length_249094_cov_15.165792", "USR5 id": "exon.CUFF.11287.1.9", "USR5 function": "Sulfite oxidase",
},
{
"B04 scaffold": "B04S209", "B04 id": "B04S209.g10050",
},
{
"MNH120 scaffold": "scaffold_48", "MNH120 id": "atg159", "MNH120 function": "Amun-like protein",
"B04 scaffold": "B04S209", "B04 id": "B04S209.g10051", "B04 function": "Amun-like protein",
"USR5 scaffold": "NODE_36160_length_249094_cov_15.165792", "USR5 id": "exon.CUFF.11284.2.7", "USR5 function": "Amun-like protein",
},
{
"MNH120 scaffold": "scaffold_893", "MNH120 id": "atg7462", "MNH120 function": "Ubiquitin carboxyl-terminal hydrolase",
"B04 scaffold": "B04S209", "B04 id": "B04S209.g10052", "B04 function": "Ubiquitin carboxyl-terminal hydrolase",
"USR5 scaffold": "NODE_36160_length_249094_cov_15.165792", "USR5 id": "exon.CUFF.11303.1.5", "USR5 function": "Ubiquitin carboxyl-terminal hydrolase",
},
{
"MNH120 scaffold": "scaffold_893", "MNH120 id": "atg7463", "MNH120 function": "Pyruvate dehydrogenase kinase",
"B04 scaffold": "B04S209", "B04 id": "B04S209.g10053", "B04 function": "Pyruvate dehydrogenase kinase",
"USR5 scaffold": "NODE_36160_length_249094_cov_15.165792", "USR5 id": "exon.CUFF.11302.1.4", "USR5 function": "Pyruvate dehydrogenase kinase",
},
{
"USR5 scaffold": "NODE_36160_length_249094_cov_15.165792", "USR5 id": "NS.06845",
},
{
"MNH120 scaffold": "scaffold_893", "MNH120 id": "atg7464",
"B04 scaffold": "B04S209", "B04 id": "B04S209.g10054",
"USR5 scaffold": "NODE_36160_length_249094_cov_15.165792", "USR5 id": "exon.CUFF.11302.1.3",
},
{
"MNH120 scaffold": "scaffold_893", "MNH120 id": "atg7465", "MNH120 function": "Beta-glucosidase family 1",
"B04 scaffold": "B04S209", "B04 id": "B04S209.g10055", "B04 function": "Beta-glucosidase family 1",
"USR5 scaffold": "NODE_36160_length_249094_cov_15.165792", "USR5 id": "exon.CUFF.11282.1.2", "USR5 function": "Beta-glucosidase family 1",
},
{
"MNH120 scaffold": "scaffold_996", "MNH120 id": "atg10702", "MNH120 function": "Membrane protein with unknown function",
"B04 scaffold": "B04S209", "B04 id": "B04S209.g10056", "B04 function": "Membrane protein with unknown function",
"USR5 scaffold": "NODE_36160_length_249094_cov_15.165792", "USR5 id": "NS.06844", "USR5 function": "Membrane protein with unknown function",
},
{
"MNH120 scaffold": "scaffold_996", "MNH120 id": "atg10703", "MNH120 function": "Secreted protein with unknown function",
"B04 scaffold": "B04S209", "B04 id": "B04S209.g10057", "B04 function": "Secreted protein with unknown function",
"USR5 scaffold": "NODE_36160_length_249094_cov_15.165792", "USR5 id": "exon.CUFF.11283.1.0", "USR5 function": "Secreted protein with unknown function",
},
{
"MNH120 scaffold": "scaffold_996", "MNH120 id": "atg10704",
"B04 scaffold": "B04S209", "B04 id": "B04S209.g10058",
"USR5 scaffold": "NODE_36160_length_249094_cov_15.165792", "USR5 id": "NS.06843", # Possible RNI-like superfamily
},
{
"MNH120 scaffold": "scaffold_996", "MNH120 id": "atg10705", "MNH120 function": "GPI transamidase component PIG-U",
"B04 scaffold": "B04S209", "B04 id": "B04S209.g10059", "B04 function": "GPI transamidase component PIG-U",
"USR5 scaffold": "NODE_36160_length_249094_cov_15.165792", "USR5 id": "NS.06842", "USR5 function": "GPI transamidase component PIG-U",
},
{
"USR5 scaffold": "NODE_5090_length_307152_cov_15.165000", "USR5 id": "NS.08710", "USR5 function": "Oxidoreductase",
},
{
"USR5 scaffold": "NODE_5090_length_307152_cov_15.165000", "USR5 id": "exon.CUFF.14260.1.0", "USR5 function": "Aromatic-L-amino acid decarboxylase",
},
{
"MNH120 scaffold": "scaffold_996", "MNH120 id": "atg10706", "MNH120 function": "Possible glycosyltransferase",
"B04 scaffold": "B04S209", "B04 id": "B04S209.g10060", "B04 function": "Possible glycosyltransferase",
},
{
"USR5 scaffold": "NODE_5090_length_307152_cov_15.165000", "USR5 id": "NS.08711", ## 3' tail of atg10706
},
{
"USR5 scaffold": "NODE_5090_length_307152_cov_15.165000", "USR5 id": "NS.08712",
},
{
"USR5 scaffold": "NODE_5090_length_307152_cov_15.165000", "USR5 id": "NS.08713",
},
{
"USR5 scaffold": "NODE_5090_length_307152_cov_15.165000", "USR5 id": "NS.08714",
},
{
"USR5 scaffold": "NODE_5090_length_307152_cov_15.165000", "USR5 id": "NS.08715",
},
{
"USR5 scaffold": "NODE_5090_length_307152_cov_15.165000", "USR5 id": "NS.08716", "USR5 function": "Partial Cytochrome P450 match",
},
{
"USR5 scaffold": "NODE_5090_length_307152_cov_15.165000", "USR5 id": "NS.08717", "USR5 function": "Partial ribonuclease H/Integrase, possible Gypsy TE",
},
{
"USR5 scaffold": "NODE_5090_length_307152_cov_15.165000", "USR5 id": "NS.08718", "USR5 function": "Partial polymerase, possible Gypsy TE",
},
{
"USR5 scaffold": "NODE_5090_length_307152_cov_15.165000", "USR5 id": "NS.08719", "USR5 function": "Partial polymerase, possible Gypsy TE",
},
{
"USR5 scaffold": "NODE_5090_length_307152_cov_15.165000", "USR5 id": "NS.08720", "USR5 function": "Weak partial Gypsy TE match",
},
{
"USR5 scaffold": "NODE_5090_length_307152_cov_15.165000", "USR5 id": "NS.08721", "USR5 function": "Partial phospholipase",
},
{
"USR5 scaffold": "NODE_5090_length_307152_cov_15.165000", "USR5 id": "NS.08722",
},
{
"USR5 scaffold": "NODE_5090_length_307152_cov_15.165000", "USR5 id": "NS.08723",
},
{
"MNH120 scaffold": "scaffold_996", "MNH120 id": "atg10706", "MNH120 function": "Possible glycosyltransferase",
"B04 scaffold": "B04S209", "B04 id": "B04S209.g10060", "B04 function": "Possible glycosyltransferase",
"USR5 scaffold": "NODE_5090_length_307152_cov_15.165000", "USR5 id": "NS.08724", "USR5 function": "Possible glycosyltransferase",
},
{
"MNH120 scaffold": "scaffold_996", "MNH120 id": "atg10707", "MNH120 function": "MM3350-like protein, possible DNA-binding",
"B04 scaffold": "B04S209", "B04 id": "B04S209.g10062", "B04 function": "MM3350-like protein, possible DNA-binding",
"USR5 scaffold": "NODE_5090_length_307152_cov_15.165000", "USR5 id": "exon.CUFF.14230.1.3", "USR5 function": "MM3350-like protein, possible DNA-binding",
},
{
"MNH120 scaffold": "scaffold_996", "MNH120 id": "atg10708",
"B04 scaffold": "B04S209", "B04 id": "B04S209.g10063",
"USR5 scaffold": "NODE_5090_length_307152_cov_15.165000", "USR5 id": "NS.08725",
},
{
"B04 scaffold": "B04S209", "B04 id": "B04S209.g10064", "B04 function": "NCS1 family purine/pyrimidine transporter",
"USR5 scaffold": "NODE_5090_length_307152_cov_15.165000", "USR5 id": "NS.08726", "USR5 function": "NCS1 family purine/pyrimidine transporter",
},
{
"B04 scaffold": "B04S209", "B04 id": "B04S209.g10065", "B04 function": "Clavaminate synthase-like protein", #### Split?
"USR5 scaffold": "NODE_5090_length_307152_cov_15.165000", "USR5 id": "exon.CUFF.14211.1.7", "USR5 function": "Transcriptional regulatory protein",
},
{
"MNH120 scaffold": "scaffold_996", "MNH120 id": "atg10709",
"USR5 scaffold": "NODE_5090_length_307152_cov_15.165000", "USR5 id": "NS.08729", ## Partial plexin repeat
},
{
"USR5 scaffold": "NODE_5090_length_307152_cov_15.165000", "USR5 id": "exon.CUFF.14232.2.11",
},
{
"MNH120 scaffold": "scaffold_996", "MNH120 id": "atg10710", "MNH120 function": "Chloride conductance regulatory protein ICLN",
"B04 scaffold": "B04S209", "B04 id": "B04S209.g10066", "B04 function": "Chloride conductance regulatory protein ICLN",
"USR5 scaffold": "NODE_5090_length_307152_cov_15.165000", "USR5 id": "exon.CUFF.14232.2.10", "USR5 function": "Chloride conductance regulatory protein ICLN",
},
{
"B04 scaffold": "B04S209", "B04 id": "B04S209.g10067", "B04 function": "Glycosyltransferase family 25 member",
"USR5 scaffold": "NODE_5090_length_307152_cov_15.165000", "USR5 id": "exon.CUFF.14214.1.12", "USR5 function": "Glycosyltransferase family 25 member, possibly inactive",
},
{
"MNH120 scaffold": "scaffold_996", "MNH120 id": "atg10711", "MNH120 function": "Glycosyltransferase family 25 member, possibly inactive",
"B04 scaffold": "B04S209", "B04 id": "B04S209.g10067", "B04 function": "Glycosyltransferase family 25 member",
"USR5 scaffold": "NODE_5090_length_307152_cov_15.165000", "USR5 id": "NS.08730", "USR5 function": "Glycosyltransferase family 25 member, possibly inactive",
},
{
"B04 scaffold": "B04S209", "B04 id": "B04S209.g10068", "B04 function": "Glycosyltransferase family 25 member",
"USR5 scaffold": "NODE_5090_length_307152_cov_15.165000", "USR5 id": "NS.08730", "USR5 function": "Glycosyltransferase family 25 member, possibly inactive",
},
{
"MNH120 scaffold": "scaffold_996", "MNH120 id": "atg10712",
"B04 scaffold": "B04S209", "B04 id": "B04S209.g10069",
"USR5 scaffold": "NODE_5090_length_307152_cov_15.165000", "USR5 id": "NS.08731",
},
{
"MNH120 scaffold": "scaffold_996", "MNH120 id": "atg10713",
"B04 scaffold": "B04S209", "B04 id": "B04S209.g10070",
"USR5 scaffold": "NODE_5090_length_307152_cov_15.165000", "USR5 id": "exon.CUFF.14217.1.14",
},
{
"MNH120 scaffold": "scaffold_996", "MNH120 id": "atg10713",
"B04 scaffold": "B04S209", "B04 id": "B04S209.g10070",
"USR5 scaffold": "NODE_5090_length_307152_cov_15.165000", "USR5 id": "exon.CUFF.14217.2.15",
},
{
"MNH120 scaffold": "scaffold_996", "MNH120 id": "atg10713",
"B04 scaffold": "B04S209", "B04 id": "B04S209.g10070",
"USR5 scaffold": "NODE_5090_length_307152_cov_15.165000", "USR5 id": "exon.CUFF.14217.3.16",
},
{
"USR5 scaffold": "NODE_5090_length_307152_cov_15.165000", "USR5 id": "NS.08732",
},
{
"MNH120 scaffold": "scaffold_996", "MNH120 id": "atg10714", "MNH120 function": "Vacuolar protein sorting-associated protein 54",
"B04 scaffold": "B04S209", "B04 id": "B04S209.g10071", "B04 function": "Vacuolar protein sorting-associated protein 54",
"USR5 scaffold": "NODE_5090_length_307152_cov_15.165000", "USR5 id": "exon.CUFF.14228.1.17", "USR5 function": "Vacuolar protein sorting-associated protein 54",
},
{
"MNH120 scaffold": "scaffold_996", "MNH120 id": "atg10715",
"B04 scaffold": "B04S209", "B04 id": "B04S209.g10072",
"USR5 scaffold": "NODE_5090_length_307152_cov_15.165000", "USR5 id": "NS.08733",
},
{
"USR5 scaffold": "NODE_5090_length_307152_cov_15.165000", "USR5 id": "NS.08734",
},
{
"MNH120 scaffold": "scaffold_996", "MNH120 id": "atg10716",
"B04 scaffold": "B04S209", "B04 id": "B04S209.g10073",
"USR5 scaffold": "NODE_5090_length_307152_cov_15.165000", "USR5 id": "NS.08735",
},
{
"MNH120 scaffold": "scaffold_996", "MNH120 id": "atg10717",
"B04 scaffold": "B04S209", "B04 id": "B04S209.g10074",
"USR5 scaffold": "NODE_5090_length_307152_cov_15.165000", "USR5 id": "NS.08736",
},
{
"MNH120 scaffold": "scaffold_996", "MNH120 id": "atg10718", "MNH120 function": "MFS transporter",
"B04 scaffold": "B04S209", "B04 id": "B04S209.g10075", "B04 function": "MFS transporter",
"USR5 scaffold": "NODE_5090_length_307152_cov_15.165000", "USR5 id": "NS.08737", "USR5 function": "MFS transporter",
},
{
"MNH120 scaffold": "scaffold_996", "MNH120 id": "atg10719", "MNH120 function": "Secreted protein with unknown function",
"B04 scaffold": "B04S209", "B04 id": "B04S209.g10076", "B04 function": "Secreted protein with unknown function",
"USR5 scaffold": "NODE_5090_length_307152_cov_15.165000", "USR5 id": "NS.08738", "USR5 function": "Secreted protein with unknown function",
},
{
"MNH120 scaffold": "scaffold_996", "MNH120 id": "atg10720",
},
{
"MNH120 scaffold": "scaffold_996", "MNH120 id": "atg10721", "MNH120 function": "Possible M-phase phosphoprotein 6",
"B04 scaffold": "B04S209", "B04 id": "B04S209.g10077", "B04 function": "Possible M-phase phosphoprotein 6",
"USR5 scaffold": "NODE_5090_length_307152_cov_15.165000", "USR5 id": "NS.08739", "USR5 function": "Possible M-phase phosphoprotein 6",
},
{
"MNH120 scaffold": "scaffold_996", "MNH120 id": "atg10722", "MNH120 function": "NADH-ubiquinone oxidoreductase",
"B04 scaffold": "B04S209", "B04 id": "B04S209.g10078", "B04 function": "NADH-ubiquinone oxidoreductase",
"USR5 scaffold": "NODE_5090_length_307152_cov_15.165000", "USR5 id": "NS.08740", "USR5 function": "NADH-ubiquinone oxidoreductase",
},
{
"USR5 scaffold": "NODE_5090_length_307152_cov_15.165000", "USR5 id": "NS.08741", "USR5 function": "",
},
{
"MNH120 scaffold": "scaffold_996", "MNH120 id": "atg10723", "MNH120 function": "DNA damage repair protein, possible tRNA-splicing endonuclease",
"B04 scaffold": "B04S209", "B04 id": "B04S209.g10079", "B04 function": "DNA damage repair protein, possible tRNA-splicing endonuclease",
"USR5 scaffold": "NODE_5090_length_307152_cov_15.165000", "USR5 id": "exon.CUFF.14234.1.22", "USR5 function": "DNA damage repair protein, possible tRNA-splicing endonuclease",
},
{
"USR5 scaffold": "NODE_5090_length_307152_cov_15.165000", "USR5 id": "NS.08742",
},
{
"MNH120 scaffold": "scaffold_996", "MNH120 id": "atg10724", "MNH120 function": "Secreted protein with possible transmembrane domain, unknown function",
"B04 scaffold": "B04S209", "B04 id": "B04S209.g10080", "B04 function": "Secreted protein with possible transmembrane domain, unknown function",
"USR5 scaffold": "NODE_5090_length_307152_cov_15.165000", "USR5 id": "NS.08743", "USR5 function": "Possible transmembrane protein, unknown function", # has extra bit on 5' end
},
{
"MNH120 scaffold": "scaffold_996", "MNH120 id": "atg10725", "MNH120 function": "YjbQ-like protein, possible secondary thiamine-phosphate synthase activity",
"B04 scaffold": "B04S209", "B04 id": "B04S209.g10082", "B04 function": "YjbQ-like protein, possible secondary thiamine-phosphate synthase activity",
"USR5 scaffold": "NODE_5090_length_307152_cov_15.165000", "USR5 id": "NS.08744", "USR5 function": "YjbQ-like protein, possible secondary thiamine-phosphate synthase activity",
},
{
"MNH120 scaffold": "scaffold_996", "MNH120 id": "atg10726",
"B04 scaffold": "B04S209", "B04 id": "B04S209.g10083",
"USR5 scaffold": "NODE_5090_length_307152_cov_15.165000", "USR5 id": "exon.CUFF.14238.1.25",
},
{
"B04 scaffold": "B04S209", "B04 id": "B04S209.g10084",
"USR5 scaffold": "NODE_5090_length_307152_cov_15.16500", "USR5 id": "NS.08745",
},
{
"USR5 scaffold": "NODE_5090_length_307152_cov_15.16500", "USR5 id": "NS.08746",
},
{
"MNH120 scaffold": "scaffold_554", "MNH120 id": "atg4020", "MNH120 function": "Possible nuclear transport factor 2-like protein",
"B04 scaffold": "B04S181", "B04 id": "B04S181.g9491", "B04 function": "Possible nuclear transport factor 2-like protein",
"USR5 scaffold": "NODE_5090_length_307152_cov_15.165000", "USR5 id": "NS.08747", "USR5 function": "Possible nuclear transport factor 2-like protein",
},
{
"MNH120 scaffold": "scaffold_987", "MNH120 id": "atg10943", "MNH120 function": "Nuclear movement protein NUDC, possible HSP20-like chaperone",
"B04 scaffold": "B04S181", "B04 id": "B04S181.g9490", "B04 function": "Nuclear movement protein NUDC, possible HSP20-like chaperone",
"USR5 scaffold": "NODE_5090_length_307152_cov_15.165000", "USR5 id": "exon.CUFF.14241.1.26", "USR5 function": "Nuclear movement protein NUDC, possible HSP20-like chaperone",
},
{
"USR5 scaffold": "NODE_5090_length_307152_cov_15.16500", "USR5 id": "exon.CUFF.14242.1.27",
},
{
"MNH120 scaffold": "scaffold_987", "MNH120 id": "atg10942", "MNH120 function": "Probable lipid transporter/ligase, possible Acetyl-CoA synthetase-like protien",
"B04 scaffold": "B04S181", "B04 id": "B04S181.g9489", "B04 function": "Probable lipid transporter/ligase, possible Acetyl-CoA synthetase-like protien",
"USR5 scaffold": "NODE_5090_length_307152_cov_15.165000", "USR5 id": "NS.08748", "USR5 function": "Probable lipid transporter/ligase, possible Acetyl-CoA synthetase-like protien",
},
{
"B04 scaffold": "B04S181", "B04 id": "B04S181.g9488",
},
{
"MNH120 scaffold": "scaffold_987", "MNH120 id": "atg10941", "MNH120 function": "NCS1 family purine-pyrimidine transporter/permease",
"B04 scaffold": "B04S181", "B04 id": "B04S181.g9487", "B04 function": "NCS1 family purine-pyrimidine transporter/permease",
"USR5 scaffold": "NODE_5090_length_307152_cov_15.165000", "USR5 id": "NS.08749", "USR5 function": "NCS1 family purine-pyrimidine transporter/permease",
},
{
"MNH120 scaffold": "scaffold_987", "MNH120 id": "atg10941", "MNH120 function": "NCS1 family purine-pyrimidine transporter/permease",
"B04 scaffold": "B04S181", "B04 id": "B04S181.g9487", "B04 function": "NCS1 family purine-pyrimidine transporter/permease",
"USR5 scaffold": "NODE_5090_length_307152_cov_15.165000", "USR5 id": "NS.08750", "USR5 function": "NCS1 family purine-pyrimidine transporter/permease",
},
{
"MNH120 scaffold": "scaffold_987", "MNH120 id": "atg10940", "MNH120 function": "Sterol reductase/Lamin B receptor",
"B04 scaffold": "B04S181", "B04 id": "B04S181.g9486", "B04 function": "Sterol reductase/Lamin B receptor",
"USR5 scaffold": "NODE_5090_length_307152_cov_15.165000", "USR5 id": "exon.CUFF.14250.2.30", "USR5 function": "Sterol reductase/Lamin B receptor",
},
{
"MNH120 scaffold": "scaffold_987", "MNH120 id": "atg10939", "MNH120 function": "Zinc finger C2H2 type domain protein",
"B04 scaffold": "B04S181", "B04 id": "B04S181.g9485", # Truncated at 5' end compared to USR5 and MNH120
"USR5 scaffold": "NODE_5090_length_307152_cov_15.165000", "USR5 id": "exon.CUFF.14248.1.31", "USR5 function": "Zinc finger C2H2 type domain protein",
},
{
"MNH120 scaffold": "scaffold_987", "MNH120 id": "atg10906",
"B04 scaffold": "B04S181", "B04 id": "B04S181.g9484",
"USR5 scaffold": "NODE_5090_length_307152_cov_15.165000", "USR5 id": "NS.08751",
},
{
"B04 scaffold": "B04S181", "B04 id": "B04S181.g9484",
"USR5 scaffold": "NODE_5090_length_307152_cov_15.165000", "USR5 id": "NS.08752",
},
]
table = pd.DataFrame(table)
table = table[["MNH120 scaffold", "MNH120 id", "USR5 scaffold", "USR5 id", "B04 scaffold", "B04 id", "MNH120 function", "USR5 function", "B04 function"]]
table.to_csv('group.function.tsv', sep='\t', index=False)
table.to_latex('group.function.tex', index=False, na_rep='', longtable=True)
table
Out[414]:
In [405]:
pd.DataFrame.from_dict?
In [ ]: