In [1]:
from cameo import models
In [2]:
model = models.bigg.iJO1366
In [3]:
from marsi.cobra.utils import annotate_model
In [4]:
annotate_model(model)
In [5]:
model.metabolites.aacald_c.annotation
Out[5]:
In [6]:
from cameo.flux_analysis.analysis import find_essential_metabolites
In [7]:
essential_metabolites = find_essential_metabolites(model)
In [8]:
non_essential_metabolites = [met for met in model.metabolites
if met not in essential_metabolites and met.id.endswith("_c")]
In [9]:
analogues = {}
In [10]:
from marsi.chemistry.molecule import Molecule
from marsi.chemistry.common import dynamic_fingerprint_cut
from marsi.nearest_neighbors import search_closest_compounds
In [11]:
import os
from pandas import DataFrame
In [ ]:
for met in non_essential_metabolites:
if met.id in ["23dhb_c", "3dhguln_c", "3hpp_c"] or met.id.endswith("coa_c"): # These chemicals are currently
# breaking the software when
# running in a loop like this.
continue
if 'inchi' in met.annotation and met.id not in analogues:
name = "%s_analogues.csv" % met.id
if os.path.exists(name):
analogues[met.id] = DataFrame.from_csv(name, sep="\t")
else:
molecule = Molecule.from_inchi(met.annotation['inchi'])
fp_cut = dynamic_fingerprint_cut(molecule.num_atoms)
analogues[met.id] = search_closest_compounds(molecule, fp_cut=fp_cut)
analogues[met.id].to_csv(name, sep='\t')
In [15]:
len(analogues), len([m for m in non_essential_metabolites if 'inchi' in m.annotation])
Out[15]:
In [16]:
from pandas import DataFrame
In [29]:
df = DataFrame(columns=["metabolite name", "number of hits", "number of hits (50% similarity)"])
In [30]:
for met_id, hits in analogues.items():
if len(hits) > 0:
name = model.metabolites.query(met_id)[0].name
df.loc[met_id] = [name, len(hits), len(hits[hits.structural_score > 0.5])]
In [35]:
df
Out[35]:
In [ ]: