Load a model from the BiGG database


In [1]:
from cameo import models



In [2]:
model = models.bigg.iJO1366

Annotate the model with metabolite information


In [3]:
from marsi.cobra.utils import annotate_model


Looking for local setup.cfg
Not available [Errno 2] No such file or directory: 'setup.cfg'
/Users/joaca/Documents/repositories/marsi/marsi/io/bigg.py:23 FutureWarning: from_csv is deprecated. Please use read_csv(...) instead. Note that some of the default arguments are different, so please refer to the documentation for from_csv when changing your function calls

In [4]:
annotate_model(model)



In [5]:
model.metabolites.aacald_c.annotation


Out[5]:
{'inchi': 'InChI=1S/C2H5NO/c3-1-2-4/h2H,1,3H2'}

In [6]:
from cameo.flux_analysis.analysis import find_essential_metabolites

In [7]:
essential_metabolites = find_essential_metabolites(model)

In [8]:
non_essential_metabolites = [met for met in model.metabolites 
                             if met not in essential_metabolites and met.id.endswith("_c")]

In [9]:
analogues = {}

In [10]:
from marsi.chemistry.molecule import Molecule
from marsi.chemistry.common import dynamic_fingerprint_cut
from marsi.nearest_neighbors import search_closest_compounds


/Users/joaca/Documents/repositories/marsi/marsi/chemistry/rdkit.py:25 DeprecationWarning: The rdkit.Chem.MCS module is deprecated; please use rdkit.Chem.rdFMCS instead.

In [11]:
import os
from pandas import DataFrame

In [ ]:
for met in non_essential_metabolites:
    if met.id in ["23dhb_c", "3dhguln_c", "3hpp_c"] or met.id.endswith("coa_c"):  # These chemicals are currently
                                                                                  # breaking the software when 
                                                                                  # running in a loop like this.
                                                                      
        continue
    if 'inchi' in met.annotation and met.id not in analogues:
        name = "%s_analogues.csv" % met.id
        if os.path.exists(name):
            analogues[met.id] = DataFrame.from_csv(name, sep="\t")
        else:
            molecule = Molecule.from_inchi(met.annotation['inchi'])
            fp_cut = dynamic_fingerprint_cut(molecule.num_atoms)
            analogues[met.id] = search_closest_compounds(molecule, fp_cut=fp_cut)
            analogues[met.id].to_csv(name, sep='\t')

In [15]:
len(analogues), len([m for m in non_essential_metabolites if 'inchi' in m.annotation])


Out[15]:
(47, 479)

In [16]:
from pandas import DataFrame

In [29]:
df = DataFrame(columns=["metabolite name", "number of hits", "number of hits (50% similarity)"])

In [30]:
for met_id, hits in analogues.items():
    if len(hits) > 0:
        name = model.metabolites.query(met_id)[0].name
        df.loc[met_id] = [name, len(hits), len(hits[hits.structural_score > 0.5])]

In [35]:
df


Out[35]:
metabolite name number of hits number of hits (50% similarity)
3hpppn_c 3-(3-hydroxy-phenyl)propionate 6298 4129
23ddhb_c 2,3-Dihydro-2,3-dihydroxybenzoate 2296 1519
3hddcoa_c (S)-3-Hydroxydodecanoyl-CoA 6 0
2dr5p_c 2-Deoxy-D-ribose 5-phosphate 226 155
35cgmp_c 3',5'-Cyclic GMP 1254 428
2amsa_c 2-Aminomalonate semialdehyde 1785 449
13dpg_c 3-Phospho-D-glyceroyl phosphate 346 200
23dhba_c (2,3-Dihydroxybenzoyl)adenylate 159 4
3ohcoa_c 3-Oxohexanoyl-CoA 5 0
3dhgulnp_c 3-keto-L-gulonate-6-phosphate 65 48
2dr1p_c 2-Deoxy-D-ribose 1-phosphate 1149 508
2pg_c D-Glycerate 2-phosphate 1056 472
15dap_c 1,5-Diaminopentane 3181 925
2mcacn_c Cis-2-Methylaconitate 902 566
12ppd__R_c (R)-Propane-1,2-diol 244 193
2h3oppan_c 2-Hydroxy-3-oxopropanoate 964 361
2dh3dgal_c 2-Dehydro-3-deoxy-D-galactonate 1902 890
3hcinnm_c 3-hydroxycinnamic acid 6608 4684
1ddecg3p_c 1-dodecanoyl-sn-glycerol 3-phosphate 1308 41
3hhcoa_c (S)-3-Hydroxyhexanoyl-CoA 4 0
2mcit_c 2-Methylcitrate 1061 517
23doguln_c 2,3-Dioxo-L-gulonate 1951 905
3oddcoa_c 3-Oxododecanoyl-CoA 9 4
1tdecg3p_c 1-tetradecanoyl-sn-glycerol 3-phosphate 918 26
3odcoa_c 3-Oxodecanoyl-CoA 12 4
23dappa_c 2,3-diaminopropionate 3025 851
12ppd__S_c (S)-Propane-1,2-diol 1207 575
2ddg6p_c 2-Dehydro-3-deoxy-D-gluconate 6-phosphate 102 73
2tpr3dpcoa_c 2'-(5''-triphosphoribosyl)-3'-dephospho-CoA 10 1
3hadpcoa_c (3S)-3-Hydroxyadipyl-CoA 2 0
3ohdcoa_c 3-Oxohexadecanoyl-CoA 3 0
2aobut_c L-2-Amino-3-oxobutanoate 2856 818
2shchc_c 2-Succinyl-6-hydroxy-2,4-cyclohexadiene-1-carb... 319 182
2dh3dgal6p_c 2-Dehydro-3-deoxy-D-galactonate 6-phosphate 102 73
2ddglcn_c 2-Dehydro-3-deoxy-D-gluconate 1902 890
3hocoa_c (S)-3-Hydroxyoctanoyl-CoA 6 1
3ohodcoa_c 3-Oxooctadecanoyl-CoA 2 0
25dkglcn_c 2,5-diketo-D-gluconate 1833 779
3hdcoa_c (S)-3-Hydroxydecanoyl-CoA 12 2
2pglyc_c 2-Phosphoglycolate 1121 564

In [ ]: