In [76]:
# http://metacyc.org/META/class-instances?object=Compounds
# Smart Table
# Add the following columns
# export with weblinks

In [3]:
import sys,os
sys.path.insert(0,'/global/project/projectdirs/metatlas/anaconda/lib/python2.7/site-packages' )
from metatlas import metatlas_objects as metob
import csv


curr_ld_lib_path = ''

os.environ['LD_LIBRARY_PATH'] = curr_ld_lib_path + ':/project/projectdirs/openmsi/jupyterhub_libs/boost_1_55_0/lib' + ':/project/projectdirs/openmsi/jupyterhub_libs/lib'
import sys
# sys.path.remove('/anaconda/lib/python2.7/site-packages')
sys.path.append('/global/project/projectdirs/openmsi/jupyterhub_libs/anaconda/lib/python2.7/site-packages')
sys.path.insert(0,'/project/projectdirs/openmsi/projects/meta-iq/pactolus/pactolus' )

from rdkit import Chem
# from rdkit.Chem.rdMolDescriptors import ExactMolWt
from rdkit.Chem import Descriptors
from rdkit.Chem import rdMolDescriptors
from rdkit.Chem import AllChem

In [4]:
""" contribution from Hans de Winter """
def _InitialiseNeutralisationReactions():
    patts= (
        # Imidazoles
        ('[n+;H]','n'),
        # Amines
        ('[N+;!H0]','N'),
        # Carboxylic acids and alcohols
        ('[$([O-]);!$([O-][#7])]','O'),
        # Thiols
        ('[S-;X1]','S'),
        # Sulfonamides
        ('[$([N-;X2]S(=O)=O)]','N'),
        # Enamines
        ('[$([N-;X2][C,N]=C)]','N'),
        # Tetrazoles
        ('[n-]','[nH]'),
        # Sulfoxides
        ('[$([S-]=O)]','S'),
        # Amides
        ('[$([N-]C=O)]','N'),
        )
    return [(Chem.MolFromSmarts(x),Chem.MolFromSmiles(y,False)) for x,y in patts]

_reactions=None
def NeutraliseCharges(mol, reactions=None):
    global _reactions
    if reactions is None:
        if _reactions is None:
            _reactions=_InitialiseNeutralisationReactions()
        reactions=_reactions
#     mol = Chem.MolFromSmiles(smiles)
    replaced = False
    for i,(reactant, product) in enumerate(reactions):
        while mol.HasSubstructMatch(reactant):
            replaced = True
            rms = AllChem.ReplaceSubstructs(mol, reactant, product)
            rms_smiles = Chem.MolToSmiles(rms[0])
            mol = Chem.MolFromSmiles(rms_smiles)
    if replaced:
        return (mol, True) #Chem.MolToSmiles(mol,True)
    else:
        return (mol, False)

In [7]:
datafile = '/global/homes/b/bpb/notebooks/bpb_data_analysis_metatlas/150624 ISTDs.txt'
with open(datafile, 'rUb') as f:
    new_compounds = list(csv.DictReader(f, dialect='excel-tab'))
print new_compounds[0].keys()


['InChI', 'name', 'description']

In [14]:
for c in new_compounds:
    myInChI = c['InChI']#'InChI=1S/C7H6O2/c8-7(9)6-4-2-1-3-5-6/h1-5H,(H,8,9)/i1D,2D,3D,4D,5D'
    myMol = Chem.MolFromInchi(myInChI)
    print 
    print c['name'],"|",Descriptors.ExactMolWt(myMol), "|",rdMolDescriptors.CalcMolFormula(myMol)


3,6-Dihydroxy-4-methylpyridazine | 126.042927432 | C5H6N2O2

d5-benzoic acid | 127.068163162 | C7H6O2

4-(3,3-dimethyl-ureido)benzoic acid | 208.084792244 | C10H12N2O3

9-anthracene carboxylic acid | 222.06807956 | C15H10O2

13C-15N-L-phenylalanine | 175.106207052 | C9H11NO2

13C-glucose | 186.083517144 | C6H12O6

d8-lysine | 154.155741656 | C6H14N2O2

d4-lysine | 150.130634672 | C6H14N2O2

ABMBA | 228.973840596 | C8H8BrNO2

In [10]:
all_compounds = []
for myCompound in new_compounds:
    myMol = Chem.MolFromInchi(myCompound['InChI'])
    try:
        myCharge = Chem.GetFormalCharge(myMol)
        if  myCharge != 0:
            (myMol, neutralised) = NeutraliseCharges(myMol)
#             if neutralised == False:
#                 print neutralised, myCompound['Common-Name']
#                 print rdMolDescriptors.CalcMolFormula(neutral_mol)
        c = metob.Compound()
        c.InChI = myCompound['InChI']
        c.formula = rdMolDescriptors.CalcMolFormula(myMol)
        c.MonoIsotopic_molecular_weight = Descriptors.ExactMolWt(myMol)
        c.description = myCompound['description']
        c.name = myCompound['name']
        all_compounds.append(c)

                
    except:
        print "Can not parse ", myCompound['Common-Name']

In [46]:
#add them to the database
metob.store(all_compounds)

In [11]:
allCompounds = metob.retrieve('Compound', name='ABMBA')
for c in allCompounds:
    print c.name,c.formula,c.MonoIsotopic_molecular_weight,c.description, c.InChI


ABMBA C8H8BrNO2 228.974 https://pubchem.ncbi.nlm.nih.gov/compound/2774400 InChI=1S/C8H8BrNO2/c1-4-2-5(8(11)12)7(10)6(9)3-4/h2-3H,10H2,1H3,(H,11,12)

In [12]:
%%javascript
var nb = IPython.notebook;
var kernel = IPython.notebook.kernel;
var command = "NOTEBOOK_FULL_PATH = '" + nb.base_url + nb.notebook_path + "'";
kernel.execute(command);



In [13]:
filename = os.path.basename(NOTEBOOK_FULL_PATH)
%system cp $filename /project/projectdirs/openmsi/www/
temp = '%s/%s'%('/project/projectdirs/openmsi/www',filename)
%system chmod 775 $temp
print 'http://nbviewer.ipython.org/url/portal.nersc.gov/project/openmsi/%s?flush_cache=true'%filename


http://nbviewer.ipython.org/url/portal.nersc.gov/project/openmsi/MetAtlas_003_Add_Compounds_To_Database.ipynb?flush_cache=true

In [ ]: