In [76]:
# http://metacyc.org/META/class-instances?object=Compounds
# Smart Table
# Add the following columns
# export with weblinks
In [3]:
import sys,os
sys.path.insert(0,'/global/project/projectdirs/metatlas/anaconda/lib/python2.7/site-packages' )
from metatlas import metatlas_objects as metob
import csv
curr_ld_lib_path = ''
os.environ['LD_LIBRARY_PATH'] = curr_ld_lib_path + ':/project/projectdirs/openmsi/jupyterhub_libs/boost_1_55_0/lib' + ':/project/projectdirs/openmsi/jupyterhub_libs/lib'
import sys
# sys.path.remove('/anaconda/lib/python2.7/site-packages')
sys.path.append('/global/project/projectdirs/openmsi/jupyterhub_libs/anaconda/lib/python2.7/site-packages')
sys.path.insert(0,'/project/projectdirs/openmsi/projects/meta-iq/pactolus/pactolus' )
from rdkit import Chem
# from rdkit.Chem.rdMolDescriptors import ExactMolWt
from rdkit.Chem import Descriptors
from rdkit.Chem import rdMolDescriptors
from rdkit.Chem import AllChem
In [4]:
""" contribution from Hans de Winter """
def _InitialiseNeutralisationReactions():
patts= (
# Imidazoles
('[n+;H]','n'),
# Amines
('[N+;!H0]','N'),
# Carboxylic acids and alcohols
('[$([O-]);!$([O-][#7])]','O'),
# Thiols
('[S-;X1]','S'),
# Sulfonamides
('[$([N-;X2]S(=O)=O)]','N'),
# Enamines
('[$([N-;X2][C,N]=C)]','N'),
# Tetrazoles
('[n-]','[nH]'),
# Sulfoxides
('[$([S-]=O)]','S'),
# Amides
('[$([N-]C=O)]','N'),
)
return [(Chem.MolFromSmarts(x),Chem.MolFromSmiles(y,False)) for x,y in patts]
_reactions=None
def NeutraliseCharges(mol, reactions=None):
global _reactions
if reactions is None:
if _reactions is None:
_reactions=_InitialiseNeutralisationReactions()
reactions=_reactions
# mol = Chem.MolFromSmiles(smiles)
replaced = False
for i,(reactant, product) in enumerate(reactions):
while mol.HasSubstructMatch(reactant):
replaced = True
rms = AllChem.ReplaceSubstructs(mol, reactant, product)
rms_smiles = Chem.MolToSmiles(rms[0])
mol = Chem.MolFromSmiles(rms_smiles)
if replaced:
return (mol, True) #Chem.MolToSmiles(mol,True)
else:
return (mol, False)
In [7]:
datafile = '/global/homes/b/bpb/notebooks/bpb_data_analysis_metatlas/150624 ISTDs.txt'
with open(datafile, 'rUb') as f:
new_compounds = list(csv.DictReader(f, dialect='excel-tab'))
print new_compounds[0].keys()
In [14]:
for c in new_compounds:
myInChI = c['InChI']#'InChI=1S/C7H6O2/c8-7(9)6-4-2-1-3-5-6/h1-5H,(H,8,9)/i1D,2D,3D,4D,5D'
myMol = Chem.MolFromInchi(myInChI)
print
print c['name'],"|",Descriptors.ExactMolWt(myMol), "|",rdMolDescriptors.CalcMolFormula(myMol)
In [10]:
all_compounds = []
for myCompound in new_compounds:
myMol = Chem.MolFromInchi(myCompound['InChI'])
try:
myCharge = Chem.GetFormalCharge(myMol)
if myCharge != 0:
(myMol, neutralised) = NeutraliseCharges(myMol)
# if neutralised == False:
# print neutralised, myCompound['Common-Name']
# print rdMolDescriptors.CalcMolFormula(neutral_mol)
c = metob.Compound()
c.InChI = myCompound['InChI']
c.formula = rdMolDescriptors.CalcMolFormula(myMol)
c.MonoIsotopic_molecular_weight = Descriptors.ExactMolWt(myMol)
c.description = myCompound['description']
c.name = myCompound['name']
all_compounds.append(c)
except:
print "Can not parse ", myCompound['Common-Name']
In [46]:
#add them to the database
metob.store(all_compounds)
In [11]:
allCompounds = metob.retrieve('Compound', name='ABMBA')
for c in allCompounds:
print c.name,c.formula,c.MonoIsotopic_molecular_weight,c.description, c.InChI
In [12]:
%%javascript
var nb = IPython.notebook;
var kernel = IPython.notebook.kernel;
var command = "NOTEBOOK_FULL_PATH = '" + nb.base_url + nb.notebook_path + "'";
kernel.execute(command);
In [13]:
filename = os.path.basename(NOTEBOOK_FULL_PATH)
%system cp $filename /project/projectdirs/openmsi/www/
temp = '%s/%s'%('/project/projectdirs/openmsi/www',filename)
%system chmod 775 $temp
print 'http://nbviewer.ipython.org/url/portal.nersc.gov/project/openmsi/%s?flush_cache=true'%filename
In [ ]: