In [1]:
import sys
import os
import re
import glob
from matplotlib import pylab as plt
import pandas as pd
%matplotlib notebook
# %matplotlib inline
%config InlineBackend.figure_format = 'retina'
# curr_ld_lib_path = ''
# os.environ['LD_LIBRARY_PATH'] = curr_ld_lib_path + ':/project/projectdirs/openmsi/jupyterhub_libs/boost_1_55_0/lib' + ':/project/projectdirs/openmsi/jupyterhub_libs/lib'
# import sys
sys.path.append('/global/project/projectdirs/openmsi/jupyterhub_libs/anaconda/lib/python2.7/site-packages')
# sys.path.insert(0,'/global/project/projectdirs/metatlas/anaconda/lib/python2.7/site-packages' )
sys.path.append('/project/projectdirs/openmsi/projects/meta-iq/pactolus/pactolus')
sys.path.insert(0,'/global/project/projectdirs/metatlas/anaconda/lib/python2.7/site-packages' )
from metatlas import metatlas_objects as metob
from metatlas import h5_query as h5q
from metatlas import mzml_to_hdf
import score_frag_dag
from rdkit import Chem
from rdkit.Chem import AllChem
from rdkit.Chem import Draw
# from rdkit.Chem.rdMolDescriptors import ExactMolWt
from rdkit.Chem import Descriptors
from rdkit.Chem import rdMolDescriptors
from rdkit.Chem import AllChem
from rdkit.Chem import Draw
from rdkit.Chem import rdDepictor
from rdkit.Chem.Draw import rdMolDraw2D
from rdkit.Chem.Draw import IPythonConsole
from IPython.display import SVG,display
In [ ]:
# These are the tests:
# if len(Chem.GetMolFrags(mol, sanitizeFrags=False, asMols=False)) != 1:
# raise TypeError('Molecule must be fully connected by covalent bonds.')
# if Chem.RemoveHs(mol).GetNumAtoms() != mol.GetNumAtoms():
# raise TypeError('Molecule must have only implicit H atoms.')
# if GetFormalCharge(mol) != 0:
# raise TypeError('Molecule must have overall neutral charge.')
In [2]:
""" contribution from Hans de Winter """
def _InitialiseNeutralisationReactions():
patts= (
# Imidazoles
('[n+;H]','n'),
# Amines
('[N+;!H0]','N'),
# Carboxylic acids and alcohols
('[$([O-]);!$([O-][#7])]','O'),
# Thiols
('[S-;X1]','S'),
# Sulfonamides
('[$([N-;X2]S(=O)=O)]','N'),
# Enamines
('[$([N-;X2][C,N]=C)]','N'),
# Tetrazoles
('[n-]','[nH]'),
# Sulfoxides
('[$([S-]=O)]','S'),
# Amides
('[$([N-]C=O)]','N'),
)
return [(Chem.MolFromSmarts(x),Chem.MolFromSmiles(y,False)) for x,y in patts]
_reactions=None
def NeutraliseCharges(mol, reactions=None):
global _reactions
if reactions is None:
if _reactions is None:
_reactions=_InitialiseNeutralisationReactions()
reactions=_reactions
# mol = Chem.MolFromSmiles(smiles)
replaced = False
for i,(reactant, product) in enumerate(reactions):
while mol.HasSubstructMatch(reactant):
replaced = True
rms = AllChem.ReplaceSubstructs(mol, reactant, product)
rms_smiles = Chem.MolToSmiles(rms[0])
mol = Chem.MolFromSmiles(rms_smiles)
if replaced:
return (mol, True) #Chem.MolToSmiles(mol,True)
else:
return (mol, False)
def drawStructure_ShowingFragment(pactolus_tree,fragment_idx,myMol,myMol_w_Hs):
drawer = rdMolDraw2D.MolDraw2DSVG(600,300)
fragment_atoms = np.where(pactolus_tree[fragment_idx]['atom_bool_arr'])[0]
mark_atoms_no_H = []
for a_index in fragment_atoms:
if myMol_w_Hs.GetAtomWithIdx(a_index).GetSymbol() != 'H':
mark_atoms_no_H.append(a_index)
rdDepictor.Compute2DCoords(myMol)
drawer.DrawMolecule(myMol,highlightAtoms=mark_atoms_no_H)
drawer.FinishDrawing()
svg = drawer.GetDrawingText().replace('svg:','')
return svg
def drawStructure_Fragment(pactolus_tree,fragment_idx,myMol,myMol_w_Hs):
fragment_atoms = np.where(pactolus_tree[fragment_idx]['atom_bool_arr'])[0]
depth_of_hit = np.sum(pactolus_tree[fragment_idx]['bond_bool_arr'])
mol2 = deepcopy(myMol_w_Hs)
# Now set the atoms you'd like to remove to dummy atoms with atomic number 0
fragment_atoms = np.where(pactolus_tree[fragment_idx]['atom_bool_arr']==False)[0]
for f in fragment_atoms:
mol2.GetAtomWithIdx(f).SetAtomicNum(0)
# Now remove dummy atoms using a query
mol3 = Chem.DeleteSubstructs(mol2, Chem.MolFromSmarts('[#0]'))
mol3 = Chem.RemoveHs(mol3)
# You get what you are looking for
return moltosvg(mol3),depth_of_hit
def moltosvg(mol,molSize=(450,150),kekulize=True):
mc = Chem.Mol(mol.ToBinary())
if kekulize:
try:
Chem.Kekulize(mc)
except:
mc = Chem.Mol(mol.ToBinary())
if not mc.GetNumConformers():
rdDepictor.Compute2DCoords(mc)
drawer = rdMolDraw2D.MolDraw2DSVG(molSize[0],molSize[1])
drawer.DrawMolecule(mc)
drawer.FinishDrawing()
svg = drawer.GetDrawingText()
# It seems that the svg renderer used doesn't quite hit the spec.
# Here are some fixes to make it work in the notebook, although I think
# the underlying issue needs to be resolved at the generation step
return svg.replace('svg:','')
def get_ion_from_fragment(frag_info,spectrum):
hit_indices = np.where(np.sum(frag_info,axis=1))
hit = spectrum[hit_indices,:][0]
return hit,hit_indices
In [4]:
allCompounds = metob.retrieve('Compounds',inchi='InchI%',username='*')
print len(allCompounds)
allCompounds[0]
#11539 trees of depth 5 Curt had before I messed with it.
Out[4]:
In [6]:
# all_my_h5_files = glob.glob('/global/project/projectdirs/openmsi/projects/ben_trees/*_hdf5_5_*.h5')
# inchi_keys = []
# for f in all_my_h5_files:
# temp = f.replace('/global/project/projectdirs/openmsi/projects/ben_trees/FragTreeLibrary_test_hdf5_5_','').replace('h5','').replace('.','')
# inchi_keys.append(temp)
# len(inchi_keys)
# #was 13189 before
In [7]:
# get inchi key for neutralized molecules
ik = []
num_pieces = []
inchi = []
charge = []
compound_list = []
for idx,myCompound in enumerate(allCompounds):
compound_list.append(myCompound.name)
try:
myMol = Chem.MolFromInchi(str(myCompound.inchi))
(myMol, neutralised) = NeutraliseCharges(myMol)
num_pieces.append(len(Chem.GetMolFrags(myMol, sanitizeFrags=False, asMols=False)))
charge.append(Chem.GetFormalCharge(myMol))
i = Chem.MolToInchi(myMol)
inchi.append(i)
ik.append(Chem.InchiToInchiKey(i))
except:
print "Can not parse ", myCompound.name, myCompound.InChI
In [9]:
cols = ['InChI',
'MonoIsotopic_molecular_weight',
'formula',
'InChI-Key',
'charge',
'num_pieces',
'description']
# print myAtlas[0].compound_identifications[0].compound
df = pd.DataFrame(index=compound_list, columns=cols)
df['name'] = compound_list
df.set_index('name',drop=True)
for idx,myCompound in enumerate(allCompounds):
n = myCompound.name
df.ix[n,'formula'] = myCompound.formula
df.ix[n,'Monoisotopic'] = myCompound.mono_isotopic_molecular_weight
df.ix[n,'num_pieces'] = num_pieces[idx]
df.ix[n,'Charge'] = charge[idx]
df.ix[n,'InChI-Key'] = ik[idx]
df.ix[n,'InChI'] = inchi[idx]
df.ix[n,'description'] = myCompound.description
df.to_csv('20151214_lookup_table_metatlas_to_inchi_key.csv')
In [ ]: