In [1]:
from rdkit import Chem
from rdkit.Chem.Draw import IPythonConsole
from rdkit.Chem.Draw.MolDrawing import MolDrawing, DrawingOptions
IPythonConsole.ipython_useSVG = False
DrawingOptions.includeAtomNumbers=True
In [2]:
from molpher.core import ExplorationTree as ETree
from molpher.core import MolpherMol
captopril = MolpherMol("captopril.sdf")
tree = ETree.create(source=captopril)
In [3]:
tree.leaves[0].asRDMol()
Out[3]:
In [4]:
print('Source: ', tree.params['source'])
In [5]:
tree.params
Out[5]:
In [6]:
tree.params = {
'non_producing_survive' : 2
, 'weight_max' : 500.0
}
tree.params
Out[6]:
In [7]:
print(tree.leaves) # show the current leaves of the tree (only the source so far)
tree.leaves[0].asRDMol()
Out[7]:
In [8]:
tree.generateMorphs()
print(len(tree.candidates))
In [9]:
{x.dist_to_target for x in tree.candidates}
Out[9]:
In [10]:
def sascore_as_obj(morph, operator):
morph.dist_to_target = morph.sascore
tree.generateMorphs([sascore_as_obj])
print(len(tree.candidates))
[x.dist_to_target for x in tree.candidates]
Out[10]:
In [11]:
tree.sortMorphs()
[
(x.smiles, x.dist_to_target)
for idx,x in enumerate(tree.candidates)
]
Out[11]:
In [12]:
# print the current candidates mask (all positions are on by default)
print("Old mask:", tree.candidates_mask)
# accept only the first ten morphs in the sorted list (those with the lowest distance to target)
new_mask = [True if idx < 10 else False for idx, x in enumerate(tree.candidates_mask)]
# save the new mask to the tree
tree.candidates_mask = new_mask
# show results
print("New mask:", tree.candidates_mask)
print("Molecules that passed the filter:")
[
(x.smiles, x.dist_to_target)
for idx,x in enumerate(tree.candidates)
if tree.candidates_mask[idx] # get molecules that passed the filter only
]
Out[12]:
In [13]:
# get the number of generations before
print(tree.generation_count)
tree.extend() # connect the accepted morphs to the tree as new leaves
# get the number of generations after
print(tree.generation_count)
# grab the new leaves as a list sorted according to their distance from target
sorted(
[
(x.getSMILES(), x.getDistToTarget())
for x in tree.leaves
], key=lambda item : item[1]
)
Out[13]:
In [14]:
from rdkit.Chem.Draw import MolsToGridImage
def get_locked_atoms(mol):
return [(idx, atm) for idx, atm in enumerate(mol.atoms) if atm.is_locked]
def show_mol_grid(mols):
locked_atoms = [[y[0] for y in get_locked_atoms(x)] for x in mols]
return MolsToGridImage(
[x.asRDMol() for x in mols]
, subImgSize=(250,200)
, highlightAtomLists=locked_atoms
, legends=[x.parent_operator for x in mols]
)
show_mol_grid(tree.leaves)
Out[14]:
In [15]:
tree.params
Out[15]:
In [16]:
class PenalizeKnown:
def __init__(self, tree, penalty):
self._tree = tree
self._penalty = penalty
def __call__(self, morph, operator):
if self._tree.hasMol(morph):
morph.dist_to_target += self._penalty
for iter_idx in range(1,10):
tree.generateMorphs([sascore_as_obj, PenalizeKnown(tree, 10)])
tree.sortMorphs()
tree.candidates_mask = [
True if idx < 50 and tree.candidates[idx].sascore < 6
else False
for idx, x in enumerate(tree.candidates_mask)
]
tree.extend()
In [17]:
tree.generation_count
Out[17]:
In [18]:
tree.mol_count
Out[18]:
In [19]:
show_mol_grid(tree.leaves)
Out[19]:
In [20]:
# set enalapril as target
tree.params = {
'target' : MolpherMol("O=C(O)[CH]2N(C(=O)[CH](N[CH](C(=O)OCC)CCc1ccccc1)C)CCC2")
}
tree.params
Out[20]:
In [21]:
class FindClosest:
def __init__(self):
self.closest_mol = None
self.closest_distance = None
def __call__(self, morph):
if not self.closest_mol or self.closest_distance > morph.dist_to_target:
self.closest_mol = morph
self.closest_distance = morph.dist_to_target
closest_info = FindClosest()
while not tree.path_found:
tree.generateMorphs()
tree.sortMorphs()
tree.filterMorphs()
tree.extend()
tree.prune()
tree.traverse(closest_info)
print('Generation #', tree.generation_count, sep='')
print('Molecules in tree:', tree.mol_count)
print('Closest molecule to target: {0} (Tanimoto distance: {1})'.format(
closest_info.closest_mol.getSMILES()
, closest_info.closest_distance
))
In [23]:
path = tree.fetchPathTo(tree.params['target'])
print(len(path))
show_mol_grid(path)
Out[23]:
In [ ]: