This notebook was created to check that a change to a smirnoff forcefield doesn't change the way it types
molecules. This concern came from switching the decorator 'R'
to 'x'
so that the SMIRKS in the SMIRNOFF format would be compatible with OEtoolkits and RDKit. See smirnoff99Frosst issue#54
This notebook will:
Authors:
In [1]:
# Imports
from __future__ import print_function
from convert_frcmod import *
import openeye.oechem as oechem
import openeye.oeiupac as oeiupac
import openeye.oeomega as oeomega
import openeye.oedepict as oedepict
from IPython.display import display
from openforcefield.typing.engines.smirnoff.forcefield import *
from openforcefield.typing.engines.smirnoff.forcefield_utils import get_molecule_parameterIDs
from openforcefield.utils import *
% matplotlib inline
import matplotlib
import numpy as np
import pylab as pl
import matplotlib.pyplot as plt
import time
import IPython
import pickle
import glob
In [2]:
def depictAtomByIdx(mol_copy, atomIdxList, supH = True, width=900, height=500):
mol = oechem.OEMol(mol_copy)
OEGenerate2DCoordinates(mol)
atomBondSet = oechem.OEAtomBondSet()
for atom in mol.GetAtoms():
if atom.GetIdx() in atomIdxList:
atomBondSet.AddAtom( atom)
for bond in atom.GetBonds():
nbrAtom = bond.GetNbr(atom)
nbrIdx = nbrAtom.GetIdx()
if (nbrIdx in atomIdxList) and nbrIdx>atom.GetIdx():
atomBondSet.AddBond( bond)
from IPython.display import Image
dopt = oedepict.OEPrepareDepictionOptions()
dopt.SetDepictOrientation( oedepict.OEDepictOrientation_Horizontal)
dopt.SetSuppressHydrogens(supH)
oedepict.OEPrepareDepiction(mol, dopt)
opts = oedepict.OE2DMolDisplayOptions(width, height, oedepict.OEScale_AutoScale)
disp = oedepict.OE2DMolDisplay(mol, opts)
aroStyle = oedepict.OEHighlightStyle_Color
aroColor = oechem.OEColor(oechem.OEGrey)
oedepict.OEAddHighlighting(disp, aroColor, aroStyle,
oechem.OEIsAromaticAtom(), oechem.OEIsAromaticBond() )
hstyle = oedepict.OEHighlightStyle_BallAndStick
hcolor = oechem.OEColor(oechem.OELightGreen)
oedepict.OEAddHighlighting(disp, hcolor, hstyle, atomBondSet)
#ofs = oechem.oeosstream()
img = oedepict.OEImage(width, height)
oedepict.OERenderMolecule(img, disp)
#oedepict.OERenderMolecule(ofs, 'png', disp)
#ofs.flush()
#return Image(data = "".join(ofs.str()))
return Image(oedepict.OEWriteImageToString("png",img))
In [3]:
def getMolParamIDToAtomIndex( oemol, ff):
"""Take an OEMol and a SMIRNOFF forcefield object and return a dictionary,
keyed by parameter ID, where each entry is a tuple of
( smirks, [[atom1, ... atomN], [atom1, ... atomN]) giving the SMIRKS
corresponding to that parameter ID and a list of the atom groups in that
molecule that parameter is applied to.
Parameters
----------
oemol : OEMol
OpenEye OEMol with the molecule to investigate.
ff : ForceField
SMIRNOFF ForceField object (obtained from an ffxml via ForceField(ffxml)) containing FF of interest.
Returns
-------
param_usage : dictionary
Dictionary, keyed by parameter ID, where each entry is a tuple of
( smirks, [[atom1, ... atomN], [atom1, ... atomN]) giving the SMIRKS
corresponding to that parameter ID and a list of the atom groups in
that molecule that parameter is applied to.
"""
labels = ff.labelMolecules([oemol])
param_usage = {}
for mol_entry in range(len(labels)):
for force in labels[mol_entry].keys():
for (atom_indices, pid, smirks) in labels[mol_entry][force]:
if not pid in param_usage:
param_usage[pid] = (smirks, [atom_indices])
else:
param_usage[pid][1].append( atom_indices )
return param_usage
In [4]:
def labels_to_pidDict(labels):
"""
This method takes a set of SMIRNOFF forcefield labels and returns
a dictionary with information for each molecule at each force type
in the form:
{ force_type: {mol_index: {(indice tuple): pid, ...}, ... } }
"""
force_type_dict = dict()
for idx, mol_dict in enumerate(labels):
for force_type, label_set in mol_dict.items():
if not force_type in force_type_dict:
force_type_dict[force_type] = dict()
force_type_dict[force_type][idx] = dict()
for (indices, pid, smirks) in label_set:
force_type_dict[force_type][idx][tuple(indices)] = {'pid': pid, 'smirks':smirks}
return force_type_dict
In [5]:
# Input and output info
#infile = 'example.frcmod' # smirnoffish frcmod file to convert
infile = 'smirnoffishFrcmod.parm99Frosst.txt' # smirnoffish frcmod file to convert
ffxmlFile = 'smirnoff99FrosstFrcmod.offxml'
template = 'template.offxml' # Template FFXML file without parameters (but with remainder of contents)
In [6]:
# Convert
# Already converted
convert_frcmod_to_ffxml( infile, template, ffxmlFile)
In [7]:
# Load SMIRNOFF FFXML
test_ff = ForceField(ffxmlFile) # We will use this below to access details of parameters
In [8]:
ref_ff = ForceField('test_forcefields/smirnoff99Frosst.offxml')
Here we will generate a list of molecules. We will label all molecules given. Here we don't care rather the assigned parameters are generic or not just that the typing doesn't change between the two forcefields.
Currently this section expects a relative or absolute path to a single file. The utils.read_molecules function will access /openforcefield/data/molecules/[your path]
if there is no file at the relative path.
In [9]:
molecule_file = "DrugBank_tripos.mol2"
molecules = utils.read_molecules(molecule_file)
In [10]:
init = time.time()
test_labels = test_ff.labelMolecules(molecules)
ref_labels = ref_ff.labelMolecules(molecules)
t = (time.time() - init) / 60.0
print("Typed %i molecules with test and reference force fields in %.2f minutes" % (len(molecules), t))
In [11]:
# Make dictionary by molecule and tuple indices
init = time.time()
test_dict = labels_to_pidDict(test_labels)
ref_dict = labels_to_pidDict(ref_labels)
t = (time.time() - init) / 60.0
print("created indices tuple to pid dictionaries in %.2f minutes" % t)
In [12]:
# Make a dictionary to store mismatches:
mismatch = dict()
# This will have embedded dictionaries with this form:
# force_type: {mol_idx:{(index tuple): {test_pid, test_smirks, ref_pid, ref_smirks}}}
mismatch_count = dict()
# loop through force types
for force_type, test_mol_dict in test_dict.items():
if force_type not in mismatch:
mismatch[force_type] = dict()
if force_type not in mismatch_count:
mismatch_count[force_type] = 0
# loop through molecules in each force type
for mol_idx, test_tuple_dict in test_mol_dict.items():
if not mol_idx in mismatch[force_type]:
mismatch[force_type][mol_idx] = dict()
# loop through all atom indice tuples in this molecule
for indice_tuple, test_info in test_tuple_dict.items():
# compare pid assignment
test_pid = test_info['pid']
ref_pid = ref_dict[force_type][mol_idx][indice_tuple]['pid']
# if they don't match store info in mismatch dictionary and update count
if test_pid != ref_pid:
test_smirks = test_info['smirks']
ref_smirks = ref_dict[force_type][mol_idx][indice_tuple]['smirks']
mismatch[force_type][mol_idx][indice_tuple] = {'test_pid': test_pid, 'test_smirks': test_smirks,
'ref_pid': ref_pid, 'ref_smirks': ref_smirks}
mismatch_count[force_type] +=1
print("%-35s %s" % ("Force Type", "Number mismatches"))
print("-"*55)
for force_type, count in mismatch_count.items():
print("%-35s %i" % (force_type, count))
In [13]:
ForceType = "PeriodicTorsionGenerator"
for mol_idx, tuple_dict in mismatch[ForceType].items():
# only visualize molecules with mismatch indices
keys = [k for k in tuple_dict.keys()]
if len(keys) == 0:
continue
mol = OEMol(molecules[mol_idx])
print("Looking at molecule %i" % mol_idx)
for indice_tuple, pid_info in tuple_dict.items():
test_pid = pid_info['test_pid']
test_smirks = pid_info['test_smirks']
ref_pid = pid_info['ref_pid']
ref_smirks = pid_info['ref_smirks']
print("%-10s %-40s %-40s" % ('', 'test force field', 'reference forcefield'))
print("%-10s %-40s %-40s" % ('pid'))
print("%-10s %-30s %-10s %-30s" % (test_pid, test_smirks, ref_pid, ref_smirks))
display(depictAtomByIdx(mol, indice_tuple, supH = False))
print("\n")
print("\n")
print("-"*100)
print("\n")
In [ ]:
# loop through force types
for force_type, test_mol_dict in test_dict.items():
# loop through molecules in each force type
for mol_idx, test_tuple_dict in test_mol_dict.items():
# loop through all atom indice tuples in this molecule
for indice_tuple, test_info in test_tuple_dict.items():
# compare pid assignment
test_pid = test_info['pid']
test_smirks = test_info['smirks']
# Check for 'R'
if 'R' in test_smirks:
print("Found 'R' in %s (%s)" % )