In [1]:
from simtk.openmm.app import *
from simtk.openmm import *
from simtk.unit import *
from sys import stdout
import pandas as pd
In [2]:
# real test case
In [26]:
AWSEM_xml = "/Users/weilu/openmmawsem/awsem_heme.xml"
pre = "/Users/weilu/Research/server/may_week3_2020/hemoglobin/setups/debug_displacement_of_N/"
fileLocation = "/Users/weilu/Research/server/may_week3_2020/hemoglobin/setups/four_chains/5x2r-openmmawsem.pdb"
fileLocation = "/Users/weilu/Research/server/may_week3_2020/hemoglobin/setups/debug_displacement_of_N/5x2r-openmmawsem.pdb"
pdb = PDBFile(fileLocation)
# pdb = PDBFile("/Users/weilu/Research/server/jan_2020/include_small_molecular/my_ABCDE_with_ligand/my_ABCDE_with_ligand-openmmawsem.pdb")
In [27]:
def compute_dis(pos1, pos2):
dis = pos1 - pos2
dis = dis.value_in_unit(nanometer)
r = (dis[0]**2 + dis[1]**2 + dis[2]**2)**0.5
return r
In [28]:
res_list = list(pdb.topology.residues())
atom_list = list(pdb.topology.atoms())
protein_resNames = ["NGP", "IGL", "IPR", "NTER", "CTER"]
DNA_resNames = ["DA", "DC", "DT", "DG"]
protein_res_list = []
DNA_res_list = []
ligand_res_list = []
for res in res_list:
if res.name in protein_resNames:
protein_res_list.append(res)
elif res.name in DNA_resNames:
DNA_res_list.append(res)
else:
ligand_res_list.append(res)
protein_atom_list = []
DNA_atom_list = []
ligand_atom_list = []
for atom in atom_list:
if atom.residue.name in protein_resNames:
protein_atom_list.append(atom)
elif atom.residue.name in DNA_resNames:
DNA_atom_list.append(atom)
else:
ligand_atom_list.append(atom)
In [29]:
forcefield = ForceField(AWSEM_xml)
[templates, names] = forcefield.generateTemplatesForUnmatchedResidues(pdb.topology)
for a in templates:
for a1 in a.atoms:
if a1.element.symbol == "C":
a1.type = "CA"
else:
a1.type = a1.element.symbol
# a1.type = a1.element.symbol
forcefield.registerResidueTemplate(a)
system = forcefield.createSystem(pdb.topology)
In [30]:
info = []
for ligand_atom in ligand_atom_list:
pos1 = pdb.positions[ligand_atom.index]
for protein_atom in protein_atom_list:
if protein_atom.name != "CB":
continue
protein_chain = protein_atom.residue.chain.id
pos2 = pdb.positions[protein_atom.index]
r = compute_dis(pos1, pos2)
if r < 0.65:
# print(ligand_atom, protein_atom, "in contact", r)
info.append([ligand_atom.index, protein_atom.index, r, protein_chain])
In [31]:
data = pd.DataFrame(info, columns=["Ligand_atom_index", "Protein_atom_index", "r", "Protein_chain"])
In [32]:
data.to_csv(f"{pre}/het_protein_bonds.csv", index=False)
In [33]:
data
Out[33]:
In [34]:
# data = pd.read_csv(f"{pre}/het_protein_bonds.csv")
In [35]:
res_list = list(pdb.topology.residues())
atom_list = list(pdb.topology.atoms())
protein_resNames = ["NGP", "IGL", "IPR", "NTER", "CTER"]
DNA_resNames = ["DA", "DC", "DT", "DG"]
protein_res_list = []
DNA_res_list = []
ligand_res_list = []
for res in res_list:
if res.name in protein_resNames:
protein_res_list.append(res)
elif res.name in DNA_resNames:
DNA_res_list.append(res)
else:
ligand_res_list.append(res)
protein_atom_list = []
DNA_atom_list = []
ligand_atom_list = []
for atom in atom_list:
if atom.residue.name in protein_resNames:
protein_atom_list.append(atom)
elif atom.residue.name in DNA_resNames:
DNA_atom_list.append(atom)
else:
ligand_atom_list.append(atom)
ligand_res_list
Out[35]:
In [36]:
info = []
for res in ligand_res_list:
atoms = list(res.atoms())
n_atoms = len(atoms)
for i in range(n_atoms):
atom1 = atoms[i]
pos1 = pdb.positions[atom1.index]
for j in range(i+1, n_atoms):
atom2 = atoms[j]
pos2 = pdb.positions[atom2.index]
dis = pos1 - pos2
dis = dis.value_in_unit(nanometer)
r = (dis[0]**2 + dis[1]**2 + dis[2]**2)**0.5
# print(atom1.index, atom2.index, round(r, 3))
info.append([res.name, atom1.index, atom2.index, atom1.name, atom2.name, atom1.element.symbol, atom2.element.symbol, round(r, 3)])
data = pd.DataFrame(info, columns=["Name", "Atom1", "Atom2", "Name1", "Name2", "Symbol1", "Symbol2", "r"])
data
Out[36]:
In [37]:
data.to_csv(f"{pre}/het_frag.csv", index=False)
In [ ]:
In [ ]:
In [ ]:
In [ ]:
info = []
for res in ligand_res_list:
atoms = list(res.atoms())
n_atoms = len(atoms)
for i in range(n_atoms):
atom1 = atoms[i]
pos1 = pdb.positions[atom1.index]
for j in range(i+1, n_atoms):
atom2 = atoms[j]
pos2 = pdb.positions[atom2.index]
r = compute_dis(pos1, pos2)
# print(atom1.index, atom2.index, round(r, 3))
info.append([res.name, atom1.index, atom2.index, atom1.name, atom2.name, atom1.element.symbol, atom2.element.symbol, round(r, 3)])
data = pd.DataFrame(info, columns=["Name", "Atom1", "Atom2", "Name1", "Name2", "Symbol1", "Symbol2", "r"])
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [33]:
data.dtypes
Out[33]:
In [34]:
protein_atom_list[0]
Out[34]:
In [23]:
res.
Out[23]:
In [123]:
atom.name
Out[123]:
In [114]:
d = pdb.positions[atom.index]
In [121]:
atom = ligand_atom_list[0]
In [ ]:
atom.index
In [91]:
a = list(pdb.getTopology().bonds())
In [100]:
a = ligand_atom_list[0]
In [103]:
ligand_atoms_index_list = [atom.index for atom in ligand_atom_list]
protein_atoms_index_list = [atom.index for atom in protein_atom_list]
In [106]:
a = protein_atom_list[0]
In [108]:
Out[108]:
In [96]:
atom1 = a[0].atom1
In [97]:
atom1.index
Out[97]:
In [98]:
atom1.id
Out[98]:
In [128]:
pdb.getTopology()
Out[128]:
In [4]:
forcefield = ForceField(AWSEM_xml)
In [5]:
[templates, names] = forcefield.generateTemplatesForUnmatchedResidues(pdb.topology)
In [6]:
for a in templates:
for a1 in a.atoms:
if a1.element.symbol == "C":
a1.type = "CA"
else:
a1.type = a1.element.symbol
# a1.type = a1.element.symbol
forcefield.registerResidueTemplate(a)
In [7]:
system = forcefield.createSystem(pdb.topology)
In [8]:
a = templates[0]
In [61]:
names
Out[61]:
In [35]:
In [36]:
Out[36]:
In [ ]:
In [ ]:
In [15]:
positions = pdb.positions
In [46]:
atoms = list(res.atoms())
In [48]:
atom = atoms[0]
In [49]:
atom.index
Out[49]:
In [68]:
atom1.name
Out[68]:
In [69]:
atom.element.symbol
Out[69]:
In [37]:
In [82]:
In [38]:
In [39]:
data
Out[39]:
In [40]:
data.to_csv(f"{pre}/het_frag.csv", index=False)
In [41]:
for i, line in data.iterrows():
atom1 = line["Atom1"]
atom2 = line["Atom2"]
r = float(line["r"])
print(atom1, atom2, r)
In [16]:
len(positions)
Out[16]:
In [52]:
pos1 = positions[3548]
In [53]:
pos2 = positions[3551]
In [54]:
(pos1[0]**2 + pos1[1]**2 + pos1[2]**2)**0.5
Out[54]:
In [55]:
dis = pos1 - pos2
dis = dis.value_in_unit(nanometer)
r = (dis[0]**2 + dis[1]**2 + dis[2]**2)**0.5
In [56]:
r
Out[56]:
In [17]:
res = ligand_res_list[0]
In [18]:
b = list(res.bonds())[0]
In [19]:
b.atom1
Out[19]:
In [20]:
list(res.bonds())
Out[20]:
In [160]:
b.type
In [161]:
atom1 = b.atom1
In [162]:
atom1.id
Out[162]:
In [163]:
atom1.index
Out[163]:
In [168]:
atom1.element.symbol
Out[168]:
In [ ]:
# initial testing.
In [5]:
origin_pdb = PDBFile("/Users/weilu/Research/server/jan_2020/include_small_molecular/6ud8_F_complete/6ud8_F_complete-openmmawsem.pdb")
In [6]:
origin_pdb.getTopology()
Out[6]:
In [64]:
pdb = PDBFile("/Users/weilu/Research/server/jan_2020/include_small_molecular/6ud8_F_complete-openmmawsem.pdb")
In [65]:
pdb.getTopology()
Out[65]:
In [66]:
forcefield = ForceField("/Users/weilu/openmmawsem/awsem.xml")
In [67]:
forcefield.generateTemplatesForUnmatchedResidues(pdb.topology)
Out[67]:
In [68]:
[template, ffxml] = forcefield.generateTemplatesForUnmatchedResidues(pdb.topology)
In [69]:
a = template[0]
In [70]:
a.name
Out[70]:
In [71]:
a.bonds
Out[71]:
In [56]:
a
Out[56]:
In [86]:
for a1 in a.atoms:
a1.type = a1.element.symbol
In [72]:
a1 = a.atoms[0]
In [73]:
a1.element.symbol
Out[73]:
In [74]:
a1.name
Out[74]:
In [75]:
a1.parameters
Out[75]:
In [76]:
a1.type
In [77]:
a1.externalBonds
Out[77]:
In [78]:
a.externalBonds
Out[78]:
In [80]:
a1.bondedTo
Out[80]:
In [82]:
a.atoms[3].name
Out[82]:
In [21]:
olc = a[1]
In [40]:
forcefield.getGenerators()
Out[40]:
In [84]:
forcefield.registerResidueTemplate(a)
In [89]:
system = forcefield.createSystem(pdb.topology)
In [90]:
system.getNumParticles()
Out[90]:
In [126]:
system.
Out[126]:
In [95]:
b = list(pdb.topology.atoms())
In [96]:
b[-1]
Out[96]:
In [97]:
b[0]
Out[97]:
In [103]:
b0 = b[-1]
In [104]:
b0.id
Out[104]:
In [105]:
b0.index
Out[105]:
In [106]:
b0.element
Out[106]:
In [107]:
pdb.topology.getNumResidues()
Out[107]:
In [111]:
res_list = list(pdb.topology.residues())
protein_resNames = ["NGP", "IGL", "IPR", "NTER", "CTER"]
DNA_resNames = ["DA", "DC", "DT", "DG"]
protein_res_list = []
DNA_res_list = []
ligand_res_list = []
for res in res_list:
if res.name in protein_resNames:
protein_res_list.append(res)
elif res.name in DNA_resNames:
DNA_res_list.append(res)
else:
ligand_res_list.append(res)
In [112]:
res = res_list[0]
In [114]:
In [ ]:
nres = len(protein_res_list)
residues = protein_res_list
natoms
In [117]:
atom_list = list(pdb.topology.atoms())
In [124]:
protein_atom_list = []
DNA_atom_list = []
ligand_atom_list = []
for atom in atom_list:
if atom.residue.name in protein_resNames:
protein_atom_list.append(atom)
elif atom.residue.name in DNA_resNames:
DNA_atom_list.append(atom)
else:
ligand_atom_list.append(atom)
In [119]:
atom = atom_list[0]
In [125]:
ligand_atom_list
Out[125]:
In [120]:
atom.index
Out[120]:
In [121]:
atom.name
Out[121]:
In [123]:
atom.residue.name
Out[123]:
In [116]:
DNA_res_list
Out[116]:
In [115]:
ligand_res_list
Out[115]:
In [ ]:
for res in res_list:
if