In [3]:
from simtk.openmm.app import *
from simtk.openmm import *
from simtk.unit import *
from sys import stdout
import pandas as pd

In [2]:
pre = "/Users/weilu/Research/server/jun_week1_2020/protein_DNA_benchmark/"
fileLocation = f"{pre}/DNAProtein_Platform_OpenCL_date_20200226_pdb_1a36_repetition_0_clean.pdb"
pdb = PDBFile(fileLocation)

In [4]:
res_list = list(pdb.topology.residues())

In [6]:
res_list = list(pdb.topology.residues())
atom_list = list(pdb.topology.atoms())
protein_resNames = ["NGP", "IGL", "IPR", "NTER", "CTER"]
DNA_resNames = ["DA", "DC", "DT", "DG"]
protein_res_list = []
DNA_res_list = []
ligand_res_list = []
for res in res_list:
    if res.name in protein_resNames:
        protein_res_list.append(res)
    elif res.name in DNA_resNames:
        DNA_res_list.append(res)
    else:
        ligand_res_list.append(res)

protein_atom_list = []
DNA_atom_list = []
ligand_atom_list = []
for atom in atom_list:
    if atom.residue.name in protein_resNames:
        protein_atom_list.append(atom)
    elif atom.residue.name in DNA_resNames:
        DNA_atom_list.append(atom)
    else:
        ligand_atom_list.append(atom)

In [38]:
a = DNA_atom_list[0]

In [45]:
a.index


Out[45]:
0

In [51]:
a = (atom1_pos - atom2_pos)

In [53]:
def compute_dis(pos1, pos2):
    dis = pos1 - pos2
    dis = dis.value_in_unit(nanometer)
    r = (dis[0]**2 + dis[1]**2 + dis[2]**2)**0.5
    return r

In [4]:
import sys

sys.path.append("/projects/pw8/wl45/open3spn2")
sys.path.append("/projects/pw8/wl45/openawsem")
sys.path.append("/Users/weilu/open3spn2")
sys.path.append("/Users/weilu/openmmawsem")
sys.path.append("C:/Users/luwei/Documents/GitHub/open3spn2")
sys.path.append("C:/Users/luwei/Documents/GitHub/openawsem")

import open3SPN2

In [60]:
dna=open3SPN2.DNA.fromCoarsePDB(fileLocation)

In [85]:
bonds_list = []
interacting_atom = ["A", "C", "G", "T"]
a = data.query("resname in @DNA_resNames").query("name in @interacting_atom").reset_index(drop=True)
n_atom = len(a)
for i in range(n_atom):
    dna_res_1 = a.iloc[i]
    chainID_1 = dna_res_1["chainID"]
    for j in range(i+1, n_atom):
        dna_res_2 = a.iloc[j]
        chainID_2 = dna_res_2["chainID"]
        if chainID_1 == chainID_2:
            continue
        atom1_pos = dna_res_1[["x", "y", "z"]].values
        atom2_pos = dna_res_2[["x", "y", "z"]].values
        dis = atom1_pos - atom2_pos
        r = (dis[0]**2 + dis[1]**2 + dis[2]**2)**0.5
        if r > 6.5:
            continue
        # print(i, j, r)
        bonds_list.append([dna_res_1["serial"], dna_res_2["serial"], r])

In [ ]:
for (atom1, atom2, r) in bonds_list:
    print(atom1, atom2, r)

In [72]:
b = a.iloc[1][["x", "y", "z"]].values

In [93]:
from open3SPN2 import function_terms

In [95]:
function_terms.dna_constraint_term()


---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
<ipython-input-95-f5d7af583eb9> in <module>()
----> 1 function_terms.dna_constraint_term()

TypeError: dna_constraint_term() missing 1 required positional argument: 'dna'

In [56]:
dna_res_n = len(DNA_res_list)
for i in range(dna_res_n):
    dna_res_1 = DNA_res_list[i]
    chainID_1 = dna_res_1.chain.id
    for j in range(i+1, dna_res_n):
        dna_res_2 = DNA_res_list[j]
        chainID_2 = dna_res_2.chain.id
        if chainID_1 == chainID_2:
            continue
        atom_1 = get_interacting_DNA_atom(dna_res_1)
        atom_2 = get_interacting_DNA_atom(dna_res_2)
        atom1_pos = pdb.positions[atom_1.index]
        atom2_pos = pdb.positions[atom_2.index]
        r = compute_dis(atom1_pos, atom2_pos)
        if r > 0.65:
            continue
        print(i, dna_res_1, j, dna_res_2, r)


1 <Residue 1 (DA) of chain 0> 41 <Residue 41 (DT) of chain 1> 0.6469376708771873
1 <Residue 1 (DA) of chain 0> 42 <Residue 42 (DT) of chain 1> 0.5491159258298746
2 <Residue 2 (DA) of chain 0> 40 <Residue 40 (DT) of chain 1> 0.6349142776784908
2 <Residue 2 (DA) of chain 0> 41 <Residue 41 (DT) of chain 1> 0.5734785785014116
2 <Residue 2 (DA) of chain 0> 42 <Residue 42 (DT) of chain 1> 0.6200130724428318
3 <Residue 3 (DA) of chain 0> 40 <Residue 40 (DT) of chain 1> 0.5904393025536154
4 <Residue 4 (DA) of chain 0> 39 <Residue 39 (DT) of chain 1> 0.5754847174339213
4 <Residue 4 (DA) of chain 0> 40 <Residue 40 (DT) of chain 1> 0.5720431714477502
5 <Residue 5 (DG) of chain 0> 38 <Residue 38 (DC) of chain 1> 0.5760794563252537
5 <Residue 5 (DG) of chain 0> 39 <Residue 39 (DT) of chain 1> 0.5544115168356443
6 <Residue 6 (DA) of chain 0> 36 <Residue 36 (DG) of chain 1> 0.6052784483194491
6 <Residue 6 (DA) of chain 0> 37 <Residue 37 (DT) of chain 1> 0.5796950922683405
6 <Residue 6 (DA) of chain 0> 38 <Residue 38 (DC) of chain 1> 0.5980582078694345
7 <Residue 7 (DC) of chain 0> 36 <Residue 36 (DG) of chain 1> 0.5328443393712653
7 <Residue 7 (DC) of chain 0> 37 <Residue 37 (DT) of chain 1> 0.6420498189393095
8 <Residue 8 (DT) of chain 0> 35 <Residue 35 (DA) of chain 1> 0.5820032474136202
8 <Residue 8 (DT) of chain 0> 36 <Residue 36 (DG) of chain 1> 0.5415202489288831
9 <Residue 9 (DT) of chain 0> 34 <Residue 34 (DA) of chain 1> 0.5892325008008298
9 <Residue 9 (DT) of chain 0> 35 <Residue 35 (DA) of chain 1> 0.6020993771795484
10 <Residue 10 (DA) of chain 0> 32 <Residue 32 (DC) of chain 1> 0.5970426282938265
10 <Residue 10 (DA) of chain 0> 33 <Residue 33 (DT) of chain 1> 0.5811134656846285
10 <Residue 10 (DA) of chain 0> 34 <Residue 34 (DA) of chain 1> 0.5754623966863512
11 <Residue 11 (DG) of chain 0> 32 <Residue 32 (DC) of chain 1> 0.5413916235037259
12 <Residue 12 (DA) of chain 0> 31 <Residue 31 (DT) of chain 1> 0.6100143768797585
12 <Residue 12 (DA) of chain 0> 32 <Residue 32 (DC) of chain 1> 0.6040196106087947
13 <Residue 13 (DA) of chain 0> 30 <Residue 30 (DT) of chain 1> 0.5981789782331036
13 <Residue 13 (DA) of chain 0> 31 <Residue 31 (DT) of chain 1> 0.5980634163698698
14 <Residue 14 (DA) of chain 0> 29 <Residue 29 (DT) of chain 1> 0.564754158904562
14 <Residue 14 (DA) of chain 0> 30 <Residue 30 (DT) of chain 1> 0.5818033860334605
15 <Residue 15 (DA) of chain 0> 28 <Residue 28 (DT) of chain 1> 0.5716752399745855
15 <Residue 15 (DA) of chain 0> 29 <Residue 29 (DT) of chain 1> 0.6047832752317145
16 <Residue 16 (DA) of chain 0> 27 <Residue 27 (DT) of chain 1> 0.5801131441365558
16 <Residue 16 (DA) of chain 0> 28 <Residue 28 (DT) of chain 1> 0.5990289225070856
17 <Residue 17 (DT) of chain 0> 26 <Residue 26 (DA) of chain 1> 0.5664125263445361
17 <Residue 17 (DT) of chain 0> 27 <Residue 27 (DT) of chain 1> 0.6074839421746061
18 <Residue 18 (DT) of chain 0> 24 <Residue 24 (DA) of chain 1> 0.6222450722986884
18 <Residue 18 (DT) of chain 0> 25 <Residue 25 (DA) of chain 1> 0.5631799534784598
18 <Residue 18 (DT) of chain 0> 26 <Residue 26 (DA) of chain 1> 0.5786104561792851
19 <Residue 19 (DT) of chain 0> 24 <Residue 24 (DA) of chain 1> 0.5728446386237723
19 <Residue 19 (DT) of chain 0> 25 <Residue 25 (DA) of chain 1> 0.6313309512450664
20 <Residue 20 (DT) of chain 0> 23 <Residue 23 (DA) of chain 1> 0.5983505410710346
20 <Residue 20 (DT) of chain 0> 24 <Residue 24 (DA) of chain 1> 0.5919594918573399
21 <Residue 21 (DT) of chain 0> 22 <Residue 22 (DA) of chain 1> 0.5847357522847391

In [34]:
def get_interacting_DNA_atom(DNA_res):
    for atom in DNA_res.atoms():
        # print(atom)
        if atom.name in ["A", "T", "C", "G"]:
            return atom

In [12]:
list(a.atoms())


Out[12]:
[<Atom 127 (T) of chain 1 residue 43 (DT)>,
 <Atom 128 (P) of chain 1 residue 43 (DT)>,
 <Atom 129 (S) of chain 1 residue 43 (DT)>]

In [29]:
for atom in a.atoms():
    print(atom)
    if atom.name not in ["A", "T", "C", "G"]:
        continue
    for


<Atom 127 (T) of chain 1 residue 43 (DT)>
<Atom 128 (P) of chain 1 residue 43 (DT)>
<Atom 129 (S) of chain 1 residue 43 (DT)>

In [ ]:


In [36]:
atom - atom


---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
<ipython-input-36-82e2e0582e9e> in <module>()
----> 1 atom - atom

TypeError: unsupported operand type(s) for -: 'Atom' and 'Atom'

In [ ]:


In [30]:
atom.id


Out[30]:
'129'

In [31]:
atom.index


Out[31]:
129

In [33]:



Out[33]:
'S'

In [ ]:


In [17]:
dna_name = a.name[1]

In [18]:
a[dna_name]


---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
<ipython-input-18-b308eada0088> in <module>()
----> 1 a[dna_name]

TypeError: 'Residue' object is not subscriptable

In [28]:
a


Out[28]:
<Residue 43 (DT) of chain 1>

In [25]:
list(a.external_bonds())


Out[25]:
[]

In [10]:
a.chain.id


Out[10]:
'B'

In [5]:
res_list


Out[5]:
[<Residue 0 (DA) of chain 0>,
 <Residue 1 (DA) of chain 0>,
 <Residue 2 (DA) of chain 0>,
 <Residue 3 (DA) of chain 0>,
 <Residue 4 (DA) of chain 0>,
 <Residue 5 (DG) of chain 0>,
 <Residue 6 (DA) of chain 0>,
 <Residue 7 (DC) of chain 0>,
 <Residue 8 (DT) of chain 0>,
 <Residue 9 (DT) of chain 0>,
 <Residue 10 (DA) of chain 0>,
 <Residue 11 (DG) of chain 0>,
 <Residue 12 (DA) of chain 0>,
 <Residue 13 (DA) of chain 0>,
 <Residue 14 (DA) of chain 0>,
 <Residue 15 (DA) of chain 0>,
 <Residue 16 (DA) of chain 0>,
 <Residue 17 (DT) of chain 0>,
 <Residue 18 (DT) of chain 0>,
 <Residue 19 (DT) of chain 0>,
 <Residue 20 (DT) of chain 0>,
 <Residue 21 (DT) of chain 0>,
 <Residue 22 (DA) of chain 1>,
 <Residue 23 (DA) of chain 1>,
 <Residue 24 (DA) of chain 1>,
 <Residue 25 (DA) of chain 1>,
 <Residue 26 (DA) of chain 1>,
 <Residue 27 (DT) of chain 1>,
 <Residue 28 (DT) of chain 1>,
 <Residue 29 (DT) of chain 1>,
 <Residue 30 (DT) of chain 1>,
 <Residue 31 (DT) of chain 1>,
 <Residue 32 (DC) of chain 1>,
 <Residue 33 (DT) of chain 1>,
 <Residue 34 (DA) of chain 1>,
 <Residue 35 (DA) of chain 1>,
 <Residue 36 (DG) of chain 1>,
 <Residue 37 (DT) of chain 1>,
 <Residue 38 (DC) of chain 1>,
 <Residue 39 (DT) of chain 1>,
 <Residue 40 (DT) of chain 1>,
 <Residue 41 (DT) of chain 1>,
 <Residue 42 (DT) of chain 1>,
 <Residue 43 (DT) of chain 1>,
 <Residue 44 (NGP) of chain 2>,
 <Residue 45 (NGP) of chain 2>,
 <Residue 46 (NGP) of chain 2>,
 <Residue 47 (NGP) of chain 2>,
 <Residue 48 (NGP) of chain 2>,
 <Residue 49 (NGP) of chain 2>,
 <Residue 50 (NGP) of chain 2>,
 <Residue 51 (NGP) of chain 2>,
 <Residue 52 (NGP) of chain 2>,
 <Residue 53 (IGL) of chain 2>,
 <Residue 54 (IPR) of chain 2>,
 <Residue 55 (NGP) of chain 2>,
 <Residue 56 (NGP) of chain 2>,
 <Residue 57 (NGP) of chain 2>,
 <Residue 58 (IPR) of chain 2>,
 <Residue 59 (IPR) of chain 2>,
 <Residue 60 (NGP) of chain 2>,
 <Residue 61 (NGP) of chain 2>,
 <Residue 62 (IPR) of chain 2>,
 <Residue 63 (NGP) of chain 2>,
 <Residue 64 (IPR) of chain 2>,
 <Residue 65 (NGP) of chain 2>,
 <Residue 66 (NGP) of chain 2>,
 <Residue 67 (NGP) of chain 2>,
 <Residue 68 (NGP) of chain 2>,
 <Residue 69 (NGP) of chain 2>,
 <Residue 70 (NGP) of chain 2>,
 <Residue 71 (NGP) of chain 2>,
 <Residue 72 (NGP) of chain 2>,
 <Residue 73 (IGL) of chain 2>,
 <Residue 74 (NGP) of chain 2>,
 <Residue 75 (NGP) of chain 2>,
 <Residue 76 (NGP) of chain 2>,
 <Residue 77 (NGP) of chain 2>,
 <Residue 78 (NGP) of chain 2>,
 <Residue 79 (NGP) of chain 2>,
 <Residue 80 (IPR) of chain 2>,
 <Residue 81 (NGP) of chain 2>,
 <Residue 82 (NGP) of chain 2>,
 <Residue 83 (NGP) of chain 2>,
 <Residue 84 (NGP) of chain 2>,
 <Residue 85 (NGP) of chain 2>,
 <Residue 86 (NGP) of chain 2>,
 <Residue 87 (NGP) of chain 2>,
 <Residue 88 (NGP) of chain 2>,
 <Residue 89 (NGP) of chain 2>,
 <Residue 90 (NGP) of chain 2>,
 <Residue 91 (NGP) of chain 2>,
 <Residue 92 (NGP) of chain 2>,
 <Residue 93 (NGP) of chain 2>,
 <Residue 94 (NGP) of chain 2>,
 <Residue 95 (NGP) of chain 2>,
 <Residue 96 (NGP) of chain 2>,
 <Residue 97 (NGP) of chain 2>,
 <Residue 98 (NGP) of chain 2>,
 <Residue 99 (NGP) of chain 2>,
 <Residue 100 (NGP) of chain 2>,
 <Residue 101 (NGP) of chain 2>,
 <Residue 102 (NGP) of chain 2>,
 <Residue 103 (NGP) of chain 2>,
 <Residue 104 (NGP) of chain 2>,
 <Residue 105 (NGP) of chain 2>,
 <Residue 106 (NGP) of chain 2>,
 <Residue 107 (NGP) of chain 2>,
 <Residue 108 (NGP) of chain 2>,
 <Residue 109 (NGP) of chain 2>,
 <Residue 110 (NGP) of chain 2>,
 <Residue 111 (NGP) of chain 2>,
 <Residue 112 (NGP) of chain 2>,
 <Residue 113 (NGP) of chain 2>,
 <Residue 114 (NGP) of chain 2>,
 <Residue 115 (NGP) of chain 2>,
 <Residue 116 (NGP) of chain 2>,
 <Residue 117 (NGP) of chain 2>,
 <Residue 118 (NGP) of chain 2>,
 <Residue 119 (NGP) of chain 2>,
 <Residue 120 (NGP) of chain 2>,
 <Residue 121 (NGP) of chain 2>,
 <Residue 122 (NGP) of chain 2>,
 <Residue 123 (NGP) of chain 2>,
 <Residue 124 (NGP) of chain 2>,
 <Residue 125 (NGP) of chain 2>,
 <Residue 126 (NGP) of chain 2>,
 <Residue 127 (NGP) of chain 2>,
 <Residue 128 (NGP) of chain 2>,
 <Residue 129 (NGP) of chain 2>,
 <Residue 130 (NGP) of chain 2>,
 <Residue 131 (NGP) of chain 2>,
 <Residue 132 (NGP) of chain 2>,
 <Residue 133 (NGP) of chain 2>,
 <Residue 134 (NGP) of chain 2>,
 <Residue 135 (NGP) of chain 2>,
 <Residue 136 (NGP) of chain 2>,
 <Residue 137 (NGP) of chain 2>,
 <Residue 138 (NGP) of chain 2>,
 <Residue 139 (NGP) of chain 2>,
 <Residue 140 (NGP) of chain 2>,
 <Residue 141 (NGP) of chain 2>,
 <Residue 142 (NGP) of chain 2>,
 <Residue 143 (NGP) of chain 2>,
 <Residue 144 (NGP) of chain 2>,
 <Residue 145 (NGP) of chain 2>,
 <Residue 146 (NGP) of chain 2>,
 <Residue 147 (NGP) of chain 2>,
 <Residue 148 (NGP) of chain 2>,
 <Residue 149 (NGP) of chain 2>,
 <Residue 150 (NGP) of chain 2>,
 <Residue 151 (NGP) of chain 2>,
 <Residue 152 (NGP) of chain 2>,
 <Residue 153 (NGP) of chain 2>,
 <Residue 154 (NGP) of chain 2>,
 <Residue 155 (NGP) of chain 2>,
 <Residue 156 (NGP) of chain 2>,
 <Residue 157 (NGP) of chain 2>,
 <Residue 158 (NGP) of chain 2>,
 <Residue 159 (NGP) of chain 2>,
 <Residue 160 (NGP) of chain 2>,
 <Residue 161 (NGP) of chain 2>,
 <Residue 162 (NGP) of chain 2>,
 <Residue 163 (NGP) of chain 2>,
 <Residue 164 (NGP) of chain 2>,
 <Residue 165 (NGP) of chain 2>,
 <Residue 166 (NGP) of chain 2>,
 <Residue 167 (NGP) of chain 2>,
 <Residue 168 (IGL) of chain 2>,
 <Residue 169 (NGP) of chain 2>,
 <Residue 170 (NGP) of chain 2>,
 <Residue 171 (NGP) of chain 2>,
 <Residue 172 (NGP) of chain 2>,
 <Residue 173 (NGP) of chain 2>,
 <Residue 174 (NGP) of chain 2>,
 <Residue 175 (NGP) of chain 2>,
 <Residue 176 (NGP) of chain 2>,
 <Residue 177 (NGP) of chain 2>,
 <Residue 178 (NGP) of chain 2>,
 <Residue 179 (NGP) of chain 2>,
 <Residue 180 (NGP) of chain 2>,
 <Residue 181 (NGP) of chain 2>,
 <Residue 182 (NGP) of chain 2>,
 <Residue 183 (NGP) of chain 2>,
 <Residue 184 (NGP) of chain 2>,
 <Residue 185 (NGP) of chain 2>,
 <Residue 186 (IPR) of chain 2>,
 <Residue 187 (IPR) of chain 2>,
 <Residue 188 (IGL) of chain 2>,
 <Residue 189 (NGP) of chain 2>,
 <Residue 190 (NGP) of chain 2>,
 <Residue 191 (NGP) of chain 2>,
 <Residue 192 (IGL) of chain 2>,
 <Residue 193 (NGP) of chain 2>,
 <Residue 194 (IGL) of chain 2>,
 <Residue 195 (NGP) of chain 2>,
 <Residue 196 (NGP) of chain 2>,
 <Residue 197 (IPR) of chain 2>,
 <Residue 198 (NGP) of chain 2>,
 <Residue 199 (NGP) of chain 2>,
 <Residue 200 (IGL) of chain 2>,
 <Residue 201 (NGP) of chain 2>,
 <Residue 202 (NGP) of chain 2>,
 <Residue 203 (NGP) of chain 2>,
 <Residue 204 (NGP) of chain 2>,
 <Residue 205 (NGP) of chain 2>,
 <Residue 206 (NGP) of chain 2>,
 <Residue 207 (NGP) of chain 2>,
 <Residue 208 (IPR) of chain 2>,
 <Residue 209 (NGP) of chain 2>,
 <Residue 210 (NGP) of chain 2>,
 <Residue 211 (NGP) of chain 2>,
 <Residue 212 (NGP) of chain 2>,
 <Residue 213 (NGP) of chain 2>,
 <Residue 214 (NGP) of chain 2>,
 <Residue 215 (NGP) of chain 2>,
 <Residue 216 (NGP) of chain 2>,
 <Residue 217 (NGP) of chain 2>,
 <Residue 218 (NGP) of chain 2>,
 <Residue 219 (NGP) of chain 2>,
 <Residue 220 (NGP) of chain 2>,
 <Residue 221 (NGP) of chain 2>,
 <Residue 222 (IPR) of chain 2>,
 <Residue 223 (NGP) of chain 2>,
 <Residue 224 (IPR) of chain 2>,
 <Residue 225 (IPR) of chain 2>,
 <Residue 226 (IPR) of chain 2>,
 <Residue 227 (IGL) of chain 2>,
 <Residue 228 (NGP) of chain 2>,
 <Residue 229 (NGP) of chain 2>,
 <Residue 230 (NGP) of chain 2>,
 <Residue 231 (NGP) of chain 2>,
 <Residue 232 (NGP) of chain 2>,
 <Residue 233 (NGP) of chain 2>,
 <Residue 234 (NGP) of chain 2>,
 <Residue 235 (NGP) of chain 2>,
 <Residue 236 (NGP) of chain 2>,
 <Residue 237 (NGP) of chain 2>,
 <Residue 238 (NGP) of chain 2>,
 <Residue 239 (NGP) of chain 2>,
 <Residue 240 (NGP) of chain 2>,
 <Residue 241 (NGP) of chain 2>,
 <Residue 242 (NGP) of chain 2>,
 <Residue 243 (NGP) of chain 2>,
 <Residue 244 (NGP) of chain 2>,
 <Residue 245 (NGP) of chain 2>,
 <Residue 246 (NGP) of chain 2>,
 <Residue 247 (NGP) of chain 2>,
 <Residue 248 (NGP) of chain 2>,
 <Residue 249 (NGP) of chain 2>,
 <Residue 250 (NGP) of chain 2>,
 <Residue 251 (IGL) of chain 2>,
 <Residue 252 (NGP) of chain 2>,
 <Residue 253 (NGP) of chain 2>,
 <Residue 254 (NGP) of chain 2>,
 <Residue 255 (NGP) of chain 2>,
 <Residue 256 (NGP) of chain 2>,
 <Residue 257 (NGP) of chain 2>,
 <Residue 258 (NGP) of chain 2>,
 <Residue 259 (NGP) of chain 2>,
 <Residue 260 (IPR) of chain 2>,
 <Residue 261 (NGP) of chain 2>,
 <Residue 262 (NGP) of chain 2>,
 <Residue 263 (NGP) of chain 2>,
 <Residue 264 (NGP) of chain 2>,
 <Residue 265 (NGP) of chain 2>,
 <Residue 266 (IGL) of chain 2>,
 <Residue 267 (NGP) of chain 2>,
 <Residue 268 (NGP) of chain 2>,
 <Residue 269 (NGP) of chain 2>,
 <Residue 270 (NGP) of chain 2>,
 <Residue 271 (NGP) of chain 2>,
 <Residue 272 (NGP) of chain 2>,
 <Residue 273 (NGP) of chain 2>,
 <Residue 274 (NGP) of chain 2>,
 <Residue 275 (NGP) of chain 2>,
 <Residue 276 (NGP) of chain 2>,
 <Residue 277 (NGP) of chain 2>,
 <Residue 278 (NGP) of chain 2>,
 <Residue 279 (NGP) of chain 2>,
 <Residue 280 (NGP) of chain 2>,
 <Residue 281 (NGP) of chain 2>,
 <Residue 282 (NGP) of chain 2>,
 <Residue 283 (NGP) of chain 2>,
 <Residue 284 (NGP) of chain 2>,
 <Residue 285 (NGP) of chain 2>,
 <Residue 286 (NGP) of chain 2>,
 <Residue 287 (NGP) of chain 2>,
 <Residue 288 (NGP) of chain 2>,
 <Residue 289 (NGP) of chain 2>,
 <Residue 290 (NGP) of chain 2>,
 <Residue 291 (NGP) of chain 2>,
 <Residue 292 (NGP) of chain 2>,
 <Residue 293 (NGP) of chain 2>,
 <Residue 294 (NGP) of chain 2>,
 <Residue 295 (NGP) of chain 2>,
 <Residue 296 (NGP) of chain 2>,
 <Residue 297 (NGP) of chain 2>,
 <Residue 298 (NGP) of chain 2>,
 <Residue 299 (NGP) of chain 2>,
 <Residue 300 (NGP) of chain 2>,
 <Residue 301 (NGP) of chain 2>,
 <Residue 302 (NGP) of chain 2>,
 <Residue 303 (NGP) of chain 2>,
 <Residue 304 (NGP) of chain 2>,
 <Residue 305 (NGP) of chain 2>,
 <Residue 306 (NGP) of chain 2>,
 <Residue 307 (NGP) of chain 2>,
 <Residue 308 (NGP) of chain 2>,
 <Residue 309 (NGP) of chain 2>,
 <Residue 310 (NGP) of chain 2>,
 <Residue 311 (NGP) of chain 2>,
 <Residue 312 (NGP) of chain 2>,
 <Residue 313 (NGP) of chain 2>,
 <Residue 314 (NGP) of chain 2>,
 <Residue 315 (NGP) of chain 2>,
 <Residue 316 (NGP) of chain 2>,
 <Residue 317 (NGP) of chain 2>,
 <Residue 318 (NGP) of chain 2>,
 <Residue 319 (IGL) of chain 2>,
 <Residue 320 (NGP) of chain 2>,
 <Residue 321 (NGP) of chain 2>,
 <Residue 322 (NGP) of chain 2>,
 <Residue 323 (NGP) of chain 2>,
 <Residue 324 (NGP) of chain 2>,
 <Residue 325 (IGL) of chain 2>,
 <Residue 326 (NGP) of chain 2>,
 <Residue 327 (NGP) of chain 2>,
 <Residue 328 (NGP) of chain 2>,
 <Residue 329 (NGP) of chain 2>,
 <Residue 330 (NGP) of chain 2>,
 <Residue 331 (NGP) of chain 2>,
 <Residue 332 (IGL) of chain 2>,
 <Residue 333 (NGP) of chain 2>,
 <Residue 334 (NGP) of chain 2>,
 <Residue 335 (NGP) of chain 2>,
 <Residue 336 (NGP) of chain 2>,
 <Residue 337 (NGP) of chain 2>,
 <Residue 338 (NGP) of chain 2>,
 <Residue 339 (NGP) of chain 2>,
 <Residue 340 (NGP) of chain 2>,
 <Residue 341 (NGP) of chain 2>,
 <Residue 342 (NGP) of chain 2>,
 <Residue 343 (NGP) of chain 2>,
 <Residue 344 (NGP) of chain 2>,
 <Residue 345 (IPR) of chain 2>,
 <Residue 346 (NGP) of chain 2>,
 <Residue 347 (NGP) of chain 2>,
 <Residue 348 (NGP) of chain 2>,
 <Residue 349 (IGL) of chain 2>,
 <Residue 350 (NGP) of chain 2>,
 <Residue 351 (NGP) of chain 2>,
 <Residue 352 (NGP) of chain 2>,
 <Residue 353 (NGP) of chain 2>,
 <Residue 354 (NGP) of chain 2>,
 <Residue 355 (NGP) of chain 2>,
 <Residue 356 (NGP) of chain 2>,
 <Residue 357 (NGP) of chain 2>,
 <Residue 358 (NGP) of chain 2>,
 <Residue 359 (NGP) of chain 2>,
 <Residue 360 (IGL) of chain 2>,
 <Residue 361 (NGP) of chain 2>,
 <Residue 362 (NGP) of chain 2>,
 <Residue 363 (NGP) of chain 2>,
 <Residue 364 (NGP) of chain 2>,
 <Residue 365 (NGP) of chain 2>,
 <Residue 366 (NGP) of chain 2>,
 <Residue 367 (NGP) of chain 2>,
 <Residue 368 (NGP) of chain 2>,
 <Residue 369 (NGP) of chain 2>,
 <Residue 370 (NGP) of chain 2>,
 <Residue 371 (IPR) of chain 2>,
 <Residue 372 (NGP) of chain 2>,
 <Residue 373 (NGP) of chain 2>,
 <Residue 374 (NGP) of chain 2>,
 <Residue 375 (NGP) of chain 2>,
 <Residue 376 (NGP) of chain 2>,
 <Residue 377 (NGP) of chain 2>,
 <Residue 378 (NGP) of chain 2>,
 <Residue 379 (NGP) of chain 2>,
 <Residue 380 (NGP) of chain 2>,
 <Residue 381 (NGP) of chain 2>,
 <Residue 382 (NGP) of chain 2>,
 <Residue 383 (NGP) of chain 2>,
 <Residue 384 (NGP) of chain 2>,
 <Residue 385 (NGP) of chain 2>,
 <Residue 386 (NGP) of chain 2>,
 <Residue 387 (NGP) of chain 2>,
 <Residue 388 (NGP) of chain 2>,
 <Residue 389 (IPR) of chain 2>,
 <Residue 390 (NGP) of chain 2>,
 <Residue 391 (NGP) of chain 2>,
 <Residue 392 (NGP) of chain 2>,
 <Residue 393 (NGP) of chain 2>,
 <Residue 394 (NGP) of chain 2>,
 <Residue 395 (NGP) of chain 2>,
 <Residue 396 (NGP) of chain 2>,
 <Residue 397 (NGP) of chain 2>,
 <Residue 398 (NGP) of chain 2>,
 <Residue 399 (NGP) of chain 2>,
 <Residue 400 (IGL) of chain 2>,
 <Residue 401 (NGP) of chain 2>,
 <Residue 402 (NGP) of chain 2>,
 <Residue 403 (NGP) of chain 2>,
 <Residue 404 (NGP) of chain 2>,
 <Residue 405 (NGP) of chain 2>,
 <Residue 406 (NGP) of chain 2>,
 <Residue 407 (NGP) of chain 2>,
 <Residue 408 (NGP) of chain 2>,
 <Residue 409 (NGP) of chain 2>,
 <Residue 410 (NGP) of chain 2>,
 <Residue 411 (NGP) of chain 2>,
 <Residue 412 (IGL) of chain 2>,
 <Residue 413 (NGP) of chain 2>,
 <Residue 414 (NGP) of chain 2>,
 <Residue 415 (NGP) of chain 2>,
 <Residue 416 (NGP) of chain 2>,
 <Residue 417 (NGP) of chain 2>,
 <Residue 418 (NGP) of chain 2>,
 <Residue 419 (NGP) of chain 2>,
 <Residue 420 (NGP) of chain 2>,
 <Residue 421 (NGP) of chain 2>,
 <Residue 422 (NGP) of chain 2>,
 <Residue 423 (NGP) of chain 2>,
 <Residue 424 (NGP) of chain 2>,
 <Residue 425 (NGP) of chain 2>,
 <Residue 426 (NGP) of chain 2>,
 <Residue 427 (NGP) of chain 2>,
 <Residue 428 (NGP) of chain 2>,
 <Residue 429 (NGP) of chain 2>,
 <Residue 430 (NGP) of chain 2>,
 <Residue 431 (NGP) of chain 2>,
 <Residue 432 (NGP) of chain 2>,
 <Residue 433 (NGP) of chain 2>,
 <Residue 434 (NGP) of chain 2>,
 <Residue 435 (NGP) of chain 2>,
 <Residue 436 (NGP) of chain 2>,
 <Residue 437 (IPR) of chain 2>,
 <Residue 438 (NGP) of chain 2>,
 <Residue 439 (NGP) of chain 2>,
 <Residue 440 (NGP) of chain 2>,
 <Residue 441 (NGP) of chain 2>,
 <Residue 442 (IPR) of chain 2>,
 <Residue 443 (NGP) of chain 2>,
 <Residue 444 (NGP) of chain 2>,
 <Residue 445 (NGP) of chain 2>,
 <Residue 446 (NGP) of chain 2>,
 <Residue 447 (NGP) of chain 2>,
 <Residue 448 (NGP) of chain 2>,
 <Residue 449 (NGP) of chain 2>,
 <Residue 450 (NGP) of chain 2>,
 <Residue 451 (NGP) of chain 2>,
 <Residue 452 (NGP) of chain 2>,
 <Residue 453 (NGP) of chain 2>,
 <Residue 454 (NGP) of chain 2>,
 <Residue 455 (NGP) of chain 2>,
 <Residue 456 (NGP) of chain 2>,
 <Residue 457 (NGP) of chain 2>,
 <Residue 458 (NGP) of chain 2>,
 <Residue 459 (NGP) of chain 2>,
 <Residue 460 (NGP) of chain 2>,
 <Residue 461 (NGP) of chain 2>,
 <Residue 462 (NGP) of chain 2>,
 <Residue 463 (NGP) of chain 2>,
 <Residue 464 (NGP) of chain 2>,
 <Residue 465 (IPR) of chain 2>,
 <Residue 466 (IPR) of chain 2>,
 <Residue 467 (NGP) of chain 2>,
 <Residue 468 (NGP) of chain 2>,
 <Residue 469 (NGP) of chain 2>,
 <Residue 470 (NGP) of chain 2>,
 <Residue 471 (NGP) of chain 2>,
 <Residue 472 (NGP) of chain 2>,
 <Residue 473 (NGP) of chain 2>,
 <Residue 474 (NGP) of chain 2>,
 <Residue 475 (NGP) of chain 2>,
 <Residue 476 (NGP) of chain 2>,
 <Residue 477 (NGP) of chain 2>,
 <Residue 478 (NGP) of chain 2>,
 <Residue 479 (NGP) of chain 2>,
 <Residue 480 (NGP) of chain 2>,
 <Residue 481 (NGP) of chain 2>,
 <Residue 482 (NGP) of chain 2>,
 <Residue 483 (NGP) of chain 2>,
 <Residue 484 (NGP) of chain 2>,
 <Residue 485 (NGP) of chain 2>,
 <Residue 486 (NGP) of chain 2>,
 <Residue 487 (NGP) of chain 2>,
 <Residue 488 (NGP) of chain 2>,
 <Residue 489 (NGP) of chain 2>,
 <Residue 490 (NGP) of chain 2>,
 <Residue 491 (NGP) of chain 2>,
 <Residue 492 (NGP) of chain 2>,
 <Residue 493 (NGP) of chain 2>,
 <Residue 494 (NGP) of chain 2>,
 <Residue 495 (NGP) of chain 2>,
 <Residue 496 (NGP) of chain 2>,
 <Residue 497 (NGP) of chain 2>,
 <Residue 498 (NGP) of chain 2>,
 <Residue 499 (NGP) of chain 2>,
 <Residue 500 (NGP) of chain 2>,
 <Residue 501 (NGP) of chain 2>,
 <Residue 502 (NGP) of chain 2>,
 <Residue 503 (NGP) of chain 2>,
 <Residue 504 (NGP) of chain 2>,
 <Residue 505 (NGP) of chain 2>,
 <Residue 506 (NGP) of chain 2>,
 <Residue 507 (NGP) of chain 2>,
 <Residue 508 (NGP) of chain 2>,
 <Residue 509 (NGP) of chain 2>,
 <Residue 510 (NGP) of chain 2>,
 <Residue 511 (NGP) of chain 2>,
 <Residue 512 (NGP) of chain 2>,
 <Residue 513 (NGP) of chain 2>,
 <Residue 514 (NGP) of chain 2>,
 <Residue 515 (NGP) of chain 2>,
 <Residue 516 (NGP) of chain 2>,
 <Residue 517 (NGP) of chain 2>,
 <Residue 518 (NGP) of chain 2>,
 <Residue 519 (NGP) of chain 2>,
 <Residue 520 (NGP) of chain 2>,
 <Residue 521 (NGP) of chain 2>,
 <Residue 522 (NGP) of chain 2>,
 <Residue 523 (NGP) of chain 2>,
 <Residue 524 (NGP) of chain 2>,
 <Residue 525 (NGP) of chain 2>,
 <Residue 526 (NGP) of chain 2>,
 <Residue 527 (NGP) of chain 2>,
 <Residue 528 (NGP) of chain 2>,
 <Residue 529 (NGP) of chain 2>,
 <Residue 530 (NGP) of chain 2>,
 <Residue 531 (NGP) of chain 2>,
 <Residue 532 (NGP) of chain 2>,
 <Residue 533 (NGP) of chain 2>,
 <Residue 534 (NGP) of chain 2>,
 <Residue 535 (NGP) of chain 2>,
 <Residue 536 (NGP) of chain 2>,
 <Residue 537 (NGP) of chain 2>,
 <Residue 538 (NGP) of chain 2>,
 <Residue 539 (NGP) of chain 2>,
 <Residue 540 (NGP) of chain 2>,
 <Residue 541 (NGP) of chain 2>,
 <Residue 542 (NGP) of chain 2>,
 <Residue 543 (NGP) of chain 2>,
 <Residue 544 (NGP) of chain 2>,
 <Residue 545 (NGP) of chain 2>,
 <Residue 546 (IGL) of chain 2>,
 <Residue 547 (NGP) of chain 2>,
 <Residue 548 (NGP) of chain 2>,
 <Residue 549 (NGP) of chain 2>,
 <Residue 550 (NGP) of chain 2>,
 <Residue 551 (NGP) of chain 2>,
 <Residue 552 (NGP) of chain 2>,
 <Residue 553 (NGP) of chain 2>,
 <Residue 554 (NGP) of chain 2>,
 <Residue 555 (IPR) of chain 2>,
 <Residue 556 (NGP) of chain 2>,
 <Residue 557 (NGP) of chain 2>,
 <Residue 558 (NGP) of chain 2>,
 <Residue 559 (NGP) of chain 2>,
 <Residue 560 (NGP) of chain 2>,
 <Residue 561 (NGP) of chain 2>,
 <Residue 562 (NGP) of chain 2>,
 <Residue 563 (NGP) of chain 2>,
 <Residue 564 (NGP) of chain 2>,
 <Residue 565 (NGP) of chain 2>,
 <Residue 566 (IGL) of chain 2>,
 <Residue 567 (NGP) of chain 2>,
 <Residue 568 (IPR) of chain 2>,
 <Residue 569 (NGP) of chain 2>,
 <Residue 570 (NGP) of chain 2>,
 <Residue 571 (NGP) of chain 2>,
 <Residue 572 (NGP) of chain 2>,
 <Residue 573 (NGP) of chain 2>,
 <Residue 574 (NGP) of chain 2>,
 <Residue 575 (NGP) of chain 2>,
 <Residue 576 (NGP) of chain 2>,
 <Residue 577 (NGP) of chain 2>,
 <Residue 578 (NGP) of chain 2>,
 <Residue 579 (NGP) of chain 2>,
 <Residue 580 (NGP) of chain 2>,
 <Residue 581 (NGP) of chain 2>,
 <Residue 582 (NGP) of chain 2>,
 <Residue 583 (NGP) of chain 2>,
 <Residue 584 (NGP) of chain 2>,
 <Residue 585 (NGP) of chain 2>,
 <Residue 586 (NGP) of chain 2>,
 <Residue 587 (NGP) of chain 2>,
 <Residue 588 (NGP) of chain 2>,
 <Residue 589 (NGP) of chain 2>,
 <Residue 590 (NGP) of chain 2>,
 <Residue 591 (NGP) of chain 2>,
 <Residue 592 (NGP) of chain 2>,
 <Residue 593 (NGP) of chain 2>,
 <Residue 594 (NGP) of chain 2>]

In [5]:
import os
import argparse
import sys
import openmmawsem
import helperFunctions.myFunctions

In [6]:
from simtk.openmm.app import *
from simtk.openmm import *
from simtk.unit import *
from sys import stdout
from pdbfixer import *

In [7]:
chains_to_simulate = "B"

In [8]:
removeHeterogens = False

In [15]:
pdb_filename = "/Users/weilu/Research/server/jun_week1_2020/no_DNA_protein_DNA_benchmark/cleaned_pdbs/1a36.pdb"


fixer = PDBFixer(filename=pdb_filename)
# we only want pdb chains, ligands or DNA chain will be ignored here.
fixer.removeHeterogens(keepWater=False)
# remove unwanted chains
chains = list(fixer.topology.chains())

In [16]:
chains


Out[16]:
[<Chain 0>, <Chain 1>, <Chain 2>]

In [17]:
for c in chains:
    print(c.id)


A
B
C

In [21]:
a = list(c.residues())

In [22]:
b = a[0]

In [23]:
b.id


Out[23]:
'1'

In [27]:
import numpy as np

In [29]:
if np.alltrue([a.name in dnaResidues for a in c.residues()]) is True:
    # this chain is a DNA chain


Out[29]:
False

In [26]:
b.name in dnaResidues


Out[26]:
False

In [ ]:
proteinResidues = ['ALA', 'ASN', 'CYS', 'GLU', 'HIS', 'LEU', 'MET', 'PRO', 'THR', 'TYR', 'ARG', 'ASP', 'GLN', 'GLY', 'ILE', 'LYS', 'PHE', 'SER', 'TRP', 'VAL']
rnaResidues = ['A', 'G', 'C', 'U', 'I']
dnaResidues = ['DA', 'DG', 'DC', 'DT', 'DI']

In [25]:
dnaResidues = ['DA', 'DG', 'DC', 'DT', 'DI']

In [49]:
chains = list(fixer.topology.chains())
a = ""

proteinResidues = ['ALA', 'ASN', 'CYS', 'GLU', 'HIS', 'LEU', 'MET', 'PRO', 'THR', 'TYR', 'ARG', 'ASP', 'GLN', 'GLY', 'ILE', 'LYS', 'PHE', 'SER', 'TRP', 'VAL']
rnaResidues = ['A', 'G', 'C', 'U', 'I']
dnaResidues = ['DA', 'DG', 'DC', 'DT', 'DI']
for c in chains:
    # print([a.name in dnaResidues for a in c.residues()])
    if True and np.alltrue([a.name in dnaResidues for a in c.residues()]):
        continue
    if c.id in 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789':
        a += c.id

In [50]:
a


Out[50]:
'C'

In [ ]:


In [ ]:


In [12]:
pdb_filename = "/Users/weilu/Research/server/jun_week1_2020/no_DNA_protein_DNA_benchmark/cleaned_pdbs/1a36.pdb"
fixer = PDBFixer(filename=pdb_filename)

# remove unwanted chains
chains = list(fixer.topology.chains())
chains_to_remove = [i for i, x in enumerate(chains) if x.id not in chains_to_simulate]
fixer.removeChains(chains_to_remove)

#Identify Missing Residues
fixer.findMissingResidues()
fixer.missingResidues = {}

#Replace Nonstandard Residues
fixer.findNonstandardResidues()
fixer.replaceNonstandardResidues()

#Remove Heterogens
if removeHeterogens:
    fixer.removeHeterogens(keepWater=False)

#Add Missing Heavy Atoms
fixer.findMissingAtoms()
fixer.addMissingAtoms()

#Add Missing Hydrogens
fixer.addMissingHydrogens(7.0)
# PDBFile.writeFile(fixer.topology, fixer.positions, open(cleaned_pdb_filename, 'w'), keepIds=keepIds)

#Read sequence
# structure = PDBParser().get_structure('X', cleaned_pdb_filename)

In [13]:
fixer.topology


Out[13]:
<Topology; 1 chains, 22 residues, 700 atoms, 1246 bonds>

In [ ]: