In [1]:
import os
import sys
import random
import time
from random import seed, randint
import argparse
import platform
from datetime import datetime
import imp
import numpy as np
import fileinput
from itertools import product
import pandas as pd
from scipy.interpolate import griddata
from scipy.interpolate import interp2d
import seaborn as sns
from os import listdir
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.interpolate import griddata
import matplotlib as mpl
# sys.path.insert(0,'..')
# from notebookFunctions import *
# from .. import notebookFunctions
from Bio.PDB.Polypeptide import one_to_three
from Bio.PDB.Polypeptide import three_to_one
from Bio.PDB.PDBParser import PDBParser
from pyCodeLib import *
# from small_script.myFunctions import *
sys.path.insert(0, "/Users/weilu/openmmawsem")
from helperFunctions.myFunctions import *
from collections import defaultdict
%matplotlib inline
# plt.rcParams['figure.figsize'] = (10,6.180) #golden ratio
# %matplotlib notebook
%load_ext autoreload
%autoreload 2
In [2]:
plt.rcParams['figure.figsize'] = np.array([16.18033, 10]) #golden ratio
plt.rcParams['figure.facecolor'] = 'w'
plt.rcParams['figure.dpi'] = 100
plt.rcParams.update({'font.size': 22})
In [3]:
from simtk.openmm.app import *
from simtk.openmm import *
from simtk.unit import *
import numpy as np
se_map_1_letter = {'A': 0, 'P': 1, 'K': 2, 'N': 3, 'R': 4,
'F': 5, 'D': 6, 'Q': 7, 'E': 8, 'G': 9,
'I': 10, 'H': 11, 'L': 12, 'C': 13, 'M': 14,
'S': 15, 'T': 16, 'Y': 17, 'V': 18, 'W': 19}
def isChainStart(residueId, chain_starts, n=2):
# return true if residue is near chain starts.
# n=0 means always return False
# n=1 means only return True if residue is the the first residue of a chain.
# n=2 means return True if residue is the first or the one nearest to the first residue of a chain.
atBegin = False
for i in range(n):
if (residueId-i) in chain_starts:
atBegin = True
return atBegin
def isChainEnd(residueId, chain_ends, n=2):
# return true if residue is near chain ends.
# n=0 means always return False
# n=1 means only return True if residue is the the last residue of a chain.
# n=2 means return True if residue is the last or the one nearest to the last residue of a chain.
atEnd = False
for i in range(n):
if (residueId+i) in chain_ends:
atEnd = True
return atEnd
def isChainEdge(residueId, chain_starts, chain_ends, n=2):
# n is how far away from the two ends count as in chain edge.
return (isChainStart(residueId, chain_starts, n) or isChainEnd(residueId, chain_ends, n))
# atBegin = False
# atEnd = False
# for i in range(n):
# if (residueId-i) in chain_starts:
# atBegin = True
# for i in range(n):
# if (residueId+i) in chain_ends:
# atEnd = True
# return (atBegin or atEnd)
def inWhichChain(residueId, chain_ends):
chain_table = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
for i, end_of_chain_resId in enumerate(chain_ends):
if end_of_chain_resId < residueId:
pass
else:
return chain_table[i]
def read_beta_parameters():
### directly copied from Nick Schafer's
# os.chdir(parameter_directory)
in_anti_HB = open("anti_HB", 'r').readlines()
in_anti_NHB = open("anti_NHB", 'r').readlines()
in_para_HB = open("para_HB", 'r').readlines()
in_para_one = open("para_one", 'r').readlines()
in_anti_one = open("anti_one", 'r').readlines()
p_par = np.zeros((20))
p_anti = np.zeros((20))
p_antihb = np.zeros((20,20,2))
p_antinhb = np.zeros((20,20,2))
p_parhb = np.zeros((20,20,2))
for i in range(20):
p_par[i] = float(in_para_one[i].strip())
p_anti[i] = float(in_anti_one[i].strip())
for j in range(20):
p_antihb[i][j][0] = float(in_anti_HB[i].strip().split()[j])
p_antinhb[i][j][0] = float(in_anti_NHB[i].strip().split()[j])
p_parhb[i][j][0] = float(in_para_HB[i].strip().split()[j])
for i in range(20):
for j in range(20):
p_antihb[i][j][1] = float(in_anti_HB[i+21].strip().split()[j])
p_antinhb[i][j][1] = float(in_anti_NHB[i+21].strip().split()[j])
p_parhb[i][j][1] = float(in_para_HB[i+21].strip().split()[j])
return p_par, p_anti, p_antihb, p_antinhb, p_parhb
def get_lambda_by_index(i, j, lambda_i):
lambda_table = [[1.37, 1.36, 1.17],
[3.89, 3.50, 3.52],
[0.00, 3.47, 3.62]]
if abs(j-i) >= 4 and abs(j-i) < 18:
return lambda_table[lambda_i][0]
elif abs(j-i) >= 18 and abs(j-i) < 45:
return lambda_table[lambda_i][1]
elif abs(j-i) >= 45:
return lambda_table[lambda_i][2]
else:
return 0
def get_alpha_by_index(i, j, alpha_i):
alpha_table = [[1.30, 1.30, 1.30],
[1.32, 1.32, 1.32],
[1.22, 1.22, 1.22],
[0.00, 0.33, 0.33],
[0.00, 1.01, 1.01]]
if abs(j-i) >= 4 and abs(j-i) < 18:
return alpha_table[alpha_i][0]
elif abs(j-i) >= 18 and abs(j-i) < 45:
return alpha_table[alpha_i][1]
elif abs(j-i) >= 45:
return alpha_table[alpha_i][2]
else:
return 0
def get_Lambda_2(i, j, p_par, p_anti, p_antihb, p_antinhb, p_parhb, a):
Lambda = get_lambda_by_index(i, j, 1)
Lambda += -0.5*get_alpha_by_index(i, j, 0)*p_antihb[a[i], a[j]][0]
Lambda += -0.25*get_alpha_by_index(i, j, 1)*(p_antinhb[a[i+1], a[j-1]][0] + p_antinhb[a[i-1], a[j+1]][0])
Lambda += -get_alpha_by_index(i, j, 2)*(p_anti[a[i]] + p_anti[a[j]])
return Lambda
def get_Lambda_3(i, j, p_par, p_anti, p_antihb, p_antinhb, p_parhb, a):
Lambda = get_lambda_by_index(i, j, 2)
Lambda += -get_alpha_by_index(i, j, 3)*p_parhb[a[i+1], a[j]][0]
Lambda += -get_alpha_by_index(i, j, 4)*p_par[a[i+1]]
Lambda += -get_alpha_by_index(i, j, 3)*p_par[a[j]]
return Lambda
def pap_term_1(oa, k_pap=4.184, dis_i_to_i4=1.2, forceGroup=28):
print("pap_1 term ON")
# dis_i_to_i4 should be in nm, it disfavor hydrogen bond when ca_i and ca_i+4 are 1.2 nm apart away.
nres, ca = oa.nres, oa.ca
# r0 = 2.0 # nm
r0 = 0.8 # nm
eta_pap = 70 # nm^-1
gamma_aph = 1.0
gamma_ap = 0.4
gamma_p = 0.4
gamma_1 = np.zeros((nres, nres))
gamma_2 = np.zeros((nres, nres))
for i in range(nres):
for j in range(nres):
resId1 = i
chain1 = inWhichChain(resId1, oa.chain_ends)
resId2 = j
chain2 = inWhichChain(resId2, oa.chain_ends)
gamma_1[i][j] = get_pap_gamma_APH(i, j, chain1, chain2, gamma_aph)
gamma_2[i][j] = get_pap_gamma_AP(i, j, chain1, chain2, gamma_ap)
constraint_i_and_i4 = f"0.5*(1+tanh({eta_pap}*(distance(a1,a2)-{dis_i_to_i4})))"
constraint_i_and_i4 = 1
pap_function = f"-{k_pap}*(gamma_1(donor_idx,acceptor_idx)+gamma_2(donor_idx,acceptor_idx))\
*0.5*(1+tanh({eta_pap}*({r0}-distance(a1,d1))))\
*0.5*(1+tanh({eta_pap}*({r0}-distance(a2,d2))))\
*{constraint_i_and_i4}"
# pap_function = f"-{k_pap}*(gamma_1(donor_idx,acceptor_idx))\
# *0.5*(1+tanh({eta_pap}*({r0}-distance(a1,d1))))\
# *0.5*(1+tanh({eta_pap}*({r0}-distance(a2,d2))))"
# pap_function = f"-{k_pap}*distance(a1,d1)"
pap = CustomHbondForce(pap_function)
pap.addPerDonorParameter("donor_idx")
pap.addPerAcceptorParameter("acceptor_idx")
pap.addTabulatedFunction("gamma_1", Discrete2DFunction(nres, nres, gamma_1.T.flatten()))
pap.addTabulatedFunction("gamma_2", Discrete2DFunction(nres, nres, gamma_2.T.flatten()))
# print(ca)
# count = 0;
i = 0
# pap.addAcceptor(ca[0], ca[4], -1, [0])
# pap.addAcceptor(ca[20], ca[8], -1, [4])
# pap.addDonor(ca[20], ca[0], -1, [4])
for i in range(nres):
if not isChainEnd(i, oa.chain_ends, n=4):
pap.addAcceptor(ca[i], ca[i+4], -1, [i])
# if i > 13 and not isChainStart(i, oa.chain_starts, n=4):
if not isChainStart(i, oa.chain_starts, n=4):
pap.addDonor(oa.n[i], oa.n[i-4], -1, [i])
pap.setNonbondedMethod(CustomHbondForce.CutoffNonPeriodic)
pap.setCutoffDistance(1.0)
# print(count)
pap.setForceGroup(forceGroup)
return pap
def pap_term_2(oa, k_pap=4.184, dis_i_to_i4=1.2, forceGroup=28):
print("pap_2 term ON")
nres, ca = oa.nres, oa.ca
# r0 = 2.0 # nm
r0 = 0.8 # nm
eta_pap = 70 # nm^-1
gamma_aph = 1.0
gamma_ap = 0.4
gamma_p = 0.4
gamma_3 = np.zeros((nres, nres))
for i in range(nres):
for j in range(nres):
resId1 = i
chain1 = inWhichChain(resId1, oa.chain_ends)
resId2 = j
chain2 = inWhichChain(resId2, oa.chain_ends)
gamma_3[i][j] = get_pap_gamma_P(i, j, chain1, chain2, gamma_p)
# gamma_3[j][i] = gamma_3[i][j]
# constraint_i_and_i4 = f"0.5*(1+tanh({eta_pap}*(distance(a1,a2)-{dis_i_to_i4})))"
constraint_i_and_i4 = 1
pap_function = f"-{k_pap}*gamma_3(donor_idx,acceptor_idx)\
*0.5*(1+tanh({eta_pap}*({r0}-distance(a1,d1))))\
*0.5*(1+tanh({eta_pap}*({r0}-distance(a2,d2))))\
*{constraint_i_and_i4}"
pap = CustomHbondForce(pap_function)
pap.addPerDonorParameter("donor_idx")
pap.addPerAcceptorParameter("acceptor_idx")
pap.addTabulatedFunction("gamma_3", Discrete2DFunction(nres, nres, gamma_3.T.flatten()))
# print(oa.n)
# count = 0;
for i in range(nres):
if not isChainEnd(i, oa.chain_ends, n=13):
pap.addAcceptor(ca[i], ca[i+4], -1, [i])
if not isChainEnd(i, oa.chain_ends, n=4):
# pap.addDonor(ca[i], ca[i+4], -1, [i])
if oa.n[i] != -1 and oa.n[i+4] != -1:
pap.addDonor(oa.n[i], oa.n[i+4], -1, [i])
pap.setNonbondedMethod(CustomHbondForce.CutoffNonPeriodic)
pap.setCutoffDistance(1.0)
# print(count)
pap.setForceGroup(forceGroup)
return pap
In [55]:
parser = PDBParser()
structure = parser.get_structure("X", "/Users/weilu/Research/server/nov_2019/beta_pap_term_energy/openMM_chainA/chainA-cleaned.pdb")
all_res = list(structure.get_residues())
chains = [res.get_full_id()[2] for res in all_res]
n = len(all_res)
In [56]:
def get_lambda_by_index(i, j, lambda_i):
lambda_table = [[1.37, 1.36, 1.17],
[3.89, 3.50, 3.52],
[0.00, 3.47, 3.62]]
if abs(j-i) >= 4 and abs(j-i) < 18:
return lambda_table[lambda_i][0]
elif abs(j-i) >= 18 and abs(j-i) < 45:
return lambda_table[lambda_i][1]
elif abs(j-i) >= 45:
return lambda_table[lambda_i][2]
else:
return 0
def get_alpha_by_index(i, j, alpha_i):
alpha_table = [[1.30, 1.30, 1.30],
[1.32, 1.32, 1.32],
[1.22, 1.22, 1.22],
[0.00, 0.33, 0.33],
[0.00, 1.01, 1.01]]
if abs(j-i) >= 4 and abs(j-i) < 18:
return alpha_table[alpha_i][0]
elif abs(j-i) >= 18 and abs(j-i) < 45:
return alpha_table[alpha_i][1]
elif abs(j-i) >= 45:
return alpha_table[alpha_i][2]
else:
return 0
In [57]:
nres = n
lambda_1 = np.zeros((nres, nres))
for i in range(nres):
for j in range(nres):
lambda_1[i][j] = get_lambda_by_index(i, j, 0)
In [63]:
r_ON = .298 * 10
sigma_NO = .068 * 10
r_OH = .206 * 10
sigma_HO = .076 * 10
eta_beta_1 = 10.0 * 0.1
eta_beta_2 = 5.0 * 0.1
# r_HB_c = 0.4
r_HB_c = 1.2 * 10
V_beta1 = 0
for i in range(n):
for j in range(n):
# if abs(i-j) < 2:
# continue
chain_i = chains[i]
chain_j = chains[j]
if all_res[j].get_resname() == "PRO":
continue
r_Oi_Nj = all_res[i]["O"] - all_res[j]["N"]
r_Oi_Hj = all_res[i]["O"] - all_res[j]["H"]
# if all_res[i].get_resname() == "PRO":
# continue
# r_Oi_Nj = all_res[j]["O"] - all_res[i]["N"]
# r_Oi_Hj = all_res[j]["O"] - all_res[i]["H"]
theta_ij = np.exp(-(r_Oi_Nj-r_ON)**2/(2*sigma_NO**2)-(r_Oi_Hj-r_OH)**2/(2*sigma_HO**2))
if i < 2 or i >= n-2:
nu_i = 1
else:
r_CAim2_CAip2 = all_res[i-2]["CA"] - all_res[i+2]["CA"]
nu_i = 0.5*(1+tanh(eta_beta_1*(r_CAim2_CAip2-r_HB_c)))
if j < 2 or j >= n-2:
nu_j = 1
else:
r_CAjm2_CAjp2 = all_res[j-2]["CA"] - all_res[j+2]["CA"]
nu_j = 0.5*(1+tanh(eta_beta_2*(r_CAjm2_CAjp2-r_HB_c)))
V_ij = lambda_1[i][j] * theta_ij * nu_i * nu_j
V_beta1 += V_ij
# if abs(V_ij) > 1e-3:
# print(i, j, V_ij,lambda_1[i][j], theta_ij, nu_i, nu_j, r_Oi_Nj, r_Oi_Hj)
V_beta1
Out[63]:
In [ ]:
In [60]:
res = all_res[9]
In [61]:
res.get_resname()
Out[61]:
In [ ]:
res.get_atoms()
In [51]:
j
Out[51]:
In [52]:
i
Out[52]:
In [47]:
i
Out[47]:
In [ ]:
In [62]:
list(res.get_atoms())
Out[62]:
In [10]:
res = all_res[0]
In [12]:
res.get_resname()
Out[12]:
In [16]:
# chains = pd.read_csv(io.StringIO(a), sep=" ", names=["res", "chain"])
all_res = list(structure.get_residues())
chains = [res.get_full_id()[2] for res in all_res]
n = len(all_res)
In [ ]:
# APH term
In [46]:
def get_pap_gamma_APH(donor_idx, acceptor_idx, chain_i, chain_j, gamma_APH):
# if chain_i == chain_j and abs(j-i) < 13 or abs(j-i) > 16:
# if abs(j-i) < 13 or abs(j-i) > 16:
# if i-j < 13 or i-j > 16:
# if (donor_idx - acceptor_idx >= 13 and donor_idx - acceptor_idx <= 16) or chain_i != chain_j:
if (donor_idx - acceptor_idx >= 13 and donor_idx - acceptor_idx <= 16):
return gamma_APH
else:
return 0
# if donor_idx - acceptor_idx < 13 or donor_idx - acceptor_idx > 16:
# return 0
# else:
# return gamma_APH
In [48]:
eta = 7
r0 = 8
a_ = 0
for i in range(n-4):
for j in range(4, n):
chain_i = chains.iloc[i][1]
chain_j = chains.iloc[j][1]
gamma = get_pap_gamma_APH(j, i, chain_i, chain_j, 1)
dis = all_res[i]["CA"] - all_res[j]["CA"]
dis_p4 = all_res[i+4]["CA"] - all_res[j-4]["CA"]
rho1 = 0.5*(1+np.tanh(eta*(r0-dis)))
rho2 = 0.5*(1+np.tanh(eta*(r0-dis_p4)))
show = -gamma * rho1 * rho2
a_ += show
# if show > 1e-6:
# print(i, j, show, rho1, rho2)
# if i == 0:
# print(i, j, show, rho1, rho2)
# break
print(a_)
In [ ]:
# P term
In [44]:
def get_pap_gamma_AP(donor_idx, acceptor_idx, chain_i, chain_j, gamma_AP):
if (donor_idx - acceptor_idx >= 17) or chain_i != chain_j:
# if (donor_idx - acceptor_idx >= 17):
return gamma_AP
else:
return 0
In [45]:
eta = 7
r0 = 8
a_ = 0
for i in range(n-4):
for j in range(4, n):
chain_i = chains.iloc[i][1]
chain_j = chains.iloc[j][1]
gamma = get_pap_gamma_AP(j, i, chain_i, chain_j, 0.4)
dis = all_res[i]["CA"] - all_res[j]["CA"]
dis_p4 = all_res[i+4]["CA"] - all_res[j-4]["CA"]
rho1 = 0.5*(1+np.tanh(eta*(r0-dis)))
rho2 = 0.5*(1+np.tanh(eta*(r0-dis_p4)))
show = gamma * rho1 * rho2
a_ += show
# if show > 1e-6:
# print(i, j, show, rho1, rho2)
# if i == 0:
# print(i, j, show, rho1, rho2)
# break
print(a_)
In [ ]:
# AP term
In [54]:
print(a_)
In [60]:
def get_pap_gamma_P(donor_idx, acceptor_idx, chain_i, chain_j, gamma_P):
if (donor_idx - acceptor_idx >= 9) or chain_i != chain_j:
return gamma_P
else:
return 0
In [61]:
eta = 7
r0 = 8
a_ = 0
for i in range(n-4):
for j in range(n-4):
chain_i = chains.iloc[i][1]
chain_j = chains.iloc[j][1]
gamma = get_pap_gamma_P(j, i, chain_i, chain_j, 0.4)
dis = all_res[i]["CA"] - all_res[j]["CA"]
dis_p4 = all_res[i+4]["CA"] - all_res[j+4]["CA"]
rho1 = 0.5*(1+np.tanh(eta*(r0-dis)))
rho2 = 0.5*(1+np.tanh(eta*(r0-dis_p4)))
show = gamma * rho1 * rho2
a_ += show
# if show > 1e-6:
# print(i, j, show)
In [62]:
print(a_)
In [56]:
eta = 7
r0 = 8
a_ = 0
for i in range(n-4):
for j in range(i+9, n-4):
gamma = 0.4
dis = all_res[i]["CA"] - all_res[j]["CA"]
dis_p4 = all_res[i+4]["CA"] - all_res[j+4]["CA"]
rho1 = 0.5*(1+np.tanh(eta*(r0-dis)))
rho2 = 0.5*(1+np.tanh(eta*(r0-dis_p4)))
show = gamma * rho1 * rho2
a_ += show
# if show > 1e-6:
# print(i, j, show)
In [57]:
print(a_)
In [30]:
-22.983010/22.350501756
Out[30]:
In [7]:
from pdbfixer import PDBFixer
In [2]:
pdbfixer.absolute_import
Out[2]:
In [64]:
fixer = PDBFixer(filename="/Users/weilu/Research/server/nov_2019/benchmark/20191025_benchmark/original/6d3r.pdb")
In [8]:
fixer.findMissingResidues()
# add missing residues in the middle of a chain, not ones at the start or end of the chain.
chains = list(fixer.topology.chains())
keys = fixer.missingResidues.keys()
In [67]:
keys
Out[67]:
In [9]:
fixer = PDBFixer(pdbid='1VII')
fixer.findMissingResidues()
missing_residues = fixer.missingResidues
missing_residues
Out[9]:
In [69]:
In [11]:
fixer = pdbfixer.PDBFixer(pdbid="5uak")
fixer.findMissingResidues()
keys = fixer.missingResidues.keys()
keys
Out[11]:
In [71]:
6D3R
Out[71]:
In [72]:
fixer.missingResidues.keys()
Out[72]:
In [4]:
fixer = pdbfixer.PDBFixer(filename="/Users/weilu/Research/server/nov_2019/benchmark/20191025_benchmark/original/6d3r.pdb")
fixer.findMissingResidues()
keys = fixer.missingResidues.keys()
keys
Out[4]:
In [73]:
fixer = PDBFixer(filename="/Users/weilu/Research/server/nov_2019/benchmark/20191025_benchmark/original/6jdq.pdb")
fixer.findMissingResidues()
keys = fixer.missingResidues.keys()
keys
Out[73]:
In [74]:
fixer.missingResidues
Out[74]:
In [ ]: