In [1]:
import os
import sys
import random
import time
from random import seed, randint
import argparse
import platform
from datetime import datetime
import imp
import numpy as np
import fileinput
from itertools import product
import pandas as pd
from scipy.interpolate import griddata
from scipy.interpolate import interp2d
import seaborn as sns
from os import listdir
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.interpolate import griddata
import matplotlib as mpl
sys.path.insert(0,'/Users/weilu/Research/opt_server/')
# from notebookFunctions import *
# from .. import notebookFunctions
from Bio.PDB.Polypeptide import one_to_three
from Bio.PDB.Polypeptide import three_to_one
from Bio.PDB.PDBParser import PDBParser
from pyCodeLib import *
from small_script.myFunctions import *
from collections import defaultdict
%matplotlib inline
# plt.rcParams['figure.figsize'] = (10,6.180) #golden ratio
# %matplotlib notebook
%load_ext autoreload
%autoreload 2
In [2]:
plt.rcParams['figure.figsize'] = np.array([16.18033, 10]) #golden ratio
plt.rcParams['figure.facecolor'] = 'w'
plt.rcParams['figure.dpi'] = 100
plt.rcParams.update({'font.size': 22})
In [3]:
pre = "/Users/weilu/Research/server/may_week2_2020/develop_continuous_fitted_potential/"
In [108]:
def planar_angle(a, b, c):
ba = a - b
bc = c - b
cosine_angle = np.dot(ba, bc) / (np.linalg.norm(ba) * np.linalg.norm(bc))
angle = np.arccos(cosine_angle)
return angle
def get_dihedral_angle(p0, p1, p2, p3):
b0 = -1.0*(p1 - p0)
b1 = p2 - p1
b2 = p3 - p2
# normalize b1 so that it does not influence magnitude of vector
# rejections that come next
b1 /= np.linalg.norm(b1)
# vector rejections
# v = projection of b0 onto plane perpendicular to b1
# = b0 minus component that aligns with b1
# w = projection of b2 onto plane perpendicular to b1
# = b2 minus component that aligns with b1
v = b0 - np.dot(b0, b1)*b1
w = b2 - np.dot(b2, b1)*b1
# angle between v and w in a plane is the torsion angle
# v and w may not be normalized but that's fine since tan is y/x
x = np.dot(v, w)
y = np.dot(np.cross(b1, v), w)
dihedral = np.arctan2(y, x)
return dihedral
In [180]:
theta = np.load("/Users/weilu/Research/server/may_week2_2020/develop_angle_input_potential/T0958/theta.npz")
omega = np.load("/Users/weilu/Research/server/may_week2_2020/develop_angle_input_potential/T0958/omega.npz")
phi = np.load("/Users/weilu/Research/server/may_week2_2020/develop_angle_input_potential/T0958/phi.npz")
thetaspline = theta["thetaspline"]
omegaspline = omega["omegaspline"]
phispline = phi["phispline"]
omega = "-3.53429174 -3.27249235 -3.01069296 -2.74889357 -2.48709418 -2.2252948\
-1.96349541 -1.70169602 -1.43989663 -1.17809725 -0.91629786 -0.65449847\
-0.39269908 -0.13089969 0.13089969 0.39269908 0.65449847 0.91629786\
1.17809725 1.43989663 1.70169602 1.96349541 2.2252948 2.48709418\
2.74889357 3.01069296 3.27249235 3.53429174"
theta = "-3.53429174 -3.27249235 -3.01069296 -2.74889357 -2.48709418 -2.2252948\
-1.96349541 -1.70169602 -1.43989663 -1.17809725 -0.91629786 -0.65449847\
-0.39269908 -0.13089969 0.13089969 0.39269908 0.65449847 0.91629786\
1.17809725 1.43989663 1.70169602 1.96349541 2.2252948 2.48709418\
2.74889357 3.01069296 3.27249235 3.53429174"
phi = "-0.39269908 -0.13089969 0.13089969 0.39269908 0.65449847 0.91629786\
1.17809725 1.43989663 1.70169602 1.96349541 2.2252948 2.48709418\
2.74889357 3.01069296 3.27249235 3.53429174"
omega_x = [float(a) for a in omega.split()]
theta_x = [float(a) for a in theta.split()]
phi_x = [float(a) for a in phi.split()]
# spline fit
from scipy.interpolate import interp1d
x = omega_x
spline = omegaspline
num_of_points = 100
n = spline.shape[0]
interaction_list = []
index_list = []
xnew = np.linspace(min(x), max(x), num=num_of_points, endpoint=True)
for i in range(n):
for j in range(i+1, n):
if np.alltrue(spline[i][j] == 0):
continue
y = spline[i][j]
f = interp1d(x, y, kind='cubic')
ynew = f(xnew)
interaction_list.append(ynew)
index_list.append([i, j])
index_array = np.array(index_list)
interaction_array = np.array(interaction_list)
In [203]:
In [202]:
def get_omega(res1, res2):
p0 = res1["CA"].get_coord()
p1 = res1["CB"].get_coord()
p2 = res2["CB"].get_coord()
p3 = res2["CA"].get_coord()
angle = get_dihedral_angle(p0, p1, p2, p3)
return angle
def get_theta(res1, res2):
p0 = res1["N"].get_coord()
p1 = res1["CA"].get_coord()
p2 = res1["CB"].get_coord()
p3 = res2["CB"].get_coord()
angle = get_dihedral_angle(p0, p1, p2, p3)
return angle
def get_phi(res1, res2):
a = res1["CA"].get_coord()
b = res1["CB"].get_coord()
c = res2["CB"].get_coord()
angle = planar_angle(a, b, c)
return angle
def compute_ml_energy(s, index_array, interaction_array, x, get_function=get_omega, num_of_points=100):
all_res = list(s.get_residues())
angle_max = max(x)
angle_min = min(x)
d_angle = (angle_max-angle_min)/(num_of_points-1)
max_angle_index_1 = num_of_points - 2
energy = 0.0
for index_pair, interaction in zip(index_array, interaction_array):
i,j = index_pair
res1 = all_res[i]
res2 = all_res[j]
if res1.resname == "GLY" or res2.resname == "GLY":
continue
# r = get_r(all_res, i, j)
# if r > 20:
# continue
angle = get_function(res1, res2)
if angle < -np.pi or angle > np.pi:
print(angle, i, j)
# print(i,j,interaction, r)
angle_index_1 = int(min(max_angle_index_1, np.floor((angle-angle_min)/d_angle)))
angle_index_2 = angle_index_1 + 1
angle_1 = angle_min + d_angle * angle_index_1
angle_2 = angle_min + d_angle * angle_index_2
v1 = interaction[angle_index_1]
v2 = interaction[angle_index_2]
e = ((v2-v1)*angle+v1*angle_2-v2*angle_1)/(angle_2-angle_1)
energy += e
return energy
def compute_ml(s, x, spline, get_function):
num_of_points = 100
n = spline.shape[0]
interaction_list = []
index_list = []
xnew = np.linspace(min(x), max(x), num=num_of_points, endpoint=True)
for i in range(n):
for j in range(i+1, n):
if np.alltrue(spline[i][j] == 0):
continue
y = spline[i][j]
f = interp1d(x, y, kind='cubic')
ynew = f(xnew)
interaction_list.append(ynew)
index_list.append([i, j])
index_array = np.array(index_list)
interaction_array = np.array(interaction_list)
energy = compute_ml_energy(s, index_array, interaction_array, x, get_function=get_function, num_of_points=num_of_points)
return energy
In [209]:
pdbFile = "/Users/weilu/Research/server/may_week2_2020/develop_continuous_fitted_potential/6btc.pdb"
s = parser.get_structure("X", pdbFile)
all_res = list(s.get_residues())
# compute_ml_omega_energy(s, index_array, interaction_array, x)
In [206]:
compute_ml(s, x, spline, get_omega)
Out[206]:
In [207]:
compute_ml(s, theta_x, thetaspline, get_theta)
Out[207]:
In [210]:
compute_ml(s, phi_x, phispline, get_phi)
Out[210]:
In [208]:
pdbFile = "/Users/weilu/Research/server/may_week2_2020/develop_continuous_fitted_potential/T0958_init.pdb"
s = parser.get_structure("X", pdbFile)
all_res = list(s.get_residues())
compute_ml(s, theta_x, thetaspline, get_theta)
Out[208]:
In [194]:
pdbFile = "/Users/weilu/Research/server/may_week2_2020/develop_continuous_fitted_potential/T0958_init.pdb"
s = parser.get_structure("X", pdbFile)
all_res = list(s.get_residues())
compute_ml_omega_energy(s, index_array, interaction_array, x)
Out[194]:
In [94]:
res1 = all_res[0]
res2 = all_res[1]
In [ ]:
p0 = res1["CA"].co
In [195]:
pdbFile = "/Users/weilu/Research/server/may_week2_2020/develop_angle_input_potential/6msp.pdb"
s = parser.get_structure("X", pdbFile)[0]
all_res = list(s.get_residues())[20:]
compute_ml_omega_energy(s, index_array, interaction_array, x)
Out[195]:
In [176]:
n = len(all_res)
table = np.zeros((n,n))
for i in range(n):
for j in range(n):
res1 = all_res[i]
res2 = all_res[j]
if res1.resname == "GLY" or res2.resname == "GLY":
continue
if i == j:
continue
r = get_r(all_res, i, j)
table[i][j] = r
In [177]:
table[table==0] = 20
table[table>20] = 20
plt.imshow(table, cmap="hot")
plt.colorbar()
Out[177]:
In [178]:
n = len(all_res)
table = np.zeros((n,n))
for i in range(n):
for j in range(n):
res1 = all_res[i]
res2 = all_res[j]
if res1.resname == "GLY" or res2.resname == "GLY":
continue
if i == j:
continue
r = get_r(all_res, i, j)
if r > 20:
continue
angle = get_omega(res1, res2)
table[i][j] = angle
In [179]:
table[table==0] = np.pi
table = table / 3.14 * 180
plt.imshow(table, cmap="hot")
plt.colorbar()
Out[179]:
In [198]:
n = len(all_res)
table = np.zeros((n,n))
for i in range(n):
for j in range(n):
res1 = all_res[i]
res2 = all_res[j]
if res1.resname == "GLY" or res2.resname == "GLY":
continue
if i == j:
continue
r = get_r(all_res, i, j)
if r > 20:
continue
angle = get_omega(res1, res2)
table[i][j] = angle
table[table==0] = np.pi
table = table / 3.14 * 180
plt.imshow(table, cmap="hot")
plt.colorbar()
Out[198]:
In [201]:
n = len(all_res)
table = np.zeros((n,n))
for i in range(n):
for j in range(n):
res1 = all_res[i]
res2 = all_res[j]
if res1.resname == "GLY" or res2.resname == "GLY":
continue
if i == j:
continue
r = get_r(all_res, i, j)
if r > 20:
continue
angle = get_phi(res1, res2)
table[i][j] = angle
table[table==0] = np.pi
table = table / 3.14 * 180
plt.imshow(table, cmap="hot")
plt.colorbar()
Out[201]:
In [197]:
In [200]:
In [111]:
get_omega(res1, res2)
Out[111]:
In [112]:
get_omega(res2, res1)
Out[112]:
In [121]:
plt.plot(xnew, interaction_array[0])
Out[121]:
In [123]:
plt.plot(omega_x, omegaspline[0][1])
Out[123]:
In [75]:
index_array.shape
Out[75]:
In [76]:
interaction_array.shape
Out[76]:
In [92]:
f(-np.pi)
Out[92]:
In [93]:
f(np.pi)
Out[93]:
In [85]:
plt.plot(xnew, ynew)
plt.plot(x, y, '-o')
Out[85]:
In [79]:
f(3.15)
Out[79]:
In [31]:
def get_cb(res):
try:
cb = res["CB"]
except:
cb = res["CA"]
return cb
def get_r(all_res, i, j):
res1 = all_res[i]
res2 = all_res[j]
try:
cb1 = res1["CB"]
except:
cb1 = res1["CA"]
try:
cb2 = res2["CB"]
except:
cb2 = res2["CA"]
r = cb1 - cb2
return r
def compute_machine_learning_energy(s, index_array, interaction_array, num_of_points=100):
x = [0.0, 2.0, 3.5, 4.25, 4.75, 5.25, 5.75, 6.25, 6.75, 7.25, 7.75, 8.25, 8.75, 9.25, 9.75, 10.25, 10.75, 11.25, 11.75, 12.25, 12.75, 13.25, 13.75, 14.25, 14.75, 15.25, 15.75, 16.25, 16.75, 17.25, 17.75, 18.25, 18.75, 19.25, 19.75]
all_res = list(s.get_residues())
r_max = max(x)
r_min = min(x)
dr = (r_max-r_min)/(num_of_points-1)
max_r_index_1 = num_of_points - 2
energy = 0.0
for index_pair, interaction in zip(index_array, interaction_array):
i,j = index_pair
r = get_r(all_res, i, j)
r = min(r, max(x))
# print(i,j,interaction, r)
r_index_1 = int(min(max_r_index_1, np.floor(r/dr)))
r_index_2 = r_index_1 + 1
r_1 = r_min + dr * r_index_1
r_2 = r_min + dr * r_index_2
v1 = interaction[r_index_1]
v2 = interaction[r_index_2]
e = ((v2-v1)*r+v1*r_2-v2*r_1)/(r_2-r_1)
energy += e
return energy
In [7]:
parser = PDBParser()
pdbFile = "/Users/weilu/Research/server/may_week2_2020/develop_continuous_fitted_potential/6btc.pdb"
s = parser.get_structure("X", pdbFile)
all_res = list(s.get_residues())
data = np.load(f"{pre}/setups/ml_data.npz")
index_array = data["index_array"]
interaction_array = data["interaction_array"]
print(compute_machine_learning_energy(s, index_array, interaction_array))
In [35]:
a = get_cb(all_res[0]).get_coord()
b = get_cb(all_res[1]).get_coord()
c = get_cb(all_res[2]).get_coord()
ba = a - b
bc = c - b
cosine_angle = np.dot(ba, bc) / (np.linalg.norm(ba) * np.linalg.norm(bc))
angle = np.arccos(cosine_angle)
print(np.degrees(angle))
In [199]:
In [36]:
angle
Out[36]:
In [98]:
"""Praxeolitic formula
1 sqrt, 1 cross product"""
p0 = get_cb(all_res[0]).get_coord()
p1 = get_cb(all_res[1]).get_coord()
p2 = get_cb(all_res[2]).get_coord()
p3 = get_cb(all_res[8]).get_coord()
b0 = -1.0*(p1 - p0)
b1 = p2 - p1
b2 = p3 - p2
# normalize b1 so that it does not influence magnitude of vector
# rejections that come next
b1 /= np.linalg.norm(b1)
# vector rejections
# v = projection of b0 onto plane perpendicular to b1
# = b0 minus component that aligns with b1
# w = projection of b2 onto plane perpendicular to b1
# = b2 minus component that aligns with b1
v = b0 - np.dot(b0, b1)*b1
w = b2 - np.dot(b2, b1)*b1
# angle between v and w in a plane is the torsion angle
# v and w may not be normalized but that's fine since tan is y/x
x = np.dot(v, w)
y = np.dot(np.cross(b1, v), w)
dihedral = np.arctan2(y, x)
print(dihedral)
In [99]:
In [104]:
get_dihedral_angle(p2, p3, p0, p1)
Out[104]:
In [63]:
parser = PDBParser()
pdbFile = "/Users/weilu/Research/server/may_week2_2020/develop_continuous_fitted_potential/T0958_init.pdb"
s = parser.get_structure("X", pdbFile)
all_res = list(s.get_residues())
data = np.load(f"{pre}/setups/ml_data.npz")
index_array = data["index_array"]
interaction_array = data["interaction_array"]
print(compute_machine_learning_energy(s, index_array, interaction_array))
In [17]:
parser = PDBParser()
data = np.load(f"{pre}/setups/ml_data.npz")
index_array = data["index_array"]
interaction_array = data["interaction_array"]
info_ = []
for i in range(1, 100):
n = str(i).zfill(4)
pdbFile = f"/Users/weilu/Research/server/may_week2_2020/develop_continuous_fitted_potential/Archive/T0958_init_{n}.pdb"
s = parser.get_structure("X", pdbFile)
energy = compute_machine_learning_energy(s, index_array, interaction_array)
q = compute_Q_from_two_pdb(pdbFileNative, pdbFile)
info_.append([i, energy, q])
In [18]:
info = pd.DataFrame(info_, columns=["frame", "energy", "Q"])
In [20]:
sp = np.loadtxt("/Users/weilu/Research/server/may_week2_2020/develop_continuous_fitted_potential/spline_potential")
In [43]:
info["sp"] = sp
info["energy_norm"] = info["energy"] / info["energy"].min()
info["sp_norm"] = info["sp"] / info["sp"].min()
info["sp_over_energy"] = info["sp"] / info["energy"]
In [58]:
sns.scatterplot("energy_norm", "Q", data=info)
sns.scatterplot("sp_norm", "Q", data=info)
Out[58]:
In [60]:
sns.scatterplot("energy", "sp", data=info, hue="Q")
Out[60]:
In [48]:
# spline fit
a = np.load(f"{pre}/dist.npz")
distspline = a['distspline']
from scipy.interpolate import interp1d
num_of_points = 100
n = distspline.shape[0]
interaction_list = []
index_list = []
x = [0.0, 2.0, 3.5, 4.25, 4.75, 5.25, 5.75, 6.25, 6.75, 7.25, 7.75, 8.25, 8.75, 9.25, 9.75, 10.25, 10.75, 11.25, 11.75, 12.25, 12.75, 13.25, 13.75, 14.25, 14.75, 15.25, 15.75, 16.25, 16.75, 17.25, 17.75, 18.25, 18.75, 19.25, 19.75]
xnew = np.linspace(min(x), max(x), num=num_of_points, endpoint=True)
for i in range(n):
for j in range(i+1, n):
if np.alltrue(distspline[i][j] == 0):
continue
y = distspline[i][j]
f = interp1d(x, y)
ynew = f(xnew)
interaction_list.append(ynew)
index_list.append([i, j])
index_array = np.array(index_list)
interaction_array = np.array(interaction_list)
In [101]:
np.savez(f"{pre}/setups/ml_data.npz", index_array=index_array, interaction_array=interaction_array)
In [51]:
compute_machine_learning_energy(s, index_array, interaction_array, num_of_points=200)
Out[51]:
In [71]:
index_array.shape
Out[71]:
In [70]:
interaction_array.shape
Out[70]:
In [65]:
ynew
Out[65]:
In [61]:
info
Out[61]:
In [95]:
r_max = max(x)
r_min = min(x)
dr = (r_max-r_min)/(num_of_points-1)
max_r_index_1 = num_of_points - 2
energy = 0.0
for index_pair, interaction in zip(index_array, interaction_array):
i,j = index_pair
r = get_r(all_res, i, j)
# print(i,j,interaction, r)
r_index_1 = int(min(max_r_index_1, np.floor(r/dr)))
r_index_2 = r_index_1 + 1
r_1 = r_min + dr * r_index_1
r_2 = r_min + dr * r_index_2
v1 = interaction[r_index_1]
v2 = interaction[r_index_2]
e = ((v2-v1)*r+v1*r_2-v2*r_1)/(r_2-r_1)
energy += e
print(energy)
In [ ]:
In [ ]:
In [57]:
from scipy.interpolate import interp1d
y = distspline[40,48]
f = interp1d(x, y)
f2 = interp1d(x, y, kind='cubic')
xnew = np.linspace(min(x), max(x), num=100, endpoint=True)
plt.plot(x, y, 'o', xnew, f(xnew), '-', xnew, f2(xnew), '--')
plt.legend(['data', 'linear', 'cubic'], loc='best')
plt.show()
In [60]:
20/100
Out[60]:
In [62]:
from scipy.interpolate import interp1d
y = distspline[41,54]
f = interp1d(x, y)
f2 = interp1d(x, y, kind='cubic')
xnew = np.linspace(min(x), max(x), num=100, endpoint=True)
plt.plot(x, y, 'o', xnew, f(xnew), '-', xnew, f2(xnew), '--')
plt.legend(['data', 'linear', 'cubic'], loc='best')
plt.show()
In [6]:
In [9]:
a.values()
Out[9]:
In [14]:
In [15]:
distspline.shape
Out[15]:
In [16]:
distspline[0][0]
Out[16]:
In [50]:
x = "0.000 2.000 3.500 4.250 4.750 5.250 5.750 6.250 6.750 7.250 7.750 8.250 8.750 9.250 9.750 10.250 10.750 11.250 11.750 12.250 12.750 13.250 13.750 14.250 14.750 15.250 15.750 16.250 16.750 17.250 17.750 18.250 18.750 19.250 19.750"
x = [float(r) for r in x.split()]
In [59]:
print(x)
In [23]:
len(x)
Out[23]:
In [38]:
distspline.min()
Out[38]:
In [39]:
distspline.max()
Out[39]:
In [26]:
dis_sum = np.sum(distspline, axis=2)
In [36]:
np.unravel_index(dis_sum.argmin(), dis_sum.shape)
Out[36]:
In [55]:
plt.plot(x, distspline[40,48], 'o-')
Out[55]:
In [32]:
np.unravel_index(dis_sum.argmax(), dis_sum.shape)
Out[32]:
In [35]:
plt.plot(x, distspline[41,54])
Out[35]:
In [27]:
plt.imshow(dis_sum)
Out[27]:
In [10]:
In [12]:
In [62]:
theta_sum = thetaspline.sum(axis=2)
In [63]:
plt.imshow(theta_sum ==0, origin=0)
Out[63]:
In [ ]:
In [58]:
theta_sum.shape
Out[58]:
In [18]:
plt.plot(thetaspline[0][1])
Out[18]:
In [15]:
theta["thetaspline"].shape
Out[15]:
In [17]:
omega["omegaspline"].shape
Out[17]:
In [19]:
omega["omegaspline"].max()
Out[19]:
In [ ]:
omega[""]
In [22]:
In [26]:
In [27]:
omega
Out[27]:
In [52]:
thetaspline[-5][61]
Out[52]:
In [72]:
len(theta_x)
Out[72]:
In [65]:
len(phi_x)
Out[65]:
In [71]:
anglestep = 10
nbins = 28
np.linspace(-np.pi-1.5*anglestep, np.pi+1.5*anglestep, nbins)
Out[71]:
In [66]:
thetaspline.shape
Out[66]:
In [53]:
plt.plot(thetaspline[-5][61])
Out[53]:
In [43]:
seq = "MNKKSKQQEKLYNFIIAKSFQQPVGSTFTYGELRKKYNVVCSTNDQREVGRRFAYWIKYTPGLPFKIVGTKNGSLLYQKIGINPC"
In [64]:
seq[80]
Out[64]:
In [55]:
seq[61]
Out[55]:
In [50]:
len(seq)
Out[50]:
In [ ]: