In [1]:
import os
import sys
import random
import time
from random import seed, randint
import argparse
import platform
from datetime import datetime
import imp
import numpy as np
import fileinput
from itertools import product
import pandas as pd
from scipy.interpolate import griddata
from scipy.interpolate import interp2d
import seaborn as sns
from os import listdir
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.interpolate import griddata
import matplotlib as mpl
sys.path.insert(0,'/Users/weilu/Research/opt_server/')
# from notebookFunctions import *
# from .. import notebookFunctions
from Bio.PDB.PDBParser import PDBParser
from pyCodeLib import *
%matplotlib inline
# plt.rcParams['figure.figsize'] = (10,6.180) #golden ratio
# %matplotlib notebook
%load_ext autoreload
%autoreload 2
In [2]:
from Bio.PDB.Polypeptide import d1_to_index
from Bio.PDB.Polypeptide import dindex_to_1
from Bio.PDB.Polypeptide import aa3
In [3]:
plt.rcParams['figure.figsize'] = [16.18033, 10] #golden ratio
plt.rcParams['figure.facecolor'] = 'w'
plt.rcParams['figure.dpi'] = 100
In [4]:
import plotly.express as px
In [5]:
import pickle
gmm_dic = {}
res_type_list = ['GLY', 'ALA', 'VAL', 'CYS', 'PRO', 'LEU', 'ILE', 'MET', 'TRP', 'PHE', 'SER', 'THR', 'TYR', 'GLN', 'ASN', 'LYS', 'ARG', 'HIS', 'ASP', 'GLU']
for res_type in res_type_list[1:]:
gmm_dic[res_type] = pickle.load(open(f"/Users/weilu/opt/parameters/side_chain/gmm_{res_type}.pkl", mode='rb'))
In [6]:
parser = PDBParser()
# pre = "/Users/weilu/Research/server/may_week1_2020/evaluation_simulation/run1/1r69/T_300/"
pre = "/Users/weilu/Research/server/may_week1_2020/evaluation_simulation/setups/1r69/"
pdbFile = f"{pre}/cbd-openmmawsem.pdb"
structure = parser.get_structure("X", pdbFile)
seq = read_fasta(f"{pre}/crystal_structure.fasta")
data_crystal_native = get_predicted_side_chain_state(structure[0], seq, gmm_dic)
In [8]:
parser = PDBParser()
# pre = "/Users/weilu/Research/server/may_week1_2020/evaluation_simulation/setups/1r69/"
# pdbFile = f"{pre}/cbd-openmmawsem.pdb"
pre = "/Users/weilu/Research/server/may_week1_2020/evaluation_simulation/run1/1r69/T_300/"
pdbFile = f"{pre}/movie.pdb"
structure = parser.get_structure("X", pdbFile)
seq = read_fasta(f"{pre}/crystal_structure.fasta")
data = get_predicted_side_chain_state(structure[0], seq, gmm_dic)
In [10]:
In [11]:
len(models)
Out[11]:
In [45]:
data_submodes = []
for submode in [2, 3, 4, 5, 1]:
pre = f"/Users/weilu/Research/server/may_week1_2020/evaluation_simulation/run1/1r69/T_300_subMode{submode}/"
pdbFile = f"{pre}/movie.pdb"
structure = parser.get_structure("X", pdbFile)
models = list(structure.get_models())
all_data = []
for model in models:
data_ = get_predicted_side_chain_state(model, seq, gmm_dic, verbose=False)
all_data.append(data_)
data = pd.concat(all_data).reset_index(drop=True).assign(subMode=submode)
data_submodes.append(data)
In [46]:
data = pd.concat(data_submodes).reset_index(drop=True)
In [16]:
data = pd.concat(all_data).reset_index(drop=True)
In [19]:
data_res = data.query("index==6")
In [27]:
data.dtypes
Out[27]:
In [97]:
convert = {1:"k_0.1", 2:"k_0.2", 3:"k_0.4", 4:"k_0.8", 5:"k_0.05"}
data["k"] = data["subMode"].apply(lambda x: convert[x])
In [60]:
data_database = pd.read_csv("/Users/weilu/Research/data/survey_represent_x_com_complete.csv", index_col=0)
res_type_list = ['GLY', 'ALA', 'VAL', 'CYS', 'PRO', 'LEU', 'ILE', 'MET', 'TRP', 'PHE', 'SER', 'THR', 'TYR', 'GLN', 'ASN', 'LYS', 'ARG', 'HIS', 'ASP', 'GLU']
# import plotly.express as px
In [118]:
sampled = pd.DataFrame(gmm_dic["ALA"].sample(5000)[0], columns=["r1", "r2", "r3"])
# fig = px.scatter_3d(sampled, x='r1', y='r2', z='r3', opacity=0.01)
# fig.show()
sns.jointplot("r1", "r2", kind="kde", data=sampled, xlim=[1, 5], ylim=[1, 5])
Out[118]:
In [120]:
gmm = gmm_dic["ALA"]
In [121]:
gmm.means_
Out[121]:
In [119]:
res = "LEU"
sampled = pd.DataFrame(gmm_dic[res].sample(5000)[0], columns=["r1", "r2", "r3"])
# fig = px.scatter_3d(sampled, x='r1', y='r2', z='r3', opacity=0.01)
# fig.show()
sns.jointplot("r1", "r2", kind="kde", data=sampled, xlim=[1, 5], ylim=[1, 5])
Out[119]:
In [70]:
sampled = pd.DataFrame(gmm_dic[res].sample(5000)[0], columns=["r1", "r2", "r3"])
# fig = px.scatter_3d(sampled, x='r1', y='r2', z='r3', opacity=0.01)
# fig.show()
sns.jointplot("r1", "r2", kind="kde", data=sampled, xlim=[1, 5], ylim=[1, 5])
Out[70]:
In [68]:
# data_res = data.query(f"index==32 and k=='k_0.1'")
res = "GLU"
data_res = data_database.query(f"ResName == '{res}'")
sns.jointplot("r1", "r2", kind="kde", data=data_res, xlim=[1, 5], ylim=[1, 5])
data_res = data.query(f"index==32 and k=='k_0.1'")
sns.jointplot("r1", "r2", kind="kde", data=data_res, xlim=[1, 5], ylim=[1, 5])
Out[68]:
In [98]:
In [99]:
In [100]:
d["k"].unique()
Out[100]:
In [103]:
res = "GLU"
selected_database = data_database.query(f"ResName == '{res}'").assign(k="database")
n = len(selected_database)
sampled = pd.DataFrame(gmm_dic[res].sample(n)[0], columns=["r1", "r2", "r3"]).assign(k="sampled")
selected_simulation = data.query(f"index==32")
d = pd.concat([selected_database, sampled, selected_simulation], sort=False)
d['k'] = pd.Categorical(d['k'], ["database", "sampled", "k_0.05", "k_0.1", "k_0.2", "k_0.4", "k_0.8"])
In [108]:
g = sns.FacetGrid(d, col="k", col_wrap=4)
g.map(sns.kdeplot, "r1", "r2")
Out[108]:
In [109]:
g = sns.FacetGrid(d, col="k", col_wrap=4)
g.map(sns.kdeplot, "r1", "r3")
Out[109]:
In [110]:
res = "LEU"
selected_database = data_database.query(f"ResName == '{res}'").assign(k="database")
n = len(selected_database)
sampled = pd.DataFrame(gmm_dic[res].sample(n)[0], columns=["r1", "r2", "r3"]).assign(k="sampled")
selected_simulation = data.query(f"index==15")
d = pd.concat([selected_database, sampled, selected_simulation], sort=False)
d['k'] = pd.Categorical(d['k'], ["database", "sampled", "k_0.05", "k_0.1", "k_0.2", "k_0.4", "k_0.8"])
In [111]:
g = sns.FacetGrid(d, col="k", col_wrap=4)
g.map(sns.kdeplot, "r1", "r2")
Out[111]:
In [112]:
g = sns.FacetGrid(d, col="k", col_wrap=4)
g.map(sns.kdeplot, "r1", "r3")
Out[112]:
In [113]:
res = "LEU"
index = 48 # leu buried inside.
selected_database = data_database.query(f"ResName == '{res}'").assign(k="database")
n = len(selected_database)
sampled = pd.DataFrame(gmm_dic[res].sample(n)[0], columns=["r1", "r2", "r3"]).assign(k="sampled")
selected_simulation = data.query(f"index=={index}")
d = pd.concat([selected_database, sampled, selected_simulation], sort=False)
d['k'] = pd.Categorical(d['k'], ["database", "sampled", "k_0.05", "k_0.1", "k_0.2", "k_0.4", "k_0.8"])
In [114]:
g = sns.FacetGrid(d, col="k", col_wrap=4)
g.map(sns.kdeplot, "r1", "r2")
Out[114]:
In [115]:
g = sns.FacetGrid(d, col="k", col_wrap=4)
g.map(sns.kdeplot, "r1", "r3")
Out[115]:
In [37]:
res = "VAL"
data_res = data.query(f"index==6 and k!='k_0.1'")
print(res, data_res.shape)
fig = px.scatter_3d(data_res, x='r1', y='r2', z='r3', opacity=0.1, color="k")
fig.show()
In [48]:
res = "VAL"
data_res = data.query(f"index==6")
print(res, data_res.shape)
fig = px.scatter_3d(data_res, x='r1', y='r2', z='r3', opacity=0.1, color="k")
fig.show()
In [22]:
res = "VAL"
data_res = data.query(f"index==6")
print(res, data_res.shape)
fig = px.scatter_3d(data_res, x='r1', y='r2', z='r3', opacity=0.1)
fig.show()
In [21]:
res = "VAL"
data_res = data.query(f"ResName == '{res}'")
print(res, data_res.shape)
fig = px.scatter_3d(data_res, x='r1', y='r2', z='r3', opacity=0.1)
fig.show()
In [7]:
data_crystal_native
Out[7]:
In [ ]:
all_data = []
for model in models:
data_ = get_predicted_side_chain_state(model, seq, gmm_dic, verbose=False)
all_data.append(data_)
In [20]:
a = pd.read_csv("/Users/weilu/Research/server/mar_2020/side_chain_states/1bgf_iteration_0_stronger_exclude_volume.csv", index_col=0)
In [5]:
data = pd.read_csv("/Users/weilu/Research/server/mar_2020/side_chain_states/survey_represent_x_com_complete.csv", index_col=0)
res_type_list = ['GLY', 'ALA', 'VAL', 'CYS', 'PRO', 'LEU', 'ILE', 'MET', 'TRP', 'PHE', 'SER', 'THR', 'TYR', 'GLN', 'ASN', 'LYS', 'ARG', 'HIS', 'ASP', 'GLU']
In [30]:
import pickle
gmm_dic = {}
res_type_list = ['GLY', 'ALA', 'VAL', 'CYS', 'PRO', 'LEU', 'ILE', 'MET', 'TRP', 'PHE', 'SER', 'THR', 'TYR', 'GLN', 'ASN', 'LYS', 'ARG', 'HIS', 'ASP', 'GLU']
for res_type in res_type_list[1:]:
gmm_dic[res_type] = pickle.load(open(f"/Users/weilu/opt/parameters/side_chain/gmm_{res_type}.pkl", mode='rb'))
In [45]:
parser = PDBParser()
pre = "/Users/weilu/Research/server/mar_2020/mass_iterative_run/setups/1bgf/"
pdbFile = f"{pre}/cbd-openmmawsem.pdb"
structure = parser.get_structure("X", pdbFile)
seq = read_fasta(f"{pre}/crystal_structure.fasta")
data_crystal_native = get_predicted_side_chain_state(structure[0], seq, gmm_dic)
In [40]:
gmm_dic["VAL"].score(np.array([2.48, 1.58, 2.56]).reshape(1, -1))
Out[40]:
In [41]:
gmm_dic["VAL"].score(np.array([2.48, 1.9, 2.56]).reshape(1, -1))
Out[41]:
In [46]:
data_crystal_native.query("ResName == 'VAL'")
Out[46]:
In [44]:
data_crystal_native.query("ResName == 'VAL'")
Out[44]:
In [22]:
a.head()
Out[22]:
In [9]:
b = a.head().reset_index(drop=True)
In [11]:
a= a.rename(columns={"cbd_n":"r1", "cbd_ca":"r2", "cbd_c":"r3", "ResType":"ResName"})
In [21]:
a.query(f"Frame==-1 and ResName == '{res}'")
Out[21]:
In [18]:
a.query(f"Frame=='native' and ResName == '{res}'")
Out[18]:
In [14]:
res = "VAL"
a.query(f"ResName == '{res}' and r2 < 1.59")
Out[14]:
In [52]:
import plotly.express as px
res = "VAL"
a_data = a.query(f"ResName == '{res}' and (Frame > 1980)").reset_index(drop=True)
# print(res, data_res.shape)
fig = px.scatter_3d(a_data, x='r1', y='r2', z='r3', opacity=0.1, color="index")
fig.show()
In [54]:
a = np.random.randint(5, size=(4,2,1))
In [ ]:
In [50]:
# res_type_list = ['GLN', 'ASN', 'LYS', 'ARG', 'HIS', 'ASP', 'GLU']
res = "VAL"
data_res = data.query(f"ResName == '{res}'").reset_index(drop=True)
data_res = data_res[["r1", "r2", "r3"]]
data_res["source"] = "database"
# gmm = gmm_dic[res]
# a = gmm.sample(10000)[0]
# a_data = pd.DataFrame(a, columns=["r1", "r2", "r3"])
a_data = a.query(f"ResName == '{res}' and (Frame > 1980)").reset_index(drop=True)
a_data = a_data[["r1", "r2", "r3"]]
a_data["source"] = "sampled"
combined = pd.concat([a_data, data_res])
fig = px.scatter_3d(combined, x='r1', y='r2', z='r3', color="source", opacity=0.1)
fig.show()
In [23]:
# res_type_list = ['GLN', 'ASN', 'LYS', 'ARG', 'HIS', 'ASP', 'GLU']
res = "VAL"
data_res = data.query(f"ResName == '{res}'").reset_index(drop=True)
data_res = data_res[["r1", "r2", "r3"]]
data_res["source"] = "database"
# gmm = gmm_dic[res]
# a = gmm.sample(10000)[0]
# a_data = pd.DataFrame(a, columns=["r1", "r2", "r3"])
a_data = a.query(f"ResName == '{res}' and Frame != 1").reset_index(drop=True)
a_data = a_data[["r1", "r2", "r3"]]
a_data["source"] = "sampled"
combined = pd.concat([a_data, data_res])
fig = px.scatter_3d(combined, x='r1', y='r2', z='r3', color="source", opacity=0.1)
fig.show()
In [15]:
# res_type_list = ['GLN', 'ASN', 'LYS', 'ARG', 'HIS', 'ASP', 'GLU']
res = "VAL"
data_res = data.query(f"ResName == '{res}'").reset_index(drop=True)
data_res = data_res[["r1", "r2", "r3"]]
data_res["source"] = "database"
# gmm = gmm_dic[res]
# a = gmm.sample(10000)[0]
# a_data = pd.DataFrame(a, columns=["r1", "r2", "r3"])
a_data = a.query(f"ResName == '{res}'").reset_index(drop=True)
a_data = a_data[["r1", "r2", "r3"]]
a_data["source"] = "sampled"
combined = pd.concat([a_data, data_res])
fig = px.scatter_3d(combined, x='r1', y='r2', z='r3', color="source", opacity=0.1)
fig.show()
In [ ]:
import plotly.express as px
res = "VAL"
data_res = a.query(f"ResName == '{res}'")
print(res, data_res.shape)
fig = px.scatter_3d(data_res, x='r1', y='r2', z='r3', opacity=0.1, color="Frame")
fig.show()
In [6]:
import plotly.express as px
res = "VAL"
data_res = data.query(f"ResName == '{res}'")
print(res, data_res.shape)
fig = px.scatter_3d(data_res, x='r1', y='r2', z='r3', opacity=0.1)
fig.show()
In [ ]:
In [ ]:
In [ ]:
# res_type_list = ['GLN', 'ASN', 'LYS', 'ARG', 'HIS', 'ASP', 'GLU']
res = "HIS"
data_res = data.query(f"ResName == '{res}'")
data_res = data_res[["r1", "r2", "r3"]]
data_res["source"] = "database"
gmm = gmm_dic[res]
a = gmm.sample(10000)[0]
a_data = pd.DataFrame(a, columns=["r1", "r2", "r3"])
a_data["source"] = "sampled"
combined = pd.concat([a_data, data_res])
fig = px.scatter_3d(combined, x='r1', y='r2', z='r3', color="source", opacity=0.1)
fig.show()
In [4]:
def dis(a, b):
return ((a[0]-b[0])**2 + (a[1]-b[1])**2 + (a[2]-b[2])**2)**0.5
def compute_side_chain_energy_for_x(x, means, precisions_chol, log_det, weights):
n_features = 3
n_components, _ = means.shape
mean_dot_precisions_chol = np.zeros((3,3))
log_prob = np.zeros(3)
for i in range(n_components):
mean_dot_precisions_chol[i] = np.dot(means[i], precisions_chol[i])
y = np.dot(x, precisions_chol[i]) - mean_dot_precisions_chol[i]
log_prob[i] = np.sum(np.square(y))
log_gaussian_prob = -.5 * (n_features * np.log(2 * np.pi) + log_prob) + log_det
c = np.max(log_gaussian_prob + np.log(weights))
score = np.log(np.sum(np.exp(log_gaussian_prob + np.log(weights) - c))) + c
kt = 1
E_side_chain = -score*kt
# print(E_side_chain)
return E_side_chain
def read_fasta(fastaFile):
seq = ""
with open(fastaFile, "r") as f:
for line in f:
if line[0] == ">":
pass
else:
# print(line)
seq += line.strip()
return seq
def compute_side_chain_energy(structure, seq):
E_side_chain_energy = 0
# parser = PDBParser()
# pdbFile = "/Users/weilu/Research/server/feb_2020/compare_side_chain_with_and_without/native/256_cbd_submode_7_debug/crystal_structure.pdb"
# fastaFile = "/Users/weilu/Research/server/feb_2020/compare_side_chain_with_and_without/native/256_cbd_submode_7_debug/crystal_structure.fasta"
# structure = parser.get_structure("x", pdbFile)
print(seq)
means_dic = {}
precisions_chol_dic = {}
log_det_dic = {}
weights_dic = {}
res_type_list = ['GLY', 'ALA', 'VAL', 'CYS', 'PRO', 'LEU', 'ILE', 'MET', 'TRP', 'PHE', 'SER', 'THR', 'TYR', 'GLN', 'ASN', 'LYS', 'ARG', 'HIS', 'ASP', 'GLU']
for res_type in res_type_list:
if res_type == "GLY":
continue
means = np.loadtxt(f"/Users/weilu/opt/parameters/side_chain/{res_type}_means.txt")
precisions_chol = np.loadtxt(f"/Users/weilu/opt/parameters/side_chain/{res_type}_precisions_chol.txt").reshape(3,3,3)
log_det = np.loadtxt(f"/Users/weilu/opt/parameters/side_chain/{res_type}_log_det.txt")
weights = np.loadtxt(f"/Users/weilu/opt/parameters/side_chain/{res_type}_weights.txt")
means_dic[res_type] = means
precisions_chol_dic[res_type] = precisions_chol
log_det_dic[res_type] = log_det
weights_dic[res_type] = weights
for res in structure.get_residues():
if res.get_full_id()[1] != 0:
continue
# x_com = get_side_chain_center_of_mass(res)
# resname = res.resname
resname = one_to_three(seq[res.id[1]-1])
if resname == "GLY":
continue
try:
n = res["N"].get_coord()
ca = res["CA"].get_coord()
c = res["C"].get_coord()
except:
continue
x_com = res["CB"].get_coord()
x = np.array([dis(x_com, n), dis(x_com, ca), dis(x_com, c)])
r_ca_com = dis(x_com, ca)
# resname = "TYR"
if resname == "GLY":
side_chain_energy = 0
else:
side_chain_energy = compute_side_chain_energy_for_x(x, means_dic[resname],
precisions_chol_dic[resname],
log_det_dic[resname],
weights_dic[resname])
if abs(side_chain_energy) > 10:
print(res.id[1], resname, x_com, x, round(side_chain_energy,3), round(r_ca_com,3))
# print(res.id[1], resname, x_com, round(side_chain_energy,3), round(r_ca_com,3))
E_side_chain_energy += side_chain_energy
return E_side_chain_energy
In [ ]:
In [8]:
pre = "/Users/weilu/Research/server/mar_2020/mass_iterative_run/iteration_1_stronger_exclude_withoutBurial_bugfix/1a32/0/"
pdbFile = f"{pre}/lastFrame.pdb"
parser = PDBParser()
structure = parser.get_structure("X", pdbFile)
seq = read_fasta(f"{pre}/crystal_structure.fasta")
In [9]:
compute_side_chain_energy(structure, seq)
Out[9]:
In [10]:
pre = "/Users/weilu/Research/server/mar_2020/mass_iterative_run/iteration_0_stronger_exclude_volume/1a32/0"
pdbFile = f"{pre}/lastFrame.pdb"
parser = PDBParser()
structure = parser.get_structure("X", pdbFile)
seq = read_fasta(f"{pre}/crystal_structure.fasta")
compute_side_chain_energy(structure, seq)
Out[10]:
In [6]:
import pickle
In [9]:
g = pickle.load(open("/Users/weilu/opt/parameters/side_chain/gmm_TYR.pkl", mode='rb'))
In [15]:
a = g.predict_proba(np.array([5.17101707,4.06451709, 4.08804776]).reshape(1, -1))
In [18]:
a.round(3)
Out[18]:
In [14]:
g.score(np.array([5.17101707,4.06451709, 4.08804776]).reshape(1, -1))
Out[14]:
In [24]:
gmm_dic = {}
res_type_list = ['GLY', 'ALA', 'VAL', 'CYS', 'PRO', 'LEU', 'ILE', 'MET', 'TRP', 'PHE', 'SER', 'THR', 'TYR', 'GLN', 'ASN', 'LYS', 'ARG', 'HIS', 'ASP', 'GLU']
for res_type in res_type_list[1:]:
gmm_dic[res_type] = pickle.load(open(f"/Users/weilu/opt/parameters/side_chain/gmm_{res_type}.pkl", mode='rb'))
In [101]:
def get_predicted_side_chain_state(model, seq, gmm_dic=gmm_dic, verbose=True):
info_ = []
for res in model.get_residues():
# if res.get_full_id()[1] != 0:
# continue
# x_com = get_side_chain_center_of_mass(res)
# resname = res.resname
resname = one_to_three(seq[res.id[1]-1])
if resname == "GLY":
continue
try:
n = res["N"].get_coord()
ca = res["CA"].get_coord()
c = res["C"].get_coord()
except:
if verbose:
print("normal? ", res)
continue
x_com = res["CB"].get_coord()
x = np.array([dis(x_com, n), dis(x_com, ca), dis(x_com, c)])
r_ca_com = dis(x_com, ca)
# resname = "TYR"
if resname == "GLY":
prediction = [0, 0, 0]
else:
prediction = gmm_dic[resname].predict_proba(np.array(x).reshape(1, -1))[0].round(3)
info_.append([res.id[1], resname, np.argmax(prediction), prediction[0], prediction[1], prediction[2]])
data = pd.DataFrame(info_, columns=["index", "ResType", "inState", "state1", "state2", "state3"])
return data
In [76]:
pdb = "1a32"
pre = f"/Users/weilu/Research/server/mar_2020/mass_iterative_run/iteration_0_stronger_exclude_volume/{pdb}/0"
pdbFile = f"{pre}/movie.pdb"
parser = PDBParser()
structure = parser.get_structure("X", pdbFile)
seq = read_fasta(f"{pre}/crystal_structure.fasta")
# data_lastFrame = get_predicted_side_chain_state(structure, seq)
In [86]:
data.to_csv("")
In [107]:
all_models = list(structure.get_models())
data_ = []
for i, model in enumerate(all_models):
# print(model)
data_oneFrame = get_predicted_side_chain_state(model, seq, verbose=False)
data_.append(data_oneFrame.assign(Frame=i))
In [109]:
data = pd.concat(data_)
In [114]:
a = data.query("index == 2").reset_index(drop=True)
plt.plot(a["inState"][-50:])
Out[114]:
In [147]:
data
Out[147]:
In [146]:
data.query("index == 3").hist("inState")
Out[146]:
In [96]:
get_predicted_side_chain_state(all_models[1], seq)
Out[96]:
In [148]:
gmm = gmm_dic["GLN"]
gmm.weights_
Out[148]:
In [120]:
gmm = gmm_dic["THR"]
gmm.weights_
Out[120]:
In [149]:
data_openMM_native
Out[149]:
In [136]:
a = data_openMM_native.merge(data_native, on="index")
In [138]:
# (data_native.values[1:-1, 2:] - data_openMM_native.values[:, 2:]).astype(float).round(2)
a[a["inState_x"]!=a["inState_y"]]
Out[138]:
In [ ]:
In [58]:
pdb = "1a32"
pre = f"/Users/weilu/Research/server/mar_2020/mass_iterative_run/iteration_0_stronger_exclude_volume/{pdb}/0"
pdbFile = f"{pre}/lastFrame.pdb"
parser = PDBParser()
structure = parser.get_structure("X", pdbFile)
seq = read_fasta(f"{pre}/crystal_structure.fasta")
data_lastFrame = get_predicted_side_chain_state(structure, seq)
In [59]:
pre = f"/Users/weilu/Research/server/mar_2020/mass_iterative_run/setups/{pdb}"
pdbFile = f"{pre}/cbd.pdb"
parser = PDBParser()
structure = parser.get_structure("X", pdbFile)
seq = read_fasta(f"{pre}/crystal_structure.fasta")
data_native = get_predicted_side_chain_state(structure, seq)
In [126]:
pre = f"/Users/weilu/Research/server/mar_2020/mass_iterative_run/iteration_native_new_4/{pdb}/0"
pdbFile = f"{pre}/native.pdb"
parser = PDBParser()
structure = parser.get_structure("X", pdbFile)
seq = read_fasta(f"{pre}/crystal_structure.fasta")
data_openMM_native = get_predicted_side_chain_state(structure[0], seq)
In [63]:
data_combined = data_lastFrame.merge(data_native, on=["index"])
In [66]:
data_combined.columns
Out[66]:
In [68]:
np.sum(data_combined["inState_x"] == data_combined["inState_y"])
Out[68]:
In [69]:
data_combined
Out[69]:
In [ ]:
In [75]:
plt.plot(data_combined["index"], data_combined["inState_x"])
plt.plot(data_combined["index"], data_combined["inState_y"])
Out[75]:
In [ ]:
pre = "/Users/weilu/Research/server/mar_2020/mass_iterative_run/iteration_0_stronger_exclude_volume/1a32/0"
pdbFile = f"{pre}/lastFrame.pdb"
parser = PDBParser()
structure = parser.get_structure("X", pdbFile)
seq = read_fasta(f"{pre}/crystal_structure.fasta")
info_ = []
for res in structure.get_residues():
if res.get_full_id()[1] != 0:
continue
# x_com = get_side_chain_center_of_mass(res)
# resname = res.resname
resname = one_to_three(seq[res.id[1]-1])
if resname == "GLY":
continue
try:
n = res["N"].get_coord()
ca = res["CA"].get_coord()
c = res["C"].get_coord()
except:
print("normal? ", res)
continue
x_com = res["CB"].get_coord()
x = np.array([dis(x_com, n), dis(x_com, ca), dis(x_com, c)])
r_ca_com = dis(x_com, ca)
# resname = "TYR"
if resname == "GLY":
prediction = [0, 0, 0]
else:
prediction = gmm_dic[resname].predict_proba(np.array(x).reshape(1, -1))[0].round(3)
info_.append([res.id[1], resname, np.argmax(prediction), prediction[0], prediction[1], prediction[2]])