In [1]:
from pyCodeLib import *
import warnings
import glob
import re
import numpy as np
import pandas as pd
from Bio.PDB.Polypeptide import one_to_three
warnings.filterwarnings('ignore')
# sys.path.insert(0, MYHOME)
%load_ext autoreload
%autoreload 2
In [2]:
# plt.rcParams['figure.figsize'] = [16.18033, 10]
plt.rcParams['figure.figsize'] = 0.5*np.array([16.18033, 10])
# def gamma_format_convertion_iteration_to_simulation(iteration_gamma, gamma_for_simulation):
# # gamma_location = "/Users/weilu/Research/server_backup/jan_2019/optimization/gammas_dec30/cath-dataset-nonredundant-S20Clean_phi_pairwise_contact_well4.5_6.5_5.0_10phi_density_mediated_contact_well6.5_9.5_5.0_10_2.6_7.0_gamma"
# # gamma_for_simulation = "/Users/weilu/Research/server_backup/jan_2019/optimization/iteration_gamma.dat"
# gamma = iteration_gamma
# gamma = -gamma # caused by tradition.
# # convert gamma to gamma used by simulation
# with open(gamma_for_simulation, "w") as out:
# c = 0
# for i in range(20):
# for j in range(i, 20):
# out.write(f"{gamma[c]:<.5f} {gamma[c]:10.5f}\n")
# c += 1
# out.write("\n")
# for i in range(20):
# for j in range(i, 20):
# # protein, water
# out.write(f"{gamma[c]:<.5f} {gamma[c+210]:10.5f}\n")
# c += 1
res_type_map = {
'A': 0,
'C': 4,
'D': 3,
'E': 6,
'F': 13,
'G': 7,
'H': 8,
'I': 9,
'K': 11,
'L': 10,
'M': 12,
'N': 2,
'P': 14,
'Q': 5,
'R': 1,
'S': 15,
'T': 16,
'V': 19,
'W': 17,
'Y': 18
}
# res_type_map = gamma_se_map_1_letter = { 'A': 0, 'R': 1, 'N': 2, 'D': 3, 'C': 4,
# 'Q': 5, 'E': 6, 'G': 7, 'H': 8, 'I': 9,
# 'L': 10, 'K': 11, 'M': 12, 'F': 13, 'P': 14,
# 'S': 15, 'T': 16, 'W': 17, 'Y': 18, 'V': 19}
res_type_map_letters = ['A', 'R', 'N', 'D', 'C', 'Q', 'E', 'G',
'H', 'I', 'L', 'K', 'M', 'F', 'P', 'S', 'T', 'W', 'Y', 'V']
inverse_res_type_map = dict(list(zip(list(range(20)), res_type_map_letters)))
def gamma_format_convertion_iteration_to_simulation(iteration_gamma, gamma_for_simulation, burial_gamma_for_simulation=None):
# gamma_location = "/Users/weilu/Research/server_backup/jan_2019/optimization/gammas_dec30/cath-dataset-nonredundant-S20Clean_phi_pairwise_contact_well4.5_6.5_5.0_10phi_density_mediated_contact_well6.5_9.5_5.0_10_2.6_7.0_gamma"
# gamma_for_simulation = "/Users/weilu/Research/server_backup/jan_2019/optimization/iteration_gamma.dat"
gamma = iteration_gamma
gamma = -gamma # caused by tradition.
# convert gamma to gamma used by simulation
with open(gamma_for_simulation, "w") as out:
c = 0
for i in range(20):
for j in range(i, 20):
out.write(f"{gamma[c]:<.5f} {gamma[c]:10.5f}\n")
c += 1
out.write("\n")
for i in range(20):
for j in range(i, 20):
# protein, water
out.write(f"{gamma[c]:<.5f} {gamma[c+210]:10.5f}\n")
c += 1
if burial_gamma_for_simulation:
rhoGamma = pd.DataFrame(gamma[630:].reshape(3,20).T, columns=["rho1", "rho2", "rho3"]).reset_index()
rhoGamma["oneLetter"] = rhoGamma["index"].apply(lambda x: inverse_res_type_map[x])
rhoGamma["Residue"] = rhoGamma["index"].apply(lambda x: one_to_three(inverse_res_type_map[x]))
rhoGamma = rhoGamma[["Residue", "rho1", "rho2", "rho3", "index", "oneLetter"]]
g = rhoGamma[["rho1", "rho2", "rho3"]].values
np.savetxt(burial_gamma_for_simulation, g, fmt='%7.4f')
In [45]:
# pre = "/Users/weilu/Research/server_backup/feb_2019/jan_optimization/gammas/"
pre = "/Users/weilu/Research/server/feb_2019/optimization_iter1/gammas/"
# pp = "cath-dataset-nonredundant-S20Clean_phi_pairwise_contact_well4.5_6.5_5.0_10phi_density_mediated_contact_well6.5_9.5_5.0_10_2.6_7.0"
# pp = "cath-dataset-nonredundant-S20Clean_phi_pairwise_contact_well4.5_6.5_5.0_10phi_density_mediated_contact_well6.5_9.5_5.0_10_2.6_7.0phi_burial_well4.0"
pp = "proteins_name_list_phi_pairwise_contact_well4.5_6.5_5.0_10phi_density_mediated_contact_well6.5_9.5_5.0_10_2.6_7.0phi_burial_well4.0"
A_name = pp + "_A"
B_name = pp + "_B"
B_filtered_name = pp + "_B_filtered"
P_name = pp + "_P"
Gamma_name = pp + "_gamma"
Gamma_filtered_name = pp + "_gamma_filtered"
Lamb_name = pp + "_lamb"
Lamb_filtered_name = pp + "_lamb_filtered"
A = np.loadtxt(pre+A_name)
B = np.loadtxt(pre+B_name)
B_filtered = np.loadtxt(pre+B_filtered_name, dtype=complex, converters={
0: lambda s: complex(s.decode().replace('+-', '-'))})
Gamma = np.loadtxt(pre+Gamma_name)
Gamma_filtered = np.loadtxt(pre+Gamma_filtered_name, dtype=complex, converters={
0: lambda s: complex(s.decode().replace('+-', '-'))})
Lamb = np.loadtxt(pre+Lamb_name, dtype=complex, converters={
0: lambda s: complex(s.decode().replace('+-', '-'))})
Lamb_filtered = np.loadtxt(pre+Lamb_filtered_name, dtype=complex, converters={
0: lambda s: complex(s.decode().replace('+-', '-'))})
half_B_name = pp + "_half_B"
half_B = np.loadtxt(pre+half_B_name)
other_half_B_name = pp + "_other_half_B"
other_half_B = np.loadtxt(pre+other_half_B_name)
std_half_B_name = pp + "_std_half_B"
std_half_B = np.loadtxt(pre+std_half_B_name)
In [49]:
phi_i_decoy_reshaped = np.loadtxt("/Users/weilu/Research/server/feb_2019/optimization_iter1/phis/proteins_name_list_phi_pairwise_contact_well4.5_6.5_5.0_10phi_density_mediated_contact_well6.5_9.5_5.0_10_2.6_7.0phi_burial_well4.0_phi_decoy_all_summary.txt")
In [50]:
phi_i_decoy_reshaped.shape
Out[50]:
In [59]:
phi_i_protein_i_decoy = np.reshape(phi_i_decoy_reshaped, (6, num_decoys, total_phis))
In [ ]:
In [141]:
def get_filtered_gamma_B_lamb_P_and_lamb(A, B, half_B, other_half_B, std_half_B, total_phis, num_decoys, noise_iterations=10, relative_error_threshold=0.5, mode=1):
lamb, P = np.linalg.eig(B)
lamb, P = sort_eigenvalues_and_eigenvectors(lamb, P)
np.random.seed(10)
cutoff_modes = []
noisy_lamb_list = []
for i_noise in range(noise_iterations):
noisy_B = np.zeros((total_phis, total_phis))
for i in range(total_phis):
for j in range(i, total_phis):
# random_B_ij = np.random.normal(
# loc=half_B[i][j], scale=std_half_B[i][j])
if mode == 1:
random_B_ij = np.random.normal(
loc=half_B[i][j], scale=std_half_B[i][j] / float(num_decoys))
# random_B_ij = np.random.normal(
# loc=half_B[i][j], scale=std_half_B[i][j] / float(num_decoys)**0.5)
noisy_B[i][j] = noisy_B[j][i] = random_B_ij - \
other_half_B[i][j]
noisy_lamb, noisy_P = np.linalg.eig(noisy_B)
noisy_lamb, noisy_P = sort_eigenvalues_and_eigenvectors(
noisy_lamb, noisy_P)
noisy_lamb_list.append(noisy_lamb)
try:
cutoff_mode = np.where(
np.abs(lamb - noisy_lamb) / lamb > relative_error_threshold)[0][0]
except IndexError:
cutoff_mode = len(lamb)
cutoff_modes.append(cutoff_mode)
cutoff_mode = min(cutoff_modes)
# cutoff_mode = 10
print("min cutoff_mode", cutoff_mode)
filtered_lamb = np.copy(lamb)
filtered_B_inv, filtered_lamb, P = get_filtered_B_inv_lambda_and_P(
filtered_lamb, cutoff_mode, P)
filtered_gamma = np.dot(filtered_B_inv, A)
filtered_B = np.linalg.inv(filtered_B_inv)
return filtered_gamma, filtered_B, filtered_lamb, P, lamb, noisy_lamb_list
def get_filtered_B_inv_lambda_and_P(filtered_lamb, cutoff_mode, P, method='extend_all_after_first_noisy_mode'):
if method == 'zero_all_after_first_noisy_mode':
filtered_lamb_inv = 1 / filtered_lamb
# for "zeroing unreliable eigenvalues"
filtered_lamb_inv[cutoff_mode:] = 0.0
filtered_B_inv = np.dot(
P, np.dot(np.diag(filtered_lamb_inv), np.linalg.inv(P)))
filtered_lamb = 1 / filtered_lamb_inv
if method == 'extend_all_after_first_noisy_mode':
# for "extending lowest reliable eigenvalue"
filtered_lamb[cutoff_mode:] = filtered_lamb[cutoff_mode - 1]
filtered_B_inv = np.dot(
P, np.dot(np.diag(1 / filtered_lamb), np.linalg.inv(P)))
return filtered_B_inv, filtered_lamb, P
def sort_eigenvalues_and_eigenvectors(eigenvalues, eigenvectors):
idx = eigenvalues.argsort()[::-1]
eigenvalues = eigenvalues[idx]
eigenvectors = eigenvectors[:, idx]
return eigenvalues, eigenvectors
In [39]:
total_phis = 690
num_decoys = 6000
In [ ]:
plt.plot(lamb)
In [136]:
f"{datetime.date.today()}"
Out[136]:
In [129]:
plt.plot(lamb)
Out[129]:
In [147]:
plt.plot(np.log(lamb))
plt.plot(np.log(noisy_lamb_list[0]))
Out[147]:
In [142]:
filtered_gamma, filtered_B, filtered_lamb, P, lamb, noisy_lamb_list = get_filtered_gamma_B_lamb_P_and_lamb(A, B, half_B, other_half_B, std_half_B, total_phis, num_decoys)
In [146]:
plt.plot(noisy_lamb_list[0])
Out[146]:
In [145]:
plot_contact_well(filtered_gamma[:210], inferBound=True, invert_sign=False)
In [111]:
rhoGamma = pd.DataFrame(filtered_gamma[630:].reshape(3,20).T, columns=["rho1", "rho2", "rho3"]).reset_index()
rhoGamma["oneLetter"] = rhoGamma["index"].apply(lambda x: inverse_res_type_map[x])
rhoGamma["Residue"] = rhoGamma["index"].apply(lambda x: one_to_three(inverse_res_type_map[x]))
rhoGamma = rhoGamma[["Residue", "rho1", "rho2", "rho3", "index", "oneLetter"]]
g = rhoGamma[["rho1", "rho2", "rho3"]].values
# np.savetxt("/Users/weilu/Research/server/feb_2019/iter_burial_gamma.dat", g, fmt='%7.4f')
rhoGamma
Out[111]:
In [113]:
filtered_gamma, filtered_B, filtered_lamb, P, lamb = get_filtered_gamma_B_lamb_P_and_lamb(A, B, half_B, other_half_B, std_half_B, total_phis, num_decoys)
In [114]:
plot_contact_well(filtered_gamma[:210], inferBound=True, invert_sign=False)
In [115]:
rhoGamma = pd.DataFrame(filtered_gamma[630:].reshape(3,20).T, columns=["rho1", "rho2", "rho3"]).reset_index()
rhoGamma["oneLetter"] = rhoGamma["index"].apply(lambda x: inverse_res_type_map[x])
rhoGamma["Residue"] = rhoGamma["index"].apply(lambda x: one_to_three(inverse_res_type_map[x]))
rhoGamma = rhoGamma[["Residue", "rho1", "rho2", "rho3", "index", "oneLetter"]]
g = rhoGamma[["rho1", "rho2", "rho3"]].values
# np.savetxt("/Users/weilu/Research/server/feb_2019/iter_burial_gamma.dat", g, fmt='%7.4f')
rhoGamma
Out[115]:
In [120]:
filtered_gamma, filtered_B, filtered_lamb, P, lamb = get_filtered_gamma_B_lamb_P_and_lamb(A, B, half_B, other_half_B, std_half_B, total_phis, num_decoys)
In [121]:
plot_contact_well(filtered_gamma[:210], inferBound=True, invert_sign=False)
In [122]:
rhoGamma = pd.DataFrame(filtered_gamma[630:].reshape(3,20).T, columns=["rho1", "rho2", "rho3"]).reset_index()
rhoGamma["oneLetter"] = rhoGamma["index"].apply(lambda x: inverse_res_type_map[x])
rhoGamma["Residue"] = rhoGamma["index"].apply(lambda x: one_to_three(inverse_res_type_map[x]))
rhoGamma = rhoGamma[["Residue", "rho1", "rho2", "rho3", "index", "oneLetter"]]
g = rhoGamma[["rho1", "rho2", "rho3"]].values
# np.savetxt("/Users/weilu/Research/server/feb_2019/iter_burial_gamma.dat", g, fmt='%7.4f')
rhoGamma
Out[122]:
In [ ]:
In [126]:
pre = "/Users/weilu/Research/server/feb_2019/optimization/gammas/"
pp = "cath-dataset-nonredundant-S20Clean_phi_pairwise_contact_well4.5_6.5_5.0_10phi_density_mediated_contact_well6.5_9.5_5.0_10_2.6_7.0phi_burial_well4.0"
Gamma_name = pp + "_gamma"
Gamma = np.loadtxt(pre+Gamma_name)
iter_gamma = 0.5 * filtered_gamma + 0.5 * Gamma
gamma_for_simulation = "/Users/weilu/Research/server/feb_2019/optimization_iter1/iteration_gamma.dat"
burial_gamma_for_simulation = "/Users/weilu/Research/server/feb_2019/optimization_iter1/iteration_burial_gamma.dat"
gamma_format_convertion_iteration_to_simulation(iter_gamma, gamma_for_simulation, burial_gamma_for_simulation=burial_gamma_for_simulation)
In [125]:
rhoGamma = pd.DataFrame(Gamma[630:].reshape(3,20).T, columns=["rho1", "rho2", "rho3"]).reset_index()
rhoGamma["oneLetter"] = rhoGamma["index"].apply(lambda x: inverse_res_type_map[x])
rhoGamma["Residue"] = rhoGamma["index"].apply(lambda x: one_to_three(inverse_res_type_map[x]))
rhoGamma = rhoGamma[["Residue", "rho1", "rho2", "rho3", "index", "oneLetter"]]
g = rhoGamma[["rho1", "rho2", "rho3"]].values
# np.savetxt("/Users/weilu/Research/server/feb_2019/iter_burial_gamma.dat", g, fmt='%7.4f')
rhoGamma
Out[125]:
In [158]:
pdb_file = "/Users/weilu/Research/server/feb_2019/iterative_optimization_new_temp_range/all_simulations/1ctf/1ctf/crystal_structure.pdb"
chain_name = "A"
parser = PDBParser()
structure = parser.get_structure('X', pdb_file)
chain = list(structure[0][chain_name])
n = len(chain)
rg = 0.0
for i, residue_i in enumerate(chain):
# print(i, residue_i)
for j, residue_j in enumerate(chain[i+1:]):
# print(j)
r = residue_i["CA"] - residue_j["CA"]
rg += r**2
(rg/(n**2))**0.5
# Rg = Rg + rsq
Out[158]:
In [160]:
p = "1enh"
p = "1fc2"
def computeRg(p):
pdb_file = f"/Users/weilu/Research/server/feb_2019/iterative_optimization_new_temp_range/all_simulations/{p}/{p}/crystal_structure.pdb"
chain_name = "A"
parser = PDBParser()
structure = parser.get_structure('X', pdb_file)
chain = list(structure[0][chain_name])
n = len(chain)
rg = 0.0
for i, residue_i in enumerate(chain):
# print(i, residue_i)
for j, residue_j in enumerate(chain[i+1:]):
# print(j)
r = residue_i["CA"] - residue_j["CA"]
rg += r**2
return (rg/(n**2))**0.5
Out[160]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [102]:
plt.plot(filtered_lamb[:50])
Out[102]:
In [89]:
plt.plot(lamb[:50])
Out[89]:
In [83]:
plt.plot(Lamb[:50])
Out[83]:
In [81]:
plt.plot(Lamb_filtered)
Out[81]:
In [ ]:
std_half_B
In [148]:
plt.imshow(std_half_B, cmap="binary")
plt.colorbar()
Out[148]:
In [96]:
plt.imshow(np.log10(std_half_B+0.1), cmap="binary")
plt.colorbar()
Out[96]:
In [42]:
std_half_B
Out[42]:
In [35]:
plt.plot(filtered_lamb[340:360])
Out[35]:
In [33]:
plt.plot(Lamb[340:360])
Out[33]:
In [34]:
plt.plot(Lamb_filtered[340:360])
Out[34]:
In [24]:
plt.plot(Lamb_filtered[:10])
Out[24]:
In [103]:
plot_contact_well(filtered_gamma[:210], inferBound=True, invert_sign=False)
In [104]:
plot_contact_well(filtered_gamma[210:420], inferBound=True, invert_sign=False)
In [91]:
plot_contact_well(filtered_gamma[:210], inferBound=True, invert_sign=False)
In [92]:
plot_contact_well(filtered_gamma[210:420], inferBound=True, invert_sign=False)
In [47]:
plot_contact_well(Gamma[:210], inferBound=True, invert_sign=False)
In [46]:
plot_contact_well(Gamma_filtered[:210], inferBound=True, invert_sign=False)
In [18]:
plot_contact_well(Gamma_filtered[:210], inferBound=True, invert_sign=False)
In [15]:
plot_contact_well(Gamma_filtered[:210], inferBound=True, invert_sign=False)
In [105]:
rhoGamma = pd.DataFrame(filtered_gamma[630:].reshape(3,20).T, columns=["rho1", "rho2", "rho3"]).reset_index()
rhoGamma["oneLetter"] = rhoGamma["index"].apply(lambda x: inverse_res_type_map[x])
rhoGamma["Residue"] = rhoGamma["index"].apply(lambda x: one_to_three(inverse_res_type_map[x]))
rhoGamma = rhoGamma[["Residue", "rho1", "rho2", "rho3", "index", "oneLetter"]]
g = rhoGamma[["rho1", "rho2", "rho3"]].values
# np.savetxt("/Users/weilu/Research/server/feb_2019/iter_burial_gamma.dat", g, fmt='%7.4f')
rhoGamma
Out[105]:
In [93]:
rhoGamma = pd.DataFrame(filtered_gamma[630:].reshape(3,20).T, columns=["rho1", "rho2", "rho3"]).reset_index()
rhoGamma["oneLetter"] = rhoGamma["index"].apply(lambda x: inverse_res_type_map[x])
rhoGamma["Residue"] = rhoGamma["index"].apply(lambda x: one_to_three(inverse_res_type_map[x]))
rhoGamma = rhoGamma[["Residue", "rho1", "rho2", "rho3", "index", "oneLetter"]]
g = rhoGamma[["rho1", "rho2", "rho3"]].values
# np.savetxt("/Users/weilu/Research/server/feb_2019/iter_burial_gamma.dat", g, fmt='%7.4f')
rhoGamma
Out[93]:
In [48]:
rhoGamma = pd.DataFrame(Gamma[630:].reshape(3,20).T, columns=["rho1", "rho2", "rho3"]).reset_index()
rhoGamma["oneLetter"] = rhoGamma["index"].apply(lambda x: inverse_res_type_map[x])
rhoGamma["Residue"] = rhoGamma["index"].apply(lambda x: one_to_three(inverse_res_type_map[x]))
rhoGamma = rhoGamma[["Residue", "rho1", "rho2", "rho3", "index", "oneLetter"]]
g = rhoGamma[["rho1", "rho2", "rho3"]].values
# np.savetxt("/Users/weilu/Research/server/feb_2019/iter_burial_gamma.dat", g, fmt='%7.4f')
rhoGamma
Out[48]:
In [16]:
rhoGamma = pd.DataFrame(Gamma[630:].reshape(3,20).T, columns=["rho1", "rho2", "rho3"]).reset_index()
rhoGamma["oneLetter"] = rhoGamma["index"].apply(lambda x: inverse_res_type_map[x])
rhoGamma["Residue"] = rhoGamma["index"].apply(lambda x: one_to_three(inverse_res_type_map[x]))
rhoGamma = rhoGamma[["Residue", "rho1", "rho2", "rho3", "index", "oneLetter"]]
g = rhoGamma[["rho1", "rho2", "rho3"]].values
# np.savetxt("/Users/weilu/Research/server/feb_2019/iter_burial_gamma.dat", g, fmt='%7.4f')
rhoGamma
Out[16]:
In [17]:
rhoGamma = pd.DataFrame(Gamma_filtered[630:].astype(float).reshape(3,20).T, columns=["rho1", "rho2", "rho3"]).reset_index()
rhoGamma["oneLetter"] = rhoGamma["index"].apply(lambda x: inverse_res_type_map[x])
rhoGamma["Residue"] = rhoGamma["index"].apply(lambda x: one_to_three(inverse_res_type_map[x]))
rhoGamma = rhoGamma[["Residue", "rho1", "rho2", "rho3", "index", "oneLetter"]]
g = rhoGamma[["rho1", "rho2", "rho3"]].values
# np.savetxt("/Users/weilu/Research/server/feb_2019/iter_burial_gamma.dat", g, fmt='%7.4f')
rhoGamma
Out[17]:
In [7]:
Gamma_filtered[630:]
Out[7]:
In [ ]:
plot_contact_well(Gamma_filtered[630:], inferBound=True, invert_sign=False)