In [1]:
from pyCodeLib import *
import warnings
import glob
import re
import numpy as np
import pandas as pd
from Bio.PDB.Polypeptide import one_to_three
warnings.filterwarnings('ignore')
# sys.path.insert(0, MYHOME)
%load_ext autoreload
%autoreload 2
In [2]:
# plt.rcParams['figure.figsize'] = [16.18033, 10]
plt.rcParams['figure.figsize'] = 0.5*np.array([16.18033, 10])
# def gamma_format_convertion_iteration_to_simulation(iteration_gamma, gamma_for_simulation):
# # gamma_location = "/Users/weilu/Research/server_backup/jan_2019/optimization/gammas_dec30/cath-dataset-nonredundant-S20Clean_phi_pairwise_contact_well4.5_6.5_5.0_10phi_density_mediated_contact_well6.5_9.5_5.0_10_2.6_7.0_gamma"
# # gamma_for_simulation = "/Users/weilu/Research/server_backup/jan_2019/optimization/iteration_gamma.dat"
# gamma = iteration_gamma
# gamma = -gamma # caused by tradition.
# # convert gamma to gamma used by simulation
# with open(gamma_for_simulation, "w") as out:
# c = 0
# for i in range(20):
# for j in range(i, 20):
# out.write(f"{gamma[c]:<.5f} {gamma[c]:10.5f}\n")
# c += 1
# out.write("\n")
# for i in range(20):
# for j in range(i, 20):
# # protein, water
# out.write(f"{gamma[c]:<.5f} {gamma[c+210]:10.5f}\n")
# c += 1
res_type_map = {
'A': 0,
'C': 4,
'D': 3,
'E': 6,
'F': 13,
'G': 7,
'H': 8,
'I': 9,
'K': 11,
'L': 10,
'M': 12,
'N': 2,
'P': 14,
'Q': 5,
'R': 1,
'S': 15,
'T': 16,
'V': 19,
'W': 17,
'Y': 18
}
# res_type_map = gamma_se_map_1_letter = { 'A': 0, 'R': 1, 'N': 2, 'D': 3, 'C': 4,
# 'Q': 5, 'E': 6, 'G': 7, 'H': 8, 'I': 9,
# 'L': 10, 'K': 11, 'M': 12, 'F': 13, 'P': 14,
# 'S': 15, 'T': 16, 'W': 17, 'Y': 18, 'V': 19}
res_type_map_letters = ['A', 'R', 'N', 'D', 'C', 'Q', 'E', 'G',
'H', 'I', 'L', 'K', 'M', 'F', 'P', 'S', 'T', 'W', 'Y', 'V']
inverse_res_type_map = dict(list(zip(list(range(20)), res_type_map_letters)))
def gamma_format_convertion_iteration_to_simulation(iteration_gamma, gamma_for_simulation, burial_gamma_for_simulation=None):
# gamma_location = "/Users/weilu/Research/server_backup/jan_2019/optimization/gammas_dec30/cath-dataset-nonredundant-S20Clean_phi_pairwise_contact_well4.5_6.5_5.0_10phi_density_mediated_contact_well6.5_9.5_5.0_10_2.6_7.0_gamma"
# gamma_for_simulation = "/Users/weilu/Research/server_backup/jan_2019/optimization/iteration_gamma.dat"
gamma = iteration_gamma
gamma = -gamma # caused by tradition.
# convert gamma to gamma used by simulation
with open(gamma_for_simulation, "w") as out:
c = 0
for i in range(20):
for j in range(i, 20):
out.write(f"{gamma[c]:<.5f} {gamma[c]:10.5f}\n")
c += 1
out.write("\n")
for i in range(20):
for j in range(i, 20):
# protein, water
out.write(f"{gamma[c]:<.5f} {gamma[c+210]:10.5f}\n")
c += 1
if burial_gamma_for_simulation:
rhoGamma = pd.DataFrame(gamma[630:].reshape(3,20).T, columns=["rho1", "rho2", "rho3"]).reset_index()
rhoGamma["oneLetter"] = rhoGamma["index"].apply(lambda x: inverse_res_type_map[x])
rhoGamma["Residue"] = rhoGamma["index"].apply(lambda x: one_to_three(inverse_res_type_map[x]))
rhoGamma = rhoGamma[["Residue", "rho1", "rho2", "rho3", "index", "oneLetter"]]
g = rhoGamma[["rho1", "rho2", "rho3"]].values
np.savetxt(burial_gamma_for_simulation, g, fmt='%7.4f')
In [74]:
# pre = "/Users/weilu/Research/server_backup/feb_2019/jan_optimization/gammas/"
# pre = "/Users/weilu/Research/server/feb_2019/optimization_with_biased_iter1/gammas/"
# pre = "/Users/weilu/Research/server/march_2019/optimization_weighted_by_q_iter1/gammas/"
# pre = "/Users/weilu/Research/server/march_2019/optimization_weighted_by_q_iter3/gammas/"
pre = "/Users/weilu/Research/server/march_2019/optimization_weighted_by_q_iter4/gammas/"
# pre = "/Users/weilu/Research/server/march_2019/optimization_weighted_by_q_iter2_improved/gammas/"
# pp = "cath-dataset-nonredundant-S20Clean_phi_pairwise_contact_well4.5_6.5_5.0_10phi_density_mediated_contact_well6.5_9.5_5.0_10_2.6_7.0"
# pp = "cath-dataset-nonredundant-S20Clean_phi_pairwise_contact_well4.5_6.5_5.0_10phi_density_mediated_contact_well6.5_9.5_5.0_10_2.6_7.0phi_burial_well4.0"
pp = "proteins_name_list_phi_pairwise_contact_well4.5_6.5_5.0_10phi_density_mediated_contact_well6.5_9.5_5.0_10_2.6_7.0phi_burial_well4.0"
A_name = pp + "_A"
B_name = pp + "_B"
B_filtered_name = pp + "_B_filtered"
P_name = pp + "_P"
Gamma_name = pp + "_gamma"
Gamma_filtered_name = pp + "_gamma_filtered"
Lamb_name = pp + "_lamb"
Lamb_filtered_name = pp + "_lamb_filtered"
A = np.loadtxt(pre+A_name)
B = np.loadtxt(pre+B_name)
B_filtered = np.loadtxt(pre+B_filtered_name, dtype=complex, converters={
0: lambda s: complex(s.decode().replace('+-', '-'))})
Gamma = np.loadtxt(pre+Gamma_name)
Gamma_filtered = np.loadtxt(pre+Gamma_filtered_name, dtype=complex, converters={
0: lambda s: complex(s.decode().replace('+-', '-'))})
Lamb = np.loadtxt(pre+Lamb_name, dtype=complex, converters={
0: lambda s: complex(s.decode().replace('+-', '-'))})
Lamb_filtered = np.loadtxt(pre+Lamb_filtered_name, dtype=complex, converters={
0: lambda s: complex(s.decode().replace('+-', '-'))})
half_B_name = pp + "_half_B"
half_B = np.loadtxt(pre+half_B_name)
other_half_B_name = pp + "_other_half_B"
other_half_B = np.loadtxt(pre+other_half_B_name)
std_half_B_name = pp + "_std_half_B"
std_half_B = np.loadtxt(pre+std_half_B_name)
In [83]:
total_phis = 690
num_decoys = 2000
filtered_gamma, filtered_B, filtered_lamb, P, lamb = get_filtered_gamma_B_lamb_P_and_lamb(A, B, half_B, other_half_B, std_half_B, total_phis, num_decoys, mode=2)
In [85]:
plt.plot(filtered_lamb)
plt.yscale('log')
In [78]:
plt.plot(Lamb)
plt.yscale('log')
In [77]:
plt.plot(Lamb_filtered)
plt.yscale('log')
In [73]:
a = plt.hist(std_half_B.flatten(), bins=100)
plt.yscale('log')
In [71]:
a = plt.hist(std_half_B.flatten(), bins=100)
plt.yscale('log')
In [31]:
g1 = Gamma_filtered
In [126]:
np.mean(Gamma_filtered2)
Out[126]:
In [127]:
np.std(Gamma_filtered2)
Out[127]:
In [128]:
np.std(Gamma_filtered)
Out[128]:
In [47]:
iter3_gamma = np.loadtxt("/Users/weilu/Research/server/march_2019/optimization_weighted_by_q_iter3/iter3")
In [58]:
iter4_gamma = np.loadtxt("/Users/weilu/Research/server/march_2019/optimization_weighted_by_q_iter4/iter4")
In [48]:
iter3_gamma.shape
Out[48]:
In [84]:
plot_contact_well(filtered_gamma[:210], inferBound=True, invert_sign=False)
In [59]:
plot_contact_well(iter4_gamma[:210], inferBound=True, invert_sign=False)
In [55]:
plot_contact_well(Gamma_filtered[:210], inferBound=True, invert_sign=False)
In [49]:
plot_contact_well(iter3_gamma[:210], inferBound=True, invert_sign=False)
In [43]:
iter2_gamma = np.loadtxt("/Users/weilu/Research/server/march_2019/optimization_weighted_by_q_iter1/iter1_gamma.dat")
In [44]:
def mix_gammas_2(pre, Gamma, preGamma, alpha=None, iterGammaName=None):
if alpha is None:
alpha = np.std(preGamma)/np.std(Gamma)
percent = int(alpha*100)
iter_gamma = (alpha/ (1.0 + alpha) * Gamma + 1.0/(1 + alpha) * preGamma).astype(float)
# pre = "/Users/weilu/Research/server/march_2019/optimization_weighted_by_q_iter1/"
gamma_for_simulation = pre + f"iteration_gamma_{percent}.dat"
burial_gamma_for_simulation = pre + f"iteration_burial_gamma_{percent}.dat"
gamma_format_convertion_iteration_to_simulation(iter_gamma, gamma_for_simulation, burial_gamma_for_simulation=burial_gamma_for_simulation)
if iterGammaName is not None:
np.savetxt(pre+iterGammaName, iter_gamma)
In [108]:
def mix_gammas_3(pre, Gamma, preGamma, alpha=None, iterGammaName=None, iteration="5"):
percent = int(alpha*100)
scale = np.std(preGamma)/np.std(Gamma)
iter_gamma = ((1- alpha)*preGamma + alpha*(scale*Gamma)).astype(float)
iter_gamma *= np.std(preGamma)/np.std(iter_gamma)
# pre = "/Users/weilu/Research/server/march_2019/optimization_weighted_by_q_iter1/"
gamma_for_simulation = pre + f"iteration_{iteration}_gamma_{percent}.dat"
burial_gamma_for_simulation = pre + f"iteration_{iteration}_burial_gamma_{percent}.dat"
gamma_format_convertion_iteration_to_simulation(iter_gamma, gamma_for_simulation, burial_gamma_for_simulation=burial_gamma_for_simulation)
if iterGammaName is not None:
np.savetxt(pre+iterGammaName, iter_gamma)
In [29]:
pre = "/Users/weilu/Research/server/march_2019/optimization_weighted_by_q_iter2/"
mix_gammas_2(pre, Gamma_filtered, iter1, iterGammaName="iter2")
In [45]:
pre = "/Users/weilu/Research/server/march_2019/optimization_weighted_by_q_iter3/"
mix_gammas_2(pre, Gamma_filtered, iter2_gamma, iterGammaName="iter3")
In [57]:
iter3_gamma = np.loadtxt("/Users/weilu/Research/server/march_2019/optimization_weighted_by_q_iter3/iter3")
pre = "/Users/weilu/Research/server/march_2019/optimization_weighted_by_q_iter4/"
mix_gammas_2(pre, Gamma_filtered, iter3_gamma, iterGammaName="iter4")
In [86]:
iter3_gamma = np.loadtxt("/Users/weilu/Research/server/march_2019/optimization_weighted_by_q_iter3/iter3")
pre = "/Users/weilu/Research/server/march_2019/optimization_weighted_by_q_iter4/"
mix_gammas_2(pre, filtered_gamma, iter3_gamma, iterGammaName="iter4_2")
In [88]:
iter3_gamma = np.loadtxt("/Users/weilu/Research/server/march_2019/optimization_weighted_by_q_iter3/iter3")
pre = "/Users/weilu/Research/server/march_2019/optimization_weighted_by_q_iter4/"
mix_gammas_2(pre, Gamma_filtered, iter3_gamma, iterGammaName="iter4_4", alpha=0.06)
In [106]:
iter3_gamma = np.loadtxt("/Users/weilu/Research/server/march_2019/optimization_weighted_by_q_iter3/iter3")
pre = "/Users/weilu/Research/server/march_2019/optimization_weighted_by_q_iter4/"
mix_gammas_3(pre, Gamma_filtered, iter3_gamma, iterGammaName="iter4_thrid_gen", alpha=0.3)
In [109]:
iter4_gamma = np.loadtxt("/Users/weilu/Research/server/march_2019/optimization_weighted_by_q_iter4/iter4_thrid_gen")
pre = "/Users/weilu/Research/server/march_2019/optimization_weighted_by_q_iter5/"
mix_gammas_3(pre, Gamma_filtered, iter4_gamma, iterGammaName="iter5_thrid_gen", alpha=0.3, iteration=5)
In [110]:
pre_i = 5
pre_iter_gamma = np.loadtxt(f"/Users/weilu/Research/server/march_2019/optimization_weighted_by_q_iter{pre_i}/iter{pre_i}_thrid_gen")
pre = f"/Users/weilu/Research/server/march_2019/optimization_weighted_by_q_iter{pre_i+1}/"
mix_gammas_3(pre, Gamma_filtered, pre_iter_gamma, iterGammaName=f"iter{pre_i+1}_thrid_gen", alpha=0.3, iteration=pre_i+1)
In [111]:
pre_i = 6
pre_iter_gamma = np.loadtxt(f"/Users/weilu/Research/server/march_2019/optimization_weighted_by_q_iter{pre_i}/iter{pre_i}_thrid_gen")
pre = f"/Users/weilu/Research/server/march_2019/optimization_weighted_by_q_iter{pre_i+1}/"
mix_gammas_3(pre, Gamma_filtered, pre_iter_gamma, iterGammaName=f"iter{pre_i+1}_thrid_gen", alpha=0.3, iteration=pre_i+1)
In [21]:
alpha = np.std(Gamma_filtered2)/np.std(Gamma_filtered)
In [89]:
preGamma = iter3_gamma
Gamma = Gamma_filtered
alpha = None
if alpha is None:
alpha = np.std(preGamma)/np.std(Gamma)
percent = int(alpha*100)
iter_gamma = (alpha/ (1.0 + alpha) * Gamma + 1.0/(1 + alpha) * preGamma).astype(float)
# pre = "/Users/weilu/Research/server/march_2019/optimization_weighted_by_q_iter1/"
In [90]:
np.std(iter_gamma)
Out[90]:
In [91]:
np.std(preGamma)
Out[91]:
In [92]:
np.std(Gamma)
Out[92]:
In [93]:
alpha
Out[93]:
In [101]:
alpha = 0.05
scale = np.std(preGamma)/np.std(Gamma)
iter_gamma = (1- alpha)*preGamma + alpha*(scale*Gamma)
iter_gamma *= np.std(preGamma)/np.std(iter_gamma)
In [102]:
np.std(iter_gamma)
Out[102]:
In [103]:
np.std(scale*Gamma)
Out[103]:
In [104]:
np.std(preGamma)
Out[104]:
In [98]:
iter_gamma = (1- alpha)*preGamma + alpha*Gamma
In [99]:
iter_gamma *= np.std(preGamma)/np.std(iter_gamma)
In [100]:
np.std(iter_gamma)
Out[100]:
In [ ]:
In [ ]:
np.std()
In [22]:
alpha
Out[22]:
In [23]:
np.std(iter1)/np.std(Gamma_filtered)
Out[23]:
In [25]:
pre = "/Users/weilu/Research/server/march_2019/optimization_weighted_by_q_iter2/"
mix_gammas_2(pre, Gamma_filtered, iter1, alpha)
In [15]:
iter1 = 0.04*Gamma_filtered1 + 0.96*Gamma_filtered2
In [20]:
np.savetxt("/Users/weilu/Research/server/march_2019/optimization_weighted_by_q_iter1/iter1_gamma.dat", iter1.astype(float))
In [9]:
alpha = np.std(Gamma_filtered2)/np.std(Gamma_filtered)
In [11]:
a = Gamma_filtered * (alpha)/(1 + alpha)
In [13]:
np.std(Gamma_filtered2/(1+alpha))
Out[13]:
In [144]:
alpha
Out[144]:
In [12]:
np.std(a)
Out[12]:
In [136]:
fig, ax1 = plt.subplots()
a = ax1.hist(a, bins=100, alpha=0.5)
b = ax1.hist(Gamma_filtered2, bins=100, alpha=0.5)
In [125]:
fig, ax1 = plt.subplots()
a = ax1.hist(Gamma_filtered, bins=100, alpha=0.5)
b = ax1.hist(Gamma_filtered2*10, bins=100, alpha=0.5)
In [118]:
fig, ax1 = plt.subplots()
a = ax1.hist(Gamma_filtered, bins=100, alpha=0.5)
b = ax1.hist(Gamma_filtered2*20, bins=100, alpha=0.5)
In [122]:
fig, ax1 = plt.subplots()
a = ax1.hist(Gamma_filtered/20, bins=100, alpha=0.5)
b = ax1.hist(Gamma_filtered2, bins=100, alpha=0.5)
In [119]:
1/21
Out[119]:
In [108]:
a = plt.hist(Gamma_filtered, bins=100)
In [107]:
a = plt.hist(Gamma_filtered2, bins=100)
In [106]:
Gamma_filtered2.shape
Out[106]:
In [6]:
pre = "/Users/weilu/Research/server/march_2019/optimization_weighted_by_q_iter1/gammas/"
pp = "proteins_name_list_phi_pairwise_contact_well4.5_6.5_5.0_10phi_density_mediated_contact_well6.5_9.5_5.0_10_2.6_7.0phi_burial_well4.0"
def get_gammas(pre, pp):
# pre = "/Users/weilu/Research/server_backup/feb_2019/jan_optimization/gammas/"
# pre = "/Users/weilu/Research/server/feb_2019/optimization_with_biased_iter1/gammas/"
# pre = "/Users/weilu/Research/server/march_2019/optimization_weighted_by_q_iter1/gammas/"
# pp = "cath-dataset-nonredundant-S20Clean_phi_pairwise_contact_well4.5_6.5_5.0_10phi_density_mediated_contact_well6.5_9.5_5.0_10_2.6_7.0"
# pp = "cath-dataset-nonredundant-S20Clean_phi_pairwise_contact_well4.5_6.5_5.0_10phi_density_mediated_contact_well6.5_9.5_5.0_10_2.6_7.0phi_burial_well4.0"
# pp = "proteins_name_list_phi_pairwise_contact_well4.5_6.5_5.0_10phi_density_mediated_contact_well6.5_9.5_5.0_10_2.6_7.0phi_burial_well4.0"
A_name = pp + "_A"
B_name = pp + "_B"
B_filtered_name = pp + "_B_filtered"
P_name = pp + "_P"
Gamma_name = pp + "_gamma"
Gamma_filtered_name = pp + "_gamma_filtered"
Lamb_name = pp + "_lamb"
Lamb_filtered_name = pp + "_lamb_filtered"
A = np.loadtxt(pre+A_name)
B = np.loadtxt(pre+B_name)
B_filtered = np.loadtxt(pre+B_filtered_name, dtype=complex, converters={
0: lambda s: complex(s.decode().replace('+-', '-'))})
Gamma = np.loadtxt(pre+Gamma_name)
Gamma_filtered = np.loadtxt(pre+Gamma_filtered_name, dtype=complex, converters={
0: lambda s: complex(s.decode().replace('+-', '-'))})
Lamb = np.loadtxt(pre+Lamb_name, dtype=complex, converters={
0: lambda s: complex(s.decode().replace('+-', '-'))})
Lamb_filtered = np.loadtxt(pre+Lamb_filtered_name, dtype=complex, converters={
0: lambda s: complex(s.decode().replace('+-', '-'))})
half_B_name = pp + "_half_B"
half_B = np.loadtxt(pre+half_B_name)
other_half_B_name = pp + "_other_half_B"
other_half_B = np.loadtxt(pre+other_half_B_name)
std_half_B_name = pp + "_std_half_B"
std_half_B = np.loadtxt(pre+std_half_B_name)
return (A, B, B_filtered, Gamma, Gamma_filtered, Lamb, Lamb_filtered, half_B, other_half_B, std_half_B)
In [14]:
pre1 = "/Users/weilu/Research/server/march_2019/optimization_weighted_by_q_iter1/gammas/"
pp1 = "proteins_name_list_phi_pairwise_contact_well4.5_6.5_5.0_10phi_density_mediated_contact_well6.5_9.5_5.0_10_2.6_7.0phi_burial_well4.0"
(A1, B1, B_filtered1, Gamma1, Gamma_filtered1, Lamb1, Lamb_filtered1, half_B1, other_half_B1, std_half_B1) = get_gammas(pre1, pp1)
In [7]:
pre2 = "/Volumes/Wei_backup/feb_2019/optimization/gammas/"
pp2 = "cath-dataset-nonredundant-S20Clean_phi_pairwise_contact_well4.5_6.5_5.0_10phi_density_mediated_contact_well6.5_9.5_5.0_10_2.6_7.0phi_burial_well4.0"
(A2, B2, B_filtered2, Gamma2, Gamma_filtered2, Lamb2, Lamb_filtered2, half_B2, other_half_B2, std_half_B2) = get_gammas(pre2, pp2)
In [37]:
alpha = 0.04
A = alpha * A1 + (1 - alpha) * A2
B = alpha * B1 + (1 - alpha) * B2
B_filtered = alpha * B_filtered1 + (1 - alpha) * B_filtered2
half_B = alpha * half_B1 + (1 - alpha) * half_B2
other_half_B = alpha * other_half_B1 + (1 - alpha) * other_half_B2
std_half_B = alpha * std_half_B1 + (1 - alpha) * std_half_B2
In [39]:
total_phis = 690
num_decoys = 6000
filtered_gamma, filtered_B, filtered_lamb, P, lamb, noisy_lamb_list = get_filtered_gamma_B_lamb_P_and_lamb(A, B, half_B, other_half_B, std_half_B, total_phis, num_decoys, mode=2)
In [66]:
a.shape
Out[66]:
In [67]:
a[0].shape
Out[67]:
In [75]:
np.average(np.ones((3,4,1)) * np.ones((3,1,1)), axis=0).shape
Out[75]:
In [72]:
np.zeros((4,4))[:, 2]
Out[72]:
In [81]:
np.average(np.ones((3,4,1)) * np.ones((3,1,1)), axis=0).T + (np.zeros((4,4))[2])
Out[81]:
In [83]:
a = np.zeros((4,4))
In [90]:
a[1] += np.average(np.ones((3,4,1)) * np.ones((3,1,1)), axis=0).reshape(4)
In [91]:
a
Out[91]:
In [45]:
iter_gamma = filtered_gamma.astype(float)
pre = "/Users/weilu/Research/server/march_2019/optimization_weighted_by_q_iter1/"
gamma_for_simulation = pre + f"iteration_gamma_combined_on_B.dat"
burial_gamma_for_simulation = pre + f"iteration_burial_gamma_combined_on_B.dat"
gamma_format_convertion_iteration_to_simulation(iter_gamma, gamma_for_simulation, burial_gamma_for_simulation=burial_gamma_for_simulation)
In [53]:
plot_contact_well(iter3_gamma[:210], inferBound=True, invert_sign=False, vmin=-3, vmax=1)
In [52]:
plot_contact_well(iter3_gamma[:210], inferBound=False, invert_sign=False, vmin=-3, vmax=1)
In [40]:
plot_contact_well(filtered_gamma[:210], inferBound=True, invert_sign=False)
In [42]:
plot_contact_well(filtered_gamma[210:420], inferBound=True, invert_sign=False)
In [43]:
rhoGamma = pd.DataFrame(filtered_gamma[630:].astype(float).reshape(3,20).T, columns=["rho1", "rho2", "rho3"]).reset_index()
rhoGamma["oneLetter"] = rhoGamma["index"].apply(lambda x: inverse_res_type_map[x])
rhoGamma["Residue"] = rhoGamma["index"].apply(lambda x: one_to_three(inverse_res_type_map[x]))
rhoGamma = rhoGamma[["Residue", "rho1", "rho2", "rho3", "index", "oneLetter"]]
g = rhoGamma[["rho1", "rho2", "rho3"]].values
# np.savetxt("/Users/weilu/Research/server/feb_2019/iter_burial_gamma.dat", g, fmt='%7.4f')
rhoGamma
Out[43]:
In [24]:
plot_contact_well(Gamma[:210], inferBound=True, invert_sign=False)
In [25]:
plot_contact_well(Gamma_filtered[:210], inferBound=True, invert_sign=False)
In [27]:
def get_filtered_gamma_B_lamb_P_and_lamb(A, B, half_B, other_half_B, std_half_B, total_phis, num_decoys, noise_iterations=10, relative_error_threshold=0.5, mode=2):
lamb, P = np.linalg.eig(B)
lamb, P = sort_eigenvalues_and_eigenvectors(lamb, P)
np.random.seed(10)
cutoff_modes = []
noisy_lamb_list = []
for i_noise in range(noise_iterations):
noisy_B = np.zeros((total_phis, total_phis))
for i in range(total_phis):
for j in range(i, total_phis):
# random_B_ij = np.random.normal(
# loc=half_B[i][j], scale=std_half_B[i][j])
if mode == 1:
random_B_ij = np.random.normal(
loc=half_B[i][j], scale=std_half_B[i][j] / float(num_decoys))
elif mode == 2:
random_B_ij = np.random.normal(
loc=half_B[i][j], scale=std_half_B[i][j] / float(num_decoys)**0.5)
noisy_B[i][j] = noisy_B[j][i] = random_B_ij - \
other_half_B[i][j]
noisy_lamb, noisy_P = np.linalg.eig(noisy_B)
noisy_lamb, noisy_P = sort_eigenvalues_and_eigenvectors(
noisy_lamb, noisy_P)
noisy_lamb_list.append(noisy_lamb)
try:
cutoff_mode = np.where(
np.abs(lamb - noisy_lamb) / lamb > relative_error_threshold)[0][0]
except IndexError:
cutoff_mode = len(lamb)
cutoff_modes.append(cutoff_mode)
cutoff_mode = min(cutoff_modes)
# cutoff_mode = 10
print("min cutoff_mode", cutoff_mode)
filtered_lamb = np.copy(lamb)
filtered_B_inv, filtered_lamb, P = get_filtered_B_inv_lambda_and_P(
filtered_lamb, cutoff_mode, P)
filtered_gamma = np.dot(filtered_B_inv, A)
filtered_B = np.linalg.inv(filtered_B_inv)
return filtered_gamma, filtered_B, filtered_lamb, P, lamb, noisy_lamb_list
def get_filtered_B_inv_lambda_and_P(filtered_lamb, cutoff_mode, P, method='extend_all_after_first_noisy_mode'):
if method == 'zero_all_after_first_noisy_mode':
filtered_lamb_inv = 1 / filtered_lamb
# for "zeroing unreliable eigenvalues"
filtered_lamb_inv[cutoff_mode:] = 0.0
filtered_B_inv = np.dot(
P, np.dot(np.diag(filtered_lamb_inv), np.linalg.inv(P)))
filtered_lamb = 1 / filtered_lamb_inv
if method == 'extend_all_after_first_noisy_mode':
# for "extending lowest reliable eigenvalue"
filtered_lamb[cutoff_mode:] = filtered_lamb[cutoff_mode - 1]
filtered_B_inv = np.dot(
P, np.dot(np.diag(1 / filtered_lamb), np.linalg.inv(P)))
return filtered_B_inv, filtered_lamb, P
def sort_eigenvalues_and_eigenvectors(eigenvalues, eigenvectors):
idx = eigenvalues.argsort()[::-1]
eigenvalues = eigenvalues[idx]
eigenvectors = eigenvectors[:, idx]
return eigenvalues, eigenvectors
In [10]:
total_phis = 690
num_decoys = 6000
In [11]:
import numpy as np
a = np.zeros((24000, 690, 2))
print("%d Mbytes" % (a.size * a.itemsize / 1024 / 1024))
In [12]:
126*690
Out[12]:
In [1]:
a = {"a":"out"}
In [2]:
a["a"]
Out[2]:
In [22]:
pwd
Out[22]:
In [38]:
len(burial_gamma)
Out[38]:
In [41]:
for i in range(3):
print(i)
In [48]:
In [59]:
# convert gamma to gamma used by optimization
def read_gamma(gammaFile):
# attention, there is a minus sign. from strength to energy
data = -np.loadtxt(gammaFile)
gamma_direct = data[:210]
gamma_mediated = data[210:]
return gamma_direct, gamma_mediated
gamma_direct, gamma_mediated = read_gamma("/Users/weilu/Research/server/feb_2019/optimization_iter1/iteration_gamma.dat")
# attention, there is a minus sign. from strength to energy
burial_gamma = -np.loadtxt("/Users/weilu/Research/server/feb_2019/optimization_iter1/iteration_burial_gamma.dat")
with open("/Users/weilu/Research/server/feb_2019/optimization_with_biased_iter1/previous_iter1.dat", "w") as out:
for x in gamma_direct:
out.write(f"{x[0]}\n")
for x in gamma_mediated:
out.write(f"{x[0]}\n") # protein
for x in gamma_mediated:
out.write(f"{x[1]}\n")
for i in range(3):
for j in range(len(burial_gamma)):
out.write(f"{burial_gamma[j][i]}\n")
# # convert gamma to gamma used by optimization
# def read_gamma(gammaFile):
# # attention, there is a minus sign. from strength to energy
# data = -np.loadtxt(gammaFile)
# gamma_direct = data[:210]
# gamma_mediated = data[210:]
# return gamma_direct, gamma_mediated
# gamma_direct, gamma_mediated = read_gamma("/Users/weilu/openmmawsem/parameters/gamma.dat")
# # attention, there is a minus sign. from strength to energy
# burial_gamma = -np.loadtxt("/Users/weilu/openmmawsem/parameters/burial_gamma.dat")
# with open("/Users/weilu/Research/server/feb_2019/original_gamma.dat", "w") as out:
# for x in gamma_direct:
# out.write(f"{x[0]}\n")
# for x in gamma_mediated:
# out.write(f"{x[0]}\n") # protein
# for x in gamma_mediated:
# out.write(f"{x[1]}\n")
# for i in range(3):
# for j in range(len(burial_gamma)):
# out.write(f"{burial_gamma[j][i]}\n")
In [ ]:
os.chdir("/Users/weilu/opt/notebook/Optimization")
In [7]:
Gamma[:10]
Out[7]:
In [24]:
os.chdir("/Users/weilu/Research/server/feb_2019/optimization_with_biased_iter1")
In [17]:
os.chdir("/Volumes/Wei_backup/feb_2019/optimization_with_biased_iter1")
In [4]:
# pre = "/Users/weilu/Research/server/feb_2019/optimization/gammas/"
pre = "/Volumes/Wei_backup/feb_2019/optimization/gammas/"
pp = "cath-dataset-nonredundant-S20Clean_phi_pairwise_contact_well4.5_6.5_5.0_10phi_density_mediated_contact_well6.5_9.5_5.0_10_2.6_7.0phi_burial_well4.0"
Gamma_name = pp + "_gamma"
Gamma = np.loadtxt(pre+Gamma_name)
# iter_gamma = (0.2 * Gamma_filtered + 0.8 * Gamma).astype(float)
# iter_gamma = (0.02 * Gamma_filtered + 0.98 * Gamma).astype(float)
iter_gamma = (0.1 * Gamma_filtered + 0.9 * Gamma).astype(float)
# iter_gamma = (0.04 * Gamma_filtered + 0.96 * Gamma).astype(float)
# np.savetxt("gamma_iter1_combined_mar06.dat", iter_gamma, fmt='%1.5f')
In [5]:
def mix_gammas(newGamma, preGamma, alpha):
alpha = percent/100.0
iter_gamma = ((1-alpha) * newGamma + alpha * preGamma).astype(float)
pre = "/Users/weilu/Research/server/march_2019/optimization_weighted_by_q_iter1/"
gamma_for_simulation = pre + f"iteration_gamma_{percent}.dat"
burial_gamma_for_simulation = pre + f"iteration_burial_gamma_{percent}.dat"
gamma_format_convertion_iteration_to_simulation(iter_gamma, gamma_for_simulation, burial_gamma_for_simulation=burial_gamma_for_simulation)
In [6]:
percent = 98
mix_gammas(Gamma_filtered, Gamma, percent)
In [42]:
percent = 0
mix_gammas(Gamma_filtered, Gamma, percent)
In [40]:
percent = 96
mix_gammas(Gamma_filtered, Gamma, percent)
In [41]:
percent = 90
mix_gammas(Gamma_filtered, Gamma, percent)
In [33]:
percent = 0
iter_gamma = ((1-percent/100.0) * Gamma_filtered + percent/100.0 * Gamma).astype(float)
pre = "/Users/weilu/Research/server/march_2019/optimization_weighted_by_q_iter1/"
gamma_for_simulation = pre + f"iteration_gamma_{percent}.dat"
burial_gamma_for_simulation = pre + f"iteration_burial_gamma_{percent}.dat"
gamma_format_convertion_iteration_to_simulation(Gamma_filtered.astype(float), gamma_for_simulation, burial_gamma_for_simulation=burial_gamma_for_simulation)
In [34]:
percent = 90
iter_gamma = ((1-percent/100.0) * Gamma_filtered + percent/100.0 * Gamma).astype(float)
pre = "/Users/weilu/Research/server/march_2019/optimization_weighted_by_q_iter1/"
gamma_for_simulation = pre + f"iteration_gamma_{percent}.dat"
burial_gamma_for_simulation = pre + f"iteration_burial_gamma_{percent}.dat"
gamma_format_convertion_iteration_to_simulation(Gamma_filtered.astype(float), gamma_for_simulation, burial_gamma_for_simulation=burial_gamma_for_simulation)
In [18]:
gamma_file_name = "gammas/proteins_name_list_phi_pairwise_contact_well4.5_6.5_5.0_10phi_density_mediated_contact_well6.5_9.5_5.0_10_2.6_7.0phi_burial_well4.0_gamma_filtered"
In [27]:
gamma_file_name = "gamma_iter1_combined_mar06.dat"
validate_hamiltonian_wei("phi_list.txt", "proteins_name_list.txt", gamma_file_name, "lammps", 6000)
Out[27]:
In [22]:
gamma_file_name = "new_iter1_gamma.dat"
validate_hamiltonian_wei("phi_list.txt", "proteins_name_list.txt", gamma_file_name, "lammps", 6000)
Out[22]:
In [ ]:
In [47]:
gamma_file_name = "gamma_iter1_origin_filter.dat"
validate_hamiltonian_wei("phi_list.txt", "proteins_name_list.txt", gamma_file_name, "lammps", 6000)
Out[47]:
In [49]:
gamma_file_name = "original_gamma.dat"
validate_hamiltonian_wei("phi_list.txt", "proteins_name_list.txt", gamma_file_name, "lammps", 6000)
Out[49]:
In [52]:
gamma_file_name = "gamma_iter_0.dat"
validate_hamiltonian_wei("phi_list.txt", "proteins_name_list.txt", gamma_file_name, "lammps", 6000)
Out[52]:
In [53]:
gamma_file_name = "gamma_iter1_new_filter.dat"
validate_hamiltonian_wei("phi_list.txt", "proteins_name_list.txt", gamma_file_name, "lammps", 6000)
Out[53]:
In [58]:
gamma_file_name = "gamma_iter1_combined.dat"
validate_hamiltonian_wei("phi_list.txt", "proteins_name_list.txt", gamma_file_name, "lammps", 6000)
Out[58]:
In [60]:
gamma_file_name = "previous_iter1.dat"
validate_hamiltonian_wei("phi_list.txt", "proteins_name_list.txt", gamma_file_name, "lammps", 6000)
Out[60]:
In [77]:
# 98 percent
gamma_file_name = "gamma_iter1_combined.dat"
validate_hamiltonian_wei("phi_list.txt", "proteins_name_list.txt", gamma_file_name, "lammps", 6000)
Out[77]:
In [73]:
# 96 percent
gamma_file_name = "gamma_iter1_combined.dat"
validate_hamiltonian_wei("phi_list.txt", "proteins_name_list.txt", gamma_file_name, "lammps", 6000)
Out[73]:
In [76]:
np.savetxt("gamma_iter1_combined.dat", iter_gamma, fmt='%1.5f')
In [78]:
pre = "/Users/weilu/Research/server/feb_2019/optimization/gammas/"
pp = "cath-dataset-nonredundant-S20Clean_phi_pairwise_contact_well4.5_6.5_5.0_10phi_density_mediated_contact_well6.5_9.5_5.0_10_2.6_7.0phi_burial_well4.0"
Gamma_name = pp + "_gamma"
Gamma = np.loadtxt(pre+Gamma_name)
# iter_gamma = (0.2 * Gamma_filtered + 0.8 * Gamma).astype(float)
iter_gamma = (0.02 * Gamma_filtered + 0.98 * Gamma).astype(float)
# iter_gamma = (0.04 * Gamma_filtered + 0.96 * Gamma).astype(float)
In [79]:
gamma_for_simulation = "/Users/weilu/Research/server/feb_2019/optimization_with_biased_iter1/iteration_gamma.dat"
burial_gamma_for_simulation = "/Users/weilu/Research/server/feb_2019/optimization_with_biased_iter1/iteration_burial_gamma.dat"
gamma_format_convertion_iteration_to_simulation(iter_gamma, gamma_for_simulation, burial_gamma_for_simulation=burial_gamma_for_simulation)
In [26]:
z_score, e_native, e_mg, e_mg_std = evaluate_hamiltonian_wei("1fc2", "phi_list.txt", "proteins_name_list.txt", None, "lammps", 6000)
In [27]:
z_score
Out[27]:
In [28]:
e_native
Out[28]:
In [29]:
e_mg
Out[29]:
In [17]:
plt.plot(lamb[:100])
Out[17]:
In [21]:
filtered_gamma, filtered_B, filtered_lamb, P, lamb, noisy_lamb_list = get_filtered_gamma_B_lamb_P_and_lamb(A, B, half_B, other_half_B, std_half_B, total_phis, num_decoys, mode=2)
In [19]:
plot_contact_well(filtered_gamma[:210], inferBound=True, invert_sign=False)
In [15]:
rhoGamma = pd.DataFrame(filtered_gamma[630:].astype(float).reshape(3,20).T, columns=["rho1", "rho2", "rho3"]).reset_index()
rhoGamma["oneLetter"] = rhoGamma["index"].apply(lambda x: inverse_res_type_map[x])
rhoGamma["Residue"] = rhoGamma["index"].apply(lambda x: one_to_three(inverse_res_type_map[x]))
rhoGamma = rhoGamma[["Residue", "rho1", "rho2", "rho3", "index", "oneLetter"]]
g = rhoGamma[["rho1", "rho2", "rho3"]].values
# np.savetxt("/Users/weilu/Research/server/feb_2019/iter_burial_gamma.dat", g, fmt='%7.4f')
rhoGamma
Out[15]:
In [4]:
plot_contact_well(Gamma_filtered[:210], inferBound=True, invert_sign=False)
In [7]:
rhoGamma = pd.DataFrame(Gamma_filtered[630:].astype(float).reshape(3,20).T, columns=["rho1", "rho2", "rho3"]).reset_index()
rhoGamma["oneLetter"] = rhoGamma["index"].apply(lambda x: inverse_res_type_map[x])
rhoGamma["Residue"] = rhoGamma["index"].apply(lambda x: one_to_three(inverse_res_type_map[x]))
rhoGamma = rhoGamma[["Residue", "rho1", "rho2", "rho3", "index", "oneLetter"]]
g = rhoGamma[["rho1", "rho2", "rho3"]].values
# np.savetxt("/Users/weilu/Research/server/feb_2019/iter_burial_gamma.dat", g, fmt='%7.4f')
rhoGamma
Out[7]:
In [ ]: