In [1]:
import os
import sys
import random
import time
from random import seed, randint
import argparse
import platform
from datetime import datetime
import imp
import numpy as np
import fileinput
from itertools import product
import pandas as pd
from scipy.interpolate import griddata
from scipy.interpolate import interp2d
import seaborn as sns
from os import listdir
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.interpolate import griddata
import matplotlib as mpl
# sys.path.insert(0,'..')
# from notebookFunctions import *
# from .. import notebookFunctions
from Bio.PDB.Polypeptide import one_to_three
from Bio.PDB.Polypeptide import three_to_one
from Bio.PDB.PDBParser import PDBParser
from pyCodeLib import *
# from small_script.myFunctions import *
sys.path.insert(0, "/Users/weilu/openmmawsem")
from helperFunctions.myFunctions import *
from collections import defaultdict
%matplotlib inline
# plt.rcParams['figure.figsize'] = (10,6.180) #golden ratio
# %matplotlib notebook
%load_ext autoreload
%autoreload 2
In [2]:
plt.rcParams['figure.figsize'] = np.array([16.18033, 10]) #golden ratio
plt.rcParams['figure.facecolor'] = 'w'
plt.rcParams['figure.dpi'] = 100
plt.rcParams.update({'font.size': 22})
In [103]:
# pre = "/Users/weilu/Research/server_backup/feb_2019/jan_optimization/gammas/"
# pre = "/Users/weilu/Research/server/april_2019/optimization_test/gammas/"
pre = "/Users/weilu/Research/server/sep_2019/peptide_optimization_trial_4_duplicate/optimization/gammas/"
# pp = "cath-dataset-nonredundant-S20Clean_phi_pairwise_contact_well4.5_6.5_5.0_10phi_density_mediated_contact_well6.5_9.5_5.0_10_2.6_7.0"
# pp = "proteins_name_list_phi_pairwise_contact_well4.5_6.5_5.0_10phi_density_mediated_contact_well6.5_9.5_5.0_10_2.6_7.0phi_burial_well4.0"
pp = f"protein_list_phi_pairwise_contact_well4.5_6.5_5.0_10phi_density_mediated_contact_well6.5_9.5_5.0_10_2.6_7.0phi_burial_well4.0"
A_name = pp + "_A"
B_name = pp + "_B"
B_filtered_name = pp + "_B_filtered"
P_name = pp + "_P"
Gamma_name = pp + "_gamma"
Gamma_filtered_name = pp + "_gamma_filtered"
Lamb_name = pp + "_lamb"
Lamb_filtered_name = pp + "_lamb_filtered"
A = np.loadtxt(pre+A_name)
B = np.loadtxt(pre+B_name)
B_filtered = np.loadtxt(pre+B_filtered_name, dtype=complex, converters={
0: lambda s: complex(s.decode().replace('+-', '-'))})
Gamma = np.loadtxt(pre+Gamma_name)
Gamma_filtered = np.loadtxt(pre+Gamma_filtered_name, dtype=complex, converters={
0: lambda s: complex(s.decode().replace('+-', '-'))})
Lamb = np.loadtxt(pre+Lamb_name, dtype=complex, converters={
0: lambda s: complex(s.decode().replace('+-', '-'))})
Lamb_filtered = np.loadtxt(pre+Lamb_filtered_name, dtype=complex, converters={
0: lambda s: complex(s.decode().replace('+-', '-'))})
half_B_name = pp + "_half_B"
half_B = np.loadtxt(pre+half_B_name)
other_half_B_name = pp + "_other_half_B"
other_half_B = np.loadtxt(pre+other_half_B_name)
std_half_B_name = pp + "_std_half_B"
std_half_B = np.loadtxt(pre+std_half_B_name)
# pre = "/Users/weilu/Research/server/april_2019/"
location = pre + "../../phis/protein_list_phi_pairwise_contact_well4.5_6.5_5.0_10phi_density_mediated_contact_well6.5_9.5_5.0_10_2.6_7.0phi_burial_well4.0_phi_decoy_summary.txt"
A_prime = np.loadtxt(location)
In [104]:
plt.plot(Lamb)
plt.yscale("log")
In [105]:
lamb, P = np.linalg.eig(B)
lamb, P = sort_eigenvalues_and_eigenvectors(lamb, P)
filtered_lamb = np.copy(lamb)
cutoff_mode = 100
filtered_B_inv, filtered_lamb, P = get_filtered_B_inv_lambda_and_P(filtered_lamb,
cutoff_mode, P)
filtered_gamma = np.dot(filtered_B_inv, A)
filtered_B = np.linalg.inv(filtered_B_inv)
plot_contact_well(filtered_gamma[:210], inferBound=True)
plot_contact_well(filtered_gamma[210:420], inferBound=True)
plot_contact_well(filtered_gamma[420:], inferBound=True)
In [106]:
plt.plot(filtered_gamma)
Out[106]:
In [107]:
# maximum difference between loaded and computed is 1e-5.
max(lamb-Lamb)
Out[107]:
In [108]:
save_gamma_pre = "/Users/weilu/Research/server/sep_2019/saved_gammas/"
np.savetxt(f"{save_gamma_pre}/trial_4_cutoff100", filtered_gamma)
In [8]:
# os.chdir('/Users/weilu/opt/notebook/Optimization')
In [110]:
gamma_file_name = "/Users/weilu/Research/server/sep_2019/peptide_optimization/saved_gammas/original_gamma"
original_gamma = np.loadtxt(gamma_file_name)
In [111]:
np.dot(A_prime, original_gamma)
Out[111]:
In [112]:
# we want to impose additional contraint so that A' * gamma = constnat.(-562.23)
c = np.dot(A_prime, original_gamma)
B_inv = filtered_B_inv
lambda_2 = (A_prime.dot(B_inv).dot(A) - c) / (A_prime.dot(B_inv).dot(A_prime) )
gamma_new = B_inv.dot(A-A_prime*lambda_2)
In [113]:
np.dot(A_prime, gamma_new)
Out[113]:
In [114]:
# impose A'gamma
save_gamma_pre = "/Users/weilu/Research/server/sep_2019/saved_gammas/"
np.savetxt(f"{save_gamma_pre}/trial_4_cutoff100_impose_Aprime_constraint", gamma_new)
In [119]:
os.chdir("/Users/weilu/Research/server/sep_2019/peptide_optimization_specific_test/optimization/")
# evaluate_hamiltonian_wei(protein, hamiltonian, training_set_file, gamma_file_name, test_decoy_method, num_decoys, use_filtered_gammas=True, outputDecoy=False, **kwargs)
protein = "2BNQ"
# protein = "4FTV"
hamiltonian = "phi_list.txt"
training_set_file = "protein_list"
gamma_file_name = "/Users/weilu/Research/server/sep_2019/saved_gammas/trial_4_cutoff100_impose_Aprime_constraint"
test_decoy_method = "shuffle"
num_decoys = 10
use_filtered_gammas = False
phi_list = read_phi_list(hamiltonian)
training_set = read_column_from_file(training_set_file, 1)
# read in Hamiltonian
# Find out how many total phi_i there are and get full parameter string
total_phis, full_parameters_string, num_phis = get_total_phis_and_parameter_string(
phi_list, training_set, mode=0)
# print(total_phis, full_parameters_string, num_phis)
# read in corresponding gammas
# if use_filtered_gammas:
# gamma_file_name = "%s%s_%s_gamma_filtered" % (
# gammas_directory, training_set_file.split('/')[-1].split('.')[0], full_parameters_string)
# else:
# pass
# gamma_file_name = "%s%s_%s_gamma" % (gammas_directory, training_set_file.split(
# '/')[-1].split('.')[0], full_parameters_string)
# Need to filter out the complex number if in the "filtered" mode;
# if use_filtered_gammas:
# gamma = np.loadtxt(gamma_file_name, dtype=complex, converters={
# 0: lambda s: complex(s.decode().replace('+-', '-'))})
# else:
# gamma = np.loadtxt(gamma_file_name)
# gamma = np.loadtxt(gamma_file_name, dtype=complex, converters={
# 0: lambda s: complex(s.decode().replace('+-', '-'))})
gamma = np.loadtxt(gamma_file_name)
# read in corresponding phis (native and decoys)
phi_native = read_native_phi(protein, phi_list, total_phis, mode=0)
phi_i_decoy = read_decoy_phis(
protein, phi_list, total_phis, num_phis, num_decoys, test_decoy_method, mode=0)
# perform dot products to get energies (native and decoys)
e_decoy = np.zeros(num_decoys)
# print(gamma.shape, phi_native.shape)
e_native = np.dot(gamma, phi_native)
for i_decoy in range(num_decoys):
e_decoy[i_decoy] = np.dot(gamma, phi_i_decoy[i_decoy])
e_mg = np.average(e_decoy)
e_mg_std = np.std(e_decoy)
# calculate z-score
z_score = (e_mg - e_native) / e_mg_std
In [120]:
np.dot(gamma, phi_native)
Out[120]:
In [ ]:
for i in range(2):
for j in range(20):
for k in range(j, 20):
the_list.append(c)
c += 1
In [94]:
gamma[:10]
Out[94]:
In [89]:
np.dot(gamma[:210], phi_native[:210])
Out[89]:
In [91]:
np.dot(gamma[:210], phi_native[:210])
Out[91]:
In [83]:
np.dot(gamma[210:420], phi_native[210:420])
Out[83]:
In [84]:
np.dot(gamma[420:630], phi_native[420:630])
Out[84]:
In [92]:
np.dot(gamma[210:630], phi_native[210:630])
Out[92]:
In [93]:
np.dot(gamma[630:], phi_native[630:])
Out[93]:
In [87]:
np.dot(gamma, phi_native)
Out[87]:
In [90]:
np.dot(gamma, phi_native)
Out[90]:
In [96]:
-608.0675497/594.140
Out[96]:
In [81]:
len(phi_native)
Out[81]:
In [72]:
def output_E_decoy(hamiltonian, training_set_file, gamma_file_name, training_decoy_method, num_decoys, test_set_file=None, test_decoy_method=None, use_filtered_gammas=False, **kwargs):
if test_set_file is None:
test_set_file = training_set_file
if test_decoy_method is None:
test_decoy_method = training_decoy_method
test_set = read_column_from_file(test_set_file, 1)
z_scores = []
e_natives = []
e_mgs = []
e_mg_stds = []
e_decoys = []
names = []
for i, protein in enumerate(test_set):
# print(i, protein)
print(protein, hamiltonian, training_set_file, gamma_file_name, test_decoy_method, num_decoys, use_filtered_gammas)
e_native, e_decoy = evaluate_hamiltonian_wei(
protein, hamiltonian, training_set_file, gamma_file_name, test_decoy_method, num_decoys, use_filtered_gammas, outputDecoy=True, **kwargs)
# print(protein, z)
e_natives.append(e_native)
e_decoys.append(e_decoy)
names.append(protein)
return names, e_natives, e_decoys
In [121]:
os.chdir("/Users/weilu/Research/server/sep_2019/peptide_optimization_trial_4/optimization/")
# gamma_file_name = "gamma_iter1_combined_mar06.dat"
# gamma_file_name = "/Users/weilu/Research/server/sep_2019/peptide_optimization/saved_gammas/cutoff100"
gamma_file_name = "/Users/weilu/Research/server/sep_2019/saved_gammas/trial_4_cutoff100_impose_Aprime_constraint"
names, e_natives, e_decoys = output_E_decoy("phi_list.txt", "protein_list", gamma_file_name, "shuffle", 1000, mode=0)
In [124]:
for name, e_native, e_decoy in zip(names, e_natives, e_decoys):
# print(name)
plt.figure()
_ = plt.hist(e_decoy, bins=50)
plt.vlines(e_native, ymin=0, ymax=100, color="red")
_ = plt.title(name)
plt.savefig(f"/Users/weilu/Dropbox/Optimization_Xfunnel/updated_figures/{name}.png")
In [125]:
os.chdir("/Users/weilu/Research/server/sep_2019/peptide_optimization_trial_4_duplicate/optimization/")
# gamma_file_name = "gamma_iter1_combined_mar06.dat"
# gamma_file_name = "/Users/weilu/Research/server/sep_2019/peptide_optimization/saved_gammas/cutoff100"
gamma_file_name = "/Users/weilu/Research/server/sep_2019/saved_gammas/trial_4_cutoff100_impose_Aprime_constraint"
data = validate_hamiltonian_wei("phi_list.txt", "protein_list", gamma_file_name, "shuffle", 1000, mode=0)
data
Out[125]:
In [31]:
os.chdir("/Users/weilu/Research/server/sep_2019/peptide_optimization_trial_4/optimization/")
# gamma_file_name = "gamma_iter1_combined_mar06.dat"
# gamma_file_name = "/Users/weilu/Research/server/sep_2019/peptide_optimization/saved_gammas/cutoff100"
gamma_file_name = "/Users/weilu/Research/server/sep_2019/saved_gammas/trial_3_cutoff100_impose_Aprime_constraint"
data = validate_hamiltonian_wei("phi_list.txt", "protein_list", gamma_file_name, "shuffle", 1000, mode=0)
data
Out[31]:
In [16]:
os.chdir("/Users/weilu/Research/server/sep_2019/peptide_optimization/optimization/")
# gamma_file_name = "gamma_iter1_combined_mar06.dat"
# gamma_file_name = "/Users/weilu/Research/server/sep_2019/peptide_optimization/saved_gammas/cutoff100"
gamma_file_name = "/Users/weilu/Research/server/sep_2019/saved_gammas/trial_2_cutoff100_impose_Aprime_constraint"
data = validate_hamiltonian_wei("phi_list.txt", "protein_list_tiny", gamma_file_name, "shuffle", 1000, mode=0)
data
Out[16]:
In [39]:
# mix gammas so that we don't overfitting too much.
gamma_file_name = "/Users/weilu/Research/server/sep_2019/saved_gammas/original_gamma"
original_gamma = np.loadtxt(gamma_file_name)
gamma_file_name = "/Users/weilu/Research/server/sep_2019/saved_gammas/trial_3_cutoff100_impose_Aprime_constraint"
gamma_new = np.loadtxt(gamma_file_name)
alpha = 0.5
alpha_percent = int(alpha*100)
mixed_gamma = alpha*original_gamma + (1-alpha)*gamma_new
save_gamma_pre = "/Users/weilu/Research/server/sep_2019/saved_gammas/"
np.savetxt(f"{save_gamma_pre}/trial_3_mixed_original_and_cutoff100_impose_Aprime_constraint_{alpha_percent}", mixed_gamma)
In [41]:
os.chdir("/Users/weilu/Research/server/sep_2019/peptide_optimization_trial_4/optimization/")
# gamma_file_name = "gamma_iter1_combined_mar06.dat"
# gamma_file_name = "/Users/weilu/Research/server/sep_2019/peptide_optimization/saved_gammas/cutoff100"
gamma_file_name = "/Users/weilu/Research/server/sep_2019/saved_gammas/trial_3_mixed_original_and_cutoff100_impose_Aprime_constraint_50"
data = validate_hamiltonian_wei("phi_list.txt", "protein_list", gamma_file_name, "shuffle", 1000, mode=0)
data
Out[41]:
In [38]:
os.chdir("/Users/weilu/Research/server/sep_2019/peptide_optimization_trial_4/optimization/")
# gamma_file_name = "gamma_iter1_combined_mar06.dat"
# gamma_file_name = "/Users/weilu/Research/server/sep_2019/peptide_optimization/saved_gammas/cutoff100"
gamma_file_name = "/Users/weilu/Research/server/sep_2019/saved_gammas/mixed_original_and_cutoff100_impose_Aprime_constraint"
data = validate_hamiltonian_wei("phi_list.txt", "protein_list", gamma_file_name, "shuffle", 1000, mode=0)
data
Out[38]:
In [36]:
os.chdir("/Users/weilu/Research/server/sep_2019/peptide_optimization_trial_4/optimization/")
# gamma_file_name = "gamma_iter1_combined_mar06.dat"
# gamma_file_name = "/Users/weilu/Research/server/sep_2019/peptide_optimization/saved_gammas/cutoff100"
gamma_file_name = "/Users/weilu/Research/server/sep_2019/saved_gammas/trial_3_mixed_original_and_cutoff100_impose_Aprime_constraint_80"
data = validate_hamiltonian_wei("phi_list.txt", "protein_list", gamma_file_name, "shuffle", 1000, mode=0)
data
Out[36]:
In [23]:
os.chdir("/Users/weilu/Research/server/sep_2019/peptide_optimization_trial_2/optimization/")
# gamma_file_name = "gamma_iter1_combined_mar06.dat"
# gamma_file_name = "/Users/weilu/Research/server/sep_2019/peptide_optimization/saved_gammas/cutoff100"
gamma_file_name = "/Users/weilu/Research/server/sep_2019/saved_gammas/trial_2_mixed_original_and_cutoff100_impose_Aprime_constraint_80"
data = validate_hamiltonian_wei("phi_list.txt", "protein_list_small", gamma_file_name, "shuffle", 1000, mode=0)
data
Out[23]:
In [18]:
os.chdir("/Users/weilu/Research/server/sep_2019/peptide_optimization_trial_2/optimization/")
# gamma_file_name = "gamma_iter1_combined_mar06.dat"
# gamma_file_name = "/Users/weilu/Research/server/sep_2019/peptide_optimization/saved_gammas/cutoff100"
gamma_file_name = "/Users/weilu/Research/server/sep_2019/saved_gammas/trial_2_mixed_original_and_cutoff100_impose_Aprime_constraint_95"
data = validate_hamiltonian_wei("phi_list.txt", "protein_list_small", gamma_file_name, "shuffle", 1000, mode=0)
data
Out[18]:
In [21]:
os.chdir("/Users/weilu/Research/server/sep_2019/peptide_optimization_trial_2/optimization/")
# gamma_file_name = "gamma_iter1_combined_mar06.dat"
# gamma_file_name = "/Users/weilu/Research/server/sep_2019/peptide_optimization/saved_gammas/cutoff100"
gamma_file_name = "/Users/weilu/Research/server/sep_2019/saved_gammas/trial_2_mixed_original_and_cutoff100_impose_Aprime_constraint_90"
data = validate_hamiltonian_wei("phi_list.txt", "protein_list_small", gamma_file_name, "shuffle", 1000, mode=0)
data
Out[21]:
In [20]:
os.chdir("/Users/weilu/Research/server/sep_2019/peptide_optimization_trial_2/optimization/")
# gamma_file_name = "gamma_iter1_combined_mar06.dat"
# gamma_file_name = "/Users/weilu/Research/server/sep_2019/peptide_optimization/saved_gammas/cutoff100"
gamma_file_name = "/Users/weilu/Research/server/sep_2019/saved_gammas/mixed_original_and_cutoff100_impose_Aprime_constraint"
data = validate_hamiltonian_wei("phi_list.txt", "protein_list_small", gamma_file_name, "shuffle", 1000, mode=0)
data
Out[20]:
In [ ]: