In [ ]:
import os
import sys
import random
import time
from random import seed, randint
import argparse
import platform
from datetime import datetime
import imp
import numpy as np
import fileinput
from itertools import product
import pandas as pd
from scipy.interpolate import griddata
from scipy.interpolate import interp2d
import seaborn as sns
from os import listdir
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.interpolate import griddata
import matplotlib as mpl
# sys.path.insert(0,'..')
# from notebookFunctions import *
# from .. import notebookFunctions
from Bio.PDB.Polypeptide import one_to_three
from Bio.PDB.Polypeptide import three_to_one
from Bio.PDB.PDBParser import PDBParser
from pyCodeLib import *
from small_script.myFunctions import *
sys.path.insert(0, "/Users/weilu/openmmawsem")
from helperFunctions.myFunctions import *
from collections import defaultdict
%matplotlib inline
# plt.rcParams['figure.figsize'] = (10,6.180) #golden ratio
# %matplotlib notebook
%load_ext autoreload
%autoreload 2
In [2]:
plt.rcParams['figure.figsize'] = np.array([16.18033, 10]) #golden ratio
plt.rcParams['figure.facecolor'] = 'w'
plt.rcParams['figure.dpi'] = 100
plt.rcParams.update({'font.size': 22})
In [146]:
pwd
Out[146]:
In [147]:
pre = "/Users/weilu/Research/server/dec_2019/multiDensityOptimization/optimization_iteration1/optimization/"
trial_name = "iter2"
save_gamma_pre = "saved_gammas"
In [165]:
In [187]:
d_ = []
for i in range(2, 5):
d = pd.read_csv(f"/Users/weilu/Research/data/optimization_iter{i}.csv", index_col=0)
d_.append(d.assign(iteration=f"iter_{i}"))
In [188]:
d = pd.read_csv(f"/Users/weilu/Research/data/optimization_iter4_600.csv", index_col=0)
d_.append(d.assign(iteration=f"iter_{i}_600"))
In [189]:
d = pd.read_csv(f"/Users/weilu/Research/data/optimization_iter4_500_iter4.csv", index_col=0)
d_.append(d.assign(iteration=f"iter_{i}_500_iter4"))
In [190]:
data = pd.concat(d_)
In [191]:
sns.lineplot("Protein", "Z_scores", data=data, hue="iteration")
Out[191]:
In [183]:
cutoff = 500
pre = "/Users/weilu/Research/server/dec_2019/multiDensityOptimization/optimization_iteration1/optimization_iter4/"
os.chdir(f"{pre}")
# gamma_file_name = "gamma_iter1_combined_mar06.dat"
# gamma_file_name = "/Users/weilu/opt/parameters/original_gamma"
gamma_pre = "/Users/weilu/Research/server/dec_2019/multiDensityOptimization/optimization_iteration1/optimization_iter4/saved_gammas"
trial_name = "iter4"
gamma_file_name = f"{gamma_pre}/{trial_name}_cutoff{cutoff}_impose_Aprime_constraint"
# gamma_file_name = f"{save_gamma_pre}/{trial_name}_cutoff{cutoff}_impose_Aprime_constraint"
print(gamma_file_name)
data = validate_hamiltonian_wei("phi_list.txt", "protein_list", gamma_file_name, "openMM", 500, mode=0)
data.to_csv(f"/Users/weilu/Research/data/optimization_{trial_name}_{cutoff}_{trial_name}.csv")
data
Out[183]:
In [177]:
cutoff = 600
os.chdir(f"{pre}")
# gamma_file_name = "gamma_iter1_combined_mar06.dat"
# gamma_file_name = "/Users/weilu/opt/parameters/original_gamma"
gamma_pre = "/Users/weilu/Research/server/dec_2019/multiDensityOptimization/optimization_iteration1/optimization_iter4/saved_gammas"
trial_name = "iter4"
gamma_file_name = f"{gamma_pre}/{trial_name}_cutoff{cutoff}_impose_Aprime_constraint"
# gamma_file_name = f"{save_gamma_pre}/{trial_name}_cutoff{cutoff}_impose_Aprime_constraint"
print(gamma_file_name)
data = validate_hamiltonian_wei("phi_list.txt", "protein_list", gamma_file_name, "openMM", 500, mode=0)
data.to_csv(f"/Users/weilu/Research/data/optimization_{trial_name}_{cutoff}.csv")
data
Out[177]:
In [164]:
cutoff = 400
os.chdir(f"{pre}")
# gamma_file_name = "gamma_iter1_combined_mar06.dat"
# gamma_file_name = "/Users/weilu/opt/parameters/original_gamma"
gamma_pre = "/Users/weilu/Research/server/dec_2019/multiDensityOptimization/optimization_iteration1/optimization_iter4/saved_gammas"
trial_name = "iter4"
gamma_file_name = f"{gamma_pre}/{trial_name}_cutoff{cutoff}_impose_Aprime_constraint"
# gamma_file_name = f"{save_gamma_pre}/{trial_name}_cutoff{cutoff}_impose_Aprime_constraint"
print(gamma_file_name)
data = validate_hamiltonian_wei("phi_list.txt", "protein_list", gamma_file_name, "openMM", 500, mode=0)
data.to_csv("/Users/weilu/Research/data/optimization_iter4.csv")
data
Out[164]:
In [168]:
cutoff = 400
os.chdir(f"{pre}")
# gamma_file_name = "gamma_iter1_combined_mar06.dat"
# gamma_file_name = "/Users/weilu/opt/parameters/original_gamma"
gamma_pre = "/Users/weilu/Research/server/dec_2019/multiDensityOptimization/optimization_iteration1/optimization_iter3/saved_gammas"
trial_name = "iter3"
gamma_file_name = f"{gamma_pre}/{trial_name}_cutoff{cutoff}_impose_Aprime_constraint"
# gamma_file_name = f"{save_gamma_pre}/{trial_name}_cutoff{cutoff}_impose_Aprime_constraint"
print(gamma_file_name)
data = validate_hamiltonian_wei("phi_list.txt", "protein_list", gamma_file_name, "openMM", 500, mode=0)
data.to_csv("/Users/weilu/Research/data/optimization_iter3.csv")
data
Out[168]:
In [169]:
cutoff = 400
os.chdir(f"{pre}")
# gamma_file_name = "gamma_iter1_combined_mar06.dat"
# gamma_file_name = "/Users/weilu/opt/parameters/original_gamma"
trial_name = "iter2"
gamma_pre = "/Users/weilu/Research/server/dec_2019/multiDensityOptimization/optimization_iteration1/optimization_iter2/saved_gammas"
gamma_file_name = f"{gamma_pre}/{trial_name}_cutoff{cutoff}_impose_Aprime_constraint"
# gamma_file_name = f"{save_gamma_pre}/{trial_name}_cutoff{cutoff}_impose_Aprime_constraint"
print(gamma_file_name)
data = validate_hamiltonian_wei("phi_list.txt", "protein_list", gamma_file_name, "openMM", 500, mode=0)
data.to_csv("/Users/weilu/Research/data/optimization_iter2.csv")
data
Out[169]:
In [152]:
cutoff = 100
os.chdir(f"{pre}")
# gamma_file_name = "gamma_iter1_combined_mar06.dat"
gamma_file_name = "/Users/weilu/opt/parameters/original_gamma"
# gamma_file_name = f"{save_gamma_pre}/{trial_name}_cutoff{cutoff}_impose_Aprime_constraint"
print(gamma_file_name)
data = validate_hamiltonian_wei("phi_list.txt", "protein_list", gamma_file_name, "openMM", 500, mode=0)
data
Out[152]:
In [158]:
list(range(0, 11, 2))
Out[158]:
In [149]:
cutoff = 100
os.chdir(f"{pre}")
# gamma_file_name = "gamma_iter1_combined_mar06.dat"
# gamma_file_name = "/Users/weilu/Research/server/sep_2019/peptide_optimization/saved_gammas/cutoff100"
gamma_file_name = f"{save_gamma_pre}/{trial_name}_cutoff{cutoff}_impose_Aprime_constraint"
print(gamma_file_name)
data = validate_hamiltonian_wei("phi_list.txt", "protein_list", gamma_file_name, "openMM", 500, mode=0)
data
Out[149]:
In [181]:
pwd
Out[181]:
In [182]:
cutoff_list = [100, 200, 300, 400, 500, 600]
cutoff_list += [10, 20, 30, 40, 50, 80]
save_gamma_pre = "/Users/weilu/Research/server/dec_2019/multiDensityOptimization/optimization_iteration1/optimization_iter4/saved_gammas"
trial_name = "iter4"
os.system(f"mkdir -p {save_gamma_pre}/figures")
for cutoff_i in cutoff_list:
# cutoff_i = 400
name = f"{save_gamma_pre}/{trial_name}_cutoff{cutoff_i}_impose_Aprime_constraint"
filtered_gamma = np.loadtxt(name)
figureName = f"{save_gamma_pre}/figures/{trial_name}_cutoff{cutoff_i}"
title = f"{trial_name}_cutoff{cutoff_i}"
show_together(filtered_gamma, figureName, title=title)
In [151]:
cutoff_list = [100, 200, 300, 400, 500]
cutoff_list += [10, 20, 30, 40, 50, 80]
os.system(f"mkdir -p {save_gamma_pre}/figures")
for cutoff_i in cutoff_list:
# cutoff_i = 400
name = f"{save_gamma_pre}/{trial_name}_cutoff{cutoff_i}_impose_Aprime_constraint"
filtered_gamma = np.loadtxt(name)
figureName = f"{save_gamma_pre}/figures/{trial_name}_cutoff{cutoff_i}"
title = f"{trial_name}_cutoff{cutoff_i}"
show_together(filtered_gamma, figureName, title=title)
In [4]:
# pdb list
pre = "/Users/weilu/Research/server/dec_2019/multiDensityOptimization/awsem_contact_term/"
databaseFolder = pre + "database/dompdb/"
In [13]:
pdbFolderList = glob.glob(databaseFolder+"*.pdb")
pdbNames = [a.split("/")[-1].split(".")[0] for a in pdbFolderList]
In [14]:
len(pdbNames)
Out[14]:
In [21]:
filtered_pdbNames = []
for pdb in pdbNames:
if os.path.exists(pre+f"/database/S20_seq/{pdb}.seq"):
filtered_pdbNames.append(pdb)
else:
# print(pdb)
pass
In [22]:
os.system(f"mkdir -p {pre}/alignments")
for pdb in filtered_pdbNames:
if os.path.exists(f"/Users/weilu/Research/optimization/mediated_term/multisequenceanddcafrustratometry/{pdb}_filtered_0.05.seqs"):
# filtered_pdbNames.append(pdb)
os.system(f"cp /Users/weilu/Research/optimization/mediated_term/multisequenceanddcafrustratometry/{pdb}_filtered_0.05.seqs {pre}/alignments/")
else:
print(pdb)
# pass
In [23]:
with open(f"{pre}/protein_list", "w") as out:
for pdb in filtered_pdbNames:
out.write(pdb+"\n")
In [24]:
# information about alignments
info = []
for pdb in filtered_pdbNames:
name = pdb
with open(f"{pre}/alignments/{name}_filtered_0.05.seqs") as f:
a = f.readlines()
info.append([pdb, len(a)])
In [3]:
pre = "/Users/weilu/Research/server/dec_2019/multiDensityOptimization/awsem_contact_term/gammas/"
trial_name = "trial_1_multiseq"
In [4]:
pp = f"protein_list_phi_pairwise_contact_well4.5_6.5_5.0_10phi_density_mediated_contact_well6.5_9.5_5.0_10_2.6_7.0phi_burial_well4.0"
# pp = f"protein_list_phi_pairwise_contact_well4.5_6.5_5.0_10phi_density_mediated_contact_well6.5_9.5_5.0_10_2.6_7.0phi_burial_well4.0phi_debye_huckel_well0"
In [87]:
gamma_file_name = "/Users/weilu/opt/parameters/original_gamma"
original_gamma = np.loadtxt(gamma_file_name)
# a = list(original_gamma)
# a.append(1)
# original_gamma_deybe = np.array(a)
# we want to impose additional contraint so that A' * gamma = constnat.(-562.23)
cutoff_list = [100, 200, 300, 400, 500, 600]
cutoff_list += [10, 20, 30, 40, 50, 80]
for cutoff_i in cutoff_list:
A, A_prime, filtered_gamma, filtered_B_inv = get_filtered_gamma(pre, cutoff_i, pp)
c = np.dot(A_prime, original_gamma)
# c = np.dot(A_prime, original_gamma)
B_inv = filtered_B_inv
lambda_2 = (A_prime.dot(B_inv).dot(A) - c) / (A_prime.dot(B_inv).dot(A_prime) )
gamma_new = B_inv.dot(A-A_prime*lambda_2)
# impose A'gamma
save_gamma_pre = "/Users/weilu/Research/server/dec_2019/saved_gammas/"
name = f"{save_gamma_pre}/{trial_name}_cutoff{cutoff_i}_impose_Aprime_constraint"
np.savetxt(name, gamma_new)
cmd = f"convert_to_simulation_format.py {name} {save_gamma_pre}/Dec11_{trial_name}_cutoff{cutoff_i}_impose_Aprime_constraint"
os.system(cmd)
In [50]:
def plot_contact_all(gammas, ax, invert_sign=True, fix_colorbar=True, inferBound=False,
vmin=-0.3, vmax=0.3, fix_confidence_colorbar=True, confidence_vmin=0,
confidence_vmax=1.0, plot_confidence=False, confidence_lower=None, confidence_upper=None):
size = 20
interaction_matrix = np.zeros((size, size))
i_content = 0
for i in range(size):
for j in range(i, size):
index1 = hydrophobicity_map[inverse_res_type_map[i]]
index2 = hydrophobicity_map[inverse_res_type_map[j]]
interaction_matrix[index1][index2] = gammas[i_content]
interaction_matrix[index2][index1] = gammas[i_content]
i_content += 1
# The minus sign is here to be consistent with the way AWSEM thinks about gammas
if invert_sign:
interaction_matrix *= -1
if inferBound:
vmin = np.min(interaction_matrix)
vmax = np.max(interaction_matrix)
if fix_colorbar:
cax = ax.pcolor(interaction_matrix, vmin=vmin,
vmax=vmax, cmap="bwr")
else:
cax = ax.pcolor(interaction_matrix, cmap="RdBu_r")
# fig.colorbar(cax)
plt.colorbar(cax,fraction=0.046, pad=0.04)
# put the major ticks at the middle of each cell
ax.set_yticks(np.arange(interaction_matrix.shape[0]) + 0.5, minor=False)
ax.set_xticks(np.arange(interaction_matrix.shape[1]) + 0.5, minor=False)
ax.set_xticklabels(hydrophobicity_letters)
ax.set_yticklabels(hydrophobicity_letters)
# plt.savefig('direct_contact.pdf')
# plt.show()
In [83]:
def show_together(filtered_gamma, figureName, title="test"):
fig = plt.figure()
ax1=plt.subplot(1, 3, 1)
ax1.set_aspect('equal')
plot_contact_all(filtered_gamma[:210], ax1, inferBound=True)
ax2=plt.subplot(1, 3, 2)
ax2.set_aspect('equal')
plot_contact_all(filtered_gamma[210:420], ax2, inferBound=True)
ax3=plt.subplot(1, 3, 3)
ax3.set_aspect('equal')
plot_contact_all(filtered_gamma[420:], ax3, inferBound=True)
ax1.title.set_text('Direct')
ax2.title.set_text('High density(protein)')
ax3.title.set_text('Low density(water)')
fig.suptitle(title, fontsize=20, y=0.75)
fig.tight_layout()
plt.savefig(figureName)
In [88]:
cutoff_list = [100, 200, 300, 400, 500, 600]
cutoff_list += [10, 20, 30, 40, 50, 80]
for cutoff_i in cutoff_list:
# cutoff_i = 400
name = f"{save_gamma_pre}/{trial_name}_cutoff{cutoff_i}_impose_Aprime_constraint"
filtered_gamma = np.loadtxt(name)
figureName = f"/Users/weilu/Research/server/dec_2019/saved_gammas/figures/{trial_name}_cutoff{cutoff_i}"
title = f"{trial_name}_cutoff{cutoff_i}"
show_together(filtered_gamma, figureName, title=title)
In [80]:
# fig, ax = plt.subplots(nrows=3, ncols=1)
cutoff_i = 400
name = f"{save_gamma_pre}/{trial_name}_cutoff{cutoff_i}_impose_Aprime_constraint"
filtered_gamma = np.loadtxt(name)
fig = plt.figure()
ax1=plt.subplot(1, 3, 1)
ax1.set_aspect('equal')
plot_contact_all(filtered_gamma[:210], ax1, inferBound=True)
ax2=plt.subplot(1, 3, 2)
ax2.set_aspect('equal')
plot_contact_all(filtered_gamma[210:420], ax2, inferBound=True)
ax3=plt.subplot(1, 3, 3)
ax3.set_aspect('equal')
plot_contact_all(filtered_gamma[420:], ax3, inferBound=True)
ax1.title.set_text('Direct')
ax2.title.set_text('High density(protein)')
ax3.title.set_text('Low density(water)')
fig.suptitle(f"{trial_name}_cutoff{cutoff_i}", fontsize=20, y=0.75)
fig.tight_layout()
# fig.tight_layout(rect=[0, 0.03, 1, 0.95])
# fig.subplots_adjust(top=0.98)
# plt.savefig(f"/Users/weilu/Research/server/dec_2019/saved_gammas/figures/{trial_name}_cutoff{cutoff_i}")
In [202]:
a = pd.read_csv("/Users/weilu/Research/server/dec_2019/iterative_optimization/original_pdbs/Final_2Sm.csv")
In [203]:
a = a.query("Class == 'α'")
In [204]:
pdb_list = []
for pdb in a.PDB.unique():
pdb = str(pdb)
if "(" in pdb:
pass
elif pdb == "nan":
pass
else:
pdb_list.append(pdb)
In [205]:
len(pdb_list)
Out[205]:
In [206]:
second_test_test = a.query("PDB in @pdb_list").reset_index(drop=True).iloc[:, :23]
In [207]:
second_test_test.to_csv("/Users/weilu/Research/server/dec_2019/iterative_optimization/original_pdbs/second_test_set.csv")
In [208]:
second_test_test.PDB.str.lower().to_list()
Out[208]:
In [ ]:
# randomly select 20 out
randomly_selected = random.sample(pdb_list, 20)
In [128]:
first_test_test = a.query("PDB in @randomly_selected").reset_index(drop=True).iloc[:, :23]
In [129]:
first_test_test.to_csv("/Users/weilu/Research/server/dec_2019/iterative_optimization/original_pdbs/first_test_set.csv")
In [137]:
d = pd.read_csv("/Users/weilu/Research/server/dec_2019/iterative_optimization/original_pdbs/first_test_set.csv", index_col=0)
In [142]:
d.PDB.str.lower().to_list()
Out[142]:
In [111]:
len(randomly_selected)
Out[111]:
In [139]:
len(a)
Out[139]:
In [145]:
d.sort_values("Lpdb")
Out[145]:
In [ ]: