In [1]:
import os
import sys
import random
import time
from random import seed, randint
import argparse
import platform
from datetime import datetime
import imp
import numpy as np
import fileinput
from itertools import product
import pandas as pd
from scipy.interpolate import griddata
from scipy.interpolate import interp2d
import seaborn as sns
from os import listdir
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.interpolate import griddata
import matplotlib as mpl
# sys.path.insert(0,'..')
# from notebookFunctions import *
# from .. import notebookFunctions
from Bio.PDB.Polypeptide import one_to_three
from Bio.PDB.Polypeptide import three_to_one
from Bio.PDB.PDBParser import PDBParser
from pyCodeLib import *
from small_script.myFunctions import *
sys.path.insert(0, "/Users/weilu/openmmawsem")
from helperFunctions.myFunctions import *
from collections import defaultdict
%matplotlib inline
# plt.rcParams['figure.figsize'] = (10,6.180) #golden ratio
# %matplotlib notebook
%load_ext autoreload
%autoreload 2
In [2]:
plt.rcParams['figure.figsize'] = np.array([16.18033, 10]) #golden ratio
plt.rcParams['figure.facecolor'] = 'w'
plt.rcParams['figure.dpi'] = 100
plt.rcParams.update({'font.size': 22})
In [ ]:
os.chdir('/Users/weilu/opt/notebook/Optimization')
In [ ]:
cutoff = 600
pre = "/Users/weilu/Research/server/feb_2020/cath_dataset_shuffle_optimization/three_well_optimization_iter0/"
os.chdir(f"{pre}")
# gamma_file_name = "gamma_iter1_combined_mar06.dat"
# gamma_file_name = "/Users/weilu/opt/parameters/original_gamma"
gamma_pre = f"{pre}/saved_gammas"
trial_name = "iter0"
gamma_file_name = f"{gamma_pre}/{trial_name}_cutoff{cutoff}_impose_Aprime_constraint"
# gamma_file_name = f"{save_gamma_pre}/{trial_name}_cutoff{cutoff}_impose_Aprime_constraint"
print(gamma_file_name)
data = validate_hamiltonian_wei("phi_list.txt", "protein_list", gamma_file_name, "shuffle", 1000, mode=0)
dataFile = f"/Users/weilu/Research/data/optimization_2020_{trial_name}_{cutoff}_{trial_name}.csv"
data.to_csv(dataFile)
print(dataFile)
In [8]:
cutoff = 600
pre = "/Users/weilu/Research/server/feb_2020/cath_dataset_shuffle_optimization/optimization_iter0/"
os.chdir(f"{pre}")
# gamma_file_name = "gamma_iter1_combined_mar06.dat"
# gamma_file_name = "/Users/weilu/opt/parameters/original_gamma"
gamma_pre = f"{pre}/saved_gammas"
trial_name = "iter0"
gamma_file_name = f"{gamma_pre}/{trial_name}_cutoff{cutoff}_impose_Aprime_constraint"
# gamma_file_name = f"{save_gamma_pre}/{trial_name}_cutoff{cutoff}_impose_Aprime_constraint"
print(gamma_file_name)
data = validate_hamiltonian_wei("phi_list.txt", "protein_list", gamma_file_name, "shuffle", 1000, mode=0)
dataFile = f"/Users/weilu/Research/data/optimization_2020_cath_{trial_name}_{cutoff}_{trial_name}.csv"
data.to_csv(dataFile)
print(dataFile)
In [29]:
data_standard = pd.read_csv("/Users/weilu/Research/data/optimization_2020_cath_iter0_600_iter0.csv", index_col=0)
data_three_well = pd.read_csv('/Users/weilu/Research/data/optimization_2020_iter0_600_iter0.csv', index_col=0)
data_standard = data_standard.sort_values("Z_scores").reset_index(drop=True).reset_index()
data_three_well = data_three_well.sort_values("Z_scores").reset_index(drop=True).reset_index()
In [32]:
data = pd.concat([data_standard.assign(Hamiltonian="standard"), data_three_well.assign(Hamiltonian="three_well")])
In [35]:
sns.lineplot("index", "Z_scores", hue="Hamiltonian", data=data)
Out[35]:
In [91]:
cutoff = 400
i = 2
pre = f"/Users/weilu/Research/server/feb_2020/mass_specific_decoys/optimization_iter{i}/"
os.chdir(f"{pre}")
# gamma_file_name = "gamma_iter1_combined_mar06.dat"
# gamma_file_name = "/Users/weilu/opt/parameters/original_gamma"
gamma_pre = f"{pre}/saved_gammas"
trial_name = f"iter{i}"
gamma_file_name = f"{gamma_pre}/{trial_name}_cutoff{cutoff}_impose_Aprime_constraint"
# gamma_file_name = "/Users/weilu/Research/server/feb_2020/mass_specific_decoys/optimization_iter1/iter_1_30"
print(gamma_file_name)
data = validate_hamiltonian_wei("phi_list.txt", "protein_list_complete", gamma_file_name, "openMM", 50, mode=0)
data = data.sort_values("Z_scores").reset_index(drop=True).reset_index()
sns.lineplot("index", "Z_scores", data=data)
plt.title("optimization_gamma_cutoff400")
Out[91]:
In [90]:
cutoff = 300
i = 2
pre = f"/Users/weilu/Research/server/feb_2020/mass_specific_decoys/optimization_iter{i}/"
os.chdir(f"{pre}")
# gamma_file_name = "gamma_iter1_combined_mar06.dat"
# gamma_file_name = "/Users/weilu/opt/parameters/original_gamma"
gamma_pre = f"{pre}/saved_gammas"
trial_name = f"iter{i}"
gamma_file_name = f"{gamma_pre}/{trial_name}_cutoff{cutoff}_impose_Aprime_constraint"
# gamma_file_name = "/Users/weilu/Research/server/feb_2020/mass_specific_decoys/optimization_iter1/iter_1_30"
print(gamma_file_name)
data = validate_hamiltonian_wei("phi_list.txt", "protein_list_complete", gamma_file_name, "openMM", 50, mode=0)
data = data.sort_values("Z_scores").reset_index(drop=True).reset_index()
sns.lineplot("index", "Z_scores", data=data)
plt.title("optimization_gamma_cutoff300")
Out[90]:
In [88]:
cutoff = 300
i = 2
pre = f"/Users/weilu/Research/server/feb_2020/mass_specific_decoys/optimization_iter{i}/"
os.chdir(f"{pre}")
# gamma_file_name = "gamma_iter1_combined_mar06.dat"
# gamma_file_name = "/Users/weilu/opt/parameters/original_gamma"
gamma_pre = f"{pre}/saved_gammas"
trial_name = f"iter{i}"
# gamma_file_name = f"{gamma_pre}/{trial_name}_cutoff{cutoff}_impose_Aprime_constraint"
# gamma_file_name = f"{save_gamma_pre}/{trial_name}_cutoff{cutoff}_impose_Aprime_constraint"
gamma_file_name = "/Users/weilu/Research/server/feb_2020/mass_specific_decoys/optimization_iter1/iter_1_30"
print(gamma_file_name)
data = validate_hamiltonian_wei("phi_list.txt", "protein_list_complete", gamma_file_name, "openMM", 50, mode=0)
data = data.sort_values("Z_scores").reset_index(drop=True).reset_index()
sns.lineplot("index", "Z_scores", data=data)
plt.title("optimization_gamma_cutoff300")
Out[88]:
In [89]:
cutoff = 300
i = 2
pre = f"/Users/weilu/Research/server/feb_2020/mass_specific_decoys/optimization_iter{i}/"
os.chdir(f"{pre}")
# gamma_file_name = "gamma_iter1_combined_mar06.dat"
# gamma_file_name = "/Users/weilu/opt/parameters/original_gamma"
gamma_pre = f"{pre}/saved_gammas"
trial_name = f"iter{i}"
# gamma_file_name = f"{gamma_pre}/{trial_name}_cutoff{cutoff}_impose_Aprime_constraint"
# gamma_file_name = f"{save_gamma_pre}/{trial_name}_cutoff{cutoff}_impose_Aprime_constraint"
gamma_file_name = "/Users/weilu/Research/server/feb_2020/mass_specific_decoys/optimization_iter1/iter_1_30"
print(gamma_file_name)
data = validate_hamiltonian_wei("phi_list.txt", "protein_list", gamma_file_name, "openMM", 50, mode=0)
data = data.sort_values("Z_scores").reset_index(drop=True).reset_index()
sns.lineplot("index", "Z_scores", data=data)
plt.title("optimization_gamma_cutoff300")
Out[89]:
In [75]:
cutoff = 100
i = 1
pre = f"/Users/weilu/Research/server/feb_2020/mass_specific_decoys/optimization_iter{i}/"
os.chdir(f"{pre}")
# gamma_file_name = "gamma_iter1_combined_mar06.dat"
# gamma_file_name = "/Users/weilu/opt/parameters/original_gamma"
gamma_pre = f"{pre}/saved_gammas"
trial_name = f"iter{i}"
gamma_file_name = f"{gamma_pre}/{trial_name}_cutoff{cutoff}_impose_Aprime_constraint"
# gamma_file_name = f"{save_gamma_pre}/{trial_name}_cutoff{cutoff}_impose_Aprime_constraint"
print(gamma_file_name)
data = validate_hamiltonian_wei("phi_list.txt", "protein_list_complete", gamma_file_name, "openMM", 50, mode=0)
dataFile = f"/Users/weilu/Research/data/optimization_2020_mass_specific_deocys_{trial_name}_{cutoff}_{trial_name}.csv"
data.to_csv(dataFile)
print(dataFile)
In [76]:
data = data.sort_values("Z_scores").reset_index(drop=True).reset_index()
sns.lineplot("index", "Z_scores", data=data)
plt.title("optimization_gamma_cutoff100")
Out[76]:
In [84]:
cutoff = 300
i = 1
pre = f"/Users/weilu/Research/server/feb_2020/mass_specific_decoys/optimization_iter{i}/"
os.chdir(f"{pre}")
# gamma_file_name = "gamma_iter1_combined_mar06.dat"
# gamma_file_name = "/Users/weilu/opt/parameters/original_gamma"
gamma_pre = f"{pre}/saved_gammas"
trial_name = f"iter{i}"
gamma_file_name = f"{gamma_pre}/{trial_name}_cutoff{cutoff}_impose_Aprime_constraint"
# gamma_file_name = f"{save_gamma_pre}/{trial_name}_cutoff{cutoff}_impose_Aprime_constraint"
print(gamma_file_name)
data = validate_hamiltonian_wei("phi_list.txt", "protein_list_complete", gamma_file_name, "openMM", 50, mode=0)
data = data.sort_values("Z_scores").reset_index(drop=True).reset_index()
sns.lineplot("index", "Z_scores", data=data)
plt.title("optimization_gamma_cutoff300")
Out[84]:
In [77]:
cutoff = 100
i = 1
pre = f"/Users/weilu/Research/server/feb_2020/mass_specific_decoys/optimization_iter{i}/"
os.chdir(f"{pre}")
# gamma_file_name = "gamma_iter1_combined_mar06.dat"
# gamma_file_name = "/Users/weilu/opt/parameters/original_gamma"
gamma_pre = f"{pre}/saved_gammas"
trial_name = f"iter{i}"
# gamma_file_name = f"{gamma_pre}/{trial_name}_cutoff{cutoff}_impose_Aprime_constraint"
gamma_file_name = "/Users/weilu/opt/parameters/original_gamma"
print(gamma_file_name)
data = validate_hamiltonian_wei("phi_list.txt", "protein_list_complete", gamma_file_name, "openMM", 50, mode=0)
data = data.sort_values("Z_scores").reset_index(drop=True).reset_index()
sns.lineplot("index", "Z_scores", data=data)
plt.title("original_gamma")
Out[77]:
In [79]:
cutoff = 100
i = 1
pre = f"/Users/weilu/Research/server/feb_2020/mass_specific_decoys/optimization_iter{i}/"
os.chdir(f"{pre}")
# gamma_file_name = "gamma_iter1_combined_mar06.dat"
# gamma_file_name = "/Users/weilu/opt/parameters/original_gamma"
gamma_pre = f"{pre}/saved_gammas"
trial_name = f"iter{i}"
# gamma_file_name = f"{gamma_pre}/{trial_name}_cutoff{cutoff}_impose_Aprime_constraint"
# gamma_file_name = "/Users/weilu/opt/parameters/original_gamma"
gamma_file_name = "/Users/weilu/Research/server/feb_2020/mass_specific_decoys/optimization_iter0/iter_0_30"
print(gamma_file_name)
data = validate_hamiltonian_wei("phi_list.txt", "protein_list_complete", gamma_file_name, "openMM", 50, mode=0)
data = data.sort_values("Z_scores").reset_index(drop=True).reset_index()
sns.lineplot("index", "Z_scores", data=data)
plt.title("iter_0_30")
Out[79]:
In [81]:
cutoff = 100
i = 1
pre = f"/Users/weilu/Research/server/feb_2020/mass_specific_decoys/optimization_iter{i}/"
os.chdir(f"{pre}")
# gamma_file_name = "gamma_iter1_combined_mar06.dat"
# gamma_file_name = "/Users/weilu/opt/parameters/original_gamma"
gamma_pre = f"{pre}/saved_gammas"
trial_name = f"iter{i}"
# gamma_file_name = f"{gamma_pre}/{trial_name}_cutoff{cutoff}_impose_Aprime_constraint"
# gamma_file_name = "/Users/weilu/opt/parameters/original_gamma"
gamma_file_name = "/Users/weilu/Research/server/feb_2020/mass_specific_decoys/optimization_iter1/iter_1_30"
print(gamma_file_name)
data = validate_hamiltonian_wei("phi_list.txt", "protein_list_complete", gamma_file_name, "openMM", 50, mode=0)
data = data.sort_values("Z_scores").reset_index(drop=True).reset_index()
sns.lineplot("index", "Z_scores", data=data)
plt.title("iter_1_30")
Out[81]:
In [ ]:
In [73]:
cutoff = 100
pre = "/Users/weilu/Research/server/feb_2020/mass_specific_decoys/optimization_iter0/"
os.chdir(f"{pre}")
# gamma_file_name = "gamma_iter1_combined_mar06.dat"
# gamma_file_name = "/Users/weilu/opt/parameters/original_gamma"
gamma_pre = f"{pre}/saved_gammas"
trial_name = "iter0"
gamma_file_name = f"{gamma_pre}/{trial_name}_cutoff{cutoff}_impose_Aprime_constraint"
# gamma_file_name = f"{save_gamma_pre}/{trial_name}_cutoff{cutoff}_impose_Aprime_constraint"
print(gamma_file_name)
data = validate_hamiltonian_wei("phi_list.txt", "protein_list", gamma_file_name, "openMM", 50, mode=0)
dataFile = f"/Users/weilu/Research/data/optimization_2020_mass_specific_deocys_{trial_name}_{cutoff}_{trial_name}.csv"
data.to_csv(dataFile)
print(dataFile)
In [74]:
data = data.sort_values("Z_scores").reset_index(drop=True).reset_index()
sns.lineplot("index", "Z_scores", data=data)
plt.title("optimization_gamma_cutoff100")
Out[74]:
In [67]:
plt.rcParams['figure.figsize'] = 0.5*np.array([16.18033, 10]) #golden ratio
In [69]:
cutoff = 100
pre = "/Users/weilu/Research/server/feb_2020/mass_specific_decoys/optimization_iter0/"
os.chdir(f"{pre}")
# gamma_file_name = "gamma_iter1_combined_mar06.dat"
# gamma_file_name = "/Users/weilu/opt/parameters/original_gamma"
gamma_pre = f"{pre}/saved_gammas"
trial_name = "iter0"
gamma_file_name = f"{gamma_pre}/{trial_name}_cutoff{cutoff}_impose_Aprime_constraint"
# gamma_file_name = "/Users/weilu/Research/server/feb_2020/cath_dataset_shuffle_optimization/optimization_iter0//saved_gammas/iter0_cutoff600_impose_Aprime_constraint"
gamma_file_name = "/Users/weilu/opt/parameters/original_gamma"
# gamma_file_name = f"{pre}/iter_0_30"
# gamma_file_name = f"{save_gamma_pre}/{trial_name}_cutoff{cutoff}_impose_Aprime_constraint"
print(gamma_file_name)
data = validate_hamiltonian_wei("phi_list.txt", "protein_list", gamma_file_name, "openMM", 50, mode=0)
In [70]:
data = data.sort_values("Z_scores").reset_index(drop=True).reset_index()
sns.lineplot("index", "Z_scores", data=data)
plt.title("original_gamma")
Out[70]:
In [62]:
cutoff = 100
pre = "/Users/weilu/Research/server/feb_2020/mass_specific_decoys/optimization_iter0/"
os.chdir(f"{pre}")
# gamma_file_name = "gamma_iter1_combined_mar06.dat"
# gamma_file_name = "/Users/weilu/opt/parameters/original_gamma"
gamma_pre = f"{pre}/saved_gammas"
trial_name = "iter0"
gamma_file_name = f"{gamma_pre}/{trial_name}_cutoff{cutoff}_impose_Aprime_constraint"
gamma_file_name = "/Users/weilu/Research/server/feb_2020/cath_dataset_shuffle_optimization/optimization_iter0//saved_gammas/iter0_cutoff600_impose_Aprime_constraint"
gamma_file_name = "/Users/weilu/opt/parameters/original_gamma"
gamma_file_name = f"{pre}/iter_0_30"
# gamma_file_name = f"{save_gamma_pre}/{trial_name}_cutoff{cutoff}_impose_Aprime_constraint"
print(gamma_file_name)
data = validate_hamiltonian_wei("phi_list.txt", "protein_list", gamma_file_name, "openMM", 50, mode=0)
dataFile = f"/Users/weilu/Research/data/optimization_2020_mass_specific_deocys_{trial_name}_{cutoff}_{trial_name}.csv"
# data.to_csv(dataFile)
print(dataFile)
In [68]:
data = data.sort_values("Z_scores").reset_index(drop=True).reset_index()
sns.lineplot("index", "Z_scores", data=data)
plt.title("mixed_iter_0_30")
Out[68]:
In [60]:
data_specific_decoys = data.sort_values("Z_scores").reset_index(drop=True).reset_index()
sns.lineplot("index", "Z_scores", data=data_specific_decoys)
Out[60]:
In [39]:
data
Out[39]:
In [11]:
# dataFile = f"/Users/weilu/Research/data/optimization_2020_{trial_name}_{cutoff}_{trial_name}.csv"
dataFiel = '/Users/weilu/Research/data/optimization_2020_iter0_600_iter0.csv'
In [146]:
pwd
Out[146]:
In [147]:
pre = "/Users/weilu/Research/server/dec_2019/multiDensityOptimization/optimization_iteration1/optimization/"
trial_name = "iter2"
save_gamma_pre = "saved_gammas"
In [165]:
In [187]:
d_ = []
for i in range(2, 5):
d = pd.read_csv(f"/Users/weilu/Research/data/optimization_iter{i}.csv", index_col=0)
d_.append(d.assign(iteration=f"iter_{i}"))
In [188]:
d = pd.read_csv(f"/Users/weilu/Research/data/optimization_iter4_600.csv", index_col=0)
d_.append(d.assign(iteration=f"iter_{i}_600"))
In [189]:
d = pd.read_csv(f"/Users/weilu/Research/data/optimization_iter4_500_iter4.csv", index_col=0)
d_.append(d.assign(iteration=f"iter_{i}_500_iter4"))
In [190]:
data = pd.concat(d_)
In [191]:
sns.lineplot("Protein", "Z_scores", data=data, hue="iteration")
Out[191]:
In [183]:
cutoff = 500
pre = "/Users/weilu/Research/server/dec_2019/multiDensityOptimization/optimization_iteration1/optimization_iter4/"
os.chdir(f"{pre}")
# gamma_file_name = "gamma_iter1_combined_mar06.dat"
# gamma_file_name = "/Users/weilu/opt/parameters/original_gamma"
gamma_pre = "/Users/weilu/Research/server/dec_2019/multiDensityOptimization/optimization_iteration1/optimization_iter4/saved_gammas"
trial_name = "iter4"
gamma_file_name = f"{gamma_pre}/{trial_name}_cutoff{cutoff}_impose_Aprime_constraint"
# gamma_file_name = f"{save_gamma_pre}/{trial_name}_cutoff{cutoff}_impose_Aprime_constraint"
print(gamma_file_name)
data = validate_hamiltonian_wei("phi_list.txt", "protein_list", gamma_file_name, "openMM", 500, mode=0)
data.to_csv(f"/Users/weilu/Research/data/optimization_{trial_name}_{cutoff}_{trial_name}.csv")
data
Out[183]:
In [177]:
cutoff = 600
os.chdir(f"{pre}")
# gamma_file_name = "gamma_iter1_combined_mar06.dat"
# gamma_file_name = "/Users/weilu/opt/parameters/original_gamma"
gamma_pre = "/Users/weilu/Research/server/dec_2019/multiDensityOptimization/optimization_iteration1/optimization_iter4/saved_gammas"
trial_name = "iter4"
gamma_file_name = f"{gamma_pre}/{trial_name}_cutoff{cutoff}_impose_Aprime_constraint"
# gamma_file_name = f"{save_gamma_pre}/{trial_name}_cutoff{cutoff}_impose_Aprime_constraint"
print(gamma_file_name)
data = validate_hamiltonian_wei("phi_list.txt", "protein_list", gamma_file_name, "openMM", 500, mode=0)
data.to_csv(f"/Users/weilu/Research/data/optimization_{trial_name}_{cutoff}.csv")
data
Out[177]:
In [164]:
cutoff = 400
os.chdir(f"{pre}")
# gamma_file_name = "gamma_iter1_combined_mar06.dat"
# gamma_file_name = "/Users/weilu/opt/parameters/original_gamma"
gamma_pre = "/Users/weilu/Research/server/dec_2019/multiDensityOptimization/optimization_iteration1/optimization_iter4/saved_gammas"
trial_name = "iter4"
gamma_file_name = f"{gamma_pre}/{trial_name}_cutoff{cutoff}_impose_Aprime_constraint"
# gamma_file_name = f"{save_gamma_pre}/{trial_name}_cutoff{cutoff}_impose_Aprime_constraint"
print(gamma_file_name)
data = validate_hamiltonian_wei("phi_list.txt", "protein_list", gamma_file_name, "openMM", 500, mode=0)
data.to_csv("/Users/weilu/Research/data/optimization_iter4.csv")
data
Out[164]:
In [168]:
cutoff = 400
os.chdir(f"{pre}")
# gamma_file_name = "gamma_iter1_combined_mar06.dat"
# gamma_file_name = "/Users/weilu/opt/parameters/original_gamma"
gamma_pre = "/Users/weilu/Research/server/dec_2019/multiDensityOptimization/optimization_iteration1/optimization_iter3/saved_gammas"
trial_name = "iter3"
gamma_file_name = f"{gamma_pre}/{trial_name}_cutoff{cutoff}_impose_Aprime_constraint"
# gamma_file_name = f"{save_gamma_pre}/{trial_name}_cutoff{cutoff}_impose_Aprime_constraint"
print(gamma_file_name)
data = validate_hamiltonian_wei("phi_list.txt", "protein_list", gamma_file_name, "openMM", 500, mode=0)
data.to_csv("/Users/weilu/Research/data/optimization_iter3.csv")
data
Out[168]:
In [169]:
cutoff = 400
os.chdir(f"{pre}")
# gamma_file_name = "gamma_iter1_combined_mar06.dat"
# gamma_file_name = "/Users/weilu/opt/parameters/original_gamma"
trial_name = "iter2"
gamma_pre = "/Users/weilu/Research/server/dec_2019/multiDensityOptimization/optimization_iteration1/optimization_iter2/saved_gammas"
gamma_file_name = f"{gamma_pre}/{trial_name}_cutoff{cutoff}_impose_Aprime_constraint"
# gamma_file_name = f"{save_gamma_pre}/{trial_name}_cutoff{cutoff}_impose_Aprime_constraint"
print(gamma_file_name)
data = validate_hamiltonian_wei("phi_list.txt", "protein_list", gamma_file_name, "openMM", 500, mode=0)
data.to_csv("/Users/weilu/Research/data/optimization_iter2.csv")
data
Out[169]:
In [152]:
cutoff = 100
os.chdir(f"{pre}")
# gamma_file_name = "gamma_iter1_combined_mar06.dat"
gamma_file_name = "/Users/weilu/opt/parameters/original_gamma"
# gamma_file_name = f"{save_gamma_pre}/{trial_name}_cutoff{cutoff}_impose_Aprime_constraint"
print(gamma_file_name)
data = validate_hamiltonian_wei("phi_list.txt", "protein_list", gamma_file_name, "openMM", 500, mode=0)
data
Out[152]:
In [158]:
list(range(0, 11, 2))
Out[158]:
In [149]:
cutoff = 100
os.chdir(f"{pre}")
# gamma_file_name = "gamma_iter1_combined_mar06.dat"
# gamma_file_name = "/Users/weilu/Research/server/sep_2019/peptide_optimization/saved_gammas/cutoff100"
gamma_file_name = f"{save_gamma_pre}/{trial_name}_cutoff{cutoff}_impose_Aprime_constraint"
print(gamma_file_name)
data = validate_hamiltonian_wei("phi_list.txt", "protein_list", gamma_file_name, "openMM", 500, mode=0)
data
Out[149]:
In [181]:
pwd
Out[181]:
In [182]:
cutoff_list = [100, 200, 300, 400, 500, 600]
cutoff_list += [10, 20, 30, 40, 50, 80]
save_gamma_pre = "/Users/weilu/Research/server/dec_2019/multiDensityOptimization/optimization_iteration1/optimization_iter4/saved_gammas"
trial_name = "iter4"
os.system(f"mkdir -p {save_gamma_pre}/figures")
for cutoff_i in cutoff_list:
# cutoff_i = 400
name = f"{save_gamma_pre}/{trial_name}_cutoff{cutoff_i}_impose_Aprime_constraint"
filtered_gamma = np.loadtxt(name)
figureName = f"{save_gamma_pre}/figures/{trial_name}_cutoff{cutoff_i}"
title = f"{trial_name}_cutoff{cutoff_i}"
show_together(filtered_gamma, figureName, title=title)
In [151]:
cutoff_list = [100, 200, 300, 400, 500]
cutoff_list += [10, 20, 30, 40, 50, 80]
os.system(f"mkdir -p {save_gamma_pre}/figures")
for cutoff_i in cutoff_list:
# cutoff_i = 400
name = f"{save_gamma_pre}/{trial_name}_cutoff{cutoff_i}_impose_Aprime_constraint"
filtered_gamma = np.loadtxt(name)
figureName = f"{save_gamma_pre}/figures/{trial_name}_cutoff{cutoff_i}"
title = f"{trial_name}_cutoff{cutoff_i}"
show_together(filtered_gamma, figureName, title=title)
In [4]:
# pdb list
pre = "/Users/weilu/Research/server/dec_2019/multiDensityOptimization/awsem_contact_term/"
databaseFolder = pre + "database/dompdb/"
In [13]:
pdbFolderList = glob.glob(databaseFolder+"*.pdb")
pdbNames = [a.split("/")[-1].split(".")[0] for a in pdbFolderList]
In [14]:
len(pdbNames)
Out[14]:
In [21]:
filtered_pdbNames = []
for pdb in pdbNames:
if os.path.exists(pre+f"/database/S20_seq/{pdb}.seq"):
filtered_pdbNames.append(pdb)
else:
# print(pdb)
pass
In [22]:
os.system(f"mkdir -p {pre}/alignments")
for pdb in filtered_pdbNames:
if os.path.exists(f"/Users/weilu/Research/optimization/mediated_term/multisequenceanddcafrustratometry/{pdb}_filtered_0.05.seqs"):
# filtered_pdbNames.append(pdb)
os.system(f"cp /Users/weilu/Research/optimization/mediated_term/multisequenceanddcafrustratometry/{pdb}_filtered_0.05.seqs {pre}/alignments/")
else:
print(pdb)
# pass
In [23]:
with open(f"{pre}/protein_list", "w") as out:
for pdb in filtered_pdbNames:
out.write(pdb+"\n")
In [24]:
# information about alignments
info = []
for pdb in filtered_pdbNames:
name = pdb
with open(f"{pre}/alignments/{name}_filtered_0.05.seqs") as f:
a = f.readlines()
info.append([pdb, len(a)])
In [3]:
pre = "/Users/weilu/Research/server/dec_2019/multiDensityOptimization/awsem_contact_term/gammas/"
trial_name = "trial_1_multiseq"
In [4]:
pp = f"protein_list_phi_pairwise_contact_well4.5_6.5_5.0_10phi_density_mediated_contact_well6.5_9.5_5.0_10_2.6_7.0phi_burial_well4.0"
# pp = f"protein_list_phi_pairwise_contact_well4.5_6.5_5.0_10phi_density_mediated_contact_well6.5_9.5_5.0_10_2.6_7.0phi_burial_well4.0phi_debye_huckel_well0"
In [87]:
gamma_file_name = "/Users/weilu/opt/parameters/original_gamma"
original_gamma = np.loadtxt(gamma_file_name)
# a = list(original_gamma)
# a.append(1)
# original_gamma_deybe = np.array(a)
# we want to impose additional contraint so that A' * gamma = constnat.(-562.23)
cutoff_list = [100, 200, 300, 400, 500, 600]
cutoff_list += [10, 20, 30, 40, 50, 80]
for cutoff_i in cutoff_list:
A, A_prime, filtered_gamma, filtered_B_inv = get_filtered_gamma(pre, cutoff_i, pp)
c = np.dot(A_prime, original_gamma)
# c = np.dot(A_prime, original_gamma)
B_inv = filtered_B_inv
lambda_2 = (A_prime.dot(B_inv).dot(A) - c) / (A_prime.dot(B_inv).dot(A_prime) )
gamma_new = B_inv.dot(A-A_prime*lambda_2)
# impose A'gamma
save_gamma_pre = "/Users/weilu/Research/server/dec_2019/saved_gammas/"
name = f"{save_gamma_pre}/{trial_name}_cutoff{cutoff_i}_impose_Aprime_constraint"
np.savetxt(name, gamma_new)
cmd = f"convert_to_simulation_format.py {name} {save_gamma_pre}/Dec11_{trial_name}_cutoff{cutoff_i}_impose_Aprime_constraint"
os.system(cmd)
In [50]:
def plot_contact_all(gammas, ax, invert_sign=True, fix_colorbar=True, inferBound=False,
vmin=-0.3, vmax=0.3, fix_confidence_colorbar=True, confidence_vmin=0,
confidence_vmax=1.0, plot_confidence=False, confidence_lower=None, confidence_upper=None):
size = 20
interaction_matrix = np.zeros((size, size))
i_content = 0
for i in range(size):
for j in range(i, size):
index1 = hydrophobicity_map[inverse_res_type_map[i]]
index2 = hydrophobicity_map[inverse_res_type_map[j]]
interaction_matrix[index1][index2] = gammas[i_content]
interaction_matrix[index2][index1] = gammas[i_content]
i_content += 1
# The minus sign is here to be consistent with the way AWSEM thinks about gammas
if invert_sign:
interaction_matrix *= -1
if inferBound:
vmin = np.min(interaction_matrix)
vmax = np.max(interaction_matrix)
if fix_colorbar:
cax = ax.pcolor(interaction_matrix, vmin=vmin,
vmax=vmax, cmap="bwr")
else:
cax = ax.pcolor(interaction_matrix, cmap="RdBu_r")
# fig.colorbar(cax)
plt.colorbar(cax,fraction=0.046, pad=0.04)
# put the major ticks at the middle of each cell
ax.set_yticks(np.arange(interaction_matrix.shape[0]) + 0.5, minor=False)
ax.set_xticks(np.arange(interaction_matrix.shape[1]) + 0.5, minor=False)
ax.set_xticklabels(hydrophobicity_letters)
ax.set_yticklabels(hydrophobicity_letters)
# plt.savefig('direct_contact.pdf')
# plt.show()
In [83]:
def show_together(filtered_gamma, figureName, title="test"):
fig = plt.figure()
ax1=plt.subplot(1, 3, 1)
ax1.set_aspect('equal')
plot_contact_all(filtered_gamma[:210], ax1, inferBound=True)
ax2=plt.subplot(1, 3, 2)
ax2.set_aspect('equal')
plot_contact_all(filtered_gamma[210:420], ax2, inferBound=True)
ax3=plt.subplot(1, 3, 3)
ax3.set_aspect('equal')
plot_contact_all(filtered_gamma[420:], ax3, inferBound=True)
ax1.title.set_text('Direct')
ax2.title.set_text('High density(protein)')
ax3.title.set_text('Low density(water)')
fig.suptitle(title, fontsize=20, y=0.75)
fig.tight_layout()
plt.savefig(figureName)
In [88]:
cutoff_list = [100, 200, 300, 400, 500, 600]
cutoff_list += [10, 20, 30, 40, 50, 80]
for cutoff_i in cutoff_list:
# cutoff_i = 400
name = f"{save_gamma_pre}/{trial_name}_cutoff{cutoff_i}_impose_Aprime_constraint"
filtered_gamma = np.loadtxt(name)
figureName = f"/Users/weilu/Research/server/dec_2019/saved_gammas/figures/{trial_name}_cutoff{cutoff_i}"
title = f"{trial_name}_cutoff{cutoff_i}"
show_together(filtered_gamma, figureName, title=title)
In [80]:
# fig, ax = plt.subplots(nrows=3, ncols=1)
cutoff_i = 400
name = f"{save_gamma_pre}/{trial_name}_cutoff{cutoff_i}_impose_Aprime_constraint"
filtered_gamma = np.loadtxt(name)
fig = plt.figure()
ax1=plt.subplot(1, 3, 1)
ax1.set_aspect('equal')
plot_contact_all(filtered_gamma[:210], ax1, inferBound=True)
ax2=plt.subplot(1, 3, 2)
ax2.set_aspect('equal')
plot_contact_all(filtered_gamma[210:420], ax2, inferBound=True)
ax3=plt.subplot(1, 3, 3)
ax3.set_aspect('equal')
plot_contact_all(filtered_gamma[420:], ax3, inferBound=True)
ax1.title.set_text('Direct')
ax2.title.set_text('High density(protein)')
ax3.title.set_text('Low density(water)')
fig.suptitle(f"{trial_name}_cutoff{cutoff_i}", fontsize=20, y=0.75)
fig.tight_layout()
# fig.tight_layout(rect=[0, 0.03, 1, 0.95])
# fig.subplots_adjust(top=0.98)
# plt.savefig(f"/Users/weilu/Research/server/dec_2019/saved_gammas/figures/{trial_name}_cutoff{cutoff_i}")
In [202]:
a = pd.read_csv("/Users/weilu/Research/server/dec_2019/iterative_optimization/original_pdbs/Final_2Sm.csv")
In [203]:
a = a.query("Class == 'α'")
In [204]:
pdb_list = []
for pdb in a.PDB.unique():
pdb = str(pdb)
if "(" in pdb:
pass
elif pdb == "nan":
pass
else:
pdb_list.append(pdb)
In [205]:
len(pdb_list)
Out[205]:
In [206]:
second_test_test = a.query("PDB in @pdb_list").reset_index(drop=True).iloc[:, :23]
In [207]:
second_test_test.to_csv("/Users/weilu/Research/server/dec_2019/iterative_optimization/original_pdbs/second_test_set.csv")
In [208]:
second_test_test.PDB.str.lower().to_list()
Out[208]:
In [ ]:
# randomly select 20 out
randomly_selected = random.sample(pdb_list, 20)
In [128]:
first_test_test = a.query("PDB in @randomly_selected").reset_index(drop=True).iloc[:, :23]
In [129]:
first_test_test.to_csv("/Users/weilu/Research/server/dec_2019/iterative_optimization/original_pdbs/first_test_set.csv")
In [137]:
d = pd.read_csv("/Users/weilu/Research/server/dec_2019/iterative_optimization/original_pdbs/first_test_set.csv", index_col=0)
In [142]:
d.PDB.str.lower().to_list()
Out[142]:
In [111]:
len(randomly_selected)
Out[111]:
In [139]:
len(a)
Out[139]:
In [145]:
d.sort_values("Lpdb")
Out[145]:
In [ ]: