In [2]:
from pyCodeLib import *
import warnings
import glob
import re
import numpy as np
import pandas as pd
from Bio.PDB.Polypeptide import one_to_three
warnings.filterwarnings('ignore')
# sys.path.insert(0, MYHOME)
%load_ext autoreload
%autoreload 2
In [ ]:
In [337]:
pre = "/Users/weilu/Research/server/april_2019/optimization_with_frag_iter2/gammas/"
location=pre + "proteins_name_list_phi_pairwise_contact_well4.5_6.5_5.0_10phi_density_mediated_contact_well6.5_9.5_5.0_10_2.6_7.0phi_burial_well4.0_gamma_filtered"
gamma = np.loadtxt(location)
plot_contact_well(gamma[:210], inferBound=True, invert_sign=False)
In [328]:
gamma = np.loadtxt("/Users/weilu/Research/server/april_2019/optimization_with_frag_iter1/gammas/proteins_name_list_phi_pairwise_contact_well4.5_6.5_5.0_10phi_density_mediated_contact_well6.5_9.5_5.0_10_2.6_7.0phi_burial_well4.0_gamma_filtered")
plot_contact_well(gamma[:210], inferBound=True, invert_sign=False)
In [250]:
gamma = np.loadtxt("/Users/weilu/Research/server/april_2019/complete_gammas/saved_gammas/optimization_restart_iter1/proteins_name_list_phi_pairwise_contact_well4.5_6.5_5.0_10phi_density_mediated_contact_well6.5_9.5_5.0_10_2.6_7.0phi_burial_well4.0_gamma_filtered")
plot_contact_well(gamma[:210], inferBound=True, invert_sign=False)
In [234]:
plot_contact_well(gamma[210:420], inferBound=True, invert_sign=False)
In [108]:
with open("/Users/weilu/Research/database/queriedPDB.dat") as f:
a = f.readlines()
a = [i.strip() for i in a]
pdb_list = a[1:-1]
In [109]:
len(pdb_list)
Out[109]:
In [114]:
from Bio.PDB.PDBParser import PDBParser
data_raw = []
for protein in pdb_list:
pre = "/Users/weilu/Research/server/april_2019/setup_test_set/cleaned_pdbs/" + protein.lower()
name = protein
problematic = 0
pdbFileLocation = pre + ".pdb"
structure = PDBParser().get_structure(name, pdbFileLocation)
seq = ""
for r in structure.get_residues():
_, _, chain, (_, resId, _) = r.get_full_id()
try:
resName = three_to_one(r.get_resname())
except:
problematic = 2
# assert chain == "A"
# if chain != chainName:
# print(i, name, length, len(seq1), chain, chainName)
# problematic = 3
seq += resName
length = len(seq)
data_raw.append([name, length, seq, problematic])
In [133]:
data = pd.DataFrame(data_raw, columns=["Name", "Length", "Seq", "Problematic"]).drop("Problematic", axis=1)
In [127]:
data.sort_values("Length").reset_index(drop=True).to_csv("/Users/weilu/Research/library/test_set_info.csv")
In [130]:
data = pd.read_csv("/Users/weilu/Research/library/test_set_info.csv", index_col=0)
In [125]:
data.sort_values("Length").hist("Length", bins=50)
Out[125]:
In [ ]:
In [ ]:
In [3]:
os.chdir("/Users/weilu/Research/server/march_2019/optimization_weighted_by_q_iter7/")
In [ ]:
"/Users/weilu/Research/server/march_2019/optimization_weighted_by_q_iter7/gammas/"
In [6]:
dataset = {"old":("1R69, 1UTG, 3ICB, 256BA, 4CPV, 1CCR, 2MHR, 1MBA, 2FHA".split(", "), 40),
"new":("1FC2C, 1ENH, 2GB1, 2CRO, 1CTF, 4ICB".split(", "), 80),
"test":(['1A2J', '1A3H', '1A5Y', '1A8Q', '1AGY', '1AKZ', '1AUZ', '1B1A', '1B31', '1B8X', '1BCO', '1BN6', '1BOH', '1BOI'], 40)}
new_simulation_list = ["bias_2","bias_old_gamma", "iter1_with_bias_96percent", "new_iter2_10", "new_iter1_90", "new_iter2_8", "old_new_iter2_8"]
old_protein_simulation_list = ["iter6_30", "iter5_30", "single", "new_iter3_10", "iter4_30", "iter4_6", "iter4_13"]
simulation_location_list_dic = {}
for p in dataset["new"][0]:
name = p.lower()[:4]
simulation_location_list_dic[name] = new_simulation_list
for p in dataset["old"][0]:
name = p.lower()[:4]
simulation_location_list_dic[name] = old_protein_simulation_list
In [39]:
dataset = {"old":("1R69, 1UTG, 3ICB, 256BA, 4CPV, 1CCR, 2MHR, 1MBA, 2FHA".split(", "), 40),
"new":("1FC2C, 1ENH, 2GB1, 2CRO, 1CTF, 4ICB".split(", "), 80),
"test":(['1A2J', '1A3H', '1A5Y', '1A8Q', '1AGY', '1AKZ', '1AUZ', '1B1A', '1B31', '1B8X', '1BCO', '1BN6', '1BOH', '1BOI'], 40)}
new_simulation_list = ["new_iter2_8","bias_old_gamma"]
old_protein_simulation_list = ["iter6_30", "iter5_30"]
simulation_location_list_dic = {}
for p in dataset["new"][0]:
name = p.lower()[:4]
simulation_location_list_dic[name] = new_simulation_list
for p in dataset["old"][0]:
name = p.lower()[:4]
simulation_location_list_dic[name] = old_protein_simulation_list
In [60]:
gamma_file_name = "iter7_thrid_gen"
gamma_file_name = "gammas/proteins_name_list_phi_pairwise_contact_well4.5_6.5_5.0_10phi_density_mediated_contact_well6.5_9.5_5.0_10_2.6_7.0phi_burial_well4.0_gamma_filtered"
gamma_file_name = "test"
z_scores, e_natives, e_mgs, e_mg_stds = validate_hamiltonian_wei(
"phi_list.txt", "proteins_name_list.txt", gamma_file_name,
"lammps", len(new_simulation_list)*6000, mode=2, simulation_location_list_dic=simulation_location_list_dic)
In [56]:
pre_i = 7
address = f"/Users/weilu/Research/server/march_2019/optimization_weighted_by_q_iter{pre_i}/iter{pre_i}_thrid_gen"
pre_iter_gamma = np.loadtxt(address)
In [57]:
np.std(pre_iter_gamma)
Out[57]:
In [82]:
address = f"/Users/weilu/Research/server/april_2019/optimization_mult_seq/gammas/proteins_name_list_phi_pairwise_contact_well4.5_6.5_5.0_10phi_density_mediated_contact_well6.5_9.5_5.0_10_2.6_7.0phi_burial_well4.0_gamma_filtered"
gamma = np.loadtxt(address)
np.std(gamma)
Out[82]:
In [223]:
address = f"/Users/weilu/Research/server/april_2019/complete_gammas/iter0_gamma"
gamma = np.loadtxt(address)
np.std(gamma)
Out[223]:
In [225]:
iter0_normalized = gamma * np.std(original_gamma) / np.std(gamma)
In [226]:
np.std(iter0_normalized)
Out[226]:
In [227]:
np.savetxt("/Users/weilu/Research/server/april_2019/complete_gammas/iter0_normalized_gamma", iter0_normalized)
In [107]:
address = f"/Users/weilu/Research/server/april_2019/complete_gammas/iter4_gamma"
gamma = np.loadtxt(address)
np.std(gamma)
Out[107]:
In [86]:
address = f"/Users/weilu/Research/server/april_2019/complete_gammas/iter7_gamma"
iter7 = np.loadtxt(address)
np.std(iter7)
Out[86]:
In [224]:
address = f"/Users/weilu/Research/server/april_2019/complete_gammas/original_gamma.dat"
original_gamma = np.loadtxt(address)
np.std(original_gamma)
Out[224]:
In [90]:
np.mean(original_gamma)
Out[90]:
In [91]:
np.mean(iter7)
Out[91]:
In [95]:
len(original_gamma)
Out[95]:
In [102]:
iter7_normalized = iter7 * np.std(original_gamma) / np.std(iter7)
In [106]:
plt.hist(iter7_normalized.flatten(), bins=100, range=(-3,3))
a = plt.hist(original_gamma.flatten(), bins=100, range=(-3,3), alpha=0.3)
In [101]:
plt.hist(iter7.flatten(), bins=100, range=(-8,8))
plt.hist(original_gamma.flatten(), bins=100, range=(-8,8), alpha=0.3)
Out[101]:
In [83]:
address = f"/Users/weilu/Research/server/april_2019/complete_gammas/iter0_gamma"
gamma = np.loadtxt(address)
np.std(gamma)
Out[83]:
In [80]:
pre_i = 7
address = f"/Users/weilu/Research/server/april_2019/optimization_mult_seq/gammas/proteins_name_list_phi_pairwise_contact_well4.5_6.5_5.0_10phi_density_mediated_contact_well6.5_9.5_5.0_10_2.6_7.0phi_burial_well4.0_gamma_filtered"
gamma = np.loadtxt(address)
np.std(gamma)
multiSeq = gamma * np.std(original_gamma) /np.std(gamma)
In [81]:
address = "/Users/weilu/Research/server/april_2019/fix_gamma_mix_error/multiSeq"
np.savetxt(address, multiSeq)
In [ ]:
In [73]:
address = "/Volumes/Wei_backup/feb_2019/original_gamma.dat"
original_gamma = np.loadtxt(address)
np.std(original_gamma)
Out[73]:
In [74]:
address = "/Users/weilu/Research/server/april_2019/fix_gamma_mix_error/t_7_2"
gamma = np.loadtxt(address)
np.std(gamma)
Out[74]:
In [76]:
normalized = gamma * np.std(original_gamma) /np.std(gamma)
In [77]:
address = "/Users/weilu/Research/server/april_2019/fix_gamma_mix_error/t_7_normalized"
np.savetxt(address, normalized)
In [59]:
np.savetxt("test", gamma/np.std(gamma)*np.std(pre_iter_gamma))
In [58]:
np.std(gamma/np.std(gamma)*np.std(pre_iter_gamma))
Out[58]:
In [54]:
np.std(gamma)
Out[54]:
In [ ]:
In [ ]:
address = f"/Users/weilu/Research/server/march_2019/optimization_weighted_by_q_iter7/gammas/proteins_name_list_phi_pairwise_contact_well4.5_6.5_5.0_10phi_density_mediated_contact_well6.5_9.5_5.0_10_2.6_7.0phi_burial_well4.0_gamma_filtered"
gamma = np.loadtxt(address)
In [66]:
gamma_file_name = "/Users/weilu/Research/server/march_2019/fix_gamma_mix_error/t_5"
z_scores, e_natives, e_mgs, e_mg_stds = validate_hamiltonian_wei(
"phi_list.txt", "proteins_name_list.txt", gamma_file_name,
"lammps", len(new_simulation_list)*6000, mode=2, simulation_location_list_dic=simulation_location_list_dic)
data = pd.DataFrame([nameList, z_scores, e_natives, e_mgs, e_mg_stds],
index=["Name", "Z", "E_Natives", "E_mgs", "E_mg_stds"]).T
data.Z = data.Z.astype(float)
data.E_Natives = data.E_Natives.astype(float)
data
Out[66]:
In [67]:
gamma_file_name = "/Users/weilu/Research/server/march_2019/fix_gamma_mix_error/t_7"
z_scores, e_natives, e_mgs, e_mg_stds = validate_hamiltonian_wei(
"phi_list.txt", "proteins_name_list.txt", gamma_file_name,
"lammps", len(new_simulation_list)*6000, mode=2, simulation_location_list_dic=simulation_location_list_dic)
data = pd.DataFrame([nameList, z_scores, e_natives, e_mgs, e_mg_stds],
index=["Name", "Z", "E_Natives", "E_mgs", "E_mg_stds"]).T
data.Z = data.Z.astype(float)
data.E_Natives = data.E_Natives.astype(float)
data
Out[67]:
In [65]:
gamma_file_name = "/Users/weilu/Research/server/march_2019/fix_gamma_mix_error/t_6"
z_scores, e_natives, e_mgs, e_mg_stds = validate_hamiltonian_wei(
"phi_list.txt", "proteins_name_list.txt", gamma_file_name,
"lammps", len(new_simulation_list)*6000, mode=2, simulation_location_list_dic=simulation_location_list_dic)
data = pd.DataFrame([nameList, z_scores, e_natives, e_mgs, e_mg_stds],
index=["Name", "Z", "E_Natives", "E_mgs", "E_mg_stds"]).T
data.Z = data.Z.astype(float)
data.E_Natives = data.E_Natives.astype(float)
data
Out[65]:
In [68]:
gamma_file_name = "/Users/weilu/Research/server/march_2019/fix_gamma_mix_error/t_7_2"
z_scores, e_natives, e_mgs, e_mg_stds = validate_hamiltonian_wei(
"phi_list.txt", "proteins_name_list.txt", gamma_file_name,
"lammps", len(new_simulation_list)*6000, mode=2, simulation_location_list_dic=simulation_location_list_dic)
data = pd.DataFrame([nameList, z_scores, e_natives, e_mgs, e_mg_stds],
index=["Name", "Z", "E_Natives", "E_mgs", "E_mg_stds"]).T
data.Z = data.Z.astype(float)
data.E_Natives = data.E_Natives.astype(float)
data
Out[68]:
In [62]:
gamma_file_name = "/Users/weilu/Research/server/march_2019/fix_gamma_mix_error/t"
z_scores, e_natives, e_mgs, e_mg_stds = validate_hamiltonian_wei(
"phi_list.txt", "proteins_name_list.txt", gamma_file_name,
"lammps", len(new_simulation_list)*6000, mode=2, simulation_location_list_dic=simulation_location_list_dic)
data = pd.DataFrame([nameList, z_scores, e_natives, e_mgs, e_mg_stds],
index=["Name", "Z", "E_Natives", "E_mgs", "E_mg_stds"]).T
data.Z = data.Z.astype(float)
data.E_Natives = data.E_Natives.astype(float)
data
In [64]:
Out[64]:
In [63]:
# normalized with std.
In [61]:
data = pd.DataFrame([nameList, z_scores, e_natives, e_mgs, e_mg_stds],
index=["Name", "Z", "E_Natives", "E_mgs", "E_mg_stds"]).T
data.Z = data.Z.astype(float)
data.E_Natives = data.E_Natives.astype(float)
data
Out[61]:
In [ ]:
# with more decoys.
In [41]:
data = pd.DataFrame([nameList, z_scores, e_natives, e_mgs, e_mg_stds],
index=["Name", "Z", "E_Natives", "E_mgs", "E_mg_stds"]).T
data.Z = data.Z.astype(float)
data.E_Natives = data.E_Natives.astype(float)
data
Out[41]:
In [20]:
nameList = dataset["new"][0] + dataset["old"][0]
In [37]:
data = pd.DataFrame([nameList, z_scores, e_natives, e_mgs, e_mg_stds],
index=["Name", "Z", "E_Natives", "E_mgs", "E_mg_stds"]).T
data.Z = data.Z.astype(float)
data.E_Natives = data.E_Natives.astype(float)
In [38]:
data
Out[38]:
In [28]:
data = pd.DataFrame([nameList, z_scores, e_natives, e_mgs, e_mg_stds],
index=["Name", "Z", "E_Natives", "E_mgs", "E_mg_stds"]).T
data.Z = data.Z.astype(float)
data.E_Natives = data.E_Natives.astype(float)
In [35]:
data
Out[35]:
In [138]:
pd.concat([data[:4],data[:4]]).reset_index(drop=False)
Out[138]:
In [172]:
Q = pd.read_csv(f"/Users/weilu/Research/server/april_2019/database/iter4_30_3icb_11/wham.dat")[" Qw"]
In [151]:
len(Q)
Out[151]:
In [162]:
a = pd.concat([Q, Q]).reset_index()
a["Rank"] = a["index"].rank(ascending=False)
a.query(f"Rank < {200*30}")
In [169]:
pd.read_csv("/Users/weilu/Research/server/april_2019/database/Q_bias_2_1ctf", index_col=0).head()
Out[169]:
In [177]:
a = pd.read_csv("/Users/weilu/Research/server/april_2019/database/Q_iter4_13_2fha", index_col=0).query(f"Rank < {200*30}")
In [200]:
simulation_location = "test"
name = "a"
sampled = a.sample(10).assign(Name=f"{simulation_location}_{name}_")
sampled["Location"] = sampled["Name"] + sampled["Run"].astype(str) + \
"/frame" + sampled["index"].astype(str) + " " + sampled[" Qw"].round(3).astype(str)
In [ ]:
with open(f"decoys/lammps/{name}_{simulation_location}.decoys", "w") as out:
for i, item in sampled.iterrows():
f.write(f"{simulation_location}_{name}_{item['Run']}/frame{item['index']} {np.round(item[' Qw'], 3)}\n")
In [221]:
np.round(0.4124, 3)
Out[221]:
In [222]:
for i, item in sampled.iterrows():
print(f"{simulation_location}_{name}_{item['Run']}/frame{item['index']} {np.round(item[' Qw'], 3)}\n")
In [206]:
a = sampled["Location"].values
In [205]:
a.values
Out[205]:
In [150]:
pd.concat([Q, Q]).reset_index(drop=False)
Out[150]:
In [ ]:
data.to_csv()
In [230]:
HFscales = pd.read_csv("~/opt/small_script/Whole_residue_HFscales.txt", sep="\t")
In [231]:
HFscales
Out[231]:
In [ ]:
In [236]:
In [242]:
direct = membrane_gamma[:210][:,0]
protein = membrane_gamma[210:][:,0]
water = membrane_gamma[210:][:,1]
In [246]:
In [247]:
def simulation_to_iteration_gamma(membrane_gamma):
direct = membrane_gamma[:210][:,0]
protein = membrane_gamma[210:][:,0]
water = membrane_gamma[210:][:,1]
return np.concatenate([direct, protein, water])
In [260]:
membrane_gamma = np.loadtxt("/Users/weilu/openmmawsem/parameters/membrane_gamma_rescaled.dat")
membrane_gamma_formated = simulation_to_iteration_gamma(membrane_gamma)
np.std(membrane_gamma_formated)
Out[260]:
In [263]:
np.mean(membrane_gamma_formated)
Out[263]:
In [264]:
np.mean(water_gamma_formated)
Out[264]:
In [251]:
membrane_gamma = np.loadtxt("/Users/weilu/opt/parameters/membrane/gamma.dat")
membrane_gamma_formated = simulation_to_iteration_gamma(membrane_gamma)
water_gamma = np.loadtxt("/Users/weilu/opt/parameters/globular_parameters/gamma.dat")
water_gamma_formated = simulation_to_iteration_gamma(water_gamma)
ratio = np.std(water_gamma_formated)/np.std(membrane_gamma_formated)
membrane_gamma_formated_rescaled = membrane_gamma_formated * ratio
In [252]:
np.std(membrane_gamma_formated)
Out[252]:
In [253]:
np.std(water_gamma_formated)
Out[253]:
In [255]:
ratio = np.std(water_gamma_formated)/np.std(membrane_gamma_formated)
In [257]:
np.std(membrane_gamma_formated * ratio)
Out[257]:
In [258]:
membrane_gamma_formated_rescaled = membrane_gamma_formated * ratio
In [259]:
gamma = membrane_gamma_formated_rescaled
with open("/Users/weilu/openmmawsem/parameters/membrane_gamma_rescaled.dat", "w") as out:
c = 0
for i in range(20):
for j in range(i, 20):
out.write(f"{gamma[c]:<.5f} {gamma[c]:10.5f}\n")
c += 1
out.write("\n")
for i in range(20):
for j in range(i, 20):
# protein, water
out.write(f"{gamma[c]:<.5f} {gamma[c+210]:10.5f}\n")
c += 1
In [ ]:
In [272]:
from simtk.openmm.app import *
from simtk.openmm import *
def read_trajectory_pdb_positions(pdb_trajectory_filename):
import uuid, os
pdb_trajectory_contents = open(pdb_trajectory_filename).read().split("MODEL")[1:]
pdb_trajectory_contents = ['\n'.join(x.split('\n')[1:]) for x in pdb_trajectory_contents]
pdb_trajectory = []
for i, pdb_contents in enumerate(pdb_trajectory_contents):
temporary_file_name = str(uuid.uuid4())
temporary_pdb_file = open(temporary_file_name, 'w')
temporary_pdb_file.write(pdb_contents)
temporary_pdb_file.close()
pdb = PDBFile(temporary_file_name)
pdb_trajectory.append(pdb)
os.remove(temporary_file_name)
return pdb_trajectory
In [267]:
import mdtraj as md
location = "/Users/weilu/Research/server/april_2019/complete_2xov/teaser_simulation_annealing/test.pdb"
pdb_trajectory = md.load(location)
In [305]:
location = "/Users/weilu/Research/server/april_2019/complete_2xov/teaser_simulation_annealing/test.pdb"
# location = "/Users/weilu/Research/server/april_2019/complete_2xov/teaser_simulation_annealing/origianl.pdb"
location = "/Users/weilu/Research/server/april_2019/complete_2xov/smaller.pdb"
a = md.iterload(location, chunk=20)
In [323]:
%%time
location = "/Users/weilu/Research/server/april_2019/complete_2xov/movie.dcd"
# a = md.iterload(location, chunk=20)
# a = md.load(location, top="/Users/weilu/Research/server/april_2019/complete_2xov/complete_2xov-openmmawsem.pdb")
a = md.load(location, top="/Users/weilu/Research/server/april_2019/complete_2xov/native.pdb")
# for traj in a:
# print(traj.openmm_positions)
In [324]:
%%time
location = "/Users/weilu/Research/server/april_2019/complete_2xov/movie.dcd"
# a = md.iterload(location, chunk=20)
a = md.load(location, top="/Users/weilu/Research/server/april_2019/complete_2xov/complete_2xov-openmmawsem.pdb")
a = md.load(location, top="/Users/weilu/Research/server/april_2019/complete_2xov/complete_2xov-openmmawsem.pdb")
# for traj in a:
# print(traj.openmm_positions)
In [325]:
%%time
a = md.iterload(location, chunk=20)
# a = md.load(location)
# for traj in a:
# print(traj.openmm_positions)
In [ ]:
In [326]:
location = "/Users/weilu/Research/server/april_2019/complete_2xov/movie.dcd"
f = md.open(location)
f.seek(0)
In [313]:
f.positions.shape
Out[313]:
In [296]:
for frame,traj in enumerate(a):
print(md.rmsd(traj, traj))
In [290]:
START = 0
STOP = 3
STRIDE = 1
with md.open(location) as f:
f.seek(START)
print(f)
t = f.read_as_traj(
topology, n_frames=STOP-START, stride=STRIDE
)
print(t)
In [280]:
a = PDBFile(location)
In [273]:
a = read_trajectory_pdb_positions(location)
In [287]:
len(pdb_trajectory)
Out[287]:
In [288]:
# pdb_trajectory.openmm_positions(0)