In [2]:
from pyCodeLib import *
import warnings
import glob
import re
import numpy as np
import pandas as pd
from Bio.PDB.Polypeptide import one_to_three

warnings.filterwarnings('ignore')


# sys.path.insert(0, MYHOME)
%load_ext autoreload
%autoreload 2

In [ ]:


In [337]:
pre = "/Users/weilu/Research/server/april_2019/optimization_with_frag_iter2/gammas/"
location=pre + "proteins_name_list_phi_pairwise_contact_well4.5_6.5_5.0_10phi_density_mediated_contact_well6.5_9.5_5.0_10_2.6_7.0phi_burial_well4.0_gamma_filtered"
gamma = np.loadtxt(location)
plot_contact_well(gamma[:210], inferBound=True, invert_sign=False)



In [328]:
gamma = np.loadtxt("/Users/weilu/Research/server/april_2019/optimization_with_frag_iter1/gammas/proteins_name_list_phi_pairwise_contact_well4.5_6.5_5.0_10phi_density_mediated_contact_well6.5_9.5_5.0_10_2.6_7.0phi_burial_well4.0_gamma_filtered")

plot_contact_well(gamma[:210], inferBound=True, invert_sign=False)



In [250]:
gamma = np.loadtxt("/Users/weilu/Research/server/april_2019/complete_gammas/saved_gammas/optimization_restart_iter1/proteins_name_list_phi_pairwise_contact_well4.5_6.5_5.0_10phi_density_mediated_contact_well6.5_9.5_5.0_10_2.6_7.0phi_burial_well4.0_gamma_filtered")
plot_contact_well(gamma[:210], inferBound=True, invert_sign=False)



In [234]:
plot_contact_well(gamma[210:420], inferBound=True, invert_sign=False)


actually this part is all about alpha beta protein. not what I want.


In [108]:
with open("/Users/weilu/Research/database/queriedPDB.dat") as f:
    a = f.readlines()
a = [i.strip() for i in a]
pdb_list = a[1:-1]

In [109]:
len(pdb_list)


Out[109]:
456

In [114]:
from Bio.PDB.PDBParser import PDBParser
data_raw = []
for protein in pdb_list:
    pre = "/Users/weilu/Research/server/april_2019/setup_test_set/cleaned_pdbs/" + protein.lower()
    name = protein
    problematic = 0
    pdbFileLocation = pre + ".pdb"
    structure = PDBParser().get_structure(name, pdbFileLocation)
    seq = ""
    for r in structure.get_residues():
        _, _, chain, (_, resId, _) = r.get_full_id()
        try:
            resName = three_to_one(r.get_resname())
        except:
            problematic = 2
        # assert chain == "A"
#         if chain != chainName:
#             print(i, name, length, len(seq1), chain, chainName)
#             problematic = 3
        seq += resName
    length = len(seq)
    data_raw.append([name, length, seq, problematic])

In [133]:
data = pd.DataFrame(data_raw, columns=["Name", "Length", "Seq", "Problematic"]).drop("Problematic", axis=1)

In [127]:
data.sort_values("Length").reset_index(drop=True).to_csv("/Users/weilu/Research/library/test_set_info.csv")

In [130]:
data = pd.read_csv("/Users/weilu/Research/library/test_set_info.csv", index_col=0)

In [125]:
data.sort_values("Length").hist("Length", bins=50)


Out[125]:
array([[<matplotlib.axes._subplots.AxesSubplot object at 0x1a207db470>]],
      dtype=object)

In [ ]:


In [ ]:


In [3]:
os.chdir("/Users/weilu/Research/server/march_2019/optimization_weighted_by_q_iter7/")

In [ ]:
"/Users/weilu/Research/server/march_2019/optimization_weighted_by_q_iter7/gammas/"

In [6]:
dataset = {"old":("1R69, 1UTG, 3ICB, 256BA, 4CPV, 1CCR, 2MHR, 1MBA, 2FHA".split(", "), 40),
            "new":("1FC2C, 1ENH, 2GB1, 2CRO, 1CTF, 4ICB".split(", "), 80),
            "test":(['1A2J', '1A3H', '1A5Y', '1A8Q', '1AGY', '1AKZ', '1AUZ', '1B1A', '1B31', '1B8X', '1BCO', '1BN6', '1BOH', '1BOI'], 40)}

new_simulation_list = ["bias_2","bias_old_gamma", "iter1_with_bias_96percent", "new_iter2_10", "new_iter1_90", "new_iter2_8", "old_new_iter2_8"]
old_protein_simulation_list = ["iter6_30", "iter5_30", "single", "new_iter3_10", "iter4_30", "iter4_6", "iter4_13"]
simulation_location_list_dic = {}
for p in dataset["new"][0]:
    name = p.lower()[:4]
    simulation_location_list_dic[name] = new_simulation_list
for p in dataset["old"][0]:
    name = p.lower()[:4]
    simulation_location_list_dic[name] = old_protein_simulation_list

In [39]:
dataset = {"old":("1R69, 1UTG, 3ICB, 256BA, 4CPV, 1CCR, 2MHR, 1MBA, 2FHA".split(", "), 40),
            "new":("1FC2C, 1ENH, 2GB1, 2CRO, 1CTF, 4ICB".split(", "), 80),
            "test":(['1A2J', '1A3H', '1A5Y', '1A8Q', '1AGY', '1AKZ', '1AUZ', '1B1A', '1B31', '1B8X', '1BCO', '1BN6', '1BOH', '1BOI'], 40)}

new_simulation_list = ["new_iter2_8","bias_old_gamma"]
old_protein_simulation_list = ["iter6_30", "iter5_30"]
simulation_location_list_dic = {}
for p in dataset["new"][0]:
    name = p.lower()[:4]
    simulation_location_list_dic[name] = new_simulation_list
for p in dataset["old"][0]:
    name = p.lower()[:4]
    simulation_location_list_dic[name] = old_protein_simulation_list

In [60]:
gamma_file_name = "iter7_thrid_gen"
gamma_file_name = "gammas/proteins_name_list_phi_pairwise_contact_well4.5_6.5_5.0_10phi_density_mediated_contact_well6.5_9.5_5.0_10_2.6_7.0phi_burial_well4.0_gamma_filtered"
gamma_file_name = "test"
z_scores, e_natives, e_mgs, e_mg_stds = validate_hamiltonian_wei(
    "phi_list.txt", "proteins_name_list.txt", gamma_file_name, 
    "lammps", len(new_simulation_list)*6000, mode=2, simulation_location_list_dic=simulation_location_list_dic)


0 (10.888644803657137+0j)

In [56]:
pre_i = 7
address = f"/Users/weilu/Research/server/march_2019/optimization_weighted_by_q_iter{pre_i}/iter{pre_i}_thrid_gen"
pre_iter_gamma = np.loadtxt(address)

In [57]:
np.std(pre_iter_gamma)


Out[57]:
1.1584179955828737

In [82]:
address = f"/Users/weilu/Research/server/april_2019/optimization_mult_seq/gammas/proteins_name_list_phi_pairwise_contact_well4.5_6.5_5.0_10phi_density_mediated_contact_well6.5_9.5_5.0_10_2.6_7.0phi_burial_well4.0_gamma_filtered"
gamma = np.loadtxt(address)
np.std(gamma)


Out[82]:
0.046736615865453135

In [223]:
address = f"/Users/weilu/Research/server/april_2019/complete_gammas/iter0_gamma"
gamma = np.loadtxt(address)
np.std(gamma)


Out[223]:
0.6487425261519498

In [225]:
iter0_normalized = gamma * np.std(original_gamma) / np.std(gamma)

In [226]:
np.std(iter0_normalized)


Out[226]:
0.43622242366014413

In [227]:
np.savetxt("/Users/weilu/Research/server/april_2019/complete_gammas/iter0_normalized_gamma", iter0_normalized)

In [107]:
address = f"/Users/weilu/Research/server/april_2019/complete_gammas/iter4_gamma"
gamma = np.loadtxt(address)
np.std(gamma)


Out[107]:
1.1584179955828737

In [86]:
address = f"/Users/weilu/Research/server/april_2019/complete_gammas/iter7_gamma"
iter7 = np.loadtxt(address)
np.std(iter7)


Out[86]:
1.1584179955828737

In [224]:
address = f"/Users/weilu/Research/server/april_2019/complete_gammas/original_gamma.dat"
original_gamma = np.loadtxt(address)
np.std(original_gamma)


Out[224]:
0.4362224236601442

In [90]:
np.mean(original_gamma)


Out[90]:
-0.11829680821739127

In [91]:
np.mean(iter7)


Out[91]:
0.10688544456383424

In [95]:
len(original_gamma)


Out[95]:
690

In [102]:
iter7_normalized = iter7 * np.std(original_gamma) / np.std(iter7)

In [106]:
plt.hist(iter7_normalized.flatten(), bins=100, range=(-3,3))
a = plt.hist(original_gamma.flatten(), bins=100, range=(-3,3), alpha=0.3)



In [101]:
plt.hist(iter7.flatten(), bins=100, range=(-8,8))
plt.hist(original_gamma.flatten(), bins=100, range=(-8,8), alpha=0.3)


Out[101]:
(array([  0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,
          0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,
          0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,
          0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,  26.,
         39.,  35.,  37.,  65.,  72., 124., 141.,  53.,  37.,  32.,  18.,
          7.,   4.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,
          0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,
          0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,
          0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,
          0.]),
 array([-8.  , -7.84, -7.68, -7.52, -7.36, -7.2 , -7.04, -6.88, -6.72,
        -6.56, -6.4 , -6.24, -6.08, -5.92, -5.76, -5.6 , -5.44, -5.28,
        -5.12, -4.96, -4.8 , -4.64, -4.48, -4.32, -4.16, -4.  , -3.84,
        -3.68, -3.52, -3.36, -3.2 , -3.04, -2.88, -2.72, -2.56, -2.4 ,
        -2.24, -2.08, -1.92, -1.76, -1.6 , -1.44, -1.28, -1.12, -0.96,
        -0.8 , -0.64, -0.48, -0.32, -0.16,  0.  ,  0.16,  0.32,  0.48,
         0.64,  0.8 ,  0.96,  1.12,  1.28,  1.44,  1.6 ,  1.76,  1.92,
         2.08,  2.24,  2.4 ,  2.56,  2.72,  2.88,  3.04,  3.2 ,  3.36,
         3.52,  3.68,  3.84,  4.  ,  4.16,  4.32,  4.48,  4.64,  4.8 ,
         4.96,  5.12,  5.28,  5.44,  5.6 ,  5.76,  5.92,  6.08,  6.24,
         6.4 ,  6.56,  6.72,  6.88,  7.04,  7.2 ,  7.36,  7.52,  7.68,
         7.84,  8.  ]),
 <a list of 100 Patch objects>)

In [83]:
address = f"/Users/weilu/Research/server/april_2019/complete_gammas/iter0_gamma"
gamma = np.loadtxt(address)
np.std(gamma)


Out[83]:
0.6487425261519498

In [80]:
pre_i = 7
address = f"/Users/weilu/Research/server/april_2019/optimization_mult_seq/gammas/proteins_name_list_phi_pairwise_contact_well4.5_6.5_5.0_10phi_density_mediated_contact_well6.5_9.5_5.0_10_2.6_7.0phi_burial_well4.0_gamma_filtered"
gamma = np.loadtxt(address)
np.std(gamma)
multiSeq = gamma * np.std(original_gamma) /np.std(gamma)

In [81]:
address = "/Users/weilu/Research/server/april_2019/fix_gamma_mix_error/multiSeq"
np.savetxt(address, multiSeq)

In [ ]:


In [73]:
address = "/Volumes/Wei_backup/feb_2019/original_gamma.dat"
original_gamma = np.loadtxt(address)
np.std(original_gamma)


Out[73]:
0.4362224236601442

In [74]:
address = "/Users/weilu/Research/server/april_2019/fix_gamma_mix_error/t_7_2"
gamma = np.loadtxt(address)
np.std(gamma)


Out[74]:
1.1584179955828737

In [76]:
normalized = gamma * np.std(original_gamma) /np.std(gamma)

In [77]:
address = "/Users/weilu/Research/server/april_2019/fix_gamma_mix_error/t_7_normalized"
np.savetxt(address, normalized)

In [59]:
np.savetxt("test", gamma/np.std(gamma)*np.std(pre_iter_gamma))

In [58]:
np.std(gamma/np.std(gamma)*np.std(pre_iter_gamma))


Out[58]:
1.1584179955828737

In [54]:
np.std(gamma)


Out[54]:
4.486000166595644

In [ ]:


In [ ]:
address = f"/Users/weilu/Research/server/march_2019/optimization_weighted_by_q_iter7/gammas/proteins_name_list_phi_pairwise_contact_well4.5_6.5_5.0_10phi_density_mediated_contact_well6.5_9.5_5.0_10_2.6_7.0phi_burial_well4.0_gamma_filtered"
gamma = np.loadtxt(address)

In [66]:
gamma_file_name = "/Users/weilu/Research/server/march_2019/fix_gamma_mix_error/t_5"
z_scores, e_natives, e_mgs, e_mg_stds = validate_hamiltonian_wei(
    "phi_list.txt", "proteins_name_list.txt", gamma_file_name, 
    "lammps", len(new_simulation_list)*6000, mode=2, simulation_location_list_dic=simulation_location_list_dic)
data = pd.DataFrame([nameList, z_scores, e_natives, e_mgs, e_mg_stds], 
                    index=["Name", "Z", "E_Natives", "E_mgs", "E_mg_stds"]).T
data.Z = data.Z.astype(float)
data.E_Natives = data.E_Natives.astype(float)
data


0 (12.902492168806434+0j)
Out[66]:
Name Z E_Natives E_mgs E_mg_stds
0 1FC2C 12.902492 -72.939507 -14.5936 4.52206
1 1ENH 10.936765 -94.788777 -19.966 6.8414
2 2GB1 12.988132 -131.775597 -27.7624 8.00833
3 2CRO 10.867731 -130.273058 -25.1145 9.67622
4 1CTF 10.445175 -123.937394 -28.4003 9.14653
5 4ICB 12.425854 -167.259641 -46.5205 9.71677
6 1R69 4.628182 -114.374358 -40.9413 15.8665
7 1UTG 4.358094 -126.820263 -59.8845 15.3589
8 3ICB 3.778402 -161.489415 -113.374 12.7343
9 256BA 2.836460 -159.889357 -78.9694 28.5285
10 4CPV 3.097858 -156.959441 -106.82 16.1851
11 1CCR 1.543357 -237.494016 -181.156 36.5032
12 2MHR 3.376231 -226.974582 -135.286 27.1569
13 1MBA 1.707582 -226.946629 -189.144 22.1382
14 2FHA 3.059179 -307.398832 -218.884 28.9341

In [67]:
gamma_file_name = "/Users/weilu/Research/server/march_2019/fix_gamma_mix_error/t_7"
z_scores, e_natives, e_mgs, e_mg_stds = validate_hamiltonian_wei(
    "phi_list.txt", "proteins_name_list.txt", gamma_file_name, 
    "lammps", len(new_simulation_list)*6000, mode=2, simulation_location_list_dic=simulation_location_list_dic)
data = pd.DataFrame([nameList, z_scores, e_natives, e_mgs, e_mg_stds], 
                    index=["Name", "Z", "E_Natives", "E_mgs", "E_mg_stds"]).T
data.Z = data.Z.astype(float)
data.E_Natives = data.E_Natives.astype(float)
data


0 (15.394139633453651+0j)
Out[67]:
Name Z E_Natives E_mgs E_mg_stds
0 1FC2C 15.394140 -67.082927 -12.7573 3.52898
1 1ENH 14.454072 -82.326354 -16.2223 4.57339
2 2GB1 17.995654 -120.965843 -22.645 5.46359
3 2CRO 14.485697 -111.494256 -19.7966 6.33022
4 1CTF 11.557301 -111.826330 -24.7091 7.53785
5 4ICB 13.055911 -137.497174 -39.0883 7.53749
6 1R69 6.178104 -92.994433 -30.7364 10.0772
7 1UTG 5.467374 -97.024359 -45.5508 9.41469
8 3ICB 5.426244 -130.648535 -82.8196 8.81438
9 256BA 4.438673 -123.890840 -54.9674 15.5279
10 4CPV 3.335522 -127.115741 -86.5347 12.1663
11 1CCR 1.310163 -146.866948 -118.83 21.3993
12 2MHR 3.643060 -174.225559 -102.152 19.7838
13 1MBA 2.275258 -192.369138 -152.473 17.5348
14 2FHA 3.693267 -217.992171 -157.959 16.2549

In [65]:
gamma_file_name = "/Users/weilu/Research/server/march_2019/fix_gamma_mix_error/t_6"
z_scores, e_natives, e_mgs, e_mg_stds = validate_hamiltonian_wei(
    "phi_list.txt", "proteins_name_list.txt", gamma_file_name, 
    "lammps", len(new_simulation_list)*6000, mode=2, simulation_location_list_dic=simulation_location_list_dic)
data = pd.DataFrame([nameList, z_scores, e_natives, e_mgs, e_mg_stds], 
                    index=["Name", "Z", "E_Natives", "E_mgs", "E_mg_stds"]).T
data.Z = data.Z.astype(float)
data.E_Natives = data.E_Natives.astype(float)
data


0 (15.557881500459525+0j)
Out[65]:
Name Z E_Natives E_mgs E_mg_stds
0 1FC2C 15.557882 -66.409632 -12.584 3.4597
1 1ENH 14.481983 -82.012968 -16.1626 4.54706
2 2GB1 18.132325 -120.221625 -22.5164 5.38845
3 2CRO 14.629681 -110.247560 -19.5473 6.19974
4 1CTF 11.518154 -111.089092 -24.6283 7.50648
5 4ICB 13.167126 -136.222312 -38.5971 7.41431
6 1R69 6.047398 -92.728155 -30.8776 10.2276
7 1UTG 5.227171 -95.725888 -45.4891 9.61071
8 3ICB 5.297855 -129.444069 -82.6104 8.84012
9 256BA 4.179782 -122.787016 -55.5428 16.088
10 4CPV 3.476587 -128.232387 -86.4684 12.0129
11 1CCR 1.257704 -147.560831 -120.253 21.7125
12 2MHR 3.628087 -175.459441 -103.038 19.9612
13 1MBA 2.233051 -191.310877 -152.205 17.5125
14 2FHA 3.719959 -219.850967 -158.76 16.4226

In [68]:
gamma_file_name = "/Users/weilu/Research/server/march_2019/fix_gamma_mix_error/t_7_2"
z_scores, e_natives, e_mgs, e_mg_stds = validate_hamiltonian_wei(
    "phi_list.txt", "proteins_name_list.txt", gamma_file_name, 
    "lammps", len(new_simulation_list)*6000, mode=2, simulation_location_list_dic=simulation_location_list_dic)
data = pd.DataFrame([nameList, z_scores, e_natives, e_mgs, e_mg_stds], 
                    index=["Name", "Z", "E_Natives", "E_mgs", "E_mg_stds"]).T
data.Z = data.Z.astype(float)
data.E_Natives = data.E_Natives.astype(float)
data


0 (14.95306946470842+0j)
Out[68]:
Name Z E_Natives E_mgs E_mg_stds
0 1FC2C 14.953069 -59.898056 -11.3384 3.24747
1 1ENH 13.864714 -70.647750 -13.9516 4.08924
2 2GB1 17.531352 -106.238755 -19.8175 4.92953
3 2CRO 14.272086 -96.801886 -17.1511 5.58088
4 1CTF 11.259620 -92.898240 -20.598 6.4212
5 4ICB 12.790158 -124.714568 -35.3586 6.98631
6 1R69 6.966446 -85.374952 -26.9794 8.38241
7 1UTG 6.731735 -94.339031 -41.1663 7.89882
8 3ICB 7.425543 -119.708526 -68.3466 6.91692
9 256BA 5.390923 -118.059756 -48.3401 12.9328
10 4CPV 5.108738 -123.618639 -73.8877 9.73448
11 1CCR 2.465812 -145.468158 -102.744 17.3265
12 2MHR 5.887625 -185.002005 -92.0686 15.7845
13 1MBA 3.995153 -184.497248 -129.042 13.8807
14 2FHA 6.311264 -224.000715 -139.049 13.4603

In [62]:
gamma_file_name = "/Users/weilu/Research/server/march_2019/fix_gamma_mix_error/t"
z_scores, e_natives, e_mgs, e_mg_stds = validate_hamiltonian_wei(
    "phi_list.txt", "proteins_name_list.txt", gamma_file_name, 
    "lammps", len(new_simulation_list)*6000, mode=2, simulation_location_list_dic=simulation_location_list_dic)
data = pd.DataFrame([nameList, z_scores, e_natives, e_mgs, e_mg_stds], 
                    index=["Name", "Z", "E_Natives", "E_mgs", "E_mg_stds"]).T
data.Z = data.Z.astype(float)
data.E_Natives = data.E_Natives.astype(float)
data


0 (15.881770605078113+0j)

In [64]:



Out[64]:
Name Z E_Natives E_mgs E_mg_stds
0 1FC2C 15.881771 -69.026462 -13.1503 3.51826
1 1ENH 15.042489 -88.963180 -17.4922 4.75127
2 2GB1 18.434727 -127.254795 -23.9252 5.60516
3 2CRO 14.679367 -118.191521 -21.0059 6.62056
4 1CTF 11.772371 -124.534577 -27.5061 8.24205
5 4ICB 13.373861 -139.045679 -39.5249 7.44144
6 1R69 5.286558 -93.297144 -32.6843 11.4655
7 1UTG 4.003590 -89.440327 -46.6201 10.6955
8 3ICB 3.539106 -130.550986 -93.1623 10.5644
9 256BA 3.295272 -117.900011 -58.654 17.9791
10 4CPV 1.819500 -119.969886 -94.2593 14.1306
11 1CCR 0.210996 -134.562463 -129.278 25.0473
12 2MHR 1.620543 -143.789168 -105.903 23.3788
13 1MBA 0.711789 -182.177810 -167.432 20.7172
14 2FHA 1.192157 -189.992927 -167.524 18.8475

In [63]:
# normalized with std.

In [61]:
data = pd.DataFrame([nameList, z_scores, e_natives, e_mgs, e_mg_stds], 
                    index=["Name", "Z", "E_Natives", "E_mgs", "E_mg_stds"]).T
data.Z = data.Z.astype(float)
data.E_Natives = data.E_Natives.astype(float)
data


Out[61]:
Name Z E_Natives E_mgs E_mg_stds
0 1FC2C 10.888645 -27.679025 -5.20934 2.06359
1 1ENH 9.251952 -24.048072 -4.82697 2.07752
2 2GB1 12.410122 -43.414874 -7.88703 2.86281
3 2CRO 10.234838 -37.913786 -6.68482 3.05124
4 1CTF 7.645952 -24.047627 -5.33915 2.44685
5 4ICB 10.266856 -62.414439 -17.7518 4.35017
6 1R69 14.526236 -43.950599 -10.2149 2.3224
7 1UTG 18.617068 -64.288211 -19.3788 2.41227
8 3ICB 17.566746 -62.966443 -15.6378 2.69422
9 256BA 17.457549 -73.472311 -17.7937 3.18937
10 4CPV 14.822274 -77.716004 -23.531 3.65565
11 1CCR 13.619389 -99.237549 -32.6869 4.88646
12 2MHR 24.058968 -154.683226 -40.303 4.75416
13 1MBA 16.661417 -116.157490 -38.3161 4.67195
14 2FHA 17.594301 -170.013749 -53.5348 6.62026

In [ ]:
# with more decoys.

In [41]:
data = pd.DataFrame([nameList, z_scores, e_natives, e_mgs, e_mg_stds], 
                    index=["Name", "Z", "E_Natives", "E_mgs", "E_mg_stds"]).T
data.Z = data.Z.astype(float)
data.E_Natives = data.E_Natives.astype(float)
data


Out[41]:
Name Z E_Natives E_mgs E_mg_stds
0 1FC2C 10.888645 -107.187656 -20.1733 7.99129
1 1ENH 9.251952 -93.126710 -18.6925 8.04524
2 2GB1 12.410122 -168.125094 -30.5427 11.0863
3 2CRO 10.234838 -146.822004 -25.8871 11.816
4 1CTF 7.645952 -93.124987 -20.676 9.47547
5 4ICB 10.266856 -241.701341 -68.7444 16.8461
6 1R69 14.526236 -170.199699 -39.5575 8.99353
7 1UTG 18.617068 -248.957567 -75.0448 9.34158
8 3ICB 17.566746 -243.838989 -60.5577 10.4334
9 256BA 17.457549 -284.523203 -68.9066 12.3509
10 4CPV 14.822274 -300.957002 -91.1242 14.1566
11 1CCR 13.619389 -384.299676 -126.581 18.9229
12 2MHR 24.058968 -599.014329 -156.074 18.4106
13 1MBA 16.661417 -449.822535 -148.38 18.0922
14 2FHA 17.594301 -658.382129 -207.315 25.6371

In [20]:
nameList = dataset["new"][0] + dataset["old"][0]

In [37]:
data = pd.DataFrame([nameList, z_scores, e_natives, e_mgs, e_mg_stds], 
                    index=["Name", "Z", "E_Natives", "E_mgs", "E_mg_stds"]).T
data.Z = data.Z.astype(float)
data.E_Natives = data.E_Natives.astype(float)

In [38]:
data


Out[38]:
Name Z E_Natives E_mgs E_mg_stds
0 1FC2C 23.480789 -107.187656 -15.5339 3.90335
1 1ENH 26.479082 -93.126710 -13.9019 2.99198
2 2GB1 22.975176 -168.125094 -28.8205 6.06327
3 2CRO 31.285547 -146.822004 -18.9095 4.08855
4 1CTF 11.639435 -93.124987 -18.7614 6.38894
5 4ICB 18.548103 -241.701341 -63.6676 9.59849
6 1R69 14.591790 -170.199699 -43.0636 8.71285
7 1UTG 20.877930 -248.957567 -78.7209 8.15391
8 3ICB 19.381229 -243.838989 -58.0876 9.58409
9 256BA 17.424797 -284.523203 -72.1624 12.1873
10 4CPV 14.375748 -300.957002 -86.6941 14.9045
11 1CCR 13.228477 -384.299676 -127.827 19.3879
12 2MHR 22.098569 -599.014329 -156.493 20.0249
13 1MBA 16.496307 -449.822535 -144.696 18.4967
14 2FHA 16.351727 -658.382129 -209.959 27.4236

iter7 gamma


In [28]:
data = pd.DataFrame([nameList, z_scores, e_natives, e_mgs, e_mg_stds], 
                    index=["Name", "Z", "E_Natives", "E_mgs", "E_mg_stds"]).T
data.Z = data.Z.astype(float)
data.E_Natives = data.E_Natives.astype(float)

In [35]:
data


Out[35]:
Name Z E_Natives E_mgs E_mg_stds
0 1FC2C 20.736046 -54.027982 -8.9613 2.17335
1 1ENH 25.163685 -75.813521 -12.4091 2.51968
2 2GB1 20.575525 -99.521555 -19.1208 3.90759
3 2CRO 27.249896 -105.345429 -14.488 3.33423
4 1CTF 13.052314 -98.729705 -19.4564 6.0735
5 4ICB 20.471473 -134.771453 -34.2875 4.90849
6 1R69 2.991517 -95.344303 -52.5771 14.2962
7 1UTG 3.541324 -108.984502 -68.9202 11.3134
8 3ICB 1.407566 -130.398896 -118.733 8.28805
9 256BA 1.246208 -137.257527 -107.214 24.1078
10 4CPV 0.786581 -126.445572 -109.126 22.0186
11 1CCR 0.699880 -243.915832 -215.168 41.0761
12 2MHR 1.438503 -181.273917 -139.694 28.905
13 1MBA -0.685222 -179.977666 -193.978 20.432
14 2FHA 1.296879 -277.381832 -247.049 23.3892

In [138]:
pd.concat([data[:4],data[:4]]).reset_index(drop=False)


Out[138]:
index Name Length Seq
0 0 1A2J 188 AQYEDGKQYTTLEKPVAGAPQVLEFFSFFCPHCYQFEEVLHISDNV...
1 1 1A3H 300 SVVEEHGQLSISNGELVNERGEQVQLKGMSSHGLQWYGQFVNYESM...
2 2 1A5Y 284 EMEKEFEQIDKSGSWAAIYQDIRHEASDFPCRVAKLPKNKNRNRYR...
3 3 1A8Q 274 PICTTRDGVEIFYKDWGQGRPVVFIHGWPLNGDAWQDQLKAVVDAG...
4 0 1A2J 188 AQYEDGKQYTTLEKPVAGAPQVLEFFSFFCPHCYQFEEVLHISDNV...
5 1 1A3H 300 SVVEEHGQLSISNGELVNERGEQVQLKGMSSHGLQWYGQFVNYESM...
6 2 1A5Y 284 EMEKEFEQIDKSGSWAAIYQDIRHEASDFPCRVAKLPKNKNRNRYR...
7 3 1A8Q 274 PICTTRDGVEIFYKDWGQGRPVVFIHGWPLNGDAWQDQLKAVVDAG...

In [172]:
Q = pd.read_csv(f"/Users/weilu/Research/server/april_2019/database/iter4_30_3icb_11/wham.dat")[" Qw"]

In [151]:
len(Q)


Out[151]:
1000

In [162]:
a = pd.concat([Q, Q]).reset_index()
a["Rank"] = a["index"].rank(ascending=False)
a.query(f"Rank < {200*30}")

In [169]:
pd.read_csv("/Users/weilu/Research/server/april_2019/database/Q_bias_2_1ctf", index_col=0).head()


Out[169]:
index Qw Rank
0 0 0.261345 59985.5
1 1 0.291708 59955.5
2 2 0.282458 59925.5
3 3 0.218886 59895.5
4 4 0.262429 59865.5

In [177]:
a = pd.read_csv("/Users/weilu/Research/server/april_2019/database/Q_iter4_13_2fha", index_col=0).query(f"Rank < {200*30}")

In [200]:
simulation_location = "test"
name = "a"
sampled = a.sample(10).assign(Name=f"{simulation_location}_{name}_")
sampled["Location"] = sampled["Name"] + sampled["Run"].astype(str) + \
            "/frame" + sampled["index"].astype(str) + " " + sampled[" Qw"].round(3).astype(str)

In [ ]:
with open(f"decoys/lammps/{name}_{simulation_location}.decoys", "w") as out:
    for i, item in sampled.iterrows():
        f.write(f"{simulation_location}_{name}_{item['Run']}/frame{item['index']} {np.round(item[' Qw'], 3)}\n")

In [221]:
np.round(0.4124, 3)


Out[221]:
0.412

In [222]:
for i, item in sampled.iterrows():
    print(f"{simulation_location}_{name}_{item['Run']}/frame{item['index']} {np.round(item[' Qw'], 3)}\n")


test_a_0/frame376 0.234

test_a_10/frame316 0.357

test_a_5/frame436 0.36

test_a_11/frame421 0.363

test_a_9/frame330 0.353

test_a_7/frame282 0.412

test_a_19/frame366 0.292

test_a_19/frame310 0.269

test_a_15/frame271 0.291

test_a_10/frame326 0.367


In [206]:
a = sampled["Location"].values

In [205]:
a.values


Out[205]:
array(['test_a_0/frame376 0.234', 'test_a_10/frame316 0.357',
       'test_a_5/frame436 0.36', 'test_a_11/frame421 0.363',
       'test_a_9/frame330 0.353', 'test_a_7/frame282 0.412',
       'test_a_19/frame366 0.292', 'test_a_19/frame310 0.269',
       'test_a_15/frame271 0.291', 'test_a_10/frame326 0.367'],
      dtype=object)

In [150]:
pd.concat([Q, Q]).reset_index(drop=False)


Out[150]:
index Qw
0 0 0.060984
1 1 0.137520
2 2 0.222031
3 3 0.197840
4 4 0.273562
5 5 0.266006
6 6 0.286569
7 7 0.218554
8 8 0.260241
9 9 0.332594
10 10 0.398317
11 11 0.350271
12 12 0.416690
13 13 0.361991
14 14 0.379512
15 15 0.366766
16 16 0.351632
17 17 0.339836
18 18 0.291605
19 19 0.262566
20 20 0.265882
21 21 0.266080
22 22 0.264690
23 23 0.251368
24 24 0.259581
25 25 0.271665
26 26 0.428057
27 27 0.433507
28 28 0.410504
29 29 0.325923
... ... ...
1970 970 0.767218
1971 971 0.779492
1972 972 0.771003
1973 973 0.771163
1974 974 0.735662
1975 975 0.728089
1976 976 0.773779
1977 977 0.804048
1978 978 0.762598
1979 979 0.805954
1980 980 0.799129
1981 981 0.873564
1982 982 0.857443
1983 983 0.831532
1984 984 0.733035
1985 985 0.728545
1986 986 0.787008
1987 987 0.750969
1988 988 0.878250
1989 989 0.811806
1990 990 0.829812
1991 991 0.836699
1992 992 0.834538
1993 993 0.790569
1994 994 0.825448
1995 995 0.779203
1996 996 0.750317
1997 997 0.795165
1998 998 0.741362
1999 999 0.723373

2000 rows × 2 columns


In [ ]:
data.to_csv()

In [230]:
HFscales = pd.read_csv("~/opt/small_script/Whole_residue_HFscales.txt", sep="\t")

In [231]:
HFscales


Out[231]:
AA DGwif DGwoct Oct-IF
0 Ala 0.17 0.50 0.33
1 Arg+ 0.81 1.81 1.00
2 Asn 0.42 0.85 0.43
3 Asp- 1.23 3.64 2.41
4 Asp0 -0.07 0.43 0.50
5 Cys -0.24 -0.02 0.22
6 Gln 0.58 0.77 0.19
7 Glu- 2.02 3.63 1.61
8 Glu0 -0.01 0.11 0.12
9 Gly 0.01 1.15 1.14
10 His+ 0.96 2.33 1.37
11 His0 0.17 0.11 -0.06
12 Ile -0.31 -1.12 -0.81
13 Leu -0.56 -1.25 -0.69
14 Lys+ 0.99 2.80 1.81
15 Met -0.23 -0.67 -0.44
16 Phe -1.13 -1.71 -0.58
17 Pro 0.45 0.14 -0.31
18 Ser 0.13 0.46 0.33
19 Thr 0.14 0.25 0.11
20 Trp -1.85 -2.09 -0.24
21 Tyr -0.94 -0.71 0.23
22 Val 0.07 -0.46 -0.53

In [ ]:


In [236]:


In [242]:
direct = membrane_gamma[:210][:,0]
protein = membrane_gamma[210:][:,0]
water = membrane_gamma[210:][:,1]

In [246]:


In [247]:
def simulation_to_iteration_gamma(membrane_gamma):
    direct = membrane_gamma[:210][:,0]
    protein = membrane_gamma[210:][:,0]
    water = membrane_gamma[210:][:,1]
    return np.concatenate([direct, protein, water])

In [260]:
membrane_gamma = np.loadtxt("/Users/weilu/openmmawsem/parameters/membrane_gamma_rescaled.dat")
membrane_gamma_formated = simulation_to_iteration_gamma(membrane_gamma)
np.std(membrane_gamma_formated)


Out[260]:
0.40883106885391807

In [263]:
np.mean(membrane_gamma_formated)


Out[263]:
-0.01062296825396825

In [264]:
np.mean(water_gamma_formated)


Out[264]:
0.06386869841269842

In [251]:
membrane_gamma = np.loadtxt("/Users/weilu/opt/parameters/membrane/gamma.dat")
membrane_gamma_formated = simulation_to_iteration_gamma(membrane_gamma)
water_gamma = np.loadtxt("/Users/weilu/opt/parameters/globular_parameters/gamma.dat")
water_gamma_formated = simulation_to_iteration_gamma(water_gamma)
ratio = np.std(water_gamma_formated)/np.std(membrane_gamma_formated)
membrane_gamma_formated_rescaled = membrane_gamma_formated * ratio

In [252]:
np.std(membrane_gamma_formated)


Out[252]:
0.0035361816815055996

In [253]:
np.std(water_gamma_formated)


Out[253]:
0.40883096718693446

In [255]:
ratio = np.std(water_gamma_formated)/np.std(membrane_gamma_formated)

In [257]:
np.std(membrane_gamma_formated * ratio)


Out[257]:
0.4088309671869345

In [258]:
membrane_gamma_formated_rescaled = membrane_gamma_formated * ratio

In [259]:
gamma = membrane_gamma_formated_rescaled
with open("/Users/weilu/openmmawsem/parameters/membrane_gamma_rescaled.dat", "w") as out:
    c = 0
    for i in range(20):
        for j in range(i, 20):
            out.write(f"{gamma[c]:<.5f} {gamma[c]:10.5f}\n")
            c += 1
    out.write("\n")
    for i in range(20):
        for j in range(i, 20):
            # protein, water
            out.write(f"{gamma[c]:<.5f} {gamma[c+210]:10.5f}\n")
            c += 1

In [ ]:


In [272]:
from simtk.openmm.app import *
from simtk.openmm import *
def read_trajectory_pdb_positions(pdb_trajectory_filename):
    import uuid, os
    pdb_trajectory_contents = open(pdb_trajectory_filename).read().split("MODEL")[1:]
    pdb_trajectory_contents = ['\n'.join(x.split('\n')[1:]) for x in pdb_trajectory_contents]
    pdb_trajectory = []
    for i, pdb_contents in enumerate(pdb_trajectory_contents):
        temporary_file_name = str(uuid.uuid4())
        temporary_pdb_file = open(temporary_file_name, 'w')
        temporary_pdb_file.write(pdb_contents)
        temporary_pdb_file.close()
        pdb = PDBFile(temporary_file_name)
        pdb_trajectory.append(pdb)
        os.remove(temporary_file_name)
    return pdb_trajectory

In [267]:
import mdtraj as md
location = "/Users/weilu/Research/server/april_2019/complete_2xov/teaser_simulation_annealing/test.pdb"
pdb_trajectory = md.load(location)

In [305]:
location = "/Users/weilu/Research/server/april_2019/complete_2xov/teaser_simulation_annealing/test.pdb"
# location = "/Users/weilu/Research/server/april_2019/complete_2xov/teaser_simulation_annealing/origianl.pdb"
location = "/Users/weilu/Research/server/april_2019/complete_2xov/smaller.pdb"
a = md.iterload(location, chunk=20)

In [323]:
%%time
location = "/Users/weilu/Research/server/april_2019/complete_2xov/movie.dcd"
# a = md.iterload(location, chunk=20)
# a = md.load(location, top="/Users/weilu/Research/server/april_2019/complete_2xov/complete_2xov-openmmawsem.pdb")
a = md.load(location, top="/Users/weilu/Research/server/april_2019/complete_2xov/native.pdb")
# for traj in a:
#     print(traj.openmm_positions)


CPU times: user 102 ms, sys: 15.1 ms, total: 117 ms
Wall time: 166 ms

In [324]:
%%time
location = "/Users/weilu/Research/server/april_2019/complete_2xov/movie.dcd"
# a = md.iterload(location, chunk=20)
a = md.load(location, top="/Users/weilu/Research/server/april_2019/complete_2xov/complete_2xov-openmmawsem.pdb")
a = md.load(location, top="/Users/weilu/Research/server/april_2019/complete_2xov/complete_2xov-openmmawsem.pdb")
# for traj in a:
#     print(traj.openmm_positions)


CPU times: user 202 ms, sys: 2.84 ms, total: 204 ms
Wall time: 131 ms

In [325]:
%%time
a = md.iterload(location, chunk=20)
# a = md.load(location)
# for traj in a:
#     print(traj.openmm_positions)


CPU times: user 14 µs, sys: 4 µs, total: 18 µs
Wall time: 20 µs

In [ ]:


In [326]:
location = "/Users/weilu/Research/server/april_2019/complete_2xov/movie.dcd"
f = md.open(location)
f.seek(0)

In [313]:
f.positions.shape


Out[313]:
(200, 1620, 3)

In [296]:
for frame,traj in enumerate(a):
    print(md.rmsd(traj, traj))

In [290]:
START = 0
STOP = 3
STRIDE = 1
with md.open(location) as f:
    f.seek(START)
    print(f)
    t = f.read_as_traj(
        topology, n_frames=STOP-START, stride=STRIDE
    )
    print(t)


---------------------------------------------------------------------------
AttributeError                            Traceback (most recent call last)
<ipython-input-290-bee2b0d4ff15> in <module>
      3 STRIDE = 1
      4 with md.open(location) as f:
----> 5     f.seek(START)
      6     print(f)
      7     t = f.read_as_traj(

AttributeError: 'PDBTrajectoryFile' object has no attribute 'seek'

In [280]:
a = PDBFile(location)

In [273]:
a = read_trajectory_pdb_positions(location)

In [287]:
len(pdb_trajectory)


Out[287]:
6

In [288]:
# pdb_trajectory.openmm_positions(0)