In [1]:
import os
import sys
import random
import time
from random import seed, randint
import argparse
import platform
from datetime import datetime
import imp
import numpy as np
import fileinput
from itertools import product
import pandas as pd
from scipy.interpolate import griddata
from scipy.interpolate import interp2d
import seaborn as sns
from os import listdir

import matplotlib.pyplot as plt
import seaborn as sns
from scipy.interpolate import griddata
import matplotlib as mpl
# sys.path.insert(0,'..')
# from notebookFunctions import *
# from .. import notebookFunctions
from Bio.PDB.Polypeptide import one_to_three
from Bio.PDB.Polypeptide import three_to_one
from Bio.PDB.PDBParser import PDBParser
from pyCodeLib import *
# from small_script.myFunctions import *
sys.path.insert(0, "/Users/weilu/openmmawsem")
from helperFunctions.myFunctions import *
from collections import defaultdict
%matplotlib inline
# plt.rcParams['figure.figsize'] = (10,6.180)    #golden ratio
# %matplotlib notebook
%load_ext autoreload
%autoreload 2

In [2]:
plt.rcParams['figure.figsize'] = np.array([16.18033, 10])    #golden ratio
plt.rcParams['figure.facecolor'] = 'w'
plt.rcParams['figure.dpi'] = 100
plt.rcParams.update({'font.size': 22})

In [3]:
# pre = "/Users/weilu/Research/server_backup/feb_2019/jan_optimization/gammas/"
# pre = "/Users/weilu/Research/server/april_2019/optimization_test/gammas/"
pre = "/Users/weilu/Research/server/sep_2019/peptide_optimization_trial_2/optimization/gammas/"
# pp = "cath-dataset-nonredundant-S20Clean_phi_pairwise_contact_well4.5_6.5_5.0_10phi_density_mediated_contact_well6.5_9.5_5.0_10_2.6_7.0"
# pp = "proteins_name_list_phi_pairwise_contact_well4.5_6.5_5.0_10phi_density_mediated_contact_well6.5_9.5_5.0_10_2.6_7.0phi_burial_well4.0"
pp = f"protein_list_phi_pairwise_contact_well4.5_6.5_5.0_10phi_density_mediated_contact_well6.5_9.5_5.0_10_2.6_7.0phi_burial_well4.0"

A_name = pp + "_A"
B_name = pp + "_B"
B_filtered_name = pp + "_B_filtered"
P_name = pp + "_P"
Gamma_name = pp + "_gamma"
Gamma_filtered_name = pp + "_gamma_filtered"
Lamb_name = pp + "_lamb"
Lamb_filtered_name = pp + "_lamb_filtered"

A = np.loadtxt(pre+A_name)
B = np.loadtxt(pre+B_name)
B_filtered = np.loadtxt(pre+B_filtered_name, dtype=complex, converters={
                           0: lambda s: complex(s.decode().replace('+-', '-'))})
Gamma = np.loadtxt(pre+Gamma_name)
Gamma_filtered = np.loadtxt(pre+Gamma_filtered_name, dtype=complex, converters={
                           0: lambda s: complex(s.decode().replace('+-', '-'))})
Lamb = np.loadtxt(pre+Lamb_name, dtype=complex, converters={
                           0: lambda s: complex(s.decode().replace('+-', '-'))})
Lamb_filtered = np.loadtxt(pre+Lamb_filtered_name, dtype=complex, converters={
                           0: lambda s: complex(s.decode().replace('+-', '-'))})

half_B_name = pp + "_half_B"
half_B = np.loadtxt(pre+half_B_name)
other_half_B_name = pp + "_other_half_B"
other_half_B = np.loadtxt(pre+other_half_B_name)
std_half_B_name = pp + "_std_half_B"
std_half_B = np.loadtxt(pre+std_half_B_name)


# pre = "/Users/weilu/Research/server/april_2019/"
location = pre + "../../phis/protein_list_phi_pairwise_contact_well4.5_6.5_5.0_10phi_density_mediated_contact_well6.5_9.5_5.0_10_2.6_7.0phi_burial_well4.0_phi_decoy_summary.txt"
A_prime = np.loadtxt(location)

In [4]:
plt.plot(Lamb)
plt.yscale("log")


/Users/weilu/anaconda3/envs/py36/lib/python3.6/site-packages/numpy/core/numeric.py:501: ComplexWarning: Casting complex values to real discards the imaginary part
  return array(a, dtype, copy=False, order=order)

In [5]:
lamb, P = np.linalg.eig(B)
lamb, P = sort_eigenvalues_and_eigenvectors(lamb, P)
filtered_lamb = np.copy(lamb)
cutoff_mode = 100
filtered_B_inv, filtered_lamb, P = get_filtered_B_inv_lambda_and_P(filtered_lamb, 
                                                                   cutoff_mode, P)
filtered_gamma = np.dot(filtered_B_inv, A)
filtered_B = np.linalg.inv(filtered_B_inv)
plot_contact_well(filtered_gamma[:210], inferBound=True)
plot_contact_well(filtered_gamma[210:420], inferBound=True)
plot_contact_well(filtered_gamma[420:], inferBound=True)



In [6]:
# maximum difference between loaded and computed is 1e-5.
max(lamb-Lamb)


Out[6]:
(1.5403101476740844e-05+0j)

In [6]:
save_gamma_pre = "/Users/weilu/Research/server/sep_2019/saved_gammas/"
np.savetxt(f"{save_gamma_pre}/trial_2_cutoff100", filtered_gamma)

In [107]:
os.chdir('/Users/weilu/opt/notebook/Optimization')


Out[107]:
'/Users/weilu/opt/notebook/Optimization'

In [7]:
os.chdir("/Users/weilu/Research/server/sep_2019/peptide_optimization_trial_2/optimization/")
# gamma_file_name = "gamma_iter1_combined_mar06.dat"
gamma_file_name = "/Users/weilu/Research/server/sep_2019/saved_gammas/trial_2_cutoff100"
data = validate_hamiltonian_wei("phi_list.txt", "protein_list_small", gamma_file_name, "shuffle", 1000, mode=0)
data


210
420
60
0 13.88620325483512
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
Out[7]:
Protein Z_scores E_native E_mgs Std_mg
0 1BD2_1 13.886203 -94.019049 27.959737 8.784171
1 6BJ8_1 13.109063 -135.224251 -11.309544 9.452598
2 2JCC_1 14.473569 -126.328556 -5.647334 8.338042
3 1AO7_1 13.484683 -157.616608 -29.399574 9.508346
4 1LP9_1 15.189795 -102.871840 16.266549 7.843318
5 3QDJ_1 12.256285 -143.183898 -11.627734 10.733771
6 3GSN_1 13.845688 -112.105952 6.630968 8.575733
7 1QRN_1 15.372893 -166.795586 -39.143784 8.303694
8 3PWP_1 14.576827 -157.595679 -29.193631 8.808642
9 5W1W_1 13.829302 -133.125454 -9.536536 8.936743
10 1QSE_1 14.182267 -156.575700 -28.558021 9.026602
11 4EUP_1 11.888598 -127.724902 -9.191554 9.970339
12 5TEZ_1 8.898732 -68.902758 -0.690305 7.665413
13 3D39_1 13.003862 -163.361364 -36.729181 9.738044
14 6EQA_1 8.989483 -104.018517 0.080684 11.580109
15 2BNR_1 14.177915 -132.611510 -17.027458 8.152401
16 6BJ2_1 15.084397 -140.325692 -15.105126 8.301331
17 6BJ3_1 14.349541 -145.302981 -16.137351 9.001377
18 5NME_1 15.351991 -160.071183 -23.836531 8.874071
19 5MEN_1 13.868756 -127.572251 -20.185120 7.743098
20 2VLJ_1 12.512705 -103.622263 21.386925 9.990580
21 2VLK_1 14.961490 -127.411272 6.484844 8.949384
22 2J8U_1 14.638396 -104.604069 7.866990 7.683291
23 2GJ6_1 13.980958 -171.134457 -37.973391 9.524460
24 1OGA_1 13.957177 -114.875395 14.482985 9.268234
25 2F54_1 13.041641 -144.348750 -25.787389 9.090985
26 5D2L_1 12.408506 -88.800883 19.124483 8.697692
27 2F53_1 14.501475 -116.953364 -4.437979 7.758892
28 3QEQ_1 13.544127 -101.515583 25.433213 9.372977
29 1QSF_1 12.551245 -141.500546 -28.763369 8.982151
30 2UWE_1 10.635273 -78.771942 11.389924 8.477626
31 5EUO_1 14.693857 -108.771322 22.772826 8.952322
32 3H9S_1 13.867971 -178.290123 -43.146525 9.745016
33 3D3V_1 14.608471 -194.461663 -55.401156 9.519169
34 2BNQ_1 8.632441 -93.322389 -21.654547 8.302153
35 4FTV_1 13.339285 -154.522283 -34.990888 8.960855

In [9]:
gamma_file_name = "/Users/weilu/Research/server/sep_2019/peptide_optimization/saved_gammas/original_gamma"
original_gamma = np.loadtxt(gamma_file_name)

In [10]:
np.dot(A_prime, original_gamma)


Out[10]:
-559.5053481482374

In [11]:
# we want to impose additional contraint so that A' * gamma = constnat.(-562.23)
c = np.dot(A_prime, original_gamma)
B_inv = filtered_B_inv
lambda_2 = (A_prime.dot(B_inv).dot(A) - c) / (A_prime.dot(B_inv).dot(A_prime) )
gamma_new = B_inv.dot(A-A_prime*lambda_2)

In [12]:
np.dot(A_prime, gamma_new)


Out[12]:
-559.5053481482373

In [13]:
# impose A'gamma
save_gamma_pre = "/Users/weilu/Research/server/sep_2019/saved_gammas/"
np.savetxt(f"{save_gamma_pre}/trial_2_cutoff100_impose_Aprime_constraint", gamma_new)

In [14]:
os.chdir("/Users/weilu/Research/server/sep_2019/peptide_optimization_trial_2/optimization/")
# gamma_file_name = "gamma_iter1_combined_mar06.dat"
# gamma_file_name = "/Users/weilu/Research/server/sep_2019/peptide_optimization/saved_gammas/cutoff100"
gamma_file_name = "/Users/weilu/Research/server/sep_2019/saved_gammas/trial_2_cutoff100_impose_Aprime_constraint"

data = validate_hamiltonian_wei("phi_list.txt", "protein_list_small", gamma_file_name, "shuffle", 1000, mode=0)
data


210
420
60
0 13.868354956091665
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
Out[14]:
Protein Z_scores E_native E_mgs Std_mg
0 1BD2_1 13.868355 -639.603610 -517.600844 8.797205
1 6BJ8_1 13.214237 -685.448865 -561.149639 9.406463
2 2JCC_1 14.328149 -670.129342 -548.402985 8.495609
3 1AO7_1 13.420110 -706.276085 -577.483215 9.597005
4 1LP9_1 15.159616 -639.190366 -518.061142 7.990257
5 3QDJ_1 11.999585 -698.554403 -566.255907 11.025256
6 3GSN_1 13.539900 -668.190557 -547.916753 8.882917
7 1QRN_1 15.249483 -705.064741 -577.848954 8.342301
8 3PWP_1 14.563373 -708.209283 -578.403614 8.913160
9 5W1W_1 13.633887 -665.151214 -541.722978 9.053048
10 1QSE_1 14.258868 -699.035194 -570.507980 9.013844
11 4EUP_1 12.171524 -680.457008 -559.321984 9.952330
12 5TEZ_1 8.972845 -630.995906 -561.765500 7.715547
13 3D39_1 12.872897 -710.978823 -584.536451 9.822371
14 6EQA_1 8.854746 -647.236987 -541.684093 11.920487
15 2BNR_1 13.835622 -683.258022 -567.030134 8.400626
16 6BJ2_1 15.074987 -679.567911 -555.603226 8.223203
17 6BJ3_1 14.558686 -681.520082 -553.095466 8.821168
18 5NME_1 14.963629 -688.772647 -553.033452 9.071275
19 5MEN_1 13.770905 -657.886755 -550.909597 7.768346
20 2VLJ_1 12.191335 -657.706818 -533.557745 10.183386
21 2VLK_1 14.701481 -688.299758 -553.145843 9.193218
22 2J8U_1 14.461123 -637.903299 -524.842433 7.818263
23 2GJ6_1 14.074049 -730.396260 -596.124356 9.540389
24 1OGA_1 13.542821 -675.023627 -545.535286 9.561401
25 2F54_1 13.108876 -697.549102 -577.876933 9.129094
26 5D2L_1 12.227117 -621.731099 -513.040115 8.889339
27 2F53_1 13.951656 -654.727398 -543.899453 7.943712
28 3QEQ_1 13.435413 -642.105823 -515.393303 9.431234
29 1QSF_1 12.454222 -689.692347 -576.578774 9.082347
30 2UWE_1 10.474880 -617.348119 -526.298859 8.692153
31 5EUO_1 14.351202 -672.534868 -540.120126 9.226735
32 3H9S_1 13.832251 -730.933424 -595.895846 9.762516
33 3D3V_1 14.510918 -747.956451 -608.472809 9.612323
34 2BNQ_1 8.849451 -672.109604 -598.080169 8.365427
35 4FTV_1 13.110313 -704.688963 -585.345681 9.103008

In [16]:
os.chdir("/Users/weilu/Research/server/sep_2019/peptide_optimization/optimization/")
# gamma_file_name = "gamma_iter1_combined_mar06.dat"
# gamma_file_name = "/Users/weilu/Research/server/sep_2019/peptide_optimization/saved_gammas/cutoff100"
gamma_file_name = "/Users/weilu/Research/server/sep_2019/saved_gammas/trial_2_cutoff100_impose_Aprime_constraint"

data = validate_hamiltonian_wei("phi_list.txt", "protein_list_tiny", gamma_file_name, "shuffle", 1000, mode=0)
data


210
420
60
0 8.849450826769093
210
420
60
210
420
60
210
420
60
210
420
60
Out[16]:
Protein Z_scores E_native E_mgs Std_mg
0 1 8.849451 -672.109604 -598.080169 8.365427
1 2 9.270943 -681.915179 -611.143850 7.633671
2 3 8.089534 -645.301463 -578.332365 8.278486
3 4 8.240793 -671.306528 -606.010473 7.923516
4 5 7.439003 -645.968912 -584.637773 8.244537

In [22]:
# mix gammas so that we don't overfitting too much.
gamma_file_name = "/Users/weilu/Research/server/sep_2019/saved_gammas/original_gamma"
original_gamma = np.loadtxt(gamma_file_name)
gamma_file_name = "/Users/weilu/Research/server/sep_2019/saved_gammas/trial_2_cutoff100_impose_Aprime_constraint"
gamma_new = np.loadtxt(gamma_file_name)

alpha = 0.8
alpha_percent = int(alpha*100)
mixed_gamma = alpha*original_gamma + (1-alpha)*gamma_new
save_gamma_pre = "/Users/weilu/Research/server/sep_2019/saved_gammas/"
np.savetxt(f"{save_gamma_pre}/trial_2_mixed_original_and_cutoff100_impose_Aprime_constraint_{alpha_percent}", mixed_gamma)

In [23]:
os.chdir("/Users/weilu/Research/server/sep_2019/peptide_optimization_trial_2/optimization/")
# gamma_file_name = "gamma_iter1_combined_mar06.dat"
# gamma_file_name = "/Users/weilu/Research/server/sep_2019/peptide_optimization/saved_gammas/cutoff100"
gamma_file_name = "/Users/weilu/Research/server/sep_2019/saved_gammas/trial_2_mixed_original_and_cutoff100_impose_Aprime_constraint_80"

data = validate_hamiltonian_wei("phi_list.txt", "protein_list_small", gamma_file_name, "shuffle", 1000, mode=0)
data


210
420
60
0 4.644053322015458
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
Out[23]:
Protein Z_scores E_native E_mgs Std_mg
0 1BD2_1 4.644053 -580.211577 -550.187949 6.464962
1 6BJ8_1 3.644353 -584.591439 -557.302772 7.487932
2 2JCC_1 3.595958 -596.931478 -571.579622 7.050099
3 1AO7_1 3.888880 -585.161512 -555.292123 7.680717
4 1LP9_1 3.343277 -585.803512 -561.447871 7.284961
5 3QDJ_1 4.780158 -578.779196 -548.984082 6.233082
6 3GSN_1 3.760090 -570.671920 -541.173748 7.845071
7 1QRN_1 4.093077 -585.305336 -559.627225 6.273547
8 3PWP_1 3.828864 -598.379609 -570.653536 7.241332
9 5W1W_1 4.471712 -583.760125 -554.346778 6.577648
10 1QSE_1 3.941467 -587.442265 -557.383337 7.626330
11 4EUP_1 3.307241 -587.257802 -562.918506 7.359395
12 5TEZ_1 2.663639 -589.949987 -573.373806 6.223133
13 3D39_1 3.891425 -597.620093 -564.951230 8.395089
14 6EQA_1 3.498288 -576.752222 -551.491749 7.220811
15 2BNR_1 3.664455 -592.969503 -564.571906 7.749474
16 6BJ2_1 4.025273 -581.230367 -553.289479 6.941364
17 6BJ3_1 4.055735 -587.024766 -561.905861 6.193428
18 5NME_1 4.344556 -584.206642 -552.575455 7.280649
19 5MEN_1 3.507534 -572.311798 -548.902009 6.674145
20 2VLJ_1 3.874439 -565.150281 -538.248505 6.943399
21 2VLK_1 3.949252 -571.704749 -542.224054 7.464880
22 2J8U_1 3.157272 -587.168680 -566.377980 6.585020
23 2GJ6_1 3.607937 -602.686394 -572.920067 8.250236
24 1OGA_1 3.777555 -576.988638 -549.457315 7.288133
25 2F54_1 3.484796 -578.188988 -553.553477 7.069426
26 5D2L_1 3.517080 -571.254753 -545.610819 7.291258
27 2F53_1 3.384206 -580.644434 -553.906841 7.900698
28 3QEQ_1 3.560583 -569.717171 -539.269134 8.551420
29 1QSF_1 3.581838 -591.100737 -565.015043 7.282767
30 2UWE_1 3.562603 -592.517263 -568.650221 6.699327
31 5EUO_1 3.574224 -574.928123 -547.466165 7.683334
32 3H9S_1 4.244045 -590.901720 -560.055415 7.268139
33 3D3V_1 3.834411 -583.571643 -550.538364 8.614956
34 2BNQ_1 2.299543 -585.493175 -568.682132 7.310601
35 4FTV_1 3.185129 -572.264914 -550.970839 6.685468

In [18]:
os.chdir("/Users/weilu/Research/server/sep_2019/peptide_optimization_trial_2/optimization/")
# gamma_file_name = "gamma_iter1_combined_mar06.dat"
# gamma_file_name = "/Users/weilu/Research/server/sep_2019/peptide_optimization/saved_gammas/cutoff100"
gamma_file_name = "/Users/weilu/Research/server/sep_2019/saved_gammas/trial_2_mixed_original_and_cutoff100_impose_Aprime_constraint_95"

data = validate_hamiltonian_wei("phi_list.txt", "protein_list_small", gamma_file_name, "shuffle", 1000, mode=0)
data


210
420
60
0 1.721204322270767
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
Out[18]:
Protein Z_scores E_native E_mgs Std_mg
0 1BD2_1 1.721204 -569.075571 -556.298031 7.423604
1 6BJ8_1 1.069227 -565.680672 -556.581484 8.510059
2 2JCC_1 0.915701 -583.206879 -575.925241 7.951984
3 1AO7_1 1.274523 -562.452529 -551.131293 8.882722
4 1LP9_1 0.729411 -575.793478 -569.582883 8.514531
5 3QDJ_1 1.521287 -556.321344 -545.745615 6.951831
6 3GSN_1 1.387840 -552.387176 -539.909435 8.990762
7 1QRN_1 0.933743 -562.850447 -556.210651 7.110943
8 3PWP_1 1.035444 -577.786546 -569.200396 8.292242
9 5W1W_1 1.573403 -568.499296 -556.713740 7.490488
10 1QSE_1 1.300462 -566.518591 -554.922466 8.916925
11 4EUP_1 0.734882 -569.782951 -563.592854 8.423249
12 5TEZ_1 0.937011 -582.253877 -575.550363 7.154146
13 3D39_1 1.566752 -576.365332 -561.279001 9.629050
14 6EQA_1 1.255480 -563.536329 -553.330684 8.128881
15 2BNR_1 1.359225 -576.040405 -564.110989 8.776630
16 6BJ2_1 1.242365 -562.792077 -552.855652 7.997991
17 6BJ3_1 0.818762 -569.306894 -563.557811 7.021681
18 5NME_1 1.459617 -564.600516 -552.489580 8.297337
19 5MEN_1 1.012511 -556.266493 -548.525586 7.645257
20 2VLJ_1 1.125352 -547.795930 -539.128022 7.702393
21 2VLK_1 1.153125 -549.843185 -540.176218 8.383274
22 2J8U_1 0.463167 -577.655939 -574.165895 7.535176
23 2GJ6_1 1.050535 -578.740794 -568.569263 9.682241
24 1OGA_1 1.020743 -558.607078 -550.192696 8.243393
25 2F54_1 0.842658 -555.808967 -548.992829 8.088854
26 5D2L_1 1.220933 -561.790438 -551.717826 8.249928
27 2F53_1 1.216688 -566.753878 -555.783226 9.016818
28 3QEQ_1 1.266109 -556.144298 -543.745853 9.792560
29 1QSF_1 1.147126 -572.614810 -562.846844 8.515162
30 2UWE_1 1.467751 -587.861478 -576.591101 7.678670
31 5EUO_1 0.907352 -556.626858 -548.843548 8.578053
32 3H9S_1 1.355323 -564.645776 -553.335335 8.345198
33 3D3V_1 1.307175 -552.749492 -539.675655 10.001600
34 2BNQ_1 0.725402 -569.252595 -563.169999 8.385135
35 4FTV_1 0.380957 -547.435405 -544.525557 7.638252

In [21]:
os.chdir("/Users/weilu/Research/server/sep_2019/peptide_optimization_trial_2/optimization/")
# gamma_file_name = "gamma_iter1_combined_mar06.dat"
# gamma_file_name = "/Users/weilu/Research/server/sep_2019/peptide_optimization/saved_gammas/cutoff100"
gamma_file_name = "/Users/weilu/Research/server/sep_2019/saved_gammas/trial_2_mixed_original_and_cutoff100_impose_Aprime_constraint_90"

data = validate_hamiltonian_wei("phi_list.txt", "protein_list_small", gamma_file_name, "shuffle", 1000, mode=0)
data


210
420
60
0 2.6207785744536953
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
Out[21]:
Protein Z_scores E_native E_mgs Std_mg
0 1BD2_1 2.620779 -572.787573 -554.261337 7.068982
1 6BJ8_1 1.863768 -571.984261 -556.821913 8.135322
2 2JCC_1 1.745512 -587.781745 -574.476701 7.622429
3 1AO7_1 2.072237 -570.022190 -552.518237 8.446887
4 1LP9_1 1.517396 -579.130156 -566.871212 8.078936
5 3QDJ_1 2.551696 -563.807295 -546.825104 6.655257
6 3GSN_1 2.115572 -558.482091 -540.330873 8.579815
7 1QRN_1 1.909748 -570.335410 -557.349509 6.799800
8 3PWP_1 1.892032 -584.650900 -569.684776 7.910079
9 5W1W_1 2.470159 -573.586239 -555.924753 7.149938
10 1QSE_1 2.099299 -573.493149 -555.742757 8.455390
11 4EUP_1 1.524393 -575.607901 -563.368072 8.029312
12 5TEZ_1 1.466337 -584.819247 -574.824844 6.815899
13 3D39_1 2.280731 -583.450252 -562.503078 9.184414
14 6EQA_1 1.959666 -567.941627 -552.717705 7.768629
15 2BNR_1 2.071524 -581.683438 -564.264628 8.408694
16 6BJ2_1 2.092312 -568.938174 -553.000261 7.617370
17 6BJ3_1 1.819276 -575.212852 -563.007161 6.709091
18 5NME_1 2.349001 -571.135891 -552.518205 7.925789
19 5MEN_1 1.776993 -561.614928 -548.651060 7.295397
20 2VLJ_1 1.990751 -553.580713 -538.834850 7.407185
21 2VLK_1 2.022479 -557.130373 -540.858830 8.045344
22 2J8U_1 1.287186 -580.826853 -571.569924 7.191602
23 2GJ6_1 1.821009 -586.722661 -570.019531 9.172456
24 1OGA_1 1.874357 -564.734264 -549.947569 7.888941
25 2F54_1 1.653453 -563.268974 -550.513045 7.714722
26 5D2L_1 1.932154 -564.945209 -549.682157 7.899502
27 2F53_1 1.881925 -571.384064 -555.157764 8.622181
28 3QEQ_1 1.969746 -560.668589 -542.253613 9.348911
29 1QSF_1 1.884175 -578.776785 -563.569577 8.071018
30 2UWE_1 2.113484 -589.413407 -573.944141 7.319320
31 5EUO_1 1.738675 -562.727280 -548.384420 8.249302
32 3H9S_1 2.242435 -573.397757 -555.575362 7.947786
33 3D3V_1 2.074781 -563.023542 -543.296558 9.507985
34 2BNQ_1 1.207456 -574.666122 -565.007377 7.999254
35 4FTV_1 1.240708 -555.711908 -546.673984 7.284490

In [20]:
os.chdir("/Users/weilu/Research/server/sep_2019/peptide_optimization_trial_2/optimization/")
# gamma_file_name = "gamma_iter1_combined_mar06.dat"
# gamma_file_name = "/Users/weilu/Research/server/sep_2019/peptide_optimization/saved_gammas/cutoff100"
gamma_file_name = "/Users/weilu/Research/server/sep_2019/saved_gammas/mixed_original_and_cutoff100_impose_Aprime_constraint"

data = validate_hamiltonian_wei("phi_list.txt", "protein_list_small", gamma_file_name, "shuffle", 1000, mode=0)
data


210
420
60
0 3.7941745445367365
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
Out[20]:
Protein Z_scores E_native E_mgs Std_mg
0 1BD2_1 3.794175 -590.306113 -529.530818 16.018054
1 6BJ8_1 3.795499 -635.532599 -567.801255 17.845175
2 2JCC_1 3.420750 -594.460605 -539.662072 16.019449
3 1AO7_1 3.672911 -613.428707 -549.662733 17.361155
4 1LP9_1 3.353718 -588.521959 -534.184893 16.202037
5 3QDJ_1 3.985167 -609.107722 -540.568396 17.198608
6 3GSN_1 3.520095 -612.595721 -548.807714 18.121103
7 1QRN_1 3.844487 -611.860055 -552.349067 15.479565
8 3PWP_1 3.518534 -614.033381 -554.889086 16.809357
9 5W1W_1 3.880848 -568.954232 -506.888790 15.992753
10 1QSE_1 3.571491 -616.040396 -555.883546 16.843623
11 4EUP_1 3.324864 -608.524101 -552.250697 16.925026
12 5TEZ_1 3.445819 -625.287886 -575.396645 14.478777
13 3D39_1 3.442946 -621.879465 -559.812575 18.027263
14 6EQA_1 3.129962 -599.946883 -544.820516 17.612469
15 2BNR_1 3.836488 -625.687958 -563.232597 16.279306
16 6BJ2_1 3.632468 -610.668113 -546.639192 17.626836
17 6BJ3_1 3.588978 -634.616977 -575.855931 16.372642
18 5NME_1 3.409681 -598.888887 -539.297705 17.477053
19 5MEN_1 3.566577 -602.625581 -547.451531 15.469748
20 2VLJ_1 3.283920 -572.222209 -518.245806 16.436576
21 2VLK_1 3.702217 -583.564819 -521.267327 16.827076
22 2J8U_1 3.205174 -589.676272 -538.356814 16.011443
23 2GJ6_1 3.568047 -637.224815 -572.701014 18.083787
24 1OGA_1 4.106673 -595.084825 -529.367237 16.002636
25 2F54_1 3.470546 -609.394407 -552.864658 16.288429
26 5D2L_1 3.401990 -596.696291 -537.963414 17.264272
27 2F53_1 3.343720 -601.048037 -543.699280 17.151187
28 3QEQ_1 3.236888 -608.993371 -547.302625 19.058659
29 1QSF_1 3.408414 -616.479070 -558.495963 17.011754
30 2UWE_1 2.989514 -590.592990 -544.328874 15.475465
31 5EUO_1 3.411315 -595.286691 -535.244551 17.600880
32 3H9S_1 3.807791 -620.912205 -556.138229 17.010907
33 3D3V_1 3.242468 -599.627142 -539.080949 18.672870
34 2BNQ_1 5.049209 -684.547763 -595.058146 17.723492
35 4FTV_1 3.207755 -585.364824 -534.493197 15.858950

In [ ]: