In [1]:
import os
import sys
import random
import time
from random import seed, randint
import argparse
import platform
from datetime import datetime
import imp
import numpy as np
import fileinput
from itertools import product
import pandas as pd
from scipy.interpolate import griddata
from scipy.interpolate import interp2d
import seaborn as sns
from os import listdir

import matplotlib.pyplot as plt
import seaborn as sns
from scipy.interpolate import griddata
import matplotlib as mpl
# sys.path.insert(0,'..')
# from notebookFunctions import *
# from .. import notebookFunctions
from Bio.PDB.Polypeptide import one_to_three
from Bio.PDB.Polypeptide import three_to_one
from Bio.PDB.PDBParser import PDBParser
from pyCodeLib import *
# from small_script.myFunctions import *
sys.path.insert(0, "/Users/weilu/openmmawsem")
from helperFunctions.myFunctions import *
from collections import defaultdict
%matplotlib inline
# plt.rcParams['figure.figsize'] = (10,6.180)    #golden ratio
# %matplotlib notebook
%load_ext autoreload
%autoreload 2

In [2]:
plt.rcParams['figure.figsize'] = np.array([16.18033, 10])    #golden ratio
plt.rcParams['figure.facecolor'] = 'w'
plt.rcParams['figure.dpi'] = 100
plt.rcParams.update({'font.size': 22})

In [3]:
# pre = "/Users/weilu/Research/server_backup/feb_2019/jan_optimization/gammas/"
# pre = "/Users/weilu/Research/server/april_2019/optimization_test/gammas/"
pre = "/Users/weilu/Research/server/sep_2019/peptide_optimization_trial_6/optimization/gammas/"
trial_name = "trial_6"
cutoff = 500

In [4]:
# pp = "cath-dataset-nonredundant-S20Clean_phi_pairwise_contact_well4.5_6.5_5.0_10phi_density_mediated_contact_well6.5_9.5_5.0_10_2.6_7.0"
# pp = "proteins_name_list_phi_pairwise_contact_well4.5_6.5_5.0_10phi_density_mediated_contact_well6.5_9.5_5.0_10_2.6_7.0phi_burial_well4.0"
pp = f"protein_list_phi_pairwise_contact_well4.5_6.5_5.0_10phi_density_mediated_contact_well6.5_9.5_5.0_10_2.6_7.0phi_burial_well4.0"

A_name = pp + "_A"
B_name = pp + "_B"
B_filtered_name = pp + "_B_filtered"
P_name = pp + "_P"
Gamma_name = pp + "_gamma"
Gamma_filtered_name = pp + "_gamma_filtered"
Lamb_name = pp + "_lamb"
Lamb_filtered_name = pp + "_lamb_filtered"

A = np.loadtxt(pre+A_name)
B = np.loadtxt(pre+B_name)
B_filtered = np.loadtxt(pre+B_filtered_name, dtype=complex, converters={
                           0: lambda s: complex(s.decode().replace('+-', '-'))})
Gamma = np.loadtxt(pre+Gamma_name)
Gamma_filtered = np.loadtxt(pre+Gamma_filtered_name, dtype=complex, converters={
                           0: lambda s: complex(s.decode().replace('+-', '-'))})
Lamb = np.loadtxt(pre+Lamb_name, dtype=complex, converters={
                           0: lambda s: complex(s.decode().replace('+-', '-'))})
Lamb_filtered = np.loadtxt(pre+Lamb_filtered_name, dtype=complex, converters={
                           0: lambda s: complex(s.decode().replace('+-', '-'))})

half_B_name = pp + "_half_B"
half_B = np.loadtxt(pre+half_B_name)
other_half_B_name = pp + "_other_half_B"
other_half_B = np.loadtxt(pre+other_half_B_name)
std_half_B_name = pp + "_std_half_B"
std_half_B = np.loadtxt(pre+std_half_B_name)


# pre = "/Users/weilu/Research/server/april_2019/"
location = pre + "../../phis/protein_list_phi_pairwise_contact_well4.5_6.5_5.0_10phi_density_mediated_contact_well6.5_9.5_5.0_10_2.6_7.0phi_burial_well4.0_phi_decoy_summary.txt"
A_prime = np.loadtxt(location)

In [5]:
plt.plot(Lamb)
plt.yscale("log")


/Users/weilu/anaconda3/envs/py36/lib/python3.6/site-packages/numpy/core/numeric.py:501: ComplexWarning: Casting complex values to real discards the imaginary part
  return array(a, dtype, copy=False, order=order)

In [6]:
lamb, P = np.linalg.eig(B)
lamb, P = sort_eigenvalues_and_eigenvectors(lamb, P)
filtered_lamb = np.copy(lamb)
cutoff_mode = cutoff
filtered_B_inv, filtered_lamb, P = get_filtered_B_inv_lambda_and_P(filtered_lamb, 
                                                                   cutoff_mode, P)
filtered_gamma = np.dot(filtered_B_inv, A)
filtered_B = np.linalg.inv(filtered_B_inv)
plot_contact_well(filtered_gamma[:210], inferBound=True)
plot_contact_well(filtered_gamma[210:420], inferBound=True)
plot_contact_well(filtered_gamma[420:], inferBound=True)



In [7]:
plt.plot(filtered_gamma)


Out[7]:
[<matplotlib.lines.Line2D at 0x1a1a09bcc0>]

In [8]:
# maximum difference between loaded and computed is 1e-5.
max(lamb-Lamb)


Out[8]:
(1.8839964817586596e-05+0j)

In [9]:
save_gamma_pre = "/Users/weilu/Research/server/sep_2019/saved_gammas/"
np.savetxt(f"{save_gamma_pre}/{trial_name}_cutoff{cutoff}", filtered_gamma)

In [10]:
# os.chdir('/Users/weilu/opt/notebook/Optimization')

In [11]:
gamma_file_name = "/Users/weilu/Research/server/sep_2019/peptide_optimization/saved_gammas/original_gamma"
original_gamma = np.loadtxt(gamma_file_name)

In [12]:
np.dot(A_prime, original_gamma)


Out[12]:
-552.0947006999806

In [13]:
# we want to impose additional contraint so that A' * gamma = constnat.(-562.23)
c = np.dot(A_prime, original_gamma)
B_inv = filtered_B_inv
lambda_2 = (A_prime.dot(B_inv).dot(A) - c) / (A_prime.dot(B_inv).dot(A_prime) )
gamma_new = B_inv.dot(A-A_prime*lambda_2)

In [18]:
np.dot(A_prime, gamma_new)


Out[18]:
-552.0947006999791

In [15]:
# impose A'gamma
save_gamma_pre = "/Users/weilu/Research/server/sep_2019/saved_gammas/"
np.savetxt(f"{save_gamma_pre}/{trial_name}_cutoff{cutoff}_impose_Aprime_constraint", gamma_new)

In [16]:
def output_E_decoy(hamiltonian, training_set_file, gamma_file_name, training_decoy_method, num_decoys, test_set_file=None, test_decoy_method=None, use_filtered_gammas=False, **kwargs):
    if test_set_file is None:
        test_set_file = training_set_file
    if test_decoy_method is None:
        test_decoy_method = training_decoy_method
    test_set = read_column_from_file(test_set_file, 1)
    z_scores = []
    e_natives = []
    e_mgs = []
    e_mg_stds = []
    e_decoys = []
    names = []
    for i, protein in enumerate(test_set):
#         print(i, protein)
        # print(protein, hamiltonian, training_set_file, gamma_file_name, test_decoy_method, num_decoys, use_filtered_gammas)
        e_native, e_decoy = evaluate_hamiltonian_wei(
            protein, hamiltonian, training_set_file, gamma_file_name, test_decoy_method, num_decoys, use_filtered_gammas, outputDecoy=True, **kwargs)
        # print(protein, z)
        e_natives.append(e_native)
        e_decoys.append(e_decoy)
        names.append(protein)
    return names, e_natives, e_decoys

do = os.system

In [25]:
# pre = "/Users/weilu/Research/server/sep_2019/peptide_optimization_trial_5/optimization/gammas/"
os.chdir(f"{pre}/..")
# gamma_file_name = "gamma_iter1_combined_mar06.dat"
# gamma_file_name = "/Users/weilu/Research/server/sep_2019/peptide_optimization/saved_gammas/cutoff100"
gamma_file_name = f"{save_gamma_pre}/{trial_name}_cutoff{cutoff}_impose_Aprime_constraint"

names, e_natives, e_decoys = output_E_decoy("phi_list.txt", "protein_list", gamma_file_name, "shuffle", 1000, mode=0)

In [ ]:


In [30]:
figure_folder = "/Users/weilu/Dropbox/Optimization_Xfunnel/Sep28_figures"
do(f"mkdir -p {figure_folder}")
for name, e_native, e_decoy in zip(names, e_natives, e_decoys):
    # print(name)
    plt.figure()
    _ = plt.hist(e_decoy, bins=50)
    plt.vlines(e_native, ymin=0, ymax=100, color="red")
    _ = plt.title(name)
    plt.savefig(f"{figure_folder}/{name}.png")
    plt.close()

In [20]:
os.chdir(f"{pre}/..")
# gamma_file_name = "gamma_iter1_combined_mar06.dat"
# gamma_file_name = "/Users/weilu/Research/server/sep_2019/peptide_optimization/saved_gammas/cutoff100"
gamma_file_name = f"{save_gamma_pre}/{trial_name}_cutoff{cutoff}_impose_Aprime_constraint"

data = validate_hamiltonian_wei("phi_list.txt", "protein_list", gamma_file_name, "shuffle", 100, mode=0)
data


0 3.386869557854129
Out[20]:
Protein Z_scores E_native E_mgs Std_mg
0 5NMG_0 3.386870 -511.160837 -494.765797 4.840765
1 5NMG_1 2.866160 -511.160837 -494.751246 5.725288
2 5NMG_2 2.756145 -511.160837 -494.156164 6.169730
3 5NMG_3 2.719634 -511.160837 -494.746348 6.035552
4 5NMG_4 3.512124 -511.160837 -494.027657 4.878296
5 5NMG_5 3.039465 -511.160837 -494.452429 5.497154
6 5NMG_6 3.001962 -511.160837 -495.431597 5.239654
7 5NMG_7 2.414770 -511.160837 -495.887233 6.325076
8 5NMG_8 3.179537 -511.160837 -494.909619 5.111189
9 5NMG_9 2.980284 -511.160837 -494.388086 5.627904
10 1BD2_0 3.380408 -598.343597 -577.401151 6.195243
11 1BD2_1 3.525846 -598.343597 -577.546616 5.898437
12 1BD2_2 2.511344 -598.343597 -580.129901 7.252568
13 1BD2_3 2.812124 -598.343597 -579.043878 6.863040
14 1BD2_4 3.532375 -598.343597 -578.973153 5.483688
15 1BD2_5 3.779970 -598.343597 -578.893912 5.145460
16 1BD2_6 3.288839 -598.343597 -577.936950 6.204817
17 1BD2_7 3.481126 -598.343597 -578.683486 5.647630
18 1BD2_8 3.416244 -598.343597 -577.356860 6.143220
19 1BD2_9 2.770466 -598.343597 -578.931231 7.006896
20 6BJ8_0 2.940191 -561.103271 -535.400404 8.741903
21 6BJ8_1 3.397240 -561.103271 -534.801940 7.741969
22 6BJ8_2 3.182921 -561.103271 -535.087035 8.173699
23 5NMF_0 3.593481 -538.913667 -520.759506 5.051970
24 5NMF_1 3.074533 -538.913667 -520.826101 5.883029
25 5NMF_2 3.139033 -538.913667 -520.770840 5.779750
26 5NMF_3 3.191294 -538.913667 -520.827615 5.667310
27 5NMF_4 3.535482 -538.913667 -519.800174 5.406192
28 5NMF_5 3.505040 -538.913667 -520.897690 5.140020
29 5NMF_6 3.493950 -538.913667 -520.903023 5.154809
... ... ... ... ... ...
445 5C0A_2 6.370663 -484.833333 -438.857996 7.216727
446 5C0A_3 6.929091 -484.833333 -438.543979 6.680437
447 5C0A_4 7.789070 -484.833333 -438.549692 5.942127
448 MAGEA3_0 2.453686 -601.308346 -582.888401 7.507051
449 MAGEA3_1 2.919113 -601.308346 -580.895786 6.992727
450 2BNQ_0 5.892063 -612.748410 -578.198538 5.863799
451 2BNQ_1 6.038218 -612.748410 -578.515250 5.669415
452 2BNQ_2 6.226188 -612.748410 -578.208619 5.547502
453 2BNQ_3 6.346864 -612.748410 -578.320307 5.424427
454 2BNQ_4 4.741292 -612.748410 -579.030899 7.111460
455 2BNQ_5 6.675301 -612.748410 -578.493183 5.131638
456 2BNQ_6 6.518725 -612.748410 -578.902228 5.192148
457 2BNQ_7 6.274447 -612.748410 -578.789692 5.412225
458 2BNQ_8 7.724444 -612.748410 -577.443056 4.570601
459 2BNQ_9 6.712003 -612.748410 -578.162089 5.152906
460 4FTV_0 4.933209 -611.524859 -578.403780 6.713902
461 4FTV_1 5.591909 -611.524859 -578.785915 5.854699
462 4FTV_2 4.590048 -611.524859 -578.557195 7.182423
463 4FTV_3 4.883737 -611.524859 -579.489241 6.559653
464 4FTV_4 5.328331 -611.524859 -579.946490 5.926503
465 4FTV_5 5.268730 -611.524859 -577.871658 6.387346
466 4FTV_6 4.654778 -611.524859 -578.255143 7.147433
467 4FTV_7 4.647203 -611.524859 -578.894958 7.021407
468 4FTV_8 4.484368 -611.524859 -579.514203 7.138277
469 4FTV_9 4.085887 -611.524859 -578.899243 7.984953
470 5C08_0 4.805754 -517.541255 -484.411273 6.893816
471 5C08_1 6.395349 -517.541255 -482.481617 5.482052
472 5C08_2 5.722684 -517.541255 -482.834472 6.064773
473 5C08_3 5.977447 -517.541255 -483.006929 5.777438
474 5C08_4 6.265751 -517.541255 -483.158791 5.487365

475 rows × 5 columns


In [24]:
import pyemma

In [32]:



Out[32]:
PCA(dim=-1, mean=array([-1. , -1.5, -2.5,  1. ,  1.5,  2.5]), skip=0,
  stride=1, var_cutoff=0.95)

In [28]:
data = X.T # X.shape

In [30]:
data.shape


Out[30]:
(2, 6)

In [89]:
data = X
pca = pyemma.coordinates.pca(data)

In [100]:
pca.get_output()


Out[100]:
[array([[-1.],
        [ 1.]], dtype=float32)]

In [98]:
pyemma.coordinates.transform.pca.pca()


---------------------------------------------------------------------------
AttributeError                            Traceback (most recent call last)
<ipython-input-98-6748433e9986> in <module>
----> 1 pyemma.coordinates.transform.pca.pca()

AttributeError: module 'pyemma.coordinates.transform.pca' has no attribute 'pca'

In [73]:
pca.fit(data)


Out[73]:
PCA(dim=-1, mean=array([-1. , -1.5, -2.5,  1. ,  1.5,  2.5]), skip=0,
  stride=1, var_cutoff=0.95)

In [72]:
pca.fit_transform(data)


Out[72]:
array([[-1.],
       [ 1.]])

In [90]:
pc1 = pca.eigenvectors[0]

In [47]:
data


Out[47]:
array([[-1, -2, -3,  1,  2,  3],
       [-1, -1, -2,  1,  1,  2]])

In [50]:
t_bar = np.mean(data, axis=0)

In [77]:
t_bar


Out[77]:
array([-1. , -1.5, -2.5,  1. ,  1.5,  2.5])

In [85]:
s = np.dot((data - t_bar).T, data - t_bar)

In [92]:
np.dot(s, pc1)


Out[92]:
array([0., 0., 0., 0., 0., 0.])

In [93]:
eigen1


Out[93]:
2.0

In [94]:
pca.eigenvalues


Out[94]:
array([ 2.00000000e+00,  2.12443557e-32,  1.21898706e-48,  0.00000000e+00,
       -1.07403546e-16, -4.62394046e-16])

In [96]:
pca.eigenvectors.round(3)


Out[96]:
array([[ 0.   , -0.   ,  0.   ,  1.   ,  0.   , -0.   ],
       [ 0.5  , -0.   ,  0.   ,  0.   ,  0.197, -0.843],
       [ 0.5  , -0.   ,  0.   ,  0.   ,  0.73 ,  0.467],
       [ 0.   , -1.   ,  0.   ,  0.   , -0.   , -0.   ],
       [-0.5  , -0.   ,  0.707,  0.   ,  0.463, -0.188],
       [-0.5  , -0.   , -0.707,  0.   ,  0.463, -0.188]])

In [91]:
eigen1 = pca.eigenvalues[0]

In [82]:
s = (np.outer((data[0] - t_bar),(data[0] - t_bar).T) + np.outer((data[1] - t_bar),(data[1] - t_bar)))/2

In [83]:
s


Out[83]:
array([[ 0.  ,  0.  ,  0.  ,  0.  ,  0.  ,  0.  ],
       [ 0.  ,  0.25,  0.25,  0.  , -0.25, -0.25],
       [ 0.  ,  0.25,  0.25,  0.  , -0.25, -0.25],
       [ 0.  ,  0.  ,  0.  ,  0.  ,  0.  ,  0.  ],
       [ 0.  , -0.25, -0.25,  0.  ,  0.25,  0.25],
       [ 0.  , -0.25, -0.25,  0.  ,  0.25,  0.25]])

In [84]:
np.dot(s, pc1)


Out[84]:
array([0., 0., 0., 0., 0., 0.])

In [64]:
pca.eigenvalues


Out[64]:
array([ 2.00000000e+00,  2.12443557e-32,  1.21898706e-48,  0.00000000e+00,
       -1.07403546e-16, -4.62394046e-16])

In [ ]:


In [51]:
t_bar


Out[51]:
array([-1. , -1.5, -2.5,  1. ,  1.5,  2.5])

In [101]:
import numpy as np
from sklearn.decomposition import PCA
X = np.array([[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2]]).T
# X = X
pca = PCA(n_components=2)
pca.fit(X)  


print(pca.explained_variance_ratio_)  

print(pca.singular_values_)


[1. 0.]
[1.41421356 0.        ]

In [102]:
pca.components_


Out[102]:
array([[ 0.  ,  0.5 ,  0.5 ,  0.  , -0.5 , -0.5 ],
       [ 0.5 ,  0.75, -0.25,  0.  ,  0.25,  0.25]])

In [103]:
pca.explained_variance_


Out[103]:
array([2., 0.])

In [104]:
pca.fit_transform(X)


Out[104]:
array([[-1.,  0.],
       [ 1.,  0.]])

In [105]:
pca.singular_values_


Out[105]:
array([1.41421356, 0.        ])

In [66]:
pca.components_


Out[66]:
array([[ 0.  ,  0.5 ,  0.5 ,  0.  , -0.5 , -0.5 ],
       [ 0.5 ,  0.75, -0.25,  0.  ,  0.25,  0.25]])

In [ ]:


In [38]:
X.shape


Out[38]:
(2, 6)

In [106]:


In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [119]:
os.chdir("/Users/weilu/Research/server/sep_2019/peptide_optimization_specific_test/optimization/")
# evaluate_hamiltonian_wei(protein, hamiltonian, training_set_file, gamma_file_name, test_decoy_method, num_decoys, use_filtered_gammas=True, outputDecoy=False, **kwargs)
protein = "2BNQ"
# protein = "4FTV"
hamiltonian = "phi_list.txt"
training_set_file =  "protein_list" 
gamma_file_name = f"/Users/weilu/Research/server/sep_2019/saved_gammas/{save_gamma_pre}/{trial_name}_cutoff{cutoff}_impose_Aprime_constraint"
test_decoy_method = "shuffle"
num_decoys = 10
use_filtered_gammas = False
phi_list = read_phi_list(hamiltonian)
training_set = read_column_from_file(training_set_file, 1)
# read in Hamiltonian
# Find out how many total phi_i there are and get full parameter string
total_phis, full_parameters_string, num_phis = get_total_phis_and_parameter_string(
    phi_list, training_set, mode=0)
# print(total_phis, full_parameters_string, num_phis)
# read in corresponding gammas
# if use_filtered_gammas:
#     gamma_file_name = "%s%s_%s_gamma_filtered" % (
#         gammas_directory, training_set_file.split('/')[-1].split('.')[0], full_parameters_string)
# else:
#     pass
#     gamma_file_name = "%s%s_%s_gamma" % (gammas_directory, training_set_file.split(
#         '/')[-1].split('.')[0], full_parameters_string)

# Need to filter out the complex number if in the "filtered" mode;
# if use_filtered_gammas:
#     gamma = np.loadtxt(gamma_file_name, dtype=complex, converters={
#                        0: lambda s: complex(s.decode().replace('+-', '-'))})
# else:
#     gamma = np.loadtxt(gamma_file_name)
# gamma = np.loadtxt(gamma_file_name, dtype=complex, converters={
#                             0: lambda s: complex(s.decode().replace('+-', '-'))})
gamma = np.loadtxt(gamma_file_name)
# read in corresponding phis (native and decoys)
phi_native = read_native_phi(protein, phi_list, total_phis, mode=0)
phi_i_decoy = read_decoy_phis(
    protein, phi_list, total_phis, num_phis, num_decoys, test_decoy_method, mode=0)
# perform dot products to get energies (native and decoys)
e_decoy = np.zeros(num_decoys)
# print(gamma.shape, phi_native.shape)
e_native = np.dot(gamma, phi_native)
for i_decoy in range(num_decoys):
    e_decoy[i_decoy] = np.dot(gamma, phi_i_decoy[i_decoy])
e_mg = np.average(e_decoy)
e_mg_std = np.std(e_decoy)
# calculate z-score
z_score = (e_mg - e_native) / e_mg_std

In [120]:
np.dot(gamma, phi_native)


Out[120]:
-605.7814622516905

In [ ]:
for i in range(2):
    for j in range(20):
        for k in range(j, 20):
            the_list.append(c)
            c += 1

In [94]:
gamma[:10]


Out[94]:
array([-0.11803464, -0.12873059,  0.03765517,  0.01365024,  0.00311885,
        0.1551497 , -0.32315511, -0.18640831, -0.2656861 , -0.20895585])

In [89]:
np.dot(gamma[:210], phi_native[:210])


Out[89]:
-47.428936288802966

In [91]:
np.dot(gamma[:210], phi_native[:210])


Out[91]:
-47.428936288802966

In [83]:
np.dot(gamma[210:420], phi_native[210:420])


Out[83]:
-462.8077411144142

In [84]:
np.dot(gamma[420:630], phi_native[420:630])


Out[84]:
-4.460372079743105

In [92]:
np.dot(gamma[210:630], phi_native[210:630])


Out[92]:
-400.90855931159706

In [93]:
np.dot(gamma[630:], phi_native[630:])


Out[93]:
-159.7300541800085

In [87]:
np.dot(gamma, phi_native)


Out[87]:
-674.7857324129609

In [90]:
np.dot(gamma, phi_native)


Out[90]:
-608.0675497804086

In [96]:
-608.0675497/594.140


Out[96]:
-1.0234415284276432

In [81]:
len(phi_native)


Out[81]:
690

In [72]:


In [121]:
os.chdir("/Users/weilu/Research/server/sep_2019/peptide_optimization_trial_4/optimization/")
# gamma_file_name = "gamma_iter1_combined_mar06.dat"
# gamma_file_name = "/Users/weilu/Research/server/sep_2019/peptide_optimization/saved_gammas/cutoff100"
gamma_file_name = "/Users/weilu/Research/server/sep_2019/saved_gammas/trial_4_cutoff100_impose_Aprime_constraint"

names, e_natives, e_decoys = output_E_decoy("phi_list.txt", "protein_list", gamma_file_name, "shuffle", 1000, mode=0)


1BD2 phi_list.txt protein_list /Users/weilu/Research/server/sep_2019/saved_gammas/trial_4_cutoff100_impose_Aprime_constraint shuffle 1000 False
2JCC phi_list.txt protein_list /Users/weilu/Research/server/sep_2019/saved_gammas/trial_4_cutoff100_impose_Aprime_constraint shuffle 1000 False
1AO7 phi_list.txt protein_list /Users/weilu/Research/server/sep_2019/saved_gammas/trial_4_cutoff100_impose_Aprime_constraint shuffle 1000 False
1LP9 phi_list.txt protein_list /Users/weilu/Research/server/sep_2019/saved_gammas/trial_4_cutoff100_impose_Aprime_constraint shuffle 1000 False
3QDJ phi_list.txt protein_list /Users/weilu/Research/server/sep_2019/saved_gammas/trial_4_cutoff100_impose_Aprime_constraint shuffle 1000 False
3GSN phi_list.txt protein_list /Users/weilu/Research/server/sep_2019/saved_gammas/trial_4_cutoff100_impose_Aprime_constraint shuffle 1000 False
1QRN phi_list.txt protein_list /Users/weilu/Research/server/sep_2019/saved_gammas/trial_4_cutoff100_impose_Aprime_constraint shuffle 1000 False
3PWP phi_list.txt protein_list /Users/weilu/Research/server/sep_2019/saved_gammas/trial_4_cutoff100_impose_Aprime_constraint shuffle 1000 False
5W1W phi_list.txt protein_list /Users/weilu/Research/server/sep_2019/saved_gammas/trial_4_cutoff100_impose_Aprime_constraint shuffle 1000 False
1QSE phi_list.txt protein_list /Users/weilu/Research/server/sep_2019/saved_gammas/trial_4_cutoff100_impose_Aprime_constraint shuffle 1000 False
4EUP phi_list.txt protein_list /Users/weilu/Research/server/sep_2019/saved_gammas/trial_4_cutoff100_impose_Aprime_constraint shuffle 1000 False
5TEZ phi_list.txt protein_list /Users/weilu/Research/server/sep_2019/saved_gammas/trial_4_cutoff100_impose_Aprime_constraint shuffle 1000 False
6EQA phi_list.txt protein_list /Users/weilu/Research/server/sep_2019/saved_gammas/trial_4_cutoff100_impose_Aprime_constraint shuffle 1000 False
2BNR phi_list.txt protein_list /Users/weilu/Research/server/sep_2019/saved_gammas/trial_4_cutoff100_impose_Aprime_constraint shuffle 1000 False
5NME phi_list.txt protein_list /Users/weilu/Research/server/sep_2019/saved_gammas/trial_4_cutoff100_impose_Aprime_constraint shuffle 1000 False
5MEN phi_list.txt protein_list /Users/weilu/Research/server/sep_2019/saved_gammas/trial_4_cutoff100_impose_Aprime_constraint shuffle 1000 False
2VLJ phi_list.txt protein_list /Users/weilu/Research/server/sep_2019/saved_gammas/trial_4_cutoff100_impose_Aprime_constraint shuffle 1000 False
2VLK phi_list.txt protein_list /Users/weilu/Research/server/sep_2019/saved_gammas/trial_4_cutoff100_impose_Aprime_constraint shuffle 1000 False
2J8U phi_list.txt protein_list /Users/weilu/Research/server/sep_2019/saved_gammas/trial_4_cutoff100_impose_Aprime_constraint shuffle 1000 False
2GJ6 phi_list.txt protein_list /Users/weilu/Research/server/sep_2019/saved_gammas/trial_4_cutoff100_impose_Aprime_constraint shuffle 1000 False
1OGA phi_list.txt protein_list /Users/weilu/Research/server/sep_2019/saved_gammas/trial_4_cutoff100_impose_Aprime_constraint shuffle 1000 False
2F54 phi_list.txt protein_list /Users/weilu/Research/server/sep_2019/saved_gammas/trial_4_cutoff100_impose_Aprime_constraint shuffle 1000 False
5D2L phi_list.txt protein_list /Users/weilu/Research/server/sep_2019/saved_gammas/trial_4_cutoff100_impose_Aprime_constraint shuffle 1000 False
2F53 phi_list.txt protein_list /Users/weilu/Research/server/sep_2019/saved_gammas/trial_4_cutoff100_impose_Aprime_constraint shuffle 1000 False
3QEQ phi_list.txt protein_list /Users/weilu/Research/server/sep_2019/saved_gammas/trial_4_cutoff100_impose_Aprime_constraint shuffle 1000 False
1QSF phi_list.txt protein_list /Users/weilu/Research/server/sep_2019/saved_gammas/trial_4_cutoff100_impose_Aprime_constraint shuffle 1000 False
2UWE phi_list.txt protein_list /Users/weilu/Research/server/sep_2019/saved_gammas/trial_4_cutoff100_impose_Aprime_constraint shuffle 1000 False
5EUO phi_list.txt protein_list /Users/weilu/Research/server/sep_2019/saved_gammas/trial_4_cutoff100_impose_Aprime_constraint shuffle 1000 False
3H9S phi_list.txt protein_list /Users/weilu/Research/server/sep_2019/saved_gammas/trial_4_cutoff100_impose_Aprime_constraint shuffle 1000 False
2BNQ phi_list.txt protein_list /Users/weilu/Research/server/sep_2019/saved_gammas/trial_4_cutoff100_impose_Aprime_constraint shuffle 1000 False
4FTV phi_list.txt protein_list /Users/weilu/Research/server/sep_2019/saved_gammas/trial_4_cutoff100_impose_Aprime_constraint shuffle 1000 False

In [124]:
for name, e_native, e_decoy in zip(names, e_natives, e_decoys):
    # print(name)
    plt.figure()
    _ = plt.hist(e_decoy, bins=50)
    plt.vlines(e_native, ymin=0, ymax=100, color="red")
    _ = plt.title(name)
    plt.savefig(f"/Users/weilu/Dropbox/Optimization_Xfunnel/updated_figures/{name}.png")


/Users/weilu/anaconda3/envs/py36/lib/python3.6/site-packages/matplotlib/pyplot.py:514: RuntimeWarning: More than 20 figures have been opened. Figures created through the pyplot interface (`matplotlib.pyplot.figure`) are retained until explicitly closed and may consume too much memory. (To control this warning, see the rcParam `figure.max_open_warning`).
  max_open_warning, RuntimeWarning)

In [125]:
os.chdir("/Users/weilu/Research/server/sep_2019/peptide_optimization_trial_4_duplicate/optimization/")
# gamma_file_name = "gamma_iter1_combined_mar06.dat"
# gamma_file_name = "/Users/weilu/Research/server/sep_2019/peptide_optimization/saved_gammas/cutoff100"
gamma_file_name = "/Users/weilu/Research/server/sep_2019/saved_gammas/trial_4_cutoff100_impose_Aprime_constraint"

data = validate_hamiltonian_wei("phi_list.txt", "protein_list", gamma_file_name, "shuffle", 1000, mode=0)
data


0 3.508959112034475
Out[125]:
Protein Z_scores E_native E_mgs Std_mg
0 1BD2 3.508959 -590.408659 -576.119892 4.072082
1 2JCC 6.406843 -607.314444 -577.222713 4.696811
2 1AO7 5.530768 -592.327551 -569.008721 4.216201
3 1LP9 6.310993 -622.609479 -592.084044 4.836867
4 3QDJ 4.901757 -606.525572 -584.644281 4.463969
5 3GSN 1.854756 -580.017867 -572.453867 4.078164
6 1QRN 4.838191 -602.786617 -582.219358 4.251023
7 3PWP 6.154082 -600.250600 -574.765083 4.141238
8 5W1W 1.467168 -506.193262 -499.628928 4.474153
9 1QSE 5.029028 -592.782661 -572.004454 4.131655
10 4EUP 4.733706 -598.299506 -579.023477 4.072080
11 5TEZ 4.801798 -590.043113 -569.550266 4.267745
12 6EQA 5.317700 -609.461338 -582.560460 5.058743
13 2BNR 5.886354 -605.915674 -580.787821 4.268831
14 5NME 2.025101 -572.614638 -563.462993 4.519106
15 5MEN 1.400714 -573.492428 -568.229312 3.757452
16 2VLJ 6.392192 -588.158513 -553.730685 5.385919
17 2VLK 6.000510 -590.754338 -558.852347 5.316547
18 2J8U 5.742514 -600.006567 -572.937643 4.713776
19 2GJ6 5.055262 -591.966325 -570.389785 4.268135
20 1OGA 6.232665 -603.027687 -568.307955 5.570607
21 2F54 5.855629 -604.796866 -581.592717 3.962708
22 5D2L 0.613488 -556.479912 -553.936897 4.145171
23 2F53 6.270418 -606.695179 -581.914186 3.952048
24 3QEQ 4.695615 -584.904171 -564.268226 4.394726
25 1QSF 4.598997 -595.936966 -576.289423 4.272136
26 2UWE 6.245330 -614.389154 -583.038066 5.019925
27 5EUO 6.012745 -603.272150 -570.879977 5.387252
28 3H9S 3.426876 -582.740959 -569.387684 3.896632
29 2BNQ 5.394414 -605.781462 -583.339660 4.160193
30 4FTV 5.242073 -594.290703 -570.873764 4.467114

In [31]:
os.chdir("/Users/weilu/Research/server/sep_2019/peptide_optimization_trial_4/optimization/")
# gamma_file_name = "gamma_iter1_combined_mar06.dat"
# gamma_file_name = "/Users/weilu/Research/server/sep_2019/peptide_optimization/saved_gammas/cutoff100"
gamma_file_name = "/Users/weilu/Research/server/sep_2019/saved_gammas/trial_3_cutoff100_impose_Aprime_constraint"

data = validate_hamiltonian_wei("phi_list.txt", "protein_list", gamma_file_name, "shuffle", 1000, mode=0)
data


210
420
60
0 11.93699656742965
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
Out[31]:
Protein Z_scores E_native E_mgs Std_mg
0 1BD2 11.936997 -664.061736 -575.776354 7.395946
1 2JCC 12.304886 -634.274622 -545.990733 7.174702
2 1AO7 12.016467 -674.865247 -572.358923 8.530488
3 1LP9 12.089640 -651.642492 -563.736447 7.271188
4 3QDJ 10.340876 -687.301314 -600.915062 8.353862
5 3GSN 11.114498 -674.278587 -590.125101 7.571506
6 1QRN 12.077819 -686.270903 -586.175261 8.287559
7 3PWP 10.945414 -673.525681 -582.026367 8.359602
8 5W1W 5.443101 -551.073543 -501.203245 9.162111
9 1QSE 12.577687 -682.868237 -580.147438 8.166907
10 4EUP 8.338386 -639.867494 -574.113597 7.885687
11 5TEZ 11.601056 -635.136262 -549.678592 7.366370
12 6EQA 5.923826 -645.349495 -593.739195 8.712326
13 2BNR 10.243296 -676.153398 -598.732239 7.558227
14 5NME 11.698059 -639.378261 -551.095570 7.546781
15 5MEN 5.683667 -619.355198 -571.667462 8.390311
16 2VLJ 12.229199 -636.100898 -546.028566 7.365350
17 2VLK 11.993582 -644.320681 -550.893999 7.789723
18 2J8U 10.779683 -641.573320 -555.647796 7.971062
19 2GJ6 12.016548 -683.625107 -587.915054 7.964854
20 1OGA 12.360650 -653.798084 -560.109762 7.579563
21 2F54 9.774296 -670.744387 -596.732454 7.572098
22 5D2L 9.951962 -640.602159 -557.162193 8.384273
23 2F53 10.378353 -664.475665 -594.584493 6.734322
24 3QEQ 10.951320 -668.757885 -591.212055 7.080958
25 1QSF 11.585027 -680.655313 -580.797229 8.619582
26 2UWE 11.831632 -640.750135 -557.770066 7.013409
27 5EUO 11.674362 -640.556132 -549.582574 7.792593
28 3H9S 11.579434 -666.245108 -573.454369 8.013409
29 2BNQ 10.119691 -674.785732 -599.565672 7.433039
30 4FTV 12.081457 -662.719918 -564.566332 8.124317

In [16]:
os.chdir("/Users/weilu/Research/server/sep_2019/peptide_optimization/optimization/")
# gamma_file_name = "gamma_iter1_combined_mar06.dat"
# gamma_file_name = "/Users/weilu/Research/server/sep_2019/peptide_optimization/saved_gammas/cutoff100"
gamma_file_name = "/Users/weilu/Research/server/sep_2019/saved_gammas/trial_2_cutoff100_impose_Aprime_constraint"

data = validate_hamiltonian_wei("phi_list.txt", "protein_list_tiny", gamma_file_name, "shuffle", 1000, mode=0)
data


210
420
60
0 8.849450826769093
210
420
60
210
420
60
210
420
60
210
420
60
Out[16]:
Protein Z_scores E_native E_mgs Std_mg
0 1 8.849451 -672.109604 -598.080169 8.365427
1 2 9.270943 -681.915179 -611.143850 7.633671
2 3 8.089534 -645.301463 -578.332365 8.278486
3 4 8.240793 -671.306528 -606.010473 7.923516
4 5 7.439003 -645.968912 -584.637773 8.244537

In [39]:
# mix gammas so that we don't overfitting too much.
gamma_file_name = "/Users/weilu/Research/server/sep_2019/saved_gammas/original_gamma"
original_gamma = np.loadtxt(gamma_file_name)
gamma_file_name = "/Users/weilu/Research/server/sep_2019/saved_gammas/trial_3_cutoff100_impose_Aprime_constraint"
gamma_new = np.loadtxt(gamma_file_name)

alpha = 0.5
alpha_percent = int(alpha*100)
mixed_gamma = alpha*original_gamma + (1-alpha)*gamma_new
save_gamma_pre = "/Users/weilu/Research/server/sep_2019/saved_gammas/"
np.savetxt(f"{save_gamma_pre}/trial_3_mixed_original_and_cutoff100_impose_Aprime_constraint_{alpha_percent}", mixed_gamma)

In [41]:
os.chdir("/Users/weilu/Research/server/sep_2019/peptide_optimization_trial_4/optimization/")
# gamma_file_name = "gamma_iter1_combined_mar06.dat"
# gamma_file_name = "/Users/weilu/Research/server/sep_2019/peptide_optimization/saved_gammas/cutoff100"
gamma_file_name = "/Users/weilu/Research/server/sep_2019/saved_gammas/trial_3_mixed_original_and_cutoff100_impose_Aprime_constraint_50"

data = validate_hamiltonian_wei("phi_list.txt", "protein_list", gamma_file_name, "shuffle", 1000, mode=0)
data


0 6.823294613794194
Out[41]:
Protein Z_scores E_native E_mgs Std_mg
0 1BD2 6.823295 -621.412295 -574.576567 6.864093
1 2JCC 6.342357 -617.516039 -572.981643 7.021742
2 1AO7 7.975743 -616.342840 -563.312714 6.648926
3 1LP9 6.317982 -629.015696 -584.468087 7.050923
4 3QDJ 6.120892 -637.294333 -591.466530 7.487112
5 3GSN 6.509687 -624.507495 -580.107055 6.820672
6 1QRN 7.837941 -629.275159 -573.215709 7.152319
7 3PWP 7.342186 -618.907910 -573.103238 6.238561
8 5W1W 3.714787 -533.994620 -509.264413 6.657235
9 1QSE 7.879351 -618.050427 -568.862540 6.242632
10 4EUP 4.848733 -619.517958 -582.118969 7.713146
11 5TEZ 6.625828 -605.449427 -559.832670 6.884688
12 6EQA 4.295426 -622.999574 -589.881268 7.710133
13 2BNR 5.828085 -619.108393 -579.520836 6.792550
14 5NME 7.507025 -605.908701 -557.473238 6.452019
15 5MEN 4.064457 -603.214544 -578.370626 6.112482
16 2VLJ 8.219110 -601.873452 -553.918716 5.834541
17 2VLK 8.337624 -607.892532 -558.566048 5.916132
18 2J8U 5.948137 -622.357777 -578.590340 7.358175
19 2GJ6 7.797824 -624.546746 -575.817518 6.249080
20 1OGA 8.124380 -614.471479 -564.821239 6.111265
21 2F54 5.522862 -619.904277 -582.387186 6.793053
22 5D2L 6.409373 -605.793383 -561.362419 6.932186
23 2F53 5.661503 -622.394036 -586.055972 6.418448
24 3QEQ 5.700798 -619.747559 -577.057424 7.488448
25 1QSF 8.550891 -624.364970 -571.509378 6.181296
26 2UWE 6.116911 -626.559912 -584.166082 6.930594
27 5EUO 7.416441 -604.947452 -557.900501 6.343603
28 3H9S 8.262784 -612.181759 -566.861153 5.484908
29 2BNQ 5.660373 -620.626428 -582.223638 6.784498
30 4FTV 7.699585 -616.576241 -563.710585 6.866039

In [38]:
os.chdir("/Users/weilu/Research/server/sep_2019/peptide_optimization_trial_4/optimization/")
# gamma_file_name = "gamma_iter1_combined_mar06.dat"
# gamma_file_name = "/Users/weilu/Research/server/sep_2019/peptide_optimization/saved_gammas/cutoff100"
gamma_file_name = "/Users/weilu/Research/server/sep_2019/saved_gammas/mixed_original_and_cutoff100_impose_Aprime_constraint"

data = validate_hamiltonian_wei("phi_list.txt", "protein_list", gamma_file_name, "shuffle", 1000, mode=0)
data


210
420
60
0 3.8751684860532514
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
Out[38]:
Protein Z_scores E_native E_mgs Std_mg
0 1BD2 3.875168 -634.484887 -558.400163 19.633914
1 2JCC 2.789012 -604.568227 -552.849857 18.543620
2 1AO7 3.325161 -633.382103 -569.322420 19.265137
3 1LP9 2.752028 -611.365417 -558.915238 19.058741
4 3QDJ 3.556798 -651.956384 -578.535294 20.642471
5 3GSN 4.099164 -674.638570 -585.798746 21.672671
6 1QRN 3.722246 -650.179577 -574.708866 20.275583
7 3PWP 3.100221 -635.314643 -575.245667 19.375710
8 5W1W 2.574534 -497.246335 -456.227335 15.932592
9 1QSE 3.070116 -629.613946 -570.720402 19.182840
10 4EUP 3.705549 -670.956261 -592.859209 21.075706
11 5TEZ 3.040179 -634.559724 -574.838393 19.644017
12 6EQA 3.279192 -652.726083 -582.036954 21.556877
13 2BNR 4.594172 -678.492263 -583.008220 20.783734
14 5NME 4.014371 -623.444323 -544.722397 19.610028
15 5MEN 2.732767 -644.130077 -590.758301 19.530305
16 2VLJ 3.518903 -608.831211 -546.931083 17.590746
17 2VLK 3.573966 -613.508697 -550.603349 17.600992
18 2J8U 2.812700 -615.660370 -563.608351 18.506071
19 2GJ6 3.184480 -637.603125 -577.529337 18.864550
20 1OGA 3.518597 -613.355091 -549.670479 18.099433
21 2F54 4.567481 -675.984190 -583.659246 20.213537
22 5D2L 3.976258 -625.922203 -545.367606 20.258895
23 2F53 4.532867 -690.236839 -594.965638 21.017866
24 3QEQ 3.347464 -650.784350 -575.604745 22.458676
25 1QSF 3.639410 -648.187613 -577.029188 19.552186
26 2UWE 2.753325 -615.453749 -563.396357 18.907102
27 5EUO 3.220518 -613.993116 -554.306240 18.533316
28 3H9S 3.428455 -627.209029 -566.578896 17.684391
29 2BNQ 4.811515 -683.340061 -583.377189 20.775758
30 4FTV 3.375951 -623.478266 -558.257554 19.319210

In [36]:
os.chdir("/Users/weilu/Research/server/sep_2019/peptide_optimization_trial_4/optimization/")
# gamma_file_name = "gamma_iter1_combined_mar06.dat"
# gamma_file_name = "/Users/weilu/Research/server/sep_2019/peptide_optimization/saved_gammas/cutoff100"
gamma_file_name = "/Users/weilu/Research/server/sep_2019/saved_gammas/trial_3_mixed_original_and_cutoff100_impose_Aprime_constraint_80"

data = validate_hamiltonian_wei("phi_list.txt", "protein_list", gamma_file_name, "shuffle", 1000, mode=0)
data


210
420
60
0 2.5365697412845893
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
Out[36]:
Protein Z_scores E_native E_mgs Std_mg
0 1BD2 2.536570 -595.822630 -573.856694 8.659701
1 2JCC 1.925393 -607.460889 -589.176189 9.496608
2 1AO7 2.894548 -581.229395 -557.884989 8.064957
3 1LP9 1.931806 -615.439619 -596.907071 9.593378
4 3QDJ 2.275837 -607.290144 -585.797411 9.443879
5 3GSN 2.414709 -594.644840 -574.096228 8.509769
6 1QRN 3.321020 -595.077712 -565.439977 8.924287
7 3PWP 2.432179 -586.137247 -567.749360 7.560253
8 5W1W 1.237414 -523.747267 -514.101114 7.795411
9 1QSE 2.232644 -579.159742 -562.091600 7.644812
10 4EUP 1.971088 -607.308237 -586.922192 10.342532
11 5TEZ 2.351310 -587.637327 -565.925117 9.234089
12 6EQA 2.097845 -609.589621 -587.566512 10.497969
13 2BNR 1.917661 -584.881391 -567.993994 8.806247
14 5NME 3.007987 -585.826966 -561.299838 8.154001
15 5MEN 1.582747 -593.530151 -582.392524 7.036896
16 2VLJ 3.219513 -581.336984 -558.652806 7.045841
17 2VLK 3.278042 -586.035643 -563.169278 6.975617
18 2J8U 1.923347 -610.828451 -592.355867 9.604397
19 2GJ6 2.645909 -589.099729 -568.558996 7.763204
20 1OGA 3.112154 -590.875516 -567.648125 7.463446
21 2F54 1.824307 -589.400212 -573.780025 8.562257
22 5D2L 2.466019 -584.908118 -563.882554 8.526116
23 2F53 1.882500 -597.145059 -580.938859 8.608873
24 3QEQ 2.150232 -590.341364 -568.564646 10.127614
25 1QSF 3.231772 -590.590765 -565.936668 7.628662
26 2UWE 1.886858 -618.045779 -600.003691 9.561973
27 5EUO 2.573511 -583.582244 -562.891257 8.039984
28 3H9S 2.635050 -579.743749 -562.905223 6.390211
29 2BNQ 1.832561 -588.130846 -571.818417 8.901439
30 4FTV 2.899512 -588.890035 -563.197137 8.861111

In [23]:
os.chdir("/Users/weilu/Research/server/sep_2019/peptide_optimization_trial_2/optimization/")
# gamma_file_name = "gamma_iter1_combined_mar06.dat"
# gamma_file_name = "/Users/weilu/Research/server/sep_2019/peptide_optimization/saved_gammas/cutoff100"
gamma_file_name = "/Users/weilu/Research/server/sep_2019/saved_gammas/trial_2_mixed_original_and_cutoff100_impose_Aprime_constraint_80"

data = validate_hamiltonian_wei("phi_list.txt", "protein_list_small", gamma_file_name, "shuffle", 1000, mode=0)
data


210
420
60
0 4.644053322015458
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
Out[23]:
Protein Z_scores E_native E_mgs Std_mg
0 1BD2_1 4.644053 -580.211577 -550.187949 6.464962
1 6BJ8_1 3.644353 -584.591439 -557.302772 7.487932
2 2JCC_1 3.595958 -596.931478 -571.579622 7.050099
3 1AO7_1 3.888880 -585.161512 -555.292123 7.680717
4 1LP9_1 3.343277 -585.803512 -561.447871 7.284961
5 3QDJ_1 4.780158 -578.779196 -548.984082 6.233082
6 3GSN_1 3.760090 -570.671920 -541.173748 7.845071
7 1QRN_1 4.093077 -585.305336 -559.627225 6.273547
8 3PWP_1 3.828864 -598.379609 -570.653536 7.241332
9 5W1W_1 4.471712 -583.760125 -554.346778 6.577648
10 1QSE_1 3.941467 -587.442265 -557.383337 7.626330
11 4EUP_1 3.307241 -587.257802 -562.918506 7.359395
12 5TEZ_1 2.663639 -589.949987 -573.373806 6.223133
13 3D39_1 3.891425 -597.620093 -564.951230 8.395089
14 6EQA_1 3.498288 -576.752222 -551.491749 7.220811
15 2BNR_1 3.664455 -592.969503 -564.571906 7.749474
16 6BJ2_1 4.025273 -581.230367 -553.289479 6.941364
17 6BJ3_1 4.055735 -587.024766 -561.905861 6.193428
18 5NME_1 4.344556 -584.206642 -552.575455 7.280649
19 5MEN_1 3.507534 -572.311798 -548.902009 6.674145
20 2VLJ_1 3.874439 -565.150281 -538.248505 6.943399
21 2VLK_1 3.949252 -571.704749 -542.224054 7.464880
22 2J8U_1 3.157272 -587.168680 -566.377980 6.585020
23 2GJ6_1 3.607937 -602.686394 -572.920067 8.250236
24 1OGA_1 3.777555 -576.988638 -549.457315 7.288133
25 2F54_1 3.484796 -578.188988 -553.553477 7.069426
26 5D2L_1 3.517080 -571.254753 -545.610819 7.291258
27 2F53_1 3.384206 -580.644434 -553.906841 7.900698
28 3QEQ_1 3.560583 -569.717171 -539.269134 8.551420
29 1QSF_1 3.581838 -591.100737 -565.015043 7.282767
30 2UWE_1 3.562603 -592.517263 -568.650221 6.699327
31 5EUO_1 3.574224 -574.928123 -547.466165 7.683334
32 3H9S_1 4.244045 -590.901720 -560.055415 7.268139
33 3D3V_1 3.834411 -583.571643 -550.538364 8.614956
34 2BNQ_1 2.299543 -585.493175 -568.682132 7.310601
35 4FTV_1 3.185129 -572.264914 -550.970839 6.685468

In [18]:
os.chdir("/Users/weilu/Research/server/sep_2019/peptide_optimization_trial_2/optimization/")
# gamma_file_name = "gamma_iter1_combined_mar06.dat"
# gamma_file_name = "/Users/weilu/Research/server/sep_2019/peptide_optimization/saved_gammas/cutoff100"
gamma_file_name = "/Users/weilu/Research/server/sep_2019/saved_gammas/trial_2_mixed_original_and_cutoff100_impose_Aprime_constraint_95"

data = validate_hamiltonian_wei("phi_list.txt", "protein_list_small", gamma_file_name, "shuffle", 1000, mode=0)
data


210
420
60
0 1.721204322270767
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
Out[18]:
Protein Z_scores E_native E_mgs Std_mg
0 1BD2_1 1.721204 -569.075571 -556.298031 7.423604
1 6BJ8_1 1.069227 -565.680672 -556.581484 8.510059
2 2JCC_1 0.915701 -583.206879 -575.925241 7.951984
3 1AO7_1 1.274523 -562.452529 -551.131293 8.882722
4 1LP9_1 0.729411 -575.793478 -569.582883 8.514531
5 3QDJ_1 1.521287 -556.321344 -545.745615 6.951831
6 3GSN_1 1.387840 -552.387176 -539.909435 8.990762
7 1QRN_1 0.933743 -562.850447 -556.210651 7.110943
8 3PWP_1 1.035444 -577.786546 -569.200396 8.292242
9 5W1W_1 1.573403 -568.499296 -556.713740 7.490488
10 1QSE_1 1.300462 -566.518591 -554.922466 8.916925
11 4EUP_1 0.734882 -569.782951 -563.592854 8.423249
12 5TEZ_1 0.937011 -582.253877 -575.550363 7.154146
13 3D39_1 1.566752 -576.365332 -561.279001 9.629050
14 6EQA_1 1.255480 -563.536329 -553.330684 8.128881
15 2BNR_1 1.359225 -576.040405 -564.110989 8.776630
16 6BJ2_1 1.242365 -562.792077 -552.855652 7.997991
17 6BJ3_1 0.818762 -569.306894 -563.557811 7.021681
18 5NME_1 1.459617 -564.600516 -552.489580 8.297337
19 5MEN_1 1.012511 -556.266493 -548.525586 7.645257
20 2VLJ_1 1.125352 -547.795930 -539.128022 7.702393
21 2VLK_1 1.153125 -549.843185 -540.176218 8.383274
22 2J8U_1 0.463167 -577.655939 -574.165895 7.535176
23 2GJ6_1 1.050535 -578.740794 -568.569263 9.682241
24 1OGA_1 1.020743 -558.607078 -550.192696 8.243393
25 2F54_1 0.842658 -555.808967 -548.992829 8.088854
26 5D2L_1 1.220933 -561.790438 -551.717826 8.249928
27 2F53_1 1.216688 -566.753878 -555.783226 9.016818
28 3QEQ_1 1.266109 -556.144298 -543.745853 9.792560
29 1QSF_1 1.147126 -572.614810 -562.846844 8.515162
30 2UWE_1 1.467751 -587.861478 -576.591101 7.678670
31 5EUO_1 0.907352 -556.626858 -548.843548 8.578053
32 3H9S_1 1.355323 -564.645776 -553.335335 8.345198
33 3D3V_1 1.307175 -552.749492 -539.675655 10.001600
34 2BNQ_1 0.725402 -569.252595 -563.169999 8.385135
35 4FTV_1 0.380957 -547.435405 -544.525557 7.638252

In [21]:
os.chdir("/Users/weilu/Research/server/sep_2019/peptide_optimization_trial_2/optimization/")
# gamma_file_name = "gamma_iter1_combined_mar06.dat"
# gamma_file_name = "/Users/weilu/Research/server/sep_2019/peptide_optimization/saved_gammas/cutoff100"
gamma_file_name = "/Users/weilu/Research/server/sep_2019/saved_gammas/trial_2_mixed_original_and_cutoff100_impose_Aprime_constraint_90"

data = validate_hamiltonian_wei("phi_list.txt", "protein_list_small", gamma_file_name, "shuffle", 1000, mode=0)
data


210
420
60
0 2.6207785744536953
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
Out[21]:
Protein Z_scores E_native E_mgs Std_mg
0 1BD2_1 2.620779 -572.787573 -554.261337 7.068982
1 6BJ8_1 1.863768 -571.984261 -556.821913 8.135322
2 2JCC_1 1.745512 -587.781745 -574.476701 7.622429
3 1AO7_1 2.072237 -570.022190 -552.518237 8.446887
4 1LP9_1 1.517396 -579.130156 -566.871212 8.078936
5 3QDJ_1 2.551696 -563.807295 -546.825104 6.655257
6 3GSN_1 2.115572 -558.482091 -540.330873 8.579815
7 1QRN_1 1.909748 -570.335410 -557.349509 6.799800
8 3PWP_1 1.892032 -584.650900 -569.684776 7.910079
9 5W1W_1 2.470159 -573.586239 -555.924753 7.149938
10 1QSE_1 2.099299 -573.493149 -555.742757 8.455390
11 4EUP_1 1.524393 -575.607901 -563.368072 8.029312
12 5TEZ_1 1.466337 -584.819247 -574.824844 6.815899
13 3D39_1 2.280731 -583.450252 -562.503078 9.184414
14 6EQA_1 1.959666 -567.941627 -552.717705 7.768629
15 2BNR_1 2.071524 -581.683438 -564.264628 8.408694
16 6BJ2_1 2.092312 -568.938174 -553.000261 7.617370
17 6BJ3_1 1.819276 -575.212852 -563.007161 6.709091
18 5NME_1 2.349001 -571.135891 -552.518205 7.925789
19 5MEN_1 1.776993 -561.614928 -548.651060 7.295397
20 2VLJ_1 1.990751 -553.580713 -538.834850 7.407185
21 2VLK_1 2.022479 -557.130373 -540.858830 8.045344
22 2J8U_1 1.287186 -580.826853 -571.569924 7.191602
23 2GJ6_1 1.821009 -586.722661 -570.019531 9.172456
24 1OGA_1 1.874357 -564.734264 -549.947569 7.888941
25 2F54_1 1.653453 -563.268974 -550.513045 7.714722
26 5D2L_1 1.932154 -564.945209 -549.682157 7.899502
27 2F53_1 1.881925 -571.384064 -555.157764 8.622181
28 3QEQ_1 1.969746 -560.668589 -542.253613 9.348911
29 1QSF_1 1.884175 -578.776785 -563.569577 8.071018
30 2UWE_1 2.113484 -589.413407 -573.944141 7.319320
31 5EUO_1 1.738675 -562.727280 -548.384420 8.249302
32 3H9S_1 2.242435 -573.397757 -555.575362 7.947786
33 3D3V_1 2.074781 -563.023542 -543.296558 9.507985
34 2BNQ_1 1.207456 -574.666122 -565.007377 7.999254
35 4FTV_1 1.240708 -555.711908 -546.673984 7.284490

In [20]:
os.chdir("/Users/weilu/Research/server/sep_2019/peptide_optimization_trial_2/optimization/")
# gamma_file_name = "gamma_iter1_combined_mar06.dat"
# gamma_file_name = "/Users/weilu/Research/server/sep_2019/peptide_optimization/saved_gammas/cutoff100"
gamma_file_name = "/Users/weilu/Research/server/sep_2019/saved_gammas/mixed_original_and_cutoff100_impose_Aprime_constraint"

data = validate_hamiltonian_wei("phi_list.txt", "protein_list_small", gamma_file_name, "shuffle", 1000, mode=0)
data


210
420
60
0 3.7941745445367365
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
Out[20]:
Protein Z_scores E_native E_mgs Std_mg
0 1BD2_1 3.794175 -590.306113 -529.530818 16.018054
1 6BJ8_1 3.795499 -635.532599 -567.801255 17.845175
2 2JCC_1 3.420750 -594.460605 -539.662072 16.019449
3 1AO7_1 3.672911 -613.428707 -549.662733 17.361155
4 1LP9_1 3.353718 -588.521959 -534.184893 16.202037
5 3QDJ_1 3.985167 -609.107722 -540.568396 17.198608
6 3GSN_1 3.520095 -612.595721 -548.807714 18.121103
7 1QRN_1 3.844487 -611.860055 -552.349067 15.479565
8 3PWP_1 3.518534 -614.033381 -554.889086 16.809357
9 5W1W_1 3.880848 -568.954232 -506.888790 15.992753
10 1QSE_1 3.571491 -616.040396 -555.883546 16.843623
11 4EUP_1 3.324864 -608.524101 -552.250697 16.925026
12 5TEZ_1 3.445819 -625.287886 -575.396645 14.478777
13 3D39_1 3.442946 -621.879465 -559.812575 18.027263
14 6EQA_1 3.129962 -599.946883 -544.820516 17.612469
15 2BNR_1 3.836488 -625.687958 -563.232597 16.279306
16 6BJ2_1 3.632468 -610.668113 -546.639192 17.626836
17 6BJ3_1 3.588978 -634.616977 -575.855931 16.372642
18 5NME_1 3.409681 -598.888887 -539.297705 17.477053
19 5MEN_1 3.566577 -602.625581 -547.451531 15.469748
20 2VLJ_1 3.283920 -572.222209 -518.245806 16.436576
21 2VLK_1 3.702217 -583.564819 -521.267327 16.827076
22 2J8U_1 3.205174 -589.676272 -538.356814 16.011443
23 2GJ6_1 3.568047 -637.224815 -572.701014 18.083787
24 1OGA_1 4.106673 -595.084825 -529.367237 16.002636
25 2F54_1 3.470546 -609.394407 -552.864658 16.288429
26 5D2L_1 3.401990 -596.696291 -537.963414 17.264272
27 2F53_1 3.343720 -601.048037 -543.699280 17.151187
28 3QEQ_1 3.236888 -608.993371 -547.302625 19.058659
29 1QSF_1 3.408414 -616.479070 -558.495963 17.011754
30 2UWE_1 2.989514 -590.592990 -544.328874 15.475465
31 5EUO_1 3.411315 -595.286691 -535.244551 17.600880
32 3H9S_1 3.807791 -620.912205 -556.138229 17.010907
33 3D3V_1 3.242468 -599.627142 -539.080949 18.672870
34 2BNQ_1 5.049209 -684.547763 -595.058146 17.723492
35 4FTV_1 3.207755 -585.364824 -534.493197 15.858950

In [ ]: