In [1]:
import os
import sys
import random
import time
from random import seed, randint
import argparse
import platform
from datetime import datetime
import imp
import numpy as np
import fileinput
from itertools import product
import pandas as pd
from scipy.interpolate import griddata
from scipy.interpolate import interp2d
import seaborn as sns
from os import listdir

import matplotlib.pyplot as plt
import seaborn as sns
from scipy.interpolate import griddata
import matplotlib as mpl
# sys.path.insert(0,'..')
# from notebookFunctions import *
# from .. import notebookFunctions
from Bio.PDB.Polypeptide import one_to_three
from Bio.PDB.Polypeptide import three_to_one
from Bio.PDB.PDBParser import PDBParser
from pyCodeLib import *
# from small_script.myFunctions import *
sys.path.insert(0, "/Users/weilu/openmmawsem")
from helperFunctions.myFunctions import *
from collections import defaultdict
%matplotlib inline
# plt.rcParams['figure.figsize'] = (10,6.180)    #golden ratio
# %matplotlib notebook
%load_ext autoreload
%autoreload 2

In [2]:
plt.rcParams['figure.figsize'] = np.array([16.18033, 10])    #golden ratio
plt.rcParams['figure.facecolor'] = 'w'
plt.rcParams['figure.dpi'] = 100
plt.rcParams.update({'font.size': 22})

In [3]:
# pre = "/Users/weilu/Research/server_backup/feb_2019/jan_optimization/gammas/"
# pre = "/Users/weilu/Research/server/april_2019/optimization_test/gammas/"
pre = "/Users/weilu/Research/server/sep_2019/peptide_optimization/optimization/gammas/"
# pp = "cath-dataset-nonredundant-S20Clean_phi_pairwise_contact_well4.5_6.5_5.0_10phi_density_mediated_contact_well6.5_9.5_5.0_10_2.6_7.0"
# pp = "proteins_name_list_phi_pairwise_contact_well4.5_6.5_5.0_10phi_density_mediated_contact_well6.5_9.5_5.0_10_2.6_7.0phi_burial_well4.0"
pp = f"protein_list_phi_pairwise_contact_well4.5_6.5_5.0_10phi_density_mediated_contact_well6.5_9.5_5.0_10_2.6_7.0phi_burial_well4.0"

A_name = pp + "_A"
B_name = pp + "_B"
B_filtered_name = pp + "_B_filtered"
P_name = pp + "_P"
Gamma_name = pp + "_gamma"
Gamma_filtered_name = pp + "_gamma_filtered"
Lamb_name = pp + "_lamb"
Lamb_filtered_name = pp + "_lamb_filtered"

A = np.loadtxt(pre+A_name)
B = np.loadtxt(pre+B_name)
B_filtered = np.loadtxt(pre+B_filtered_name, dtype=complex, converters={
                           0: lambda s: complex(s.decode().replace('+-', '-'))})
Gamma = np.loadtxt(pre+Gamma_name)
Gamma_filtered = np.loadtxt(pre+Gamma_filtered_name, dtype=complex, converters={
                           0: lambda s: complex(s.decode().replace('+-', '-'))})
Lamb = np.loadtxt(pre+Lamb_name, dtype=complex, converters={
                           0: lambda s: complex(s.decode().replace('+-', '-'))})
Lamb_filtered = np.loadtxt(pre+Lamb_filtered_name, dtype=complex, converters={
                           0: lambda s: complex(s.decode().replace('+-', '-'))})

half_B_name = pp + "_half_B"
half_B = np.loadtxt(pre+half_B_name)
other_half_B_name = pp + "_other_half_B"
other_half_B = np.loadtxt(pre+other_half_B_name)
std_half_B_name = pp + "_std_half_B"
std_half_B = np.loadtxt(pre+std_half_B_name)


# pre = "/Users/weilu/Research/server/april_2019/"
location = pre + "../../phis/protein_list_phi_pairwise_contact_well4.5_6.5_5.0_10phi_density_mediated_contact_well6.5_9.5_5.0_10_2.6_7.0phi_burial_well4.0_phi_decoy_summary.txt"
A_prime = np.loadtxt(location)

In [4]:
lamb, P = np.linalg.eig(B)
lamb, P = sort_eigenvalues_and_eigenvectors(lamb, P)
filtered_lamb = np.copy(lamb)
cutoff_mode = 100
filtered_B_inv, filtered_lamb, P = get_filtered_B_inv_lambda_and_P(filtered_lamb, 
                                                                   cutoff_mode, P)
filtered_gamma = np.dot(filtered_B_inv, A)
filtered_B = np.linalg.inv(filtered_B_inv)
plot_contact_well(filtered_gamma[:210], inferBound=True)
plot_contact_well(filtered_gamma[210:420], inferBound=True)
plot_contact_well(filtered_gamma[420:], inferBound=True)



In [5]:
# maximum difference between loaded and computed is 1e-5.
max(lamb-Lamb)


Out[5]:
(1.5022809272030481e-05+0j)

In [6]:
plt.plot(Lamb)
plt.yscale("log")


/Users/weilu/anaconda3/envs/py36/lib/python3.6/site-packages/numpy/core/numeric.py:501: ComplexWarning: Casting complex values to real discards the imaginary part
  return array(a, dtype, copy=False, order=order)

In [7]:
os.chdir("/Users/weilu/Research/server/sep_2019/peptide_optimization/optimization/")
# gamma_file_name = "gamma_iter1_combined_mar06.dat"
# gamma_file_name = "/Users/weilu/Research/server/sep_2019/peptide_optimization/saved_gammas/cutoff100"
gamma_file_name = "/Users/weilu/Research/server/sep_2019/peptide_optimization/saved_gammas/original_gamma"

data = validate_hamiltonian_wei("phi_list.txt", "protein_list_tiny", gamma_file_name, "shuffle", 1000, mode=0)
data


210
420
60
0 0.28501951024346306
210
420
60
210
420
60
210
420
60
210
420
60
Out[7]:
Protein Z_scores E_native E_mgs Std_mg
0 1 0.285020 -563.839068 -561.332622 8.793945
1 2 0.107713 -567.748351 -566.811899 8.693935
2 3 -0.027309 -558.486254 -558.725908 8.775800
3 4 0.157535 -565.228535 -563.794115 9.105390
4 5 0.081485 -561.292942 -560.599687 8.507752

In [8]:
data


Out[8]:
Protein Z_scores E_native E_mgs Std_mg
0 1 0.285020 -563.839068 -561.332622 8.793945
1 2 0.107713 -567.748351 -566.811899 8.693935
2 3 -0.027309 -558.486254 -558.725908 8.775800
3 4 0.157535 -565.228535 -563.794115 9.105390
4 5 0.081485 -561.292942 -560.599687 8.507752

In [9]:
gamma_file_name = "/Users/weilu/Research/server/sep_2019/peptide_optimization/saved_gammas/original_gamma"
original_gamma = np.loadtxt(gamma_file_name)

In [10]:
np.dot(A_prime, original_gamma)


Out[10]:
-562.2346343586077

In [11]:
# we want to impose additional contraint so that A' * gamma = constnat.(-562.23)
c = -562.23
B_inv = filtered_B_inv
lambda_2 = (A_prime.dot(B_inv).dot(A) - c) / (A_prime.dot(B_inv).dot(A_prime) )
gamma_new = B_inv.dot(A-A_prime*lambda_2)

In [12]:
np.dot(A_prime, gamma_new)


Out[12]:
-562.2299999999993

In [13]:
plot_contact_well(filtered_gamma[:210], inferBound=True)
plot_contact_well(filtered_gamma[210:420], inferBound=True)
plot_contact_well(filtered_gamma[420:], inferBound=True)



In [14]:
# impose A'gamma
save_gamma_pre = "/Users/weilu/Research/server/sep_2019/peptide_optimization/saved_gammas/"
# np.savetxt(f"{save_gamma_pre}/cutoff100_impose_Aprime_constraint", gamma_new)

In [15]:
os.chdir("/Users/weilu/Research/server/sep_2019/peptide_optimization/optimization/")
# gamma_file_name = "gamma_iter1_combined_mar06.dat"
# gamma_file_name = "/Users/weilu/Research/server/sep_2019/peptide_optimization/saved_gammas/cutoff100"
gamma_file_name = "/Users/weilu/Research/server/sep_2019/peptide_optimization/saved_gammas/cutoff100_impose_Aprime_constraint"

data = validate_hamiltonian_wei("phi_list.txt", "protein_list_tiny", gamma_file_name, "shuffle", 1000, mode=0)
data


210
420
60
0 5.650668065384711
210
420
60
210
420
60
210
420
60
210
420
60
Out[15]:
Protein Z_scores E_native E_mgs Std_mg
0 1 5.650668 -2978.012973 -1235.843095 308.312196
1 2 5.615262 -2711.807657 -1040.237232 297.683400
2 3 5.751143 -2594.283031 -873.201081 299.259126
3 4 5.757088 -2705.576760 -995.833711 296.980539
4 5 5.541697 -2622.239457 -1031.635421 287.024707

In [16]:
data


Out[16]:
Protein Z_scores E_native E_mgs Std_mg
0 1 5.650668 -2978.012973 -1235.843095 308.312196
1 2 5.615262 -2711.807657 -1040.237232 297.683400
2 3 5.751143 -2594.283031 -873.201081 299.259126
3 4 5.757088 -2705.576760 -995.833711 296.980539
4 5 5.541697 -2622.239457 -1031.635421 287.024707

In [17]:
# mix gammas so that we don't overfitting too much.
alpha = 0.95
mixed_gamma = alpha*original_gamma + (1-alpha)*gamma_new
save_gamma_pre = "/Users/weilu/Research/server/sep_2019/peptide_optimization/saved_gammas/"
# np.savetxt(f"{save_gamma_pre}/mixed_original_and_cutoff100_impose_Aprime_constraint", mixed_gamma)

In [18]:
os.chdir("/Users/weilu/Research/server/sep_2019/peptide_optimization/optimization/")
# gamma_file_name = "gamma_iter1_combined_mar06.dat"
# gamma_file_name = "/Users/weilu/Research/server/sep_2019/peptide_optimization/saved_gammas/cutoff100"
gamma_file_name = "/Users/weilu/Research/server/sep_2019/peptide_optimization/saved_gammas/mixed_original_and_cutoff100_impose_Aprime_constraint"

data = validate_hamiltonian_wei("phi_list.txt", "protein_list_tiny", gamma_file_name, "shuffle", 1000, mode=0)
data


210
420
60
0 5.049209188880802
210
420
60
210
420
60
210
420
60
210
420
60
Out[18]:
Protein Z_scores E_native E_mgs Std_mg
0 1 5.049209 -684.547763 -595.058146 17.723492
1 2 5.009914 -674.951316 -590.483165 16.860201
2 3 5.052534 -660.276093 -574.449667 16.986807
3 4 5.001211 -672.245947 -585.396095 17.365764
4 5 4.809522 -664.340268 -584.151473 16.672924

In [19]:
data


Out[19]:
Protein Z_scores E_native E_mgs Std_mg
0 1 5.049209 -684.547763 -595.058146 17.723492
1 2 5.009914 -674.951316 -590.483165 16.860201
2 3 5.052534 -660.276093 -574.449667 16.986807
3 4 5.001211 -672.245947 -585.396095 17.365764
4 5 4.809522 -664.340268 -584.151473 16.672924

In [110]:
# with additional constraint
data


Out[110]:
Protein Z_scores E_native E_mgs Std_mg
0 1 5.651049 -3008.457623 -1266.073497 308.329333
1 2 5.615784 -2742.978825 -1071.181503 297.696134
2 3 5.751925 -2625.236556 -903.822166 299.276227
3 4 5.757630 -2736.832105 -1026.833311 296.997018
4 5 5.541917 -2652.731026 -1061.958377 287.043734
5 6 5.640894 -2646.236207 -977.280624 295.867223
6 7 5.643035 -2595.491454 -877.928416 304.368652
7 8 5.632280 -2636.063746 -922.999355 304.151143
8 9 5.713291 -2651.458445 -931.172575 301.102450
9 10 5.940028 -2726.245430 -934.732806 301.600012
10 11 5.186864 -2474.552891 -940.955401 295.669514
11 12 5.618313 -2438.236761 -834.689790 285.414311
12 13 5.545475 -2686.797129 -1045.745276 295.926273
13 14 5.807589 -2765.680560 -1005.084412 303.154383
14 15 5.567411 -2557.287188 -920.860669 293.929524
15 16 5.664767 -2679.415330 -958.113061 303.861087
16 17 5.482699 -2641.182109 -984.409576 302.181899
17 18 5.400914 -2516.119603 -864.716713 305.763597
18 19 5.273468 -2750.482608 -1125.884209 308.070205
19 20 5.457617 -2499.879560 -829.967892 305.978153
20 21 5.610868 -2392.805221 -732.599054 295.891162
21 22 5.556166 -2490.013054 -815.342712 301.407546
22 23 5.741183 -2881.309551 -1112.011923 308.176503
23 24 5.585379 -2843.972253 -1186.329512 296.782492
24 25 5.698058 -2612.012132 -984.198019 285.678767
25 26 5.543354 -2624.741693 -1025.815922 288.440148
26 27 5.612749 -2716.021004 -1073.312809 292.674447
27 28 5.728280 -2809.690563 -1082.632250 301.496852
28 29 5.615072 -2512.088602 -849.941550 296.015267
29 30 5.638543 -2519.054470 -894.606338 288.097127
... ... ... ... ... ...
60 61 5.623568 -2533.761646 -872.077912 295.485662
61 62 5.941724 -2590.890107 -911.704707 282.609115
62 63 5.777780 -2251.842494 -662.284140 275.115772
63 64 5.532760 -2587.606428 -981.009190 290.379002
64 65 5.296592 -2448.387540 -945.918421 283.667134
65 66 5.870407 -2550.911900 -848.036674 290.077886
66 67 5.641691 -2323.204171 -710.204636 285.907128
67 68 5.788094 -2571.548595 -901.093700 288.601884
68 69 5.721171 -2721.315860 -1052.910870 291.619471
69 70 5.655420 -2401.354575 -786.197606 285.594506
70 71 5.966686 -2543.944001 -851.778594 283.602242
71 72 6.094359 -2370.194470 -716.363328 271.370815
72 73 5.937591 -2258.559473 -633.811879 273.637488
73 74 5.732575 -2353.060601 -756.225264 278.554638
74 75 6.010518 -2277.832037 -593.495727 280.231451
75 76 5.859694 -2328.963756 -650.522601 286.438357
76 77 5.847136 -2315.332343 -628.166268 288.545715
77 78 5.561025 -2432.344838 -835.116055 287.218434
78 79 5.566768 -2163.885322 -577.958701 284.891827
79 80 5.671892 -2476.919001 -874.997801 282.431525
80 81 5.710937 -2441.512943 -766.554734 293.289569
81 82 5.768186 -2425.085329 -695.007288 299.934513
82 83 5.531390 -2415.371149 -710.795836 308.164009
83 84 5.692533 -2235.606668 -610.990893 285.394187
84 85 5.784387 -2465.342676 -748.534857 296.800293
85 86 5.838220 -2186.903698 -532.103421 283.442607
86 87 5.832363 -2376.181608 -725.208268 283.071081
87 88 5.815084 -2654.196399 -1042.439921 277.168213
88 89 6.061436 -2523.192030 -669.898491 305.751548
89 90 5.606781 -2293.793233 -673.710920 288.950511

90 rows × 5 columns


In [ ]: