In [1]:
import os
import sys
import random
import time
from random import seed, randint
import argparse
import platform
from datetime import datetime
import imp
import numpy as np
import fileinput
from itertools import product
import pandas as pd
from scipy.interpolate import griddata
from scipy.interpolate import interp2d
import seaborn as sns
from os import listdir

import matplotlib.pyplot as plt
import seaborn as sns
from scipy.interpolate import griddata
import matplotlib as mpl
# sys.path.insert(0,'..')
# from notebookFunctions import *
# from .. import notebookFunctions
from Bio.PDB.Polypeptide import one_to_three
from Bio.PDB.Polypeptide import three_to_one
from Bio.PDB.PDBParser import PDBParser
from pyCodeLib import *
# from small_script.myFunctions import *
sys.path.insert(0, "/Users/weilu/openmmawsem")
from helperFunctions.myFunctions import *
from collections import defaultdict
%matplotlib inline
# plt.rcParams['figure.figsize'] = (10,6.180)    #golden ratio
# %matplotlib notebook
%load_ext autoreload
%autoreload 2

In [2]:
plt.rcParams['figure.figsize'] = np.array([16.18033, 10])    #golden ratio
plt.rcParams['figure.facecolor'] = 'w'
plt.rcParams['figure.dpi'] = 100
plt.rcParams.update({'font.size': 22})

In [71]:
# pre = "/Users/weilu/Research/server_backup/feb_2019/jan_optimization/gammas/"
# pre = "/Users/weilu/Research/server/april_2019/optimization_test/gammas/"
pre = "/Users/weilu/Research/server/sep_2019/peptide_optimization/optimization/gammas/"
# pp = "cath-dataset-nonredundant-S20Clean_phi_pairwise_contact_well4.5_6.5_5.0_10phi_density_mediated_contact_well6.5_9.5_5.0_10_2.6_7.0"
# pp = "proteins_name_list_phi_pairwise_contact_well4.5_6.5_5.0_10phi_density_mediated_contact_well6.5_9.5_5.0_10_2.6_7.0phi_burial_well4.0"
pp = f"protein_list_phi_pairwise_contact_well4.5_6.5_5.0_10phi_density_mediated_contact_well6.5_9.5_5.0_10_2.6_7.0phi_burial_well4.0"

A_name = pp + "_A"
B_name = pp + "_B"
B_filtered_name = pp + "_B_filtered"
P_name = pp + "_P"
Gamma_name = pp + "_gamma"
Gamma_filtered_name = pp + "_gamma_filtered"
Lamb_name = pp + "_lamb"
Lamb_filtered_name = pp + "_lamb_filtered"

A = np.loadtxt(pre+A_name)
B = np.loadtxt(pre+B_name)
B_filtered = np.loadtxt(pre+B_filtered_name, dtype=complex, converters={
                           0: lambda s: complex(s.decode().replace('+-', '-'))})
Gamma = np.loadtxt(pre+Gamma_name)
Gamma_filtered = np.loadtxt(pre+Gamma_filtered_name, dtype=complex, converters={
                           0: lambda s: complex(s.decode().replace('+-', '-'))})
Lamb = np.loadtxt(pre+Lamb_name, dtype=complex, converters={
                           0: lambda s: complex(s.decode().replace('+-', '-'))})
Lamb_filtered = np.loadtxt(pre+Lamb_filtered_name, dtype=complex, converters={
                           0: lambda s: complex(s.decode().replace('+-', '-'))})

half_B_name = pp + "_half_B"
half_B = np.loadtxt(pre+half_B_name)
other_half_B_name = pp + "_other_half_B"
other_half_B = np.loadtxt(pre+other_half_B_name)
std_half_B_name = pp + "_std_half_B"
std_half_B = np.loadtxt(pre+std_half_B_name)


# pre = "/Users/weilu/Research/server/april_2019/"
location = pre + "../../phis/protein_list_phi_pairwise_contact_well4.5_6.5_5.0_10phi_density_mediated_contact_well6.5_9.5_5.0_10_2.6_7.0phi_burial_well4.0_phi_decoy_summary.txt"
A_prime = np.loadtxt(location)

In [63]:
pre = "/Users/weilu/Research/server/sep_2019/peptide_optimization/optimization/gammas/"
name_pre = f"protein_list_phi_pairwise_contact_well4.5_6.5_5.0_10phi_density_mediated_contact_well6.5_9.5_5.0_10_2.6_7.0phi_burial_well4.0"
A_name = f"{name_pre}_A"
B_name = f"{name_pre}_B"
B_filtered_name = f"{name_pre}_B_filtered"
P_name = f"{name_pre}_P"
Gamma_name = f"{name_pre}_gamma"
Gamma_filtered_name = f"{name_pre}_gamma_filtered"
Lamb_name = f"{name_pre}_lamb"
Lamb_filtered_name = f"{name_pre}_lamb_filtered"

A = np.loadtxt(pre+A_name)
B = np.loadtxt(pre+B_name)
B_filtered = np.loadtxt(pre+B_filtered_name, dtype=complex, converters={
                           0: lambda s: complex(s.decode().replace('+-', '-'))})
Gamma = np.loadtxt(pre+Gamma_name)
Gamma_filtered = np.loadtxt(pre+Gamma_filtered_name, dtype=complex, converters={
                           0: lambda s: complex(s.decode().replace('+-', '-'))})
Lamb = np.loadtxt(pre+Lamb_name, dtype=complex, converters={
                           0: lambda s: complex(s.decode().replace('+-', '-'))})
Lamb_filtered = np.loadtxt(pre+Lamb_filtered_name, dtype=complex, converters={
                           0: lambda s: complex(s.decode().replace('+-', '-'))})

In [100]:
# maximum difference between loaded and computed is 1e-5.
max(lamb-Lamb)


Out[100]:
(1.5022809272030481e-05+0j)

In [99]:
plt.plot(Lamb)
plt.yscale("log")



In [105]:
lamb, P = np.linalg.eig(B)
lamb, P = sort_eigenvalues_and_eigenvectors(lamb, P)
filtered_lamb = np.copy(lamb)
cutoff_mode = 100
filtered_B_inv, filtered_lamb, P = get_filtered_B_inv_lambda_and_P(filtered_lamb, 
                                                                   cutoff_mode, P)
filtered_gamma = np.dot(filtered_B_inv, A)
filtered_B = np.linalg.inv(filtered_B_inv)
plot_contact_well(filtered_gamma[:210], inferBound=True)
plot_contact_well(filtered_gamma[210:420], inferBound=True)
plot_contact_well(filtered_gamma[420:], inferBound=True)



In [106]:
save_gamma_pre = "/Users/weilu/Research/server/sep_2019/peptide_optimization/saved_gammas/"
np.savetxt(f"{save_gamma_pre}/cutoff100", filtered_gamma)

In [107]:
os.chdir('/Users/weilu/opt/notebook/Optimization')


Out[107]:
'/Users/weilu/opt/notebook/Optimization'

In [109]:
os.chdir("/Users/weilu/Research/server/sep_2019/peptide_optimization/optimization/")
# gamma_file_name = "gamma_iter1_combined_mar06.dat"
gamma_file_name = "/Users/weilu/Research/server/sep_2019/peptide_optimization/saved_gammas/cutoff100"
data = validate_hamiltonian_wei("phi_list.txt", "protein_list_tiny", gamma_file_name, "shuffle", 1000, mode=0)
data


210
420
60
0 5.651048869882155
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
210
420
60
Out[109]:
Protein Z_scores E_native E_mgs Std_mg
0 1 5.651049 -3008.457623 -1266.073497 308.329333
1 2 5.615784 -2742.978825 -1071.181503 297.696134
2 3 5.751925 -2625.236556 -903.822166 299.276227
3 4 5.757630 -2736.832105 -1026.833311 296.997018
4 5 5.541917 -2652.731026 -1061.958377 287.043734
5 6 5.640894 -2646.236207 -977.280624 295.867223
6 7 5.643035 -2595.491454 -877.928416 304.368652
7 8 5.632280 -2636.063746 -922.999355 304.151143
8 9 5.713291 -2651.458445 -931.172575 301.102450
9 10 5.940028 -2726.245430 -934.732806 301.600012
10 11 5.186864 -2474.552891 -940.955401 295.669514
11 12 5.618313 -2438.236761 -834.689790 285.414311
12 13 5.545475 -2686.797129 -1045.745276 295.926273
13 14 5.807589 -2765.680560 -1005.084412 303.154383
14 15 5.567411 -2557.287188 -920.860669 293.929524
15 16 5.664767 -2679.415330 -958.113061 303.861087
16 17 5.482699 -2641.182109 -984.409576 302.181899
17 18 5.400914 -2516.119603 -864.716713 305.763597
18 19 5.273468 -2750.482608 -1125.884209 308.070205
19 20 5.457617 -2499.879560 -829.967892 305.978153
20 21 5.610868 -2392.805221 -732.599054 295.891162
21 22 5.556166 -2490.013054 -815.342712 301.407546
22 23 5.741183 -2881.309551 -1112.011923 308.176503
23 24 5.585379 -2843.972253 -1186.329512 296.782492
24 25 5.698058 -2612.012132 -984.198019 285.678767
25 26 5.543354 -2624.741693 -1025.815922 288.440148
26 27 5.612749 -2716.021004 -1073.312809 292.674447
27 28 5.728280 -2809.690563 -1082.632250 301.496852
28 29 5.615072 -2512.088602 -849.941550 296.015267
29 30 5.638543 -2519.054470 -894.606338 288.097127
... ... ... ... ... ...
60 61 5.623568 -2533.761646 -872.077912 295.485662
61 62 5.941724 -2590.890107 -911.704707 282.609115
62 63 5.777780 -2251.842494 -662.284140 275.115772
63 64 5.532760 -2587.606428 -981.009190 290.379002
64 65 5.296592 -2448.387540 -945.918421 283.667134
65 66 5.870407 -2550.911900 -848.036674 290.077886
66 67 5.641691 -2323.204171 -710.204636 285.907128
67 68 5.788094 -2571.548595 -901.093700 288.601884
68 69 5.721171 -2721.315860 -1052.910870 291.619471
69 70 5.655420 -2401.354575 -786.197606 285.594506
70 71 5.966686 -2543.944001 -851.778594 283.602242
71 72 6.094359 -2370.194470 -716.363328 271.370815
72 73 5.937591 -2258.559473 -633.811879 273.637488
73 74 5.732575 -2353.060601 -756.225264 278.554638
74 75 6.010518 -2277.832037 -593.495727 280.231451
75 76 5.859694 -2328.963756 -650.522601 286.438357
76 77 5.847136 -2315.332343 -628.166268 288.545715
77 78 5.561025 -2432.344838 -835.116055 287.218434
78 79 5.566768 -2163.885322 -577.958701 284.891827
79 80 5.671892 -2476.919001 -874.997801 282.431525
80 81 5.710937 -2441.512943 -766.554734 293.289569
81 82 5.768186 -2425.085329 -695.007288 299.934513
82 83 5.531390 -2415.371149 -710.795836 308.164009
83 84 5.692533 -2235.606668 -610.990893 285.394187
84 85 5.784387 -2465.342676 -748.534857 296.800293
85 86 5.838220 -2186.903698 -532.103421 283.442607
86 87 5.832363 -2376.181608 -725.208268 283.071081
87 88 5.815084 -2654.196399 -1042.439921 277.168213
88 89 6.061436 -2523.192030 -669.898491 305.751548
89 90 5.606781 -2293.793233 -673.710920 288.950511

90 rows × 5 columns


In [113]:
gamma_file_name = "/Users/weilu/Research/server/sep_2019/peptide_optimization/saved_gammas/converted_original_gamma.dat"
original = np.loadtxt(gamma_file_name)
gamma_file_name = "/Users/weilu/Research/server/sep_2019/peptide_optimization/saved_gammas/converted_original_gamma_2.dat"
original_2 = np.loadtxt(gamma_file_name)

In [117]:
data.plot("Protein", "E_native")


Out[117]:
<matplotlib.axes._subplots.AxesSubplot at 0x1a2710aeb8>

In [120]:
os.chdir("/Users/weilu/Research/server/sep_2019/peptide_optimization/optimization/")
# gamma_file_name = "gamma_iter1_combined_mar06.dat"
# gamma_file_name = "/Users/weilu/Research/server/sep_2019/peptide_optimization/saved_gammas/cutoff100"
gamma_file_name = "/Users/weilu/Research/server/sep_2019/peptide_optimization/saved_gammas/original_gamma"

data = validate_hamiltonian_wei("phi_list.txt", "protein_list_tiny", gamma_file_name, "shuffle", 1000, mode=0)
data


210
420
60
0 0.28501951024346306
210
420
60
210
420
60
210
420
60
210
420
60
Out[120]:
Protein Z_scores E_native E_mgs Std_mg
0 1 0.285020 -563.839068 -561.332622 8.793945
1 2 0.107713 -567.748351 -566.811899 8.693935
2 3 -0.027309 -558.486254 -558.725908 8.775800
3 4 0.157535 -565.228535 -563.794115 9.105390
4 5 0.081485 -561.292942 -560.599687 8.507752

In [121]:
data


Out[121]:
Protein Z_scores E_native E_mgs Std_mg
0 1 0.285020 -563.839068 -561.332622 8.793945
1 2 0.107713 -567.748351 -566.811899 8.693935
2 3 -0.027309 -558.486254 -558.725908 8.775800
3 4 0.157535 -565.228535 -563.794115 9.105390
4 5 0.081485 -561.292942 -560.599687 8.507752

In [123]:
gamma_file_name = "/Users/weilu/Research/server/sep_2019/peptide_optimization/saved_gammas/original_gamma"
original_gamma = np.loadtxt(gamma_file_name)

In [124]:
np.dot(A_prime, original_gamma)


Out[124]:
-562.2346343586077

In [126]:
# we want to impose additional contraint so that A' * gamma = constnat.(-562.23)
c = -562.23
B_inv = filtered_B_inv
lambda_2 = (A_prime.dot(B_inv).dot(A) - c) / (A_prime.dot(B_inv).dot(A_prime) )
gamma_new = B_inv.dot(A-A_prime*lambda_2)

In [127]:
np.dot(A_prime, gamma_new)


Out[127]:
-562.2299999999993

In [128]:
plot_contact_well(filtered_gamma[:210], inferBound=True)
plot_contact_well(filtered_gamma[210:420], inferBound=True)
plot_contact_well(filtered_gamma[420:], inferBound=True)



In [129]:
# impose A'gamma
save_gamma_pre = "/Users/weilu/Research/server/sep_2019/peptide_optimization/saved_gammas/"
np.savetxt(f"{save_gamma_pre}/cutoff100_impose_Aprime_constraint", gamma_new)

In [130]:
os.chdir("/Users/weilu/Research/server/sep_2019/peptide_optimization/optimization/")
# gamma_file_name = "gamma_iter1_combined_mar06.dat"
# gamma_file_name = "/Users/weilu/Research/server/sep_2019/peptide_optimization/saved_gammas/cutoff100"
gamma_file_name = "/Users/weilu/Research/server/sep_2019/peptide_optimization/saved_gammas/cutoff100_impose_Aprime_constraint"

data = validate_hamiltonian_wei("phi_list.txt", "protein_list_tiny", gamma_file_name, "shuffle", 1000, mode=0)
data


210
420
60
0 5.650668065384711
210
420
60
210
420
60
210
420
60
210
420
60
Out[130]:
Protein Z_scores E_native E_mgs Std_mg
0 1 5.650668 -2978.012973 -1235.843095 308.312196
1 2 5.615262 -2711.807657 -1040.237232 297.683400
2 3 5.751143 -2594.283031 -873.201081 299.259126
3 4 5.757088 -2705.576760 -995.833711 296.980539
4 5 5.541697 -2622.239457 -1031.635421 287.024707

In [131]:
data


Out[131]:
Protein Z_scores E_native E_mgs Std_mg
0 1 5.650668 -2978.012973 -1235.843095 308.312196
1 2 5.615262 -2711.807657 -1040.237232 297.683400
2 3 5.751143 -2594.283031 -873.201081 299.259126
3 4 5.757088 -2705.576760 -995.833711 296.980539
4 5 5.541697 -2622.239457 -1031.635421 287.024707

In [133]:
# mix gammas so that we don't overfitting too much.
alpha = 0.95
mixed_gamma = alpha*original_gamma + (1-alpha)*gamma_new
save_gamma_pre = "/Users/weilu/Research/server/sep_2019/peptide_optimization/saved_gammas/"
np.savetxt(f"{save_gamma_pre}/mixed_original_and_cutoff100_impose_Aprime_constraint", mixed_gamma)

In [134]:
os.chdir("/Users/weilu/Research/server/sep_2019/peptide_optimization/optimization/")
# gamma_file_name = "gamma_iter1_combined_mar06.dat"
# gamma_file_name = "/Users/weilu/Research/server/sep_2019/peptide_optimization/saved_gammas/cutoff100"
gamma_file_name = "/Users/weilu/Research/server/sep_2019/peptide_optimization/saved_gammas/mixed_original_and_cutoff100_impose_Aprime_constraint"

data = validate_hamiltonian_wei("phi_list.txt", "protein_list_tiny", gamma_file_name, "shuffle", 1000, mode=0)
data


210
420
60
0 5.049209188880802
210
420
60
210
420
60
210
420
60
210
420
60
Out[134]:
Protein Z_scores E_native E_mgs Std_mg
0 1 5.049209 -684.547763 -595.058146 17.723492
1 2 5.009914 -674.951316 -590.483165 16.860201
2 3 5.052534 -660.276093 -574.449667 16.986807
3 4 5.001211 -672.245947 -585.396095 17.365764
4 5 4.809522 -664.340268 -584.151473 16.672924

In [135]:
data


Out[135]:
Protein Z_scores E_native E_mgs Std_mg
0 1 5.049209 -684.547763 -595.058146 17.723492
1 2 5.009914 -674.951316 -590.483165 16.860201
2 3 5.052534 -660.276093 -574.449667 16.986807
3 4 5.001211 -672.245947 -585.396095 17.365764
4 5 4.809522 -664.340268 -584.151473 16.672924

In [ ]:


In [110]:
# with additional constraint
data


Out[110]:
Protein Z_scores E_native E_mgs Std_mg
0 1 5.651049 -3008.457623 -1266.073497 308.329333
1 2 5.615784 -2742.978825 -1071.181503 297.696134
2 3 5.751925 -2625.236556 -903.822166 299.276227
3 4 5.757630 -2736.832105 -1026.833311 296.997018
4 5 5.541917 -2652.731026 -1061.958377 287.043734
5 6 5.640894 -2646.236207 -977.280624 295.867223
6 7 5.643035 -2595.491454 -877.928416 304.368652
7 8 5.632280 -2636.063746 -922.999355 304.151143
8 9 5.713291 -2651.458445 -931.172575 301.102450
9 10 5.940028 -2726.245430 -934.732806 301.600012
10 11 5.186864 -2474.552891 -940.955401 295.669514
11 12 5.618313 -2438.236761 -834.689790 285.414311
12 13 5.545475 -2686.797129 -1045.745276 295.926273
13 14 5.807589 -2765.680560 -1005.084412 303.154383
14 15 5.567411 -2557.287188 -920.860669 293.929524
15 16 5.664767 -2679.415330 -958.113061 303.861087
16 17 5.482699 -2641.182109 -984.409576 302.181899
17 18 5.400914 -2516.119603 -864.716713 305.763597
18 19 5.273468 -2750.482608 -1125.884209 308.070205
19 20 5.457617 -2499.879560 -829.967892 305.978153
20 21 5.610868 -2392.805221 -732.599054 295.891162
21 22 5.556166 -2490.013054 -815.342712 301.407546
22 23 5.741183 -2881.309551 -1112.011923 308.176503
23 24 5.585379 -2843.972253 -1186.329512 296.782492
24 25 5.698058 -2612.012132 -984.198019 285.678767
25 26 5.543354 -2624.741693 -1025.815922 288.440148
26 27 5.612749 -2716.021004 -1073.312809 292.674447
27 28 5.728280 -2809.690563 -1082.632250 301.496852
28 29 5.615072 -2512.088602 -849.941550 296.015267
29 30 5.638543 -2519.054470 -894.606338 288.097127
... ... ... ... ... ...
60 61 5.623568 -2533.761646 -872.077912 295.485662
61 62 5.941724 -2590.890107 -911.704707 282.609115
62 63 5.777780 -2251.842494 -662.284140 275.115772
63 64 5.532760 -2587.606428 -981.009190 290.379002
64 65 5.296592 -2448.387540 -945.918421 283.667134
65 66 5.870407 -2550.911900 -848.036674 290.077886
66 67 5.641691 -2323.204171 -710.204636 285.907128
67 68 5.788094 -2571.548595 -901.093700 288.601884
68 69 5.721171 -2721.315860 -1052.910870 291.619471
69 70 5.655420 -2401.354575 -786.197606 285.594506
70 71 5.966686 -2543.944001 -851.778594 283.602242
71 72 6.094359 -2370.194470 -716.363328 271.370815
72 73 5.937591 -2258.559473 -633.811879 273.637488
73 74 5.732575 -2353.060601 -756.225264 278.554638
74 75 6.010518 -2277.832037 -593.495727 280.231451
75 76 5.859694 -2328.963756 -650.522601 286.438357
76 77 5.847136 -2315.332343 -628.166268 288.545715
77 78 5.561025 -2432.344838 -835.116055 287.218434
78 79 5.566768 -2163.885322 -577.958701 284.891827
79 80 5.671892 -2476.919001 -874.997801 282.431525
80 81 5.710937 -2441.512943 -766.554734 293.289569
81 82 5.768186 -2425.085329 -695.007288 299.934513
82 83 5.531390 -2415.371149 -710.795836 308.164009
83 84 5.692533 -2235.606668 -610.990893 285.394187
84 85 5.784387 -2465.342676 -748.534857 296.800293
85 86 5.838220 -2186.903698 -532.103421 283.442607
86 87 5.832363 -2376.181608 -725.208268 283.071081
87 88 5.815084 -2654.196399 -1042.439921 277.168213
88 89 6.061436 -2523.192030 -669.898491 305.751548
89 90 5.606781 -2293.793233 -673.710920 288.950511

90 rows × 5 columns


In [ ]:


In [ ]:
# contraints T_f = T_c
B_inverse = np.linalg.pinv(B)
up = A.dot(B_inverse).dot(A) - A_prime.dot(B_inverse).dot(A)
down = A.dot(B_inverse).dot(A_prime) - A_prime.dot(B_inverse).dot(A_prime)
lambda_1 = up / down
print(lambda_1)
g = B_inverse.dot(A - A_prime*lambda_1)

In [102]:
# lamb, P = np.linalg.eig(B)
# lamb, P = sort_eigenvalues_and_eigenvectors(lamb, P)
# filtered_lamb = np.copy(lamb)
# cutoff_mode = 200
# filtered_B_inv, filtered_lamb, P = get_filtered_B_inv_lambda_and_P(filtered_lamb, 
#                                                                    cutoff_mode, P)
# filtered_gamma = np.dot(filtered_B_inv, A)
# filtered_B = np.linalg.inv(filtered_B_inv)
# plot_contact_well(filtered_gamma[:210], inferBound=True)



In [ ]:


In [ ]:


In [ ]:
# "mkdir -p database/dompdb"
# "mkdir -p database/S20_seq"
# "mkdir optimization"
# cp ~/opt/optimization/phi_list_contact.txt phi_list.txt

In [9]:
def mycp(source, target):
    os.system(f"cp {source} {target}")
    
do = os.system

def getSeq(fileLocation):
    p = PDBParser()
    s = p.get_structure("test", fileLocation)
    seq = ""
    residues = list(s.get_residues())
    for residue in residues:
        res_id = residue.get_id()[0]
        if res_id==' ':
            residue_name = residue.get_resname()
            seq += three_to_one(residue_name)
    return seq

# get chains nad seq
def getChainsAndSeq(fileLocation):
    # fileLocation = "/Users/weilu/Research/examples/optimization/optimization/Structure_Ensemble/1.pdb"
    p = PDBParser()
    pdb = p.get_structure("test", fileLocation)
    residues = list(pdb.get_residues())
    seq = ""
    chains = ""
    for residue in residues:
        res_id = residue.get_id()[0]
        chain = residue.get_full_id()[2]
        if res_id==' ':
            residue_name = residue.get_resname()
            seq += three_to_one(residue_name)
            chains += chain
    return chains, seq

In [40]:
pdbFolderList = glob.glob("/Users/weilu/Downloads/Optimization_Xfunnel/Structure_Ensemble_*")

In [34]:
pdbFolderList = [ '/Users/weilu/Downloads/Optimization_Xfunnel/Structure_Ensemble_2BNQ',
 '/Users/weilu/Downloads/Optimization_Xfunnel/Structure_Ensemble_4FTV']

In [4]:
pdbFolder = pdbFolderList[0]

In [5]:
pdbName = pdbFolder.split("_")[-1]

In [6]:
pdbName


Out[6]:
'1BD2'

In [41]:
len(pdbFolderList)


Out[41]:
33

In [43]:
# pre = "/Users/weilu/Research/server/sep_2019/peptide_optimization_specific_test"
pre = "/Users/weilu/Research/server/sep_2019/peptide_optimization_trial_4_duplicate"

do(f"mkdir -p {pre}/database/dompdb")
do(f"mkdir -p {pre}/database/S20_seq")

for pdbFolder in pdbFolderList:
    pdbName = pdbFolder.split("_")[-1]
    source = pdbFolder + f"/*ab.pdb"
    p_list = glob.glob(source)
    for p in p_list:
        target = f"{pre}/database/dompdb/{pdbName}.pdb"
        ## move native pdbs to dompdb
        mycp(p, target)
        ## move native seq to S20_seq
        seq = getSeq(target)
        fileLocation = f"{pre}/database/S20_seq/{pdbName}.seq"
        with open(fileLocation, "w") as out:
            out.write(seq+"\n")

In [44]:
do(f"mkdir -p {pre}/optimization")
## write protein_list
fileLocation = f"{pre}/optimization/protein_list"
with open(fileLocation, "w") as out:
    for pdbFolder in pdbFolderList:
        pdbName = pdbFolder.split("_")[-1]
        out.write(f"{pdbName}\n")

In [45]:
do(f"mkdir -p {pre}/optimization/decoys/shuffle")
# generate decoys
for pdbFolder in pdbFolderList:
    pdbName = pdbFolder.split("_")[-1]
    source = pdbFolder + f"/*ab.pdb"
    p_list = glob.glob(source)
    # print(p_list, source)
    assert len(p_list) == 1
    chain_seq, seq = getChainsAndSeq(p_list[0])
    print(pdbName, i, len(seq), len(chain_seq))
    decoy_list = []
    with open(f"{pdbFolder}/peptide.txt") as f:
        for line in f:
            pep = line.strip()
            assert len(pep) == 9
            for c in list(set(chain_seq)):
                if chain_seq.count(c) == 9:
                    first_c = chain_seq.find(c)
                    a = list(seq)
                    a[first_c:first_c+9] = pep
                    decoy = "".join(a)
                    decoy_list.append(decoy)
    fileLocation = f"{pre}/optimization/decoys/shuffle/{pdbName}.decoys"
    with open(fileLocation, "w") as out:
        for decoy in decoy_list:
            out.write(decoy+"\n")


1BD2 1 416 416
2JCC 1 421 421
1AO7 1 418 418
1LP9 1 421 421
3QDJ 1 424 424
3GSN 1 425 425
1QRN 1 418 418
3PWP 1 420 420
5W1W 1 395 395
1QSE 1 418 418
4EUP 1 425 425
5TEZ 1 422 422
3D39 1 419 419
6EQA 1 426 426
2BNR 1 425 425
5NME 1 426 426
5MEN 1 425 425
2VLJ 1 421 421
2VLK 1 421 421
2J8U 1 421 421
2GJ6 1 418 418
1OGA 1 421 421
2F54 1 427 427
5D2L 1 420 420
2F53 1 431 431
3QEQ 1 426 426
1QSF 1 418 418
2UWE 1 421 421
5EUO 1 424 424
3H9S 1 420 420
3D3V 1 419 419
2BNQ 1 425 425
4FTV 1 420 420

In [46]:
do(f"mkdir -p {pre}/phis")
do(f"cp ~/opt/optimization/phi_list_contact.txt {pre}/optimization/phi_list.txt")


Out[46]:
0

In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [29]:
do(f"mkdir -p {pre}/optimization/decoys/shuffle")
# generate decoys
for pdbFolder in pdbFolderList:
    pdbName = pdbFolder.split("_")[-1]
    source = pdbFolder + f"/*ab.pdb"
    p_list = glob.glob(source)
    if pdbName != "3D39":
        continue
    # print(p_list, source)
    assert len(p_list) == 1
    chain_seq, seq = getChainsAndSeq(p_list[0])
    print(pdbName, i, len(seq), len(chain_seq))
    decoy_list = []
    decoy = ""
    with open(f"{pdbFolder}/peptide.txt") as f:
        for line in f:
            pep = line.strip()
            assert len(pep) == 9
            for c in list(set(chain_seq)):
                if chain_seq.count(c) == 9:
                    first_c = chain_seq.find(c)
                    a = list(seq)
                    a[first_c:first_c+9] = pep
                    decoy = "".join(a)
                    decoy_list.append(decoy)
    fileLocation = f"{pre}/optimization/decoys/shuffle/{pdbName}.decoys"
    with open(fileLocation, "w") as out:
        for decoy in decoy_list:
            out.write(decoy+"\n")


3D39 1 419 419

In [31]:
list(set(chain_seq))


Out[31]:
['A', 'X', 'B', 'Y']

In [32]:
chain_seq.count("Y")


Out[32]:
8

In [ ]:


In [205]:
## write protein_list
fileLocation = f"{pre}/optimization/protein_list"
with open(fileLocation, "w") as out:
    for pdbFolder in pdbFolderList:
        pdbName = pdbFolder.split("_")[-1]
        for i in range(1, 6):
            out.write(f"{pdbName}_{i}\n")
# generate decoys
for pdbFolder in pdbFolderList:
    pdbName = pdbFolder.split("_")[-1]
    for i in range(1, 6):
        source = pdbFolder + f"/{i}.pdb"
        source = pdbFolder + f"/*ab.pdb"
        p_list = glob.glob(source)
        assert len(p_list) == 1
        chain_seq, seq = getChainsAndSeq(source)
        print(pdbName, i, len(seq), len(chain_seq))
        decoy_list = []
        with open(f"{pdbFolder}/peptide.txt") as f:
            for line in f:
                pep = line.strip()
                assert len(pep) == 9
                for c in list(set(chain_seq)):
                    if chain_seq.count(c) == 9:
                        first_c = chain_seq.find(c)
                        a = list(seq)
                        a[first_c:first_c+9] = pep
                        decoy = "".join(a)
                        decoy_list.append(decoy)
        fileLocation = f"{pre}/optimization/decoys/shuffle/{pdbName}_{i}.decoys"
        with open(fileLocation, "w") as out:
            for decoy in decoy_list:
                out.write(decoy+"\n")

In [19]:


In [168]:
# ensure they all 9 residue peptide.
for pdbFolder in pdbFolderList:
    pdbName = pdbFolder.split("_")[-1]
    # print(pdbName)
    with open(f"{pdbFolder}/peptide.txt") as f:
        for line in f:
            pep = line.strip()
            assert len(pep) == 9

In [160]:
fileLocation


Out[160]:
'/Users/weilu/Research/server/sep_2019/peptide_optimization_trial_2/database/S20_seq/4FTV_5.seq'

In [164]:
for c in "ABCDEFG":
    if chain_seq.count(c) == 9:
        first_c = chain_seq.find(c)
        a = list(seq)
        a[first_c:first_c+9] = pep
        decoy = "".join(a)
        decoy_list.append(decoy)


Out[164]:
107

In [ ]:


In [3]:
# get seq
fileLocation = "/Users/weilu/Research/examples/optimization/optimization/Structure_Ensemble/1.pdb"
p = PDBParser()
pdb = p.get_structure("test", fileLocation)
residues = list(pdb.get_residues())
seq = ""
chains = ""
for residue in residues:
    res_id = residue.get_id()[0]
    chain = residue.get_full_id()[2]
    if res_id==' ':
        residue_name = residue.get_resname()
        seq += three_to_one(residue_name)
        chains += chain

In [5]:
# get decoy
decoy_list = []
with open("/Users/weilu/Research/examples/optimization/optimization/Structure_Ensemble/peptide.txt") as f:
    for line in f:
        pep = line.strip()
        assert len(pep) == 9
        a = list(seq)
        a[180:189] = pep
        decoy = "".join(a)
        decoy_list.append(decoy)

In [38]:
for i in range(1, 91):
    pre = "/Users/weilu/Research/server/sep_2019/peptide_optimization"
    fileLocation = f"{pre}/database/S20_seq/{i}.seq"
    with open(fileLocation, "w") as out:
        out.write(seq+"\n")

In [40]:
for i in range(1, 91):
    fileLocation = f"{pre}/optimization/decoys/shuffle/{i}.decoys"
    with open(fileLocation, "w") as out:
        for decoy in decoy_list:
            out.write(decoy+"\n")

In [42]:
with open("/Users/weilu/Research/server/sep_2019/peptide_optimization/optimization/protein_list", "w") as out:
    for i in range(1, 91):
        out.write(f"{i}\n")

In [41]:
for i in range(1, 91):
    os.system(f"cp /Users/weilu/Research/examples/optimization/optimization/Structure_Ensemble/{i}.pdb /Users/weilu/Research/server/sep_2019/peptide_optimization/database/dompdb/")

In [39]:
len(decoy_list)


Out[39]:
1000

In [6]:
seq[180:189]


Out[6]:
'SLLMWITQV'

In [49]:
a = list(seq)
a[180:189] = list('FIFLLFLTL')

In [151]:


In [26]:
all_seq = []
for i in range(1, 91):
    fileLocation = f"/Users/weilu/Research/examples/optimization/optimization/Structure_Ensemble/{i}.pdb"
    seq = getSeq(fileLocation)
    assert len(seq) == 414
    all_seq.append(seq)
#     assert preSeq == seq
#     preSeq = seq

In [27]:
all_seq[0]


Out[27]:
'GSHSMRYFFTSVSRPGRGEPRFIAVGYVDDTQFVRFDSDAASQRMEPRAPWIEQEGPEYWDGETRKVKAHSQTHRVDLGTLRGYYNQSEAGSHTVQRMYGCDVGSDWRFLRGYHQYAYDGKDYIALKEDLRSWTAADMAAQTTKHKWEAAHVAEQLRAYLEGTCVEWLRRYLENGKETLQSLLMWITQVQEVTQIPAALSVPEGENLVLNCSFTDSAIYNLQWFRQDPGKGLTSLLLIQSSQREQTSGRLNASLDKSSGRSTLYIAASQPGDSATYLCAVRPTSGGSYIPTFGRGTSLIVHPGVTQTPKFQVLKTGQSMTLQCAQDMNHEYMSWYRQDPGMGLRLIHYSVGAGITDQGEVPNGYNVSRSTTEDFPLRLLSAAPSQTSVYFCASSYVGNTGELFFGEGSRLTVLE'

In [28]:
all_seq[0] == all_seq[1]


Out[28]:
True

In [29]:
for i in range(90):
    if all_seq[i] != all_seq[0]:
        print(i)

In [9]:
def getAllFrames(movieLocation):
    # movieLocation = "/Users/weilu/Research/examples/openMM_simulation/test_2/movie.pdb"
    location = movieLocation
    with open(location) as f:
        a = f.readlines()
    n = len(a)
    # get the position of every model title
    model_title_index_list = []
    for i in range(n):
        if len(a[i]) >= 5 and a[i][:5] == "MODEL":
            model_title_index = i
            model_title_index_list.append(model_title_index)
    model_title_index_list.append(n)
    check_array = np.diff(model_title_index_list)
    if np.allclose(check_array, check_array[0]):
        size = check_array[0]
    elif np.allclose(check_array[:-1], check_array[0]) and check_array[-1] == check_array[0] + 1:
        # this is ok. with extra "END"
        size = check_array[0]
    else:  
        print("!!!! Someting is wrong  !!!!")
        print(check_array)
    return a

In [10]:
num_of_frames = int(n/size)

In [11]:
frame = 5
oneFrame = a[size*frame:size*(frame+1)]

In [60]:
frame = num_of_frames
oneFrame = a[size*frame:size*(frame+1)]

In [61]:
oneFrame


Out[61]:
[]

In [45]:
# s = p.get_structure("test", f)
# residues = list(s.get_residues())

In [24]:
import io

f = io.StringIO("".join(oneFrame))
MAX_OFFSET=4
DISTANCE_CUTOFF=9.5
s = p.get_structure("test", f)
chains = s[0].get_list()

# import pdb file
native_coords = []
for chain in chains:
    dis = []
    all_res = []
    for res in chain:
        is_regular_res = res.has_id('CA') and res.has_id('O')
        res_id = res.get_id()[0]
        if (res.get_resname()=='GLY'):
            native_coords.append(res['CA'].get_coord())
        elif (res_id==' ' or res_id=='H_MSE' or res_id=='H_M3L' or res_id=='H_CAS') and is_regular_res:
            native_coords.append(res['CB'].get_coord())
        else:
            print('ERROR: irregular residue at %s!' % res)
            exit()
native_contacts_table = compute_native_contacts(native_coords, MAX_OFFSET, DISTANCE_CUTOFF)

In [25]:
native_contacts_table


Out[25]:
array([[0, 0, 0, ..., 1, 1, 0],
       [0, 0, 0, ..., 1, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       ...,
       [1, 1, 0, ..., 0, 0, 0],
       [1, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0]])

In [62]:
plt.imshow(native_contacts_table, origin=[0,0])


Out[62]:
<matplotlib.image.AxesImage at 0x1a23839898>

In [ ]: