In [1]:
import os
import sys
import random
import time
from random import seed, randint
import argparse
import platform
from datetime import datetime
import imp
import numpy as np
import fileinput
from itertools import product
import pandas as pd
from scipy.interpolate import griddata
from scipy.interpolate import interp2d
import seaborn as sns
from os import listdir

import matplotlib.pyplot as plt
import seaborn as sns
from scipy.interpolate import griddata
import matplotlib as mpl
# sys.path.insert(0,'..')
# from notebookFunctions import *
# from .. import notebookFunctions
from Bio.PDB.Polypeptide import one_to_three
from Bio.PDB.Polypeptide import three_to_one
from Bio.PDB.PDBParser import PDBParser
from pyCodeLib import *
from small_script.myFunctions import *
%matplotlib inline
# plt.rcParams['figure.figsize'] = (10,6.180)    #golden ratio
# %matplotlib notebook
%load_ext autoreload
%autoreload 2

In [2]:
plt.rcParams['figure.figsize'] = [16.18033, 10]    #golden ratio
plt.rcParams['figure.facecolor'] = 'w'
plt.rcParams['figure.dpi'] = 100

In [11]:
def get_z(A, B, gamma):
    return A.dot(gamma) / np.sqrt( gamma.dot(B).dot(gamma) )

In [28]:
pre = "/Users/weilu/Research/server/jun_2019/membrane_only_contact_optimization/optimization/gammas/"
gamma_file = pre + "protein_list_phi_contact_membrane_well6.5_9.5_5.0_10_2.6_7.0_gamma"

In [29]:
gamma = np.loadtxt(gamma_file)

In [30]:
gamma.shape


Out[30]:
(690,)

In [31]:
# name = "proteins_name_list_phi_pairwise_contact_well4.5_6.5_5.0_10phi_density_mediated_contact_well6.5_9.5_5.0_10_2.6_7.0phi_burial_well4.0"
name = "protein_list_phi_contact_membrane_well6.5_9.5_5.0_10_2.6_7.0"
A,B,B_filtered,Gamma,Gamma_filtered,Lamb,Lamb_filtered,half_B,other_half_B,std_half_B,A_prime = get_raw_optimization_data(pre, name)

In [32]:
A_prime.dot(Gamma)


Out[32]:
-596.5427442493

In [33]:
A_prime.dot(Gamma_filtered)


Out[33]:
(-8168.904056027597+0j)

In [34]:
lamb, P = np.linalg.eig(B)
lamb, P = sort_eigenvalues_and_eigenvectors(lamb, P)

In [35]:
B.shape


Out[35]:
(690, 690)

In [36]:
plt.plot(lamb)
plt.yscale("log")
# plt.xlim([650, 700])



In [37]:
plt.plot(lamb)
plt.yscale("log")
plt.xlim([650, 700])


Out[37]:
(650, 700)

In [68]:
total_phis = len(A)
num_decoys = 28000
cutoff_mode = None
cutoff_mode = 650
filtered_gamma, filtered_B, filtered_lamb, P, lamb = get_filtered_gamma_B_lamb_P_and_lamb(
            A, B, half_B, other_half_B, std_half_B, total_phis, num_decoys, setCutoff=cutoff_mode)
filtered_B_inv =np.linalg.inv(filtered_B)
c = -400
B_inv = filtered_B_inv
lambda_2 = (A_prime.dot(B_inv).dot(A) - c) / (A_prime.dot(B_inv).dot(A_prime) )
gamma_new = B_inv.dot(A-A_prime*lambda_2)


650

In [67]:
gamma_new_c200 = gamma_new

In [69]:
gamma_new_c400 = gamma_new

In [72]:
plt.plot(gamma_new_c400)


Out[72]:
[<matplotlib.lines.Line2D at 0x1a16e5acf8>]

In [70]:
plt.plot(gamma_new_c200-gamma_new_c400)


Out[70]:
[<matplotlib.lines.Line2D at 0x1a16e927b8>]

In [60]:


In [61]:
get_z(A, B, gamma)


Out[61]:
7.425683052372977

In [62]:
get_z(A, B, gamma_new)


Out[62]:
7.366889975987781

In [63]:
A_prime.dot(gamma_new)


Out[63]:
-200.0000000000001

In [64]:
pre = ""

In [66]:
np.savetxt("/Users/weilu/Research/server/jun_2019/membrane_only_contact_optimization/optimization/gammas/gamma_650_c200", 
           gamma_new)

In [ ]:


In [60]:
np.savetxt("/Users/weilu/Research/server/jun_2019/amphibian/setup/2xov/gamma_1200", gamma_1200)

In [61]:
np.savetxt("/Users/weilu/Research/server/jun_2019/amphibian/setup/2xov/gamma_1100", gamma_1100)
np.savetxt("/Users/weilu/Research/server/jun_2019/amphibian/setup/2xov/gamma_1300", gamma_1300)

In [56]:
gamma.shape


Out[56]:
(1380,)

In [71]:



Out[71]:
1

In [75]:
# convert to standard gamma
gamma = np.zeros(690)
c = 0
ii = 1
for jj in range(3):
    for i in range(20):
        for j in range(i, 20):
            if jj == 0:
                gamma[c] = gamma_ijm[ii][i][j]
            if jj == 1:
                gamma[c] = protein_gamma_ijm[ii][i][j]
            if jj == 2:
                gamma[c] = water_gamma_ijm[ii][i][j]
            c += 1
print(c)
for i in range(3):
    for j in range(20):
        if ii == 0:
            gamma[c] = burial_gamma_ij[j][i]
        if ii == 1:
            gamma[c] = membrane_burial_gamma_ij[j][i]
        c += 1


630

In [76]:
np.savetxt("/Users/weilu/Research/server/jun_2019/amphibian/converted_original_membrane_gamma.dat", gamma)

In [74]:
np.savetxt("/Users/weilu/Research/server/jun_2019/amphibian/converted_original_gamma.dat", gamma)

In [53]:
plt.plot(gamma_1200-gamma_1100)


Out[53]:
[<matplotlib.lines.Line2D at 0x1a1c911518>]

In [59]:
plt.plot(gamma_1200)
plt.plot(gamma_1100)
plt.plot(gamma_1300)


Out[59]:
[<matplotlib.lines.Line2D at 0x1a19e27f28>]

In [36]:



Out[36]:
[<matplotlib.lines.Line2D at 0x1a18d10fd0>]

Multi Seq version


In [73]:
folder = "/Users/weilu/Research/server/jun_2019/multiSeq_membrane_only_contact_optimization/optimization/"

In [74]:
i = 0
location = f"{folder}/proteins_name_list/proteins_name_list_{i}.txt"
with open(location, "r") as f:
    a = f.readlines()
size = len(a)
# print(size)
pre = f"{folder}/sub_gamma"
base = pre + f"/proteins_name_list_{i}_phi_contact_membrane_well6.5_9.5_5.0_10_2.6_7.0"
# base = pre + f"/proteins_name_list_{i}_phi_pairwise_contact_multiLetter_well4.5_6.5_5.0_10phi_density_mediated_contact_multiLetter_well6.5_9.5_5.0_10_2.6_7.0phi_burial_multiLetter_well4.0"
a_name = base + "_A"
a_prime_name = base + "_A_prime"
std_half_b_name = base + "_std_half_B"
half_b = base + "_half_B"
other_half_b = base + "_other_half_B"

A = size * np.loadtxt(a_name)
A_prime = size * np.loadtxt(a_prime_name)
std_half_B = np.loadtxt(std_half_b_name)
half_B = np.loadtxt(half_b)
other_half_B = np.loadtxt(other_half_b)

In [75]:
count = 1
for i in range(1, 522):
    location = f"{folder}/proteins_name_list/proteins_name_list_{i}.txt"
    with open(location, "r") as f:
        a = f.readlines()
    size = len(a)
    # print(size)
    pre = f"{folder}/sub_gamma"
#     base = pre + f"/proteins_name_list_{i}_phi_pairwise_contact_multiLetter_well4.5_6.5_5.0_10phi_density_mediated_contact_multiLetter_well6.5_9.5_5.0_10_2.6_7.0phi_burial_multiLetter_well4.0"
    base = pre + f"/proteins_name_list_{i}_phi_contact_membrane_well6.5_9.5_5.0_10_2.6_7.0"
    a_name = base + "_A"
    a_prime_name = base + "_A_prime"
    std_half_b_name = base + "_std_half_B"
    half_b = base + "_half_B"
    other_half_b = base + "_other_half_B"
    try:
        std_half_B = np.loadtxt(std_half_b_name)
        A += size * np.loadtxt(a_name)
        A_prime += size * np.loadtxt(a_prime_name)
        half_B += np.loadtxt(half_b)
        other_half_B += np.loadtxt(other_half_b)
        count += 1
        print(i, "done")
    except:
        print(i, "not found")


1 done
2 done
3 done
4 done
5 done
6 done
7 done
8 done
9 done
10 done
11 done
12 done
13 done
14 done
15 done
16 done
17 done
18 done
19 done
20 done
21 done
22 done
23 done
24 done
25 done
26 done
27 done
28 done
29 done
30 done
31 done
32 done
33 done
34 done
35 done
36 done
37 done
38 done
39 done
40 done
41 done
42 done
43 done
44 done
45 done
46 done
47 done
48 done
49 done
50 done
51 done
52 done
53 done
54 done
55 done
56 done
57 done
58 done
59 done
60 done
61 done
62 done
63 done
64 done
65 done
66 done
67 done
68 done
69 done
70 done
71 done
72 done
73 done
74 done
75 done
76 done
77 done
78 done
79 done
80 done
81 done
82 done
83 done
84 done
85 done
86 done
87 done
88 done
89 done
90 done
91 done
92 done
93 done
94 done
95 done
96 done
97 done
98 done
99 done
100 done
101 done
102 done
103 done
104 done
105 done
106 done
107 done
108 done
109 done
110 done
111 done
112 done
113 done
114 done
115 done
116 done
117 done
118 done
119 done
120 done
121 done
122 done
123 done
124 done
125 done
126 done
127 done
128 done
129 done
130 done
131 done
132 done
133 done
134 done
135 done
136 done
137 done
138 done
139 done
140 done
141 done
142 done
143 done
144 done
145 done
146 done
147 done
148 done
149 done
150 done
151 done
152 done
153 done
154 done
155 done
156 done
157 done
158 done
159 done
160 done
161 done
162 done
163 done
164 done
165 done
166 done
167 done
168 done
169 done
170 done
171 done
172 done
173 done
174 done
175 done
176 done
177 done
178 done
179 done
180 done
181 done
182 done
183 done
184 done
185 done
186 done
187 done
188 done
189 done
190 done
191 done
192 done
193 done
194 done
195 done
196 done
197 done
198 done
199 done
200 done
201 done
202 done
203 done
204 done
205 done
206 done
207 done
208 done
209 done
210 done
211 done
212 done
213 done
214 done
215 done
216 done
217 done
218 done
219 done
220 done
221 done
222 done
223 done
224 done
225 done
226 done
227 done
228 done
229 done
230 done
231 done
232 done
233 done
234 done
235 done
236 done
237 done
238 done
239 done
240 done
241 done
242 done
243 done
244 done
245 done
246 done
247 done
248 done
249 done
250 done
251 done
252 done
253 done
254 done
255 done
256 done
257 done
258 done
259 done
260 done
---------------------------------------------------------------------------
FileNotFoundError                         Traceback (most recent call last)
<ipython-input-75-f681dd2b160e> in <module>
      2 for i in range(1, 522):
      3     location = f"{folder}/proteins_name_list/proteins_name_list_{i}.txt"
----> 4     with open(location, "r") as f:
      5         a = f.readlines()
      6     size = len(a)

FileNotFoundError: [Errno 2] No such file or directory: '/Users/weilu/Research/server/jun_2019/multiSeq_membrane_only_contact_optimization/optimization//proteins_name_list/proteins_name_list_261.txt'

In [77]:
count


Out[77]:
261

In [78]:
# folder = "/Users/weilu/Research/server/may_2019/multi_iter0/multiLetter_symmetric"
pre = f"{folder}/gammas/"
n = count*2
A /= n
A_prime /= n
std_half_B /= n
half_B /= n
other_half_B /= n

np.save(pre+ "A", A)
np.save(pre+ "A_prime", A_prime)
np.save(pre+ "std_half_B", std_half_B)
np.save(pre+ "half_B", half_B)
np.save(pre+ "other_half_B", other_half_B)

In [79]:
B = half_B - other_half_B
gamma = np.dot(np.linalg.pinv(B), A)

In [81]:
np.savetxt(pre+"gamma", gamma)

In [82]:
lamb, P = np.linalg.eig(B)
lamb, P = sort_eigenvalues_and_eigenvectors(lamb, P)

In [83]:
plt.plot(lamb)
plt.yscale("log")



In [84]:
cutoff_mode = 650
filtered_lamb = np.copy(lamb)
filtered_B_inv, filtered_lamb, P = get_filtered_B_inv_lambda_and_P(
    filtered_lamb, cutoff_mode, P)

filtered_gamma = np.dot(filtered_B_inv, A)

In [85]:
A_prime.dot(gamma)


Out[85]:
-304.23840796653025

In [86]:
A_prime.dot(filtered_gamma)


Out[86]:
-177.35544791319015

In [87]:
total_phis = len(A)
num_decoys = 28000
cutoff_mode = None
cutoff_mode = 650
filtered_gamma, filtered_B, filtered_lamb, P, lamb = get_filtered_gamma_B_lamb_P_and_lamb(
            A, B, half_B, other_half_B, std_half_B, total_phis, num_decoys, setCutoff=cutoff_mode)
filtered_B_inv =np.linalg.inv(filtered_B)
c = -300
B_inv = filtered_B_inv
lambda_2 = (A_prime.dot(B_inv).dot(A) - c) / (A_prime.dot(B_inv).dot(A_prime) )
gamma_new = B_inv.dot(A-A_prime*lambda_2)


650

In [88]:
np.savetxt(pre+"gamma_650_c300", gamma_new)

no burial part, contact only membrane.


In [3]:
pre = "/Users/weilu/Research/server/jun_2019/membrane_only_contact_optimization/contact_only/gammas/"
# gamma_file = pre + "protein_list_phi_contact_membrane_well6.5_9.5_5.0_10_2.6_7.0_gamma"
# name = "proteins_name_list_phi_pairwise_contact_well4.5_6.5_5.0_10phi_density_mediated_contact_well6.5_9.5_5.0_10_2.6_7.0phi_burial_well4.0"
name = "protein_list_phi_contact_only_membrane_well6.5_9.5_5.0_10_2.6_7.0"
A,B,B_filtered,Gamma,Gamma_filtered,Lamb,Lamb_filtered,half_B,other_half_B,std_half_B,A_prime = get_raw_optimization_data(pre, name)

In [4]:
lamb, P = np.linalg.eig(B)
lamb, P = sort_eigenvalues_and_eigenvectors(lamb, P)

In [5]:
A_prime.dot(Gamma)


Out[5]:
-119.887916786

In [6]:
plt.plot(lamb)
plt.yscale("log")



In [7]:
total_phis = len(A)
num_decoys = 28000
cutoff_mode = None
cutoff_mode = 600
filtered_gamma, filtered_B, filtered_lamb, P, lamb = get_filtered_gamma_B_lamb_P_and_lamb(
            A, B, half_B, other_half_B, std_half_B, total_phis, num_decoys, setCutoff=cutoff_mode)
filtered_B_inv =np.linalg.inv(filtered_B)
c = -200
B_inv = filtered_B_inv
lambda_2 = (A_prime.dot(B_inv).dot(A) - c) / (A_prime.dot(B_inv).dot(A_prime) )
gamma_new = B_inv.dot(A-A_prime*lambda_2)


600

In [8]:
A_prime.dot(gamma_new)


Out[8]:
-200.0

In [12]:
get_z(A, B, Gamma)


Out[12]:
6.738473583560997

In [10]:
np.savetxt("/Users/weilu/Research/server/jun_2019/membrane_only_contact_optimization/contact_only/gammas/gamma_600_c200", 
           gamma_new)

In [ ]: