In [1]:
import os
import sys
import random
import time
from random import seed, randint
import argparse
import platform
from datetime import datetime
import imp
import numpy as np
import fileinput
from itertools import product
import pandas as pd
from scipy.interpolate import griddata
from scipy.interpolate import interp2d
import seaborn as sns
from os import listdir
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.interpolate import griddata
import matplotlib as mpl
# sys.path.insert(0,'..')
# from notebookFunctions import *
# from .. import notebookFunctions
from Bio.PDB.Polypeptide import one_to_three
from Bio.PDB.Polypeptide import three_to_one
from Bio.PDB.PDBParser import PDBParser
from pyCodeLib import *
from small_script.myFunctions import *
%matplotlib inline
# plt.rcParams['figure.figsize'] = (10,6.180) #golden ratio
# %matplotlib notebook
%load_ext autoreload
%autoreload 2
In [2]:
plt.rcParams['figure.figsize'] = [16.18033, 10] #golden ratio
plt.rcParams['figure.facecolor'] = 'w'
plt.rcParams['figure.dpi'] = 100
In [11]:
def get_z(A, B, gamma):
return A.dot(gamma) / np.sqrt( gamma.dot(B).dot(gamma) )
In [28]:
pre = "/Users/weilu/Research/server/jun_2019/membrane_only_contact_optimization/optimization/gammas/"
gamma_file = pre + "protein_list_phi_contact_membrane_well6.5_9.5_5.0_10_2.6_7.0_gamma"
In [29]:
gamma = np.loadtxt(gamma_file)
In [30]:
gamma.shape
Out[30]:
In [31]:
# name = "proteins_name_list_phi_pairwise_contact_well4.5_6.5_5.0_10phi_density_mediated_contact_well6.5_9.5_5.0_10_2.6_7.0phi_burial_well4.0"
name = "protein_list_phi_contact_membrane_well6.5_9.5_5.0_10_2.6_7.0"
A,B,B_filtered,Gamma,Gamma_filtered,Lamb,Lamb_filtered,half_B,other_half_B,std_half_B,A_prime = get_raw_optimization_data(pre, name)
In [32]:
A_prime.dot(Gamma)
Out[32]:
In [33]:
A_prime.dot(Gamma_filtered)
Out[33]:
In [34]:
lamb, P = np.linalg.eig(B)
lamb, P = sort_eigenvalues_and_eigenvectors(lamb, P)
In [35]:
B.shape
Out[35]:
In [36]:
plt.plot(lamb)
plt.yscale("log")
# plt.xlim([650, 700])
In [37]:
plt.plot(lamb)
plt.yscale("log")
plt.xlim([650, 700])
Out[37]:
In [68]:
total_phis = len(A)
num_decoys = 28000
cutoff_mode = None
cutoff_mode = 650
filtered_gamma, filtered_B, filtered_lamb, P, lamb = get_filtered_gamma_B_lamb_P_and_lamb(
A, B, half_B, other_half_B, std_half_B, total_phis, num_decoys, setCutoff=cutoff_mode)
filtered_B_inv =np.linalg.inv(filtered_B)
c = -400
B_inv = filtered_B_inv
lambda_2 = (A_prime.dot(B_inv).dot(A) - c) / (A_prime.dot(B_inv).dot(A_prime) )
gamma_new = B_inv.dot(A-A_prime*lambda_2)
In [67]:
gamma_new_c200 = gamma_new
In [69]:
gamma_new_c400 = gamma_new
In [72]:
plt.plot(gamma_new_c400)
Out[72]:
In [70]:
plt.plot(gamma_new_c200-gamma_new_c400)
Out[70]:
In [60]:
In [61]:
get_z(A, B, gamma)
Out[61]:
In [62]:
get_z(A, B, gamma_new)
Out[62]:
In [63]:
A_prime.dot(gamma_new)
Out[63]:
In [64]:
pre = ""
In [66]:
np.savetxt("/Users/weilu/Research/server/jun_2019/membrane_only_contact_optimization/optimization/gammas/gamma_650_c200",
gamma_new)
In [ ]:
In [60]:
np.savetxt("/Users/weilu/Research/server/jun_2019/amphibian/setup/2xov/gamma_1200", gamma_1200)
In [61]:
np.savetxt("/Users/weilu/Research/server/jun_2019/amphibian/setup/2xov/gamma_1100", gamma_1100)
np.savetxt("/Users/weilu/Research/server/jun_2019/amphibian/setup/2xov/gamma_1300", gamma_1300)
In [56]:
gamma.shape
Out[56]:
In [71]:
Out[71]:
In [75]:
# convert to standard gamma
gamma = np.zeros(690)
c = 0
ii = 1
for jj in range(3):
for i in range(20):
for j in range(i, 20):
if jj == 0:
gamma[c] = gamma_ijm[ii][i][j]
if jj == 1:
gamma[c] = protein_gamma_ijm[ii][i][j]
if jj == 2:
gamma[c] = water_gamma_ijm[ii][i][j]
c += 1
print(c)
for i in range(3):
for j in range(20):
if ii == 0:
gamma[c] = burial_gamma_ij[j][i]
if ii == 1:
gamma[c] = membrane_burial_gamma_ij[j][i]
c += 1
In [76]:
np.savetxt("/Users/weilu/Research/server/jun_2019/amphibian/converted_original_membrane_gamma.dat", gamma)
In [74]:
np.savetxt("/Users/weilu/Research/server/jun_2019/amphibian/converted_original_gamma.dat", gamma)
In [53]:
plt.plot(gamma_1200-gamma_1100)
Out[53]:
In [59]:
plt.plot(gamma_1200)
plt.plot(gamma_1100)
plt.plot(gamma_1300)
Out[59]:
In [36]:
Out[36]:
In [73]:
folder = "/Users/weilu/Research/server/jun_2019/multiSeq_membrane_only_contact_optimization/optimization/"
In [74]:
i = 0
location = f"{folder}/proteins_name_list/proteins_name_list_{i}.txt"
with open(location, "r") as f:
a = f.readlines()
size = len(a)
# print(size)
pre = f"{folder}/sub_gamma"
base = pre + f"/proteins_name_list_{i}_phi_contact_membrane_well6.5_9.5_5.0_10_2.6_7.0"
# base = pre + f"/proteins_name_list_{i}_phi_pairwise_contact_multiLetter_well4.5_6.5_5.0_10phi_density_mediated_contact_multiLetter_well6.5_9.5_5.0_10_2.6_7.0phi_burial_multiLetter_well4.0"
a_name = base + "_A"
a_prime_name = base + "_A_prime"
std_half_b_name = base + "_std_half_B"
half_b = base + "_half_B"
other_half_b = base + "_other_half_B"
A = size * np.loadtxt(a_name)
A_prime = size * np.loadtxt(a_prime_name)
std_half_B = np.loadtxt(std_half_b_name)
half_B = np.loadtxt(half_b)
other_half_B = np.loadtxt(other_half_b)
In [75]:
count = 1
for i in range(1, 522):
location = f"{folder}/proteins_name_list/proteins_name_list_{i}.txt"
with open(location, "r") as f:
a = f.readlines()
size = len(a)
# print(size)
pre = f"{folder}/sub_gamma"
# base = pre + f"/proteins_name_list_{i}_phi_pairwise_contact_multiLetter_well4.5_6.5_5.0_10phi_density_mediated_contact_multiLetter_well6.5_9.5_5.0_10_2.6_7.0phi_burial_multiLetter_well4.0"
base = pre + f"/proteins_name_list_{i}_phi_contact_membrane_well6.5_9.5_5.0_10_2.6_7.0"
a_name = base + "_A"
a_prime_name = base + "_A_prime"
std_half_b_name = base + "_std_half_B"
half_b = base + "_half_B"
other_half_b = base + "_other_half_B"
try:
std_half_B = np.loadtxt(std_half_b_name)
A += size * np.loadtxt(a_name)
A_prime += size * np.loadtxt(a_prime_name)
half_B += np.loadtxt(half_b)
other_half_B += np.loadtxt(other_half_b)
count += 1
print(i, "done")
except:
print(i, "not found")
In [77]:
count
Out[77]:
In [78]:
# folder = "/Users/weilu/Research/server/may_2019/multi_iter0/multiLetter_symmetric"
pre = f"{folder}/gammas/"
n = count*2
A /= n
A_prime /= n
std_half_B /= n
half_B /= n
other_half_B /= n
np.save(pre+ "A", A)
np.save(pre+ "A_prime", A_prime)
np.save(pre+ "std_half_B", std_half_B)
np.save(pre+ "half_B", half_B)
np.save(pre+ "other_half_B", other_half_B)
In [79]:
B = half_B - other_half_B
gamma = np.dot(np.linalg.pinv(B), A)
In [81]:
np.savetxt(pre+"gamma", gamma)
In [82]:
lamb, P = np.linalg.eig(B)
lamb, P = sort_eigenvalues_and_eigenvectors(lamb, P)
In [83]:
plt.plot(lamb)
plt.yscale("log")
In [84]:
cutoff_mode = 650
filtered_lamb = np.copy(lamb)
filtered_B_inv, filtered_lamb, P = get_filtered_B_inv_lambda_and_P(
filtered_lamb, cutoff_mode, P)
filtered_gamma = np.dot(filtered_B_inv, A)
In [85]:
A_prime.dot(gamma)
Out[85]:
In [86]:
A_prime.dot(filtered_gamma)
Out[86]:
In [87]:
total_phis = len(A)
num_decoys = 28000
cutoff_mode = None
cutoff_mode = 650
filtered_gamma, filtered_B, filtered_lamb, P, lamb = get_filtered_gamma_B_lamb_P_and_lamb(
A, B, half_B, other_half_B, std_half_B, total_phis, num_decoys, setCutoff=cutoff_mode)
filtered_B_inv =np.linalg.inv(filtered_B)
c = -300
B_inv = filtered_B_inv
lambda_2 = (A_prime.dot(B_inv).dot(A) - c) / (A_prime.dot(B_inv).dot(A_prime) )
gamma_new = B_inv.dot(A-A_prime*lambda_2)
In [88]:
np.savetxt(pre+"gamma_650_c300", gamma_new)
In [3]:
pre = "/Users/weilu/Research/server/jun_2019/membrane_only_contact_optimization/contact_only/gammas/"
# gamma_file = pre + "protein_list_phi_contact_membrane_well6.5_9.5_5.0_10_2.6_7.0_gamma"
# name = "proteins_name_list_phi_pairwise_contact_well4.5_6.5_5.0_10phi_density_mediated_contact_well6.5_9.5_5.0_10_2.6_7.0phi_burial_well4.0"
name = "protein_list_phi_contact_only_membrane_well6.5_9.5_5.0_10_2.6_7.0"
A,B,B_filtered,Gamma,Gamma_filtered,Lamb,Lamb_filtered,half_B,other_half_B,std_half_B,A_prime = get_raw_optimization_data(pre, name)
In [4]:
lamb, P = np.linalg.eig(B)
lamb, P = sort_eigenvalues_and_eigenvectors(lamb, P)
In [5]:
A_prime.dot(Gamma)
Out[5]:
In [6]:
plt.plot(lamb)
plt.yscale("log")
In [7]:
total_phis = len(A)
num_decoys = 28000
cutoff_mode = None
cutoff_mode = 600
filtered_gamma, filtered_B, filtered_lamb, P, lamb = get_filtered_gamma_B_lamb_P_and_lamb(
A, B, half_B, other_half_B, std_half_B, total_phis, num_decoys, setCutoff=cutoff_mode)
filtered_B_inv =np.linalg.inv(filtered_B)
c = -200
B_inv = filtered_B_inv
lambda_2 = (A_prime.dot(B_inv).dot(A) - c) / (A_prime.dot(B_inv).dot(A_prime) )
gamma_new = B_inv.dot(A-A_prime*lambda_2)
In [8]:
A_prime.dot(gamma_new)
Out[8]:
In [12]:
get_z(A, B, Gamma)
Out[12]:
In [10]:
np.savetxt("/Users/weilu/Research/server/jun_2019/membrane_only_contact_optimization/contact_only/gammas/gamma_600_c200",
gamma_new)
In [ ]: