In [2]:
import os
import sys
import random
import time
from random import seed, randint
import argparse
import platform
from datetime import datetime
import imp
import numpy as np
import fileinput
from itertools import product
import pandas as pd
from scipy.interpolate import griddata
from scipy.interpolate import interp2d
import seaborn as sns
from os import listdir
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.interpolate import griddata
import matplotlib as mpl
# sys.path.insert(0,'..')
# from notebookFunctions import *
# from .. import notebookFunctions
from Bio.PDB.Polypeptide import one_to_three
from Bio.PDB.Polypeptide import three_to_one
from Bio.PDB.PDBParser import PDBParser
from pyCodeLib import *
from small_script.myFunctions import *
%matplotlib inline
# plt.rcParams['figure.figsize'] = (10,6.180) #golden ratio
# %matplotlib notebook
%load_ext autoreload
%autoreload 2
In [3]:
plt.rcParams['figure.figsize'] = [16.18033, 10] #golden ratio
plt.rcParams['figure.facecolor'] = 'w'
plt.rcParams['figure.dpi'] = 100
In [3]:
pre = "/Users/weilu/Research/server/may_2019/multi_iter0/optimization/gammas/"
name = "proteins_name_list_phi_pairwise_contact_well4.5_6.5_5.0_10phi_density_mediated_contact_well6.5_9.5_5.0_10_2.6_7.0phi_burial_well4.0"
A,B,B_filtered,Gamma,Gamma_filtered,Lamb,Lamb_filtered,half_B,other_half_B,std_half_B,A_prime = get_raw_optimization_data(pre, name)
In [8]:
multi_iter0 = np.loadtxt("/Users/weilu/Research/server/may_2019/gammas/multi_seq_iter0_cutoff600")
original = np.loadtxt("/Users/weilu/Research/server/may_2019/gammas/original_gamma")
iter0 = np.loadtxt("/Users/weilu/Research/server/may_2019/gammas/iter0_cutoff600")
In [ ]:
total_phis = len(A)
num_decoys = 1000
filtered_gamma, filtered_B, filtered_lamb, P, lamb = get_filtered_gamma_B_lamb_P_and_lamb(
A, B, half_B, other_half_B, std_half_B, total_phis, num_decoys, setCutoff=600)
In [9]:
B_filtered_inv = np.linalg.inv(B_filtered)
In [15]:
filtered_B_inv = np.dot(
P, np.dot(np.diag(1 / filtered_lamb), np.linalg.inv(P)))
In [ ]:
filtered_B_inv
In [46]:
c = A_prime.dot(original)
B_inv = filtered_B_inv
lambda_2 = (A_prime.dot(B_inv).dot(A) - c) / (A_prime.dot(B_inv).dot(A_prime) )
gamma_new = B_inv.dot(A-A_prime*lambda_2)
In [49]:
np.savetxt("/Users/weilu/Research/server/may_2019/gammas/multi_constraint_tc_constant", gamma_new)
In [5]:
pre = "/Users/weilu/Research/server/may_2019/iteration_optimization/optimization/gammas/"
name = "proteins_name_list_phi_pairwise_contact_well4.5_6.5_5.0_10phi_density_mediated_contact_well6.5_9.5_5.0_10_2.6_7.0phi_burial_well4.0"
A,B,B_filtered,Gamma,Gamma_filtered,Lamb,Lamb_filtered,half_B,other_half_B,std_half_B,A_prime = get_raw_optimization_data(pre, name)
In [6]:
total_phis = len(A)
num_decoys = 6000
cutoff_mode = 600
filtered_gamma, filtered_B, filtered_lamb, P, lamb = get_filtered_gamma_B_lamb_P_and_lamb(
A, B, half_B, other_half_B, std_half_B, total_phis, num_decoys, setCutoff=None)
filtered_B_inv =np.linalg.inv(filtered_B)
In [12]:
c = A_prime.dot(original)
B_inv = filtered_B_inv
lambda_2 = (A_prime.dot(B_inv).dot(A) - c) / (A_prime.dot(B_inv).dot(A_prime) )
gamma_new = B_inv.dot(A-A_prime*lambda_2)
In [13]:
c
Out[13]:
In [14]:
A_prime.dot(gamma_new)
Out[14]:
In [252]:
np.savetxt(f"{pre}/constant_tc_filtered", gamma_new)
In [293]:
c
Out[293]:
In [297]:
pre = "/Users/weilu/Research/server/may_2019/iter1_optimization_decoys_2000/optimization_2/gammas/"
name = "proteins_name_list_phi_pairwise_contact_well4.5_6.5_5.0_10phi_density_mediated_contact_well6.5_9.5_5.0_10_2.6_7.0phi_burial_well4.0"
A,B,B_filtered,Gamma,Gamma_filtered,Lamb,Lamb_filtered,half_B,other_half_B,std_half_B,A_prime = get_raw_optimization_data(pre, name)
total_phis = len(A)
num_decoys = 26000
cutoff_mode = None
cutoff_mode = 600
filtered_gamma, filtered_B, filtered_lamb, P, lamb = get_filtered_gamma_B_lamb_P_and_lamb(
A, B, half_B, other_half_B, std_half_B, total_phis, num_decoys, setCutoff=cutoff_mode)
filtered_B_inv =np.linalg.inv(filtered_B)
In [1]:
c = A_prime.dot(original)
B_inv = filtered_B_inv
lambda_2 = (A_prime.dot(B_inv).dot(A) - c) / (A_prime.dot(B_inv).dot(A_prime) )
gamma_new = B_inv.dot(A-A_prime*lambda_2)
In [ ]:
In [301]:
np.savetxt(f"{pre}/cutoff_600", gamma_new)
In [298]:
np.savetxt(f"{pre}/cutoff_646", gamma_new)
In [334]:
pre = "/Users/weilu/Research/server/may_2019/rewrite_phis_computation_reading/optimization/gammas/"
name = "proteins_name_list_phi_pairwise_contact_well4.5_6.5_5.0_10phi_density_mediated_contact_well6.5_9.5_5.0_10_2.6_7.0phi_burial_well4.0"
A,B,B_filtered,Gamma,Gamma_filtered,Lamb,Lamb_filtered,half_B,other_half_B,std_half_B,A_prime = get_raw_optimization_data(pre, name)
total_phis = len(A)
num_decoys = 28000
cutoff_mode = None
cutoff_mode = 600
filtered_gamma, filtered_B, filtered_lamb, P, lamb = get_filtered_gamma_B_lamb_P_and_lamb(
A, B, half_B, other_half_B, std_half_B, total_phis, num_decoys, setCutoff=cutoff_mode)
filtered_B_inv =np.linalg.inv(filtered_B)
c = A_prime.dot(original)
B_inv = filtered_B_inv
lambda_2 = (A_prime.dot(B_inv).dot(A) - c) / (A_prime.dot(B_inv).dot(A_prime) )
gamma_new = B_inv.dot(A-A_prime*lambda_2)
In [342]:
other_half_B
Out[342]:
In [340]:
lambda_2
Out[340]:
In [303]:
np.savetxt(f"{pre}/cutoff_600", gamma_new)
In [ ]:
In [336]:
def getGamma(pre, name, original, cutoff_mode=600):
A,B,B_filtered,Gamma,Gamma_filtered,Lamb,Lamb_filtered,half_B,other_half_B,std_half_B,A_prime = get_raw_optimization_data(pre, name)
total_phis = len(A)
num_decoys = -1
# cutoff_mode = None
# cutoff_mode = 600
filtered_gamma, filtered_B, filtered_lamb, P, lamb = get_filtered_gamma_B_lamb_P_and_lamb(
A, B, half_B, other_half_B, std_half_B, total_phis, num_decoys, setCutoff=cutoff_mode)
filtered_B_inv =np.linalg.inv(filtered_B)
c = A_prime.dot(original)
B_inv = filtered_B_inv
lambda_2 = (A_prime.dot(B_inv).dot(A) - c) / (A_prime.dot(B_inv).dot(A_prime) )
gamma_new = B_inv.dot(A-A_prime*lambda_2)
return gamma_new
In [309]:
pre = "/Users/weilu/Research/server/may_2019/iter1_optimization_decoys_2000/optimization/gammas/"
name = "proteins_name_list_phi_pairwise_contact_well4.5_6.5_5.0_10phi_density_mediated_contact_well6.5_9.5_5.0_10_2.6_7.0phi_burial_well4.0"
gamma_new = getGamma(pre, name, original)
np.savetxt(f"{pre}/cutoff_600", gamma_new)
In [310]:
pre = "/Users/weilu/Research/server/may_2019/iter1_optimization_decoys_2000/optimization_2_correct/gammas/"
name = "proteins_name_list_phi_pairwise_contact_well4.5_6.5_5.0_10phi_density_mediated_contact_well6.5_9.5_5.0_10_2.6_7.0phi_burial_well4.0"
gamma_new = getGamma(pre, name, original)
np.savetxt(f"{pre}/cutoff_600", gamma_new)
np.savetxt(f"/Users/weilu/Research/server/may_2019/correct_gammas/iter2_cutoff_600", gamma_new)
In [332]:
pre = "/Users/weilu/Research/server/may_2019/iter1_optimization_decoys_2000/optimization_3_correct/gammas/"
name = "proteins_name_list_phi_pairwise_contact_well4.5_6.5_5.0_10phi_density_mediated_contact_well6.5_9.5_5.0_10_2.6_7.0phi_burial_well4.0"
gamma_new = getGamma(pre, name, original)
np.savetxt(f"{pre}/cutoff_600", gamma_new)
np.savetxt(f"/Users/weilu/Research/server/may_2019/correct_gammas/iter3_cutoff_600", gamma_new)
In [333]:
pre = "/Users/weilu/Research/server/may_2019/iter1_optimization_decoys_2000/optimization_4_correct/gammas/"
name = "proteins_name_list_phi_pairwise_contact_well4.5_6.5_5.0_10phi_density_mediated_contact_well6.5_9.5_5.0_10_2.6_7.0phi_burial_well4.0"
gamma_new = getGamma(pre, name, original)
np.savetxt(f"{pre}/cutoff_600", gamma_new)
np.savetxt(f"/Users/weilu/Research/server/may_2019/correct_gammas/iter4_cutoff_600", gamma_new)
In [339]:
pre = "/Users/weilu/Research/server/may_2019/iter1_optimization_decoys_2000/optimization_4_correct/gammas/"
name = "proteins_name_list_phi_pairwise_contact_well4.5_6.5_5.0_10phi_density_mediated_contact_well6.5_9.5_5.0_10_2.6_7.0phi_burial_well4.0"
gamma_new = getGamma(pre, name, original, cutoff_mode=644)
outName = "iter4_cutoff_644_constant_tc"
np.savetxt(f"{pre}/{outName}", gamma_new)
np.savetxt(f"/Users/weilu/Research/server/may_2019/correct_gammas/{outName}", gamma_new)
In [311]:
pre = "/Users/weilu/Research/server/may_2019/iter1_optimization_decoys_2000/optimization_2_correct/gammas/"
name = "proteins_name_list_phi_pairwise_contact_well4.5_6.5_5.0_10phi_density_mediated_contact_well6.5_9.5_5.0_10_2.6_7.0phi_burial_well4.0"
A,B,B_filtered,Gamma,Gamma_filtered,Lamb,Lamb_filtered,half_B,other_half_B,std_half_B,A_prime = get_raw_optimization_data(pre, name)
total_phis = len(A)
num_decoys = 28000
cutoff_mode = None
cutoff_mode = 600
filtered_gamma, filtered_B, filtered_lamb, P, lamb = get_filtered_gamma_B_lamb_P_and_lamb(
A, B, half_B, other_half_B, std_half_B, total_phis, num_decoys, setCutoff=cutoff_mode)
filtered_B_inv =np.linalg.inv(filtered_B)
c = A_prime.dot(original)
B_inv = filtered_B_inv
lambda_2 = (A_prime.dot(B_inv).dot(A) - c) / (A_prime.dot(B_inv).dot(A_prime) )
gamma_new = B_inv.dot(A-A_prime*lambda_2)
In [315]:
np.dot(A_prime, gamma_new)
Out[315]:
In [316]:
np.dot(A_prime, original)
Out[316]:
In [330]:
os.path.join("", "123/sdfdfs")
Out[330]:
In [331]:
os.path.abspath("1235")
Out[331]:
In [328]:
os.path.basename("123/sdfsf/sdfsf")
Out[328]:
In [321]:
os.path.dirname("./sdfdsf/sdfdf/ss/a")
Out[321]:
In [258]:
cutoff600 = np.loadtxt(f"{pre}/cutoff600")
In [259]:
cutoff600_constant_tc = np.loadtxt(f"{pre}/constant_tc")
In [265]:
constant_tc_filtered_back = gamma_new
In [253]:
np.std(gamma_new)
Out[253]:
In [237]:
np.savetxt(f"{pre}/cutoff600", filtered_gamma)
In [264]:
np.std(gamma_new)
Out[264]:
In [263]:
A_prime.dot(gamma_new)
Out[263]:
In [267]:
pre = "/Users/weilu/Research/server/may_2019/iter1_optimization/optimization/gammas/"
name = "proteins_name_list_phi_pairwise_contact_well4.5_6.5_5.0_10phi_density_mediated_contact_well6.5_9.5_5.0_10_2.6_7.0phi_burial_well4.0"
A,B,B_filtered,Gamma,Gamma_filtered,Lamb,Lamb_filtered,half_B,other_half_B,std_half_B,A_prime = get_raw_optimization_data(pre, name)
total_phis = len(A)
num_decoys = 6000
cutoff_mode = 600
filtered_gamma, filtered_B, filtered_lamb, P, lamb = get_filtered_gamma_B_lamb_P_and_lamb(
A, B, half_B, other_half_B, std_half_B, total_phis, num_decoys, setCutoff=None)
filtered_B_inv =np.linalg.inv(filtered_B)
In [268]:
c = A_prime.dot(original)
B_inv = filtered_B_inv
lambda_2 = (A_prime.dot(B_inv).dot(A) - c) / (A_prime.dot(B_inv).dot(A_prime) )
gamma_new = B_inv.dot(A-A_prime*lambda_2)
In [273]:
np.savetxt(f"{pre}/constant_tc_filtered", gamma_new)
In [272]:
plt.plot(gamma_new)
Out[272]:
In [270]:
plt.plot(gamma_new-constant_tc_filtered_back)
Out[270]:
In [32]:
def get_z(A, B, gamma):
return A.dot(gamma) / np.sqrt( gamma.dot(B).dot(gamma) )
In [33]:
get_z(A,B, original)
Out[33]:
In [34]:
get_z(A,B, multi_iter0)
Out[34]:
In [35]:
get_z(A,B, gamma_new)
Out[35]:
In [36]:
np.mean(gamma_new)
Out[36]:
In [37]:
np.mean(original)
Out[37]:
In [38]:
np.mean(multi_iter0)
Out[38]:
In [8]:
B_inv = np.linalg.inv(B)
In [5]:
np.std(original)
Out[5]:
In [6]:
np.std(multi_iter0)
Out[6]:
In [7]:
multi_iter0_scaled = multi_iter0 * np.std(original)/np.std(multi_iter0)
In [ ]:
A.dot(B_inv)
In [86]:
constraint = np.loadtxt("/Users/weilu/Research/server/may_2019/gammas/contraint_filtered")
In [87]:
A_prime.dot(constraint)
Out[87]:
In [88]:
A.dot(constraint)
Out[88]:
In [102]:
A_prime.dot(iter0)
Out[102]:
In [97]:
A_prime.dot(original)
Out[97]:
In [91]:
A_prime.dot(multi_iter0)
Out[91]:
In [98]:
multi_iter0_A_norm = multi_iter0 * A_prime.dot(original) / A_prime.dot(multi_iter0)
In [99]:
A_prime.dot(multi_iter0_A_norm)
Out[99]:
In [100]:
np.savetxt("/Users/weilu/Research/server/may_2019/gammas/multi_iter0_A_norm", multi_iter0_A_norm)
In [83]:
A.dot(original)
Out[83]:
In [115]:
A.dot(original) / np.sqrt( original.dot(B).dot(original) )
Out[115]:
In [114]:
A.dot(multi_iter0) / np.sqrt( multi_iter0.dot(B).dot(multi_iter0) )
Out[114]:
In [108]:
original.dot(B).dot(original)
Out[108]:
In [110]:
multi_iter0.dot(B).dot(multi_iter0)
Out[110]:
In [84]:
A.dot(multi_iter0)
Out[84]:
In [20]:
B_inv = filtered_B_inv
lambda_2 = (A_prime.dot(B_inv).dot(A)-A.dot(B_inv).dot(A))/(A_prime.dot(B_inv).dot(A_prime)-A.dot(B_inv).dot(A_prime))
gamma_new = B_inv.dot(A-A_prime*lambda_2)
In [21]:
lambda_2
Out[21]:
In [22]:
A_prime.dot(gamma_new)
Out[22]:
In [18]:
B_inv = np.linalg.inv(B)
lambda_2 = (A_prime.dot(B_inv).dot(A)-A.dot(B_inv).dot(A))/(A_prime.dot(B_inv).dot(A_prime)-A.dot(B_inv).dot(A_prime))
gamma_new = B_inv.dot(A-A_prime*lambda_2)
In [19]:
lambda_2
Out[19]:
In [63]:
np.savetxt(f"{pre}contraint", gamma_new)
In [14]:
In [65]:
B = filtered_B
B_inv = np.linalg.inv(B)
lambda_2 = (A_prime.dot(B_inv).dot(A)-A.dot(B_inv).dot(A))/(A_prime.dot(B_inv).dot(A_prime)-A.dot(B_inv).dot(A_prime))
gamma_new_filtered = B_inv.dot(A-A_prime*lambda_2)
In [66]:
np.savetxt(f"{pre}contraint_filtered", gamma_new_filtered)
In [74]:
In [75]:
In [76]:
original_normalized = original * np.std(multi_iter0) / np.std(original)
In [79]:
np.savetxt("/Users/weilu/Research/server/may_2019/gammas/original_gamma_normalized", original_normalized)
In [77]:
original_normalized[:10]
Out[77]:
In [70]:
(multi_iter0-gamma_new_filtered).shape
Out[70]:
In [71]:
np.sum(multi_iter0-gamma_new_filtered)/690
Out[71]:
In [72]:
np.mean(gamma_new_filtered)
Out[72]:
In [73]:
np.std(gamma_new_filtered)
Out[73]:
In [62]:
np.sum(Gamma-gamma_new)
Out[62]:
In [15]:
plt.plot(Lamb)
plt.yscale("log")
In [13]:
In [6]:
filtered_gamma, filtered_B, filtered_lamb, P, lamb = get_filtered_gamma_B_lamb_P_and_lamb(
A, B, half_B, other_half_B, std_half_B, total_phis, num_decoys)
In [8]:
filtered_gamma, filtered_B, filtered_lamb, P, lamb = get_filtered_gamma_B_lamb_P_and_lamb(
A, B, half_B, other_half_B, std_half_B, total_phis, num_decoys, setCutoff=600)
In [7]:
np.sum(filtered_gamma-Gamma_filtered)
Out[7]:
In [9]:
np.sum(filtered_gamma-Gamma_filtered)
Out[9]:
In [17]:
original = np.loadtxt("/Users/weilu/Research/server/may_2019/gammas/original_gamma")
In [24]:
multi_iter0 = np.loadtxt("/Users/weilu/Research/server/may_2019/gammas/cutoff600")
In [27]:
np.std(original[:630])
Out[27]:
In [28]:
np.mean(original[:630])
Out[28]:
In [18]:
np.std(original)
Out[18]:
In [19]:
np.mean(original)
Out[19]:
In [25]:
np.std(multi_iter0)
Out[25]:
In [26]:
np.mean(multi_iter0)
Out[26]:
In [29]:
np.std(multi_iter0[:630])
Out[29]:
In [30]:
np.mean(multi_iter0[:630])
Out[30]:
In [10]:
np.savetxt(pre+"cutoff600", filtered_gamma)
In [47]:
b = formatBurialGamma(original[630:])
b.plot(x="Residue", y=["rho1", "rho2", "rho3"])
_ = plt.xticks(np.arange(20), b.Residue)
In [46]:
b = formatBurialGamma(multi_iter0[630:])
b.plot(x="Residue", y=["rho1", "rho2", "rho3"])
_ = plt.xticks(np.arange(20), b.Residue)
In [42]:
locs
Out[42]:
In [43]:
labels
Out[43]:
In [33]:
def formatBurialGamma(burialGamma):
# now, positive means favored.
rhoGamma = pd.DataFrame(-burialGamma.reshape(3,20).T, columns=["rho1", "rho2", "rho3"]).reset_index()
rhoGamma["oneLetter"] = rhoGamma["index"].apply(lambda x: inverse_res_type_map[x])
rhoGamma["Residue"] = rhoGamma["index"].apply(lambda x: one_to_three(inverse_res_type_map[x]))
rhoGamma = rhoGamma[["Residue", "rho1", "rho2", "rho3", "index", "oneLetter"]]
g = rhoGamma[["rho1", "rho2", "rho3"]].values
# np.savetxt("/Users/weilu/Research/server/feb_2019/burial_only_gamma.dat", g, fmt='%7.4f')
# rhoGamma
rhoGamma["hydrophobicityOrder"] = rhoGamma["oneLetter"].apply(lambda x: hydrophobicity_map[x])
return rhoGamma.sort_values("hydrophobicityOrder")
In [34]:
formatBurialGamma(multi_iter0[630:])
Out[34]:
In [31]:
# now, positive means favored.
ga = multi_iter0[630:]
rhoGamma = pd.DataFrame(-ga.reshape(3,20).T, columns=["rho1", "rho2", "rho3"]).reset_index()
rhoGamma["oneLetter"] = rhoGamma["index"].apply(lambda x: inverse_res_type_map[x])
rhoGamma["Residue"] = rhoGamma["index"].apply(lambda x: one_to_three(inverse_res_type_map[x]))
rhoGamma = rhoGamma[["Residue", "rho1", "rho2", "rho3", "index", "oneLetter"]]
g = rhoGamma[["rho1", "rho2", "rho3"]].values
# np.savetxt("/Users/weilu/Research/server/feb_2019/burial_only_gamma.dat", g, fmt='%7.4f')
# rhoGamma
rhoGamma["hydrophobicityOrder"] = rhoGamma["oneLetter"].apply(lambda x: hydrophobicity_map[x])
rhoGamma.sort_values("hydrophobicityOrder")
Out[31]:
In [ ]:
data = pd.read_csv("/Users/weilu/Research/server/april_2019/optimization_mult_seq/data_info_3.csv", index_col=0)
In [6]:
data.query("Problematic != 4").reset_index(drop=True)
Out[6]:
In [8]:
complete_data_filtered = data.query("Problematic != 4").reset_index(drop=True)
to_location = "/Users/weilu/Research/server/april_2019/optimization_mult_seq_2/"
n = len(complete_data_filtered)
number_of_runs = int(np.ceil(n/5))
perRun = 5
count = 0
for i in range(number_of_runs):
with open(to_location+f"proteins_name_list/proteins_name_list_{i}.txt", "w") as out:
cc = 0
while count < n and cc < perRun:
fullName = complete_data_filtered.iloc[count]["FullName"]
out.write(fullName+"\n")
cc += 1
count += 1
print(number_of_runs)
In [1]:
362*5
Out[1]:
In [9]:
number_of_runs
Out[9]:
In [10]:
-1 == [-1,2]
Out[10]:
In [48]:
multi = np.loadtxt("/Users/weilu/Research/server/may_2019/gammas/multi_seq_iter0_cutoff600")
In [49]:
original = np.loadtxt("/Users/weilu/Research/server/may_2019/gammas/original_gamma")
In [50]:
np.std(original)
Out[50]:
In [53]:
normalized_multi = multi * np.std(original) /np.std(multi)
In [54]:
np.std(normalized_multi)
Out[54]:
In [56]:
np.savetxt("/Users/weilu/Research/server/may_2019/gammas/multi_seq_iter0_cutoff600_normalized", normalized_multi)
In [ ]:
In [90]:
In [91]:
In [79]:
len(data)
Out[79]:
In [85]:
In [215]:
In [229]:
In [216]:
len(data)
Out[216]:
In [201]:
In [227]:
def get_MSA_data(a3mFile):
data = []
# "/Users/weilu/Research/server/may_2019/family_fold/aligned/1r69.a3m"
with open(a3mFile, "r") as f:
for line in f:
if line[0] == ">":
continue
s_new = ""
seq = line.strip()
for s in seq:
if s.islower():
continue
s_new += s
data.append(s_new)
return data
gamma_ijm, water_gamma_ijm, protein_gamma_ijm = get_gammas("/Users/weilu/opt/parameters/globular_parameters/gamma.dat", memGammaFile=None)
burial_gamma = np.loadtxt("/Users/weilu/opt/parameters/globular_parameters/burial_gamma.dat")
def get_ff_dat(data, location=None):
n = len(data[0])
f_direct = np.zeros((n,n))
f_water = np.zeros((n,n))
f_protein = np.zeros((n,n))
f_burial = np.zeros((n,3))
for i in range(n):
for j in range(i+1, n):
direct = []
water = []
protein = []
for seq in data:
# seq = data[0]
if seq[i] == "-" or seq[j] == "-":
continue
if seq[i] == "X" or seq[j] == "X":
continue
res1type = res_type_map[seq[i]]
res2type = res_type_map[seq[j]]
direct.append(gamma_ijm[0][res1type][res2type])
water.append(water_gamma_ijm[0][res1type][res2type])
protein.append(protein_gamma_ijm[0][res1type][res2type])
f_direct[i][j] += np.average(direct)
f_water[i][j] += np.average(water)
f_protein[i][j] += np.average(protein)
f_direct[j][i] += np.average(direct)
f_water[j][i] += np.average(water)
f_protein[j][i] += np.average(protein)
for i in range(n):
for j in range(3):
burial = []
for seq in data:
if seq[i] == "-" or seq[i] == "X":
continue
res1type = res_type_map[seq[i]]
burial.append(burial_gamma[res1type][j])
f_burial[i][j] += np.average(burial)
if location:
np.savetxt(location+"/direct.dat", f_direct)
np.savetxt(location+"/water.dat", f_water)
np.savetxt(location+"/protein.dat", f_protein)
np.savetxt(location+"/burial.dat", f_burial)
return f_direct, f_water, f_protein, f_burial
In [228]:
In [155]:
name = "1r69"
a3mFile = f"/Users/weilu/Research/server/may_2019/family_fold/aligned/{name}.a3m"
pre = f"/Users/weilu/Research/server/may_2019/family_fold/ff_contact/{name}/"
os.system(f"mkdir -p {pre}")
data = get_MSA_data(a3mFile)
print(name, len(data))
f_direct_2, f_water_2, f_protein_2, f_burial_2 = get_ff_dat(data, location=pre)
In [ ]:
In [123]:
In [134]:
In [157]:
a = np.loadtxt("/Users/weilu/Research/server/may_2019/family_fold/data_9.info")
In [160]:
In [165]:
wham = pd.read_csv("/Users/weilu/Research/server/may_2019/family_fold/original_1r69_9/wham.dat")
energy = pd.read_csv("/Users/weilu/Research/server/may_2019/family_fold/original_1r69_9/energy.dat")
wham.columns = wham.columns.str.strip()
remove_columns = ['Tc', 'Energy']
wham = wham.drop(remove_columns, axis=1)
energy.columns = energy.columns.str.strip()
remove_columns = ['Steps', 'Shake', 'Excluded', 'Helix', 'AMH-Go', 'Vec_FM', 'SSB']
energy = energy.drop(remove_columns, axis=1)
data = pd.concat([wham, energy], axis=1)
In [168]:
data["newContact"] = a[:,2]
In [177]:
data.Qw.dtype
Out[177]:
In [171]:
data.columns
Out[171]:
In [ ]:
g = sns.FacetGrid(a, col="Name",col_wrap=4, hue="Folder", sharey=False, sharex=False)
g = (g.map(plt.scatter, "Qw", y_show, alpha=0.5).add_legend())
In [187]:
# cmap = sns.cubehelix_palette(as_cmap=True)
# fg = sns.FacetGrid(data=data, hue='Qw', palette=cmap)
# g = (fg.map(plt.scatter, 'newContact', 'Water').add_legend())
In [ ]:
In [197]:
cmap = sns.cubehelix_palette(as_cmap=True)
f, ax = plt.subplots()
# ax.set_aspect(1)
points = ax.scatter(data["Steps"], data["Water"], c=data["Qw"], s=50, cmap=cmap)
f.colorbar(points)
Out[197]:
In [198]:
cmap = sns.cubehelix_palette(as_cmap=True)
f, ax = plt.subplots()
# ax.set_aspect(1)
points = ax.scatter(data["Steps"], data["newContact"], c=data["Qw"], s=50, cmap=cmap)
f.colorbar(points)
Out[198]:
In [195]:
cmap = sns.cubehelix_palette(as_cmap=True)
f, ax = plt.subplots()
ax.set_aspect(1)
points = ax.scatter(data["Water"], data["newContact"], c=data["Qw"], s=50, cmap=cmap)
f.colorbar(points)
Out[195]:
In [191]:
cmap = sns.cubehelix_palette(as_cmap=True)
f, ax = plt.subplots()
points = ax.scatter(data["Water"], data["newContact"], c=data["Qw"], s=50, cmap=cmap)
f.colorbar(points)
ax.set_aspect('equal')
In [174]:
data.plot("newContact", "Water", )
Out[174]:
In [172]:
data.plot("Steps", ["newContact", "Water"])
Out[172]:
In [158]:
a.shape
Out[158]:
In [ ]:
In [ ]:
In [ ]:
In [125]:
a.columns
Out[125]:
In [114]:
np.average([1,2])
Out[114]:
In [100]:
res_type_map["S"]
Out[100]:
In [ ]:
# s.islower
# seq = data[0]
# s_new = ""
# for s in seq:
# if s.islower():
# continue
# s_new += s
# print(s_new, len(s_new))
# data = []
# with open("/Users/weilu/Research/server/may_2019/family_fold/aligned/1r69.a3m", "r") as f:
# for line in f:
# if line[0] == ">":
# continue
# s_new = ""
# seq = line.strip()
# for s in seq:
# if s.islower():
# continue
# s_new += s
# data.append(s_new)
In [ ]:
data = []
with open("/Users/weilu/Research/server/may_2019/family_fold/query2.a3m", "r") as f:
for line in f:
if line[0] == ">":
continue
data.append(line.strip())
with open("/Users/weilu/Research/server/may_2019/family_fold/query2.aligned", "w") as out:
for seq in data:
s_new = ""
for s in seq:
if s.islower():
continue
s_new += s
out.write(s_new+"\n")
# print(s_new, len(s_new))
a = pd.read_csv("/Users/weilu/Research/server/may_2019/family_fold/contact.csv")
b = a.pivot(index="i", columns=" j", values=" Distance").values
plt.imshow(b < 8)