In [1]:
import os
import sys
import random
import time
from random import seed, randint
import argparse
import platform
from datetime import datetime
import imp
import numpy as np
import fileinput
from itertools import product
import pandas as pd
from scipy.interpolate import griddata
from scipy.interpolate import interp2d
import seaborn as sns
from os import listdir
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.interpolate import griddata
import matplotlib as mpl
# sys.path.insert(0,'..')
# from notebookFunctions import *
# from .. import notebookFunctions
from Bio.PDB.Polypeptide import one_to_three
from Bio.PDB.Polypeptide import three_to_one
from Bio.PDB.PDBParser import PDBParser
from pyCodeLib import *
# from small_script.myFunctions import *
sys.path.insert(0, "/Users/weilu/openmmawsem")
from helperFunctions.myFunctions import *
from collections import defaultdict
%matplotlib inline
# plt.rcParams['figure.figsize'] = (10,6.180) #golden ratio
# %matplotlib notebook
%load_ext autoreload
%autoreload 2
In [2]:
plt.rcParams['figure.figsize'] = np.array([16.18033, 10]) #golden ratio
plt.rcParams['figure.facecolor'] = 'w'
plt.rcParams['figure.dpi'] = 100
plt.rcParams.update({'font.size': 22})
In [3]:
from Bio.PDB.PDBParser import PDBParser
from Bio.PDB.Polypeptide import PPBuilder
from Bio.PDB.Polypeptide import three_to_one
def get_inside_or_not_table(pdb_file):
parser = PDBParser(PERMISSIVE=1,QUIET=True)
try:
structure = parser.get_structure('X', pdb_file)
except:
return [0]
inside_or_not_table = []
for res in structure.get_residues():
if res.get_id()[0] != " ":
continue# skip
try:
res["CA"].get_vector()
except:
print(pdb_file, res.get_id())
return [0]
inside_or_not_table.append(int(abs(res["CA"].get_vector()[-1]) < 15))
return inside_or_not_table
def extractTransmembrane(toLocation, location):
x = PDBParser().get_structure("x", location)
class Transmembrane(Select):
def accept_residue(self, residue):
if abs(residue["CA"].get_vector()[-1]) < 15:
return 1
else:
return 0
io = PDBIO()
io.set_structure(x)
io.save(toLocation, Transmembrane())
def getSeqFromPDB(location, considerGap=True):
x = PDBParser().get_structure("x", location)
seq = ""
resseqs = []
preResId = 0
for res in x.get_residues():
resId = res.get_id()[1]
if considerGap and resId != preResId + 1:
seq += " "
resseqs.append(-1)
seq += three_to_one(res.get_resname())
resseqs.append(res.get_id()[1])
preResId = resId
return seq,resseqs
In [4]:
info = pd.read_csv("/Users/weilu/Research/database/membrane_contact_dtabase/for_iter0_training_complete_jun06.csv", index_col=0)
In [5]:
info.shape
Out[5]:
In [6]:
a = info.query("InMembraneRatio > 0.2 and InMembraneRatio < 0.8 and Length < 2000").reset_index()
a.to_csv("/Users/weilu/Research/database/relative_k/chosen_jun19.csv")
In [7]:
a.hist("Length", bins=100)
Out[7]:
In [8]:
a.shape
Out[8]:
In [9]:
a.columns
Out[9]:
In [4]:
chosen = pd.read_csv("/Users/weilu/Research/database/relative_k/chosen_jun19.csv", index_col=0)
parser = PDBParser(PERMISSIVE=1,QUIET=True)
In [5]:
def do(cmd, get=False, show=True):
if get:
out = subprocess.Popen(cmd,stdout=subprocess.PIPE,shell=True).communicate()[0].decode()
if show:
print(out, end="")
return out
else:
return subprocess.Popen(cmd, shell=True).wait()
In [7]:
fromPre = "/Users/weilu/Research/database/hybrid_contact_database/"
toPre = "/Users/weilu/Research/server/oct_2019/relative_k_optimization/"
pdb_list = chosen.Protein.tolist()
pre = toPre
do(f"mkdir -p {pre}")
do(f"mkdir -p {pre}/optimization")
do(f"mkdir -p {pre}/phis")
do(f"mkdir -p {pre}/database/dompdb")
do(f"mkdir -p {pre}/database/S20_seq")
for pdb in pdb_list:
fromLocation = f'{fromPre}/cleaned/{pdb}.pdb'
toLocation = f"{toPre}/database/dompdb/"
cmd = f"cp {fromLocation} {toLocation}"
do(cmd)
# print(cmd)
In [9]:
for pdb in pdb_list:
fromLocation = f'{fromPre}/cleaned/{pdb}.pdb'
# toLocation = f"{toPre}/dompdb/"
# os.system(f"cp {fromLocation} {toLocation}")
try:
structure = parser.get_structure(pdb, fromLocation)
except:
print(fromLocation)
print("cannot get ", pdb)
continue
seq = ""
for res in structure.get_residues():
resName = three_to_one(res.resname)
seq += resName
with open(f"{toPre}/database/S20_seq/{pdb}.seq", "w") as out:
out.write(seq+"\n")
In [10]:
np.random.shuffle(pdb_list)
with open(f"{toPre}/optimization/protein_list", "w") as out:
for pdb in pdb_list:
out.write(pdb+"\n")
In [88]:
with open("/Users/weilu/Research/server/jun_2019/relative_k/optimization/small_protein_list") as f:
names = f.readlines()
In [12]:
In [14]:
In [199]:
l = "/Users/weilu/Research/server/nov_2019/relative_k_optimization_with_membrane/database/dompdb/3b60"
input_pdb_filename = l
structure = parse_pdb(input_pdb_filename)
res_list = get_res_list(structure)
In [137]:
def get_relative_k_data_rotation(gamma, decoy, decoyQ, native, include_mem_burial=0):
# gamma = 1
rotation_all = np.arange(0, 180, 30)
z_shift_all = np.arange(-20, 20, 4)
rotation_axis=Vector(1,0,0)
z_m = []
rad_list = []
for z_shift in z_shift_all:
for degree in rotation_all:
z_m.append(z_shift)
rad_list.append(degree)
if include_mem_burial == 1:
energy = [a[0]+(a[1]+a[2])*gamma for a in decoy]
native_energy =native[0] + (native[2]+native[1])*gamma
elif include_mem_burial == 0:
energy = [a[0]+(a[1])*gamma for a in decoy]
native_energy =native[0] + (native[1])*gamma
elif include_mem_burial == 2:
# only membrane
energy = [(a[2])*gamma for a in decoy]
native_energy = (native[1])*gamma
elif include_mem_burial == 3:
energy = [a[0]+(a[1]+a[2]*0.5)*gamma for a in decoy]
native_energy =native[0] + (native[2]*0.5+native[1])*gamma
a0 = [a[0] for a in decoy]
a1 = [a[1] for a in decoy]
a2 = [a[2] for a in decoy]
# z_m = list(np.arange(1, limit, 1)) + list(np.arange(-limit, -1, 1))
# z_m = np.arange(-limit, limit, 1)
d = pd.DataFrame([z_m, rad_list, energy, a0, a1, a2, list(decoyQ)]).T
d.columns = ["z_m", "rad", "energy", "contact_wat", "contact_mem", "membrane", "Q"]
# d = d.sort_values("z_m").reset_index()
d["native"] = native_energy
d["z"] = native_energy/d["energy"]
d["z"] = (d["energy"] - native_energy)/d["energy"].std()
# d = d.query("z_m != 0").reset_index(drop=True)
# d = d.query("abs(z_m) >= 5").reset_index(drop=True)
# dz = d["z"].tolist()
return d
def get_value_array_5(gamma_list, decoy, decoyQ, native, cutoff=0, **kwargs):
# gamma_list = np.linspace(1,15, num=100)
value_list = []
value_sum_list = []
for gamma in gamma_list:
d = get_relative_k_data_rotation(gamma, decoy, decoyQ, native, **kwargs)
corr, p = scipy.stats.pearsonr(d["Q"], d["z"])
# value, value_sum = get_value(gamma, **kwargs)
# print(gamma, value)
value_list.append(corr)
value_sum_list.append(p)
return np.array(value_list), np.array(value_sum_list)
In [7]:
# pre = "/Users/weilu/Research/server/oct_2019/relative_k_optimization_with_burial_and_membrane"
pre = "/Users/weilu/Research/server/nov_2019/relative_k_optimization_with_membrane/"
phi_pre = "phi_relative_k_with_membrane_well"
with open(f"{pre}/optimization/protein_list") as f:
names = f.readlines()
names = [i.strip() for i in names]
In [20]:
name
Out[20]:
In [32]:
np.arange(0, 360, 60)
Out[32]:
In [33]:
np.arange(-20, 20, 4)
Out[33]:
In [30]:
np.identity(3)
Out[30]:
In [143]:
# pre = "/Users/weilu/Research/server/oct_2019/relative_k_optimization/phis"
name = names[1]
cutoff = 1.0
gamma_list = np.linspace(1,15, num=100)
decoy = np.loadtxt(f"{pre}/phis/{phi_pre}_{name}_decoys_rotation")
native = np.loadtxt(f"{pre}/phis/{phi_pre}_{name}_native_rotation")
decoyQ = np.loadtxt(f"{pre}/phis/{phi_pre}_{name}_decoysQ_rotation")
gamma = 2
d = get_relative_k_data_rotation(gamma, decoy, decoyQ, native, include_mem_burial=1)
# value_list, value_sum_list = get_value_array_3(gamma_list, decoy=decoy, decoyQ=decoyQ, native=native, cutoff=cutoff)
In [144]:
np.corrcoef(d[["Q", "z"]].values.T)
Out[144]:
In [145]:
import scipy
corr, p = scipy.stats.pearsonr(d["Q"], d["z"])
In [146]:
corr
Out[146]:
In [147]:
d.plot.scatter("Q", "z", c="red")
Out[147]:
In [142]:
# pre = "/Users/weilu/Research/server/oct_2019/relative_k_optimization_with_burial_and_membrane"
# pre = "/Users/weilu/Research/server/oct_2019/relative_k_optimization_new_gamma/phis"
# pre = "/Users/weilu/Research/server/oct_2019/relative_k_optimization_new_gamma/back_phis/"
# pre = "/Users/weilu/Research/server/oct_2019/relative_k_optimization/phis"
gamma_list = np.linspace(0,10, num=20)
cutoff = 0
all_value_list = []
all_value_sum_list = []
cc = 0
n = 80
for i, name in enumerate(names):
cc += 1
if i % 100 == n:
print(i)
break
decoy = np.loadtxt(f"{pre}/phis/{phi_pre}_{name}_decoys_rotation")
native = np.loadtxt(f"{pre}/phis/{phi_pre}_{name}_native_rotation")
decoyQ = np.loadtxt(f"{pre}/phis/{phi_pre}_{name}_decoysQ_rotation")
one_value_list, one_value_sum_list = get_value_array_5(gamma_list, decoy=decoy, decoyQ=decoyQ, native=native, cutoff=cutoff, include_mem_burial=1)
all_value_list.append(one_value_list)
# all_value_sum_list.append(one_value_sum_list)
plt.rcParams['figure.figsize'] = 0.5*np.array([16.18033, 10]) #golden ratio
plt.rcParams['figure.facecolor'] = 'w'
plt.rcParams['figure.dpi'] = 100
plt.rcParams.update({'font.size': 22})
a_3 = pd.DataFrame(all_value_list).sum(axis=0)/n
plt.plot(gamma_list, a_3)
plt.xlabel("relative_k")
plt.ylabel("average pearson correlation")
Out[142]:
In [ ]:
# 4p6v is big.
In [139]:
# 1 to 0.5, mode 3
plt.rcParams['figure.figsize'] = 0.5*np.array([16.18033, 10]) #golden ratio
plt.rcParams['figure.facecolor'] = 'w'
plt.rcParams['figure.dpi'] = 100
plt.rcParams.update({'font.size': 22})
a_3 = pd.DataFrame(all_value_list).sum(axis=0)/20
plt.plot(gamma_list, a_3)
plt.xlabel("relative_k")
plt.ylabel("average fraction of decoys that E_decoy > E_native")
# plt.ylim((-1, 1))
# plt.savefig("/Users/weilu/Dropbox/openAWSEM/figures/relative_k.png", kpi=300)
Out[139]:
In [131]:
# membrane only
plt.rcParams['figure.figsize'] = 0.5*np.array([16.18033, 10]) #golden ratio
plt.rcParams['figure.facecolor'] = 'w'
plt.rcParams['figure.dpi'] = 100
plt.rcParams.update({'font.size': 22})
a_3 = pd.DataFrame(all_value_list).sum(axis=0)/20
plt.plot(gamma_list, a_3)
plt.xlabel("relative_k")
plt.ylabel("average fraction of decoys that E_decoy > E_native")
# plt.ylim((-1, 1))
# plt.savefig("/Users/weilu/Dropbox/openAWSEM/figures/relative_k.png", kpi=300)
Out[131]:
In [129]:
# together mode 1
plt.rcParams['figure.figsize'] = 0.5*np.array([16.18033, 10]) #golden ratio
plt.rcParams['figure.facecolor'] = 'w'
plt.rcParams['figure.dpi'] = 100
plt.rcParams.update({'font.size': 22})
a_3 = pd.DataFrame(all_value_list).sum(axis=0)/20
plt.plot(gamma_list, a_3)
plt.xlabel("relative_k")
plt.ylabel("average fraction of decoys that E_decoy > E_native")
# plt.ylim((-1, 1))
# plt.savefig("/Users/weilu/Dropbox/openAWSEM/figures/relative_k.png", kpi=300)
Out[129]:
In [119]:
# pre = "/Users/weilu/Research/server/oct_2019/relative_k_optimization_with_burial_and_membrane"
# pre = "/Users/weilu/Research/server/oct_2019/relative_k_optimization_new_gamma/phis"
# pre = "/Users/weilu/Research/server/oct_2019/relative_k_optimization_new_gamma/back_phis/"
# pre = "/Users/weilu/Research/server/oct_2019/relative_k_optimization/phis"
gamma_list = np.linspace(0,10, num=20)
cutoff = 0
all_value_list = []
all_value_sum_list = []
cc = 0
for i, name in enumerate(names):
cc += 1
if i % 100 == 40:
print(i)
break
decoy = np.loadtxt(f"{pre}/phis/{phi_pre}_{name}_decoys_rotation")
native = np.loadtxt(f"{pre}/phis/{phi_pre}_{name}_native_rotation")
decoyQ = np.loadtxt(f"{pre}/phis/{phi_pre}_{name}_decoysQ_rotation")
one_value_list, one_value_sum_list = get_value_array_5(gamma_list, decoy=decoy, decoyQ=decoyQ, native=native, cutoff=cutoff)
all_value_list.append(one_value_list)
# all_value_sum_list.append(one_value_sum_list)
In [120]:
plt.rcParams['figure.figsize'] = 0.5*np.array([16.18033, 10]) #golden ratio
plt.rcParams['figure.facecolor'] = 'w'
plt.rcParams['figure.dpi'] = 100
plt.rcParams.update({'font.size': 22})
a_3 = pd.DataFrame(all_value_list).sum(axis=0)/20
plt.plot(gamma_list, a_3)
plt.xlabel("relative_k")
plt.ylabel("average fraction of decoys that E_decoy > E_native")
# plt.ylim((-1, 1))
# plt.savefig("/Users/weilu/Dropbox/openAWSEM/figures/relative_k.png", kpi=300)
Out[120]:
In [116]:
plt.rcParams['figure.figsize'] = 0.5*np.array([16.18033, 10]) #golden ratio
plt.rcParams['figure.facecolor'] = 'w'
plt.rcParams['figure.dpi'] = 100
plt.rcParams.update({'font.size': 22})
a_3 = pd.DataFrame(all_value_list).sum(axis=0)/20
plt.plot(gamma_list, a_3)
plt.xlabel("relative_k")
plt.ylabel("average fraction of decoys that E_decoy > E_native")
# plt.ylim((-1, 1))
# plt.savefig("/Users/weilu/Dropbox/openAWSEM/figures/relative_k.png", kpi=300)
Out[116]:
In [110]:
def get_value_4(gamma, decoy, decoyQ, native, cutoff=0, **kwargs):
d = get_relative_k_data_rotation(gamma, decoy, decoyQ, native, **kwargs)
# dz = d["z"].tolist()
value = ( ((d["z"] >= cutoff)) ).sum()/len(d)
value_sum = ((d["z"]) * (1-d["Q"]) ).sum()/(1-d["Q"]).sum()
return value, value_sum
def get_value_array_4(gamma_list, **kwargs):
# gamma_list = np.linspace(1,15, num=100)
value_list = []
value_sum_list = []
for gamma in gamma_list:
value, value_sum = get_value_4(gamma, **kwargs)
# value, value_sum = get_value(gamma, **kwargs)
# print(gamma, value)
value_list.append(value)
value_sum_list.append(value_sum)
return np.array(value_list), np.array(value_sum_list)
# def get_relative_k_data_rotation(gamma, decoy, decoyQ, native):
# # gamma = 1
# rotation_all = [60] * 6
# z_shift_all = [-20] + [4]*10
# rotation_axis=Vector(1,0,0)
# z = 0
# rad = 0
# z_m = []
# rad_list = []
# for z_shift in z_shift_all:
# z += z_shift
# for degree in rotation_all:
# rad += degree
# z_m.append(z)
# rad_list.append(rad)
# energy = [a[0]+(a[1]+a[2])*gamma for a in decoy]
# a0 = [a[0] for a in decoy]
# a1 = [a[1] for a in decoy]
# a2 = [a[2] for a in decoy]
# # z_m = list(np.arange(1, limit, 1)) + list(np.arange(-limit, -1, 1))
# # z_m = np.arange(-limit, limit, 1)
# d = pd.DataFrame([z_m, rad_list, energy, a0, a1, a2, list(decoyQ)]).T
# d.columns = ["z_m", "rad", "energy", "contact_wat", "contact_mem", "membrane", "Q"]
# # d = d.sort_values("z_m").reset_index()
# native_energy =native[0] + (native[2]+native[1])*gamma
# d["native"] = native_energy
# d["z"] = native_energy/d["energy"]
# d["z"] = (d["energy"] - native_energy)/d["energy"].std()
# # d = d.query("z_m != 0").reset_index(drop=True)
# # d = d.query("abs(z_m) >= 5").reset_index(drop=True)
# # dz = d["z"].tolist()
# return d
In [ ]:
def get_relative_k_data_rotation(gamma, decoy, decoyQ, native):
# gamma = 1
rotation_all = [60] * 6
z_shift_all = [-20] + [4]*10
rotation_axis=Vector(1,0,0)
z = 0
rad = 0
z_m = []
for z_shift in z_shift_all:
z += z_shift
for degree in rotation_all:
rad += degree
z_m.append(f"{z}_{rad}")
energy = [a[0]+(a[1]+a[2])*gamma for a in decoy]
# z_m = list(np.arange(1, limit, 1)) + list(np.arange(-limit, -1, 1))
# z_m = np.arange(-limit, limit, 1)
d = pd.DataFrame([z_m, energy, list(decoyQ)]).T
d.columns = ["z_m", "energy", "Q"]
# d = d.sort_values("z_m").reset_index()
native_energy =native[0] + (native[2]+native[1])*gamma
d["native"] = native_energy
d["z"] = native_energy/d["energy"]
d["z"] = (d["energy"] - native_energy)/d["energy"].std()
# d = d.query("z_m != 0").reset_index(drop=True)
# d = d.query("abs(z_m) >= 5").reset_index(drop=True)
# dz = d["z"].tolist()
return d
In [10]:
def get_relative_k_data(gamma, decoy, decoyQ, native):
# gamma = 1
limit =30
energy = [a[0]+(a[1]+a[2])*gamma for a in decoy]
# z_m = list(np.arange(1, limit, 1)) + list(np.arange(-limit, -1, 1))
z_m = np.arange(-limit, limit, 1)
d = pd.DataFrame([z_m, energy, list(decoyQ)]).T
d.columns = ["z_m", "energy", "Q"]
# d = d.sort_values("z_m").reset_index()
native_energy =native[0] + (native[2]+native[1])*gamma
d["native"] = native_energy
d["z"] = native_energy/d["energy"]
d["z"] = (d["energy"] - native_energy)/d["energy"].std()
# d = d.query("z_m != 0").reset_index(drop=True)
# d = d.query("abs(z_m) >= 5").reset_index(drop=True)
# dz = d["z"].tolist()
return d
def get_value_3(gamma, decoy, decoyQ, native, cutoff=0):
d = get_relative_k_data(gamma, decoy, decoyQ, native)
# dz = d["z"].tolist()
value = ( ((d["z"] >= cutoff)) ).sum()/len(d)
value_sum = ((d["z"]) * (1-d["Q"]) ).sum()/(1-d["Q"]).sum()
return value, value_sum
def get_value_array_3(gamma_list, **kwargs):
# gamma_list = np.linspace(1,15, num=100)
value_list = []
value_sum_list = []
for gamma in gamma_list:
value, value_sum = get_value_3(gamma, **kwargs)
# value, value_sum = get_value(gamma, **kwargs)
# print(gamma, value)
value_list.append(value)
value_sum_list.append(value_sum)
return np.array(value_list), np.array(value_sum_list)
In [104]:
# pre = "/Users/weilu/Research/server/oct_2019/relative_k_optimization_with_burial_and_membrane"
# pre = "/Users/weilu/Research/server/oct_2019/relative_k_optimization_new_gamma/phis"
# pre = "/Users/weilu/Research/server/oct_2019/relative_k_optimization_new_gamma/back_phis/"
# pre = "/Users/weilu/Research/server/oct_2019/relative_k_optimization/phis"
gamma_list = np.linspace(0,3, num=50)
cutoff = 0
all_value_list = []
all_value_sum_list = []
cc = 0
for i, name in enumerate(names):
cc += 1
if i % 100 == 20:
print(i)
break
decoy = np.loadtxt(f"{pre}/phis/{phi_pre}_{name}_decoys_shifted_4.5_6.5_5.0_10")
native = np.loadtxt(f"{pre}/phis/{phi_pre}_{name}_native_4.5_6.5_5.0_10")
decoyQ = np.loadtxt(f"{pre}/phis/{phi_pre}_{name}_decoysQ_shifted_4.5_6.5_5.0_10")
one_value_list, one_value_sum_list = get_value_array_3(gamma_list, decoy=decoy, decoyQ=decoyQ, native=native, cutoff=cutoff)
all_value_list.append(one_value_list)
# all_value_sum_list.append(one_value_sum_list)
In [109]:
plt.rcParams['figure.figsize'] = 0.5*np.array([16.18033, 10]) #golden ratio
plt.rcParams['figure.facecolor'] = 'w'
plt.rcParams['figure.dpi'] = 100
plt.rcParams.update({'font.size': 22})
a_3 = pd.DataFrame(all_value_list).sum(axis=0)/20
plt.plot(gamma_list, a_3)
plt.xlabel("relative_k")
plt.ylabel("average fraction of decoys that E_decoy > E_native")
plt.ylim((0,1))
plt.savefig("/Users/weilu/Dropbox/openAWSEM/figures/relative_k.png", kpi=300)
In [126]:
def get_relative_k_data_alpha(gamma, decoy, decoyQ, native):
# gamma = 1
limit =30
energy = [a[0]*(1-gamma)+(a[1]+a[2])*gamma for a in decoy]
# z_m = list(np.arange(1, limit, 1)) + list(np.arange(-limit, -1, 1))
z_m = np.arange(-limit, limit, 1)
d = pd.DataFrame([z_m, energy, list(decoyQ)]).T
d.columns = ["z_m", "energy", "Q"]
# d = d.sort_values("z_m").reset_index()
native_energy =native[0]*(1-gamma) + (native[2]+native[1])*gamma
d["native"] = native_energy
d["z"] = native_energy/d["energy"]
d["z"] = (d["energy"] - native_energy)/d["energy"].std()
# d = d.query("z_m != 0").reset_index(drop=True)
# d = d.query("abs(z_m) >= 5").reset_index(drop=True)
# dz = d["z"].tolist()
return d
# relative k =1
name = names[1]
decoy = np.loadtxt(f"{pre}/phis/{phi_pre}_{name}_decoys_shifted_4.5_6.5_5.0_10")
native = np.loadtxt(f"{pre}/phis/{phi_pre}_{name}_native_4.5_6.5_5.0_10")
decoyQ = np.loadtxt(f"{pre}/phis/{phi_pre}_{name}_decoysQ_shifted_4.5_6.5_5.0_10")
d = get_relative_k_data_alpha(1, decoy=decoy, decoyQ=decoyQ, native=native)
d["abs_z_m"] = abs(d["z_m"])
d.plot.scatter("abs_z_m", "energy")
Out[126]:
In [128]:
def get_relative_k_data_alpha(gamma, decoy, decoyQ, native):
# gamma = 1
limit =30
energy = [a[0]*(1-gamma)+(a[1]+a[2])*gamma for a in decoy]
# z_m = list(np.arange(1, limit, 1)) + list(np.arange(-limit, -1, 1))
z_m = np.arange(-limit, limit, 1)
d = pd.DataFrame([z_m, energy, list(decoyQ)]).T
d.columns = ["z_m", "energy", "Q"]
# d = d.sort_values("z_m").reset_index()
native_energy =native[0]*(1-gamma) + (native[2]+native[1])*gamma
d["native"] = native_energy
d["z"] = native_energy/d["energy"]
d["z"] = (d["energy"] - native_energy)/d["energy"].std()
# d = d.query("z_m != 0").reset_index(drop=True)
# d = d.query("abs(z_m) >= 5").reset_index(drop=True)
# dz = d["z"].tolist()
return d
# relative k =1
name = names[1]
decoy = np.loadtxt(f"{pre}/phis/{phi_pre}_{name}_decoys_shifted_4.5_6.5_5.0_10")
native = np.loadtxt(f"{pre}/phis/{phi_pre}_{name}_native_4.5_6.5_5.0_10")
decoyQ = np.loadtxt(f"{pre}/phis/{phi_pre}_{name}_decoysQ_shifted_4.5_6.5_5.0_10")
d = get_relative_k_data_alpha(1, decoy=decoy, decoyQ=decoyQ, native=native)
d["abs_z_m"] = abs(d["z_m"])
d.plot.scatter("abs_z_m", "energy")
Out[128]:
In [122]:
# relative k = 2
name = names[1]
decoy = np.loadtxt(f"{pre}/phis/{phi_pre}_{name}_decoys_shifted_4.5_6.5_5.0_10")
native = np.loadtxt(f"{pre}/phis/{phi_pre}_{name}_native_4.5_6.5_5.0_10")
decoyQ = np.loadtxt(f"{pre}/phis/{phi_pre}_{name}_decoysQ_shifted_4.5_6.5_5.0_10")
d = get_relative_k_data(2, decoy=decoy, decoyQ=decoyQ, native=native)
d["abs_z_m"] = abs(d["z_m"])
d.plot.scatter("abs_z_m", "energy")
Out[122]:
In [124]:
# relative k = 2
relative_k = 100
name = names[1]
decoy = np.loadtxt(f"{pre}/phis/{phi_pre}_{name}_decoys_shifted_4.5_6.5_5.0_10")
native = np.loadtxt(f"{pre}/phis/{phi_pre}_{name}_native_4.5_6.5_5.0_10")
decoyQ = np.loadtxt(f"{pre}/phis/{phi_pre}_{name}_decoysQ_shifted_4.5_6.5_5.0_10")
d = get_relative_k_data(relative_k, decoy=decoy, decoyQ=decoyQ, native=native)
d["abs_z_m"] = abs(d["z_m"])
d.plot.scatter("abs_z_m", "energy")
Out[124]:
In [117]:
d["abs_z_m"] = abs(d["z_m"])
d.plot.scatter("abs_z_m", "energy")
Out[117]:
In [115]:
d.plot.scatter("abs_z_m", "energy")
Out[115]:
In [ ]:
z_m_low = -15
z_m_high = 15
inside_or_not_table = []
for res in res_list:
z_loc = res["CA"].get_coord()[-1]
if z_loc > z_m_low and z_loc < z_m_high:
inside_or_not_table.append(1)
else:
inside_or_not_table.append(0)
rotation_axis=Vector(1,0,0)
degree = 0
radian=math.radians(degree)
translation=(0,0,10)
# Iterate through all atoms and rotate by 90 degress
rotation_matrix = rotaxis2m(radian, rotation_axis)
translation_matrix = np.array(translation, 'f')
for atom in structure.get_atoms():
print(atom.get_coord())
atom.transform(rotation_matrix, translation_matrix)
print(atom.get_coord())
count = 0
# res_list = get_res_list(structure)
for i, res in enumerate(res_list):
z_loc = res["CA"].get_coord()[-1]
if z_loc > z_m_low and z_loc < z_m_high:
is_inside = 1
else:
is_inside = 0
if is_inside == inside_or_not_table[i]:
count += 1
print(count, count/len(inside_or_not_table))
In [111]:
d.plot("z_m", "energy")
Out[111]:
In [118]:
name = names[1]
decoy = np.loadtxt(f"{pre}/phis/{phi_pre}_{name}_decoys_shifted_4.5_6.5_5.0_10")
native = np.loadtxt(f"{pre}/phis/{phi_pre}_{name}_native_4.5_6.5_5.0_10")
decoyQ = np.loadtxt(f"{pre}/phis/{phi_pre}_{name}_decoysQ_shifted_4.5_6.5_5.0_10")
d = get_relative_k_data(1, decoy=decoy, decoyQ=decoyQ, native=native)
d
Out[118]:
In [98]:
Out[98]:
In [58]:
name
Out[58]:
In [ ]:
In [ ]:
def get_relative_k_data(gamma, decoy, decoyQ, native, cutoff=0.95):
# gamma = 1
limit =30
energy = [a[0]+a[1]*gamma for a in decoy]
# z_m = list(np.arange(1, limit, 1)) + list(np.arange(-limit, -1, 1))
z_m = list(np.arange(5, limit, 1)) + list(np.arange(-limit, -5, 1))
d = pd.DataFrame([z_m, energy, list(decoyQ)]).T
d.columns = ["z_m", "energy", "Q"]
d = d.sort_values("z_m").reset_index()
native_energy =native[0] + native[1]*gamma
d["native"] = native_energy
d["z"] = native_energy/d["energy"]
# dz = d["z"].tolist()
return d
def get_value(gamma, decoy, decoyQ, native, cutoff=0.95):
# gamma = 1
limit =30
energy = [a[0]+a[1]*gamma for a in decoy]
# z_m = list(np.arange(1, limit, 1)) + list(np.arange(-limit, -1, 1))
z_m = list(np.arange(5, limit, 1)) + list(np.arange(-limit, -5, 1))
d = pd.DataFrame([z_m, energy, list(decoyQ)]).T
d.columns = ["z_m", "energy", "Q"]
d = d.sort_values("z_m").reset_index()
native_energy =native[0] + native[1]*gamma
d["z"] = native_energy/d["energy"]
d["z_real"] = (native_energy-d["energy"])/d["energy"].std()
# dz = d["z"].tolist()
value = ( ((d["z"] > cutoff)) * (1-d["Q"]) ).sum()/(1-d["Q"]).sum()
value_sum = ((d["z"]) * (1-d["Q"]) ).sum()/(1-d["Q"]).sum()
return value, value_sum
def get_value_2(gamma, decoy, decoyQ, native, cutoff=0):
# gamma = 1
limit =30
energy = [a[0]+a[1]*gamma for a in decoy]
# z_m = list(np.arange(1, limit, 1)) + list(np.arange(-limit, -1, 1))
z_m = list(np.arange(5, limit, 1)) + list(np.arange(-limit, -5, 1))
d = pd.DataFrame([z_m, energy, list(decoyQ)]).T
d.columns = ["z_m", "energy", "Q"]
d = d.sort_values("z_m").reset_index()
native_energy =native[0] + native[1]*gamma
# d["z"] = native_energy/d["energy"]
d["z"] = (native_energy-d["energy"])/d["energy"].std()
# dz = d["z"].tolist()
value = ( ((d["z"] > cutoff)) * (1-d["Q"]) ).sum()/(1-d["Q"]).sum()
value_sum = ((d["z"]) * (1-d["Q"]) ).sum()/(1-d["Q"]).sum()
return value, value_sum
def get_value_3(gamma, decoy, decoyQ, native, cutoff=0):
# gamma = 1
limit =30
energy = [a[0]+a[1]*gamma for a in decoy]
# z_m = list(np.arange(1, limit, 1)) + list(np.arange(-limit, -1, 1))
z_m = list(np.arange(5, limit, 1)) + list(np.arange(-limit, -5, 1))
d = pd.DataFrame([z_m, energy, list(decoyQ)]).T
d.columns = ["z_m", "energy", "Q"]
d = d.sort_values("z_m").reset_index()
native_energy =native[0] + native[1]*gamma
# d["z"] = native_energy/d["energy"]
d["z"] = (native_energy-d["energy"])/d["energy"].std()
# dz = d["z"].tolist()
value = ( ((d["z"] > cutoff)) ).sum()/len(d)
value_sum = ((d["z"]) * (1-d["Q"]) ).sum()/(1-d["Q"]).sum()
return value, value_sum
def get_value_array(gamma_list, **kwargs):
# gamma_list = np.linspace(1,15, num=100)
value_list = []
value_sum_list = []
for gamma in gamma_list:
value, value_sum = get_value_2(gamma, **kwargs)
# value, value_sum = get_value(gamma, **kwargs)
# print(gamma, value)
value_list.append(value)
value_sum_list.append(value_sum)
return np.array(value_list), np.array(value_sum_list)
def get_value_array_3(gamma_list, **kwargs):
# gamma_list = np.linspace(1,15, num=100)
value_list = []
value_sum_list = []
for gamma in gamma_list:
value, value_sum = get_value_3(gamma, **kwargs)
# value, value_sum = get_value(gamma, **kwargs)
# print(gamma, value)
value_list.append(value)
value_sum_list.append(value_sum)
return np.array(value_list), np.array(value_sum_list)
In [194]:
pre = "/Users/weilu/Research/server/oct_2019/relative_k_optimization_new_gamma/phis"
# pre = "/Users/weilu/Research/server/oct_2019/relative_k_optimization_new_gamma/back_phis/"
# pre = "/Users/weilu/Research/server/oct_2019/relative_k_optimization/phis"
gamma_list = np.linspace(1,10, num=50)
cutoff = 1.0
all_value_list = []
all_value_sum_list = []
for i, name in enumerate(names):
cc += 1
if i % 100 == 20:
print(i)
break
decoy = np.loadtxt(f"{pre}/phi_relative_k_well_{name}_decoys_shifted_4.5_6.5_5.0_10")
native = np.loadtxt(f"{pre}/phi_relative_k_well_{name}_native_4.5_6.5_5.0_10")
decoyQ = np.loadtxt(f"{pre}/phi_relative_k_well_{name}_decoysQ_shifted_4.5_6.5_5.0_10")
one_value_list, one_value_sum_list = get_value_array_3(gamma_list, decoy=decoy, decoyQ=decoyQ, native=native, cutoff=cutoff)
all_value_list.append(one_value_list)
# all_value_sum_list.append(one_value_sum_list)
In [196]:
a_3 = pd.DataFrame(all_value_list).sum(axis=0)/20
plt.plot(gamma_list, a_3)
plt.xlabel("relative_k")
plt.ylabel("average fraction of decoys that has energy E_decoy > E_native")
plt.ylim((0,1))
# plt.savefig("/Users/weilu/Dropbox/openAWSEM/figures/relative_k.png", kpi=300)
Out[196]:
In [200]:
# a_3 = pd.DataFrame(all_value_list).sum(axis=0)/20
plt.plot(gamma_list, a_3, color="red")
plt.plot(gamma_list, a_2)
plt.xlabel("relative_k")
plt.ylabel("average fraction of decoys that has energy E_decoy > E_native")
plt.ylim((0,1))
# plt.savefig("/Users/weilu/Dropbox/openAWSEM/figures/relative_k.png", kpi=300)
Out[200]:
In [197]:
pre = "/Users/weilu/Research/server/oct_2019/relative_k_optimization_new_gamma/phis"
# pre = "/Users/weilu/Research/server/oct_2019/relative_k_optimization_new_gamma/back_phis/"
# pre = "/Users/weilu/Research/server/oct_2019/relative_k_optimization/phis"
gamma_list = np.linspace(1,10, num=50)
cutoff = 1.0
all_value_list = []
all_value_sum_list = []
for i, name in enumerate(names):
cc += 1
if i % 100 == 20:
print(i)
break
decoy = np.loadtxt(f"{pre}/phi_relative_k_well_{name}_decoys_shifted_4.5_6.5_5.0_10")
native = np.loadtxt(f"{pre}/phi_relative_k_well_{name}_native_4.5_6.5_5.0_10")
decoyQ = np.loadtxt(f"{pre}/phi_relative_k_well_{name}_decoysQ_shifted_4.5_6.5_5.0_10")
one_value_list, one_value_sum_list = get_value_array(gamma_list, decoy=decoy, decoyQ=decoyQ, native=native, cutoff=cutoff)
all_value_list.append(one_value_list)
# all_value_sum_list.append(one_value_sum_list)
In [198]:
a_2 = pd.DataFrame(all_value_list).sum(axis=0)/20
plt.plot(gamma_list, a_3)
plt.xlabel("relative_k")
plt.ylabel("average fraction of decoys that has energy E_decoy > E_native")
plt.ylim((0,1))
# plt.savefig("/Users/weilu/Dropbox/openAWSEM/figures/relative_k.png", kpi=300)
Out[198]:
In [ ]:
In [168]:
plt.plot(gamma_list, pd.DataFrame(all_value_list).sum(axis=0)/20)
plt.xlabel("relative_k")
plt.ylabel("average fraction of decoys that has energy E_decoy > E_native")
plt.ylim((0,1))
# plt.savefig("/Users/weilu/Dropbox/openAWSEM/figures/relative_k.png", kpi=300)
Out[168]:
In [166]:
plt.plot(gamma_list, pd.DataFrame(all_value_list).sum(axis=0)/20)
plt.xlabel("relative_k")
plt.ylabel("average fraction of decoys that has energy E_decoy > E_native")
plt.ylim((0,1))
# plt.savefig("/Users/weilu/Dropbox/openAWSEM/figures/relative_k.png", kpi=300)
Out[166]:
In [126]:
a.shape
Out[126]:
In [ ]:
np.savetxt()
In [186]:
a = np.loadtxt("/Users/weilu/Research/server/oct_2019/relative_k_optimization_new_gamma/optimization/membrane_gamma_original.dat")
b = np.loadtxt("/Users/weilu/Research/server/oct_2019/relative_k_optimization_new_gamma/optimization/membrane_gamma_rescaled.dat")
In [171]:
a = np.loadtxt("/Users/weilu/Research/server/oct_2019/relative_k_optimization_new_gamma/optimization/membrane_gamma_original.dat")
fileName = "/Users/weilu/Research/server/oct_2019/relative_k_optimization_new_gamma/optimization/membrane_gamma_original_x200.dat"
# a = -a
a *= 200
with open(fileName, "w") as f:
for i in range(210):
# f.write(f"{a[i][0]} {a[i][1]}\n")
f.write(f"{a[i][0]:<.5f} {a[i][1]:10.5f}\n")
f.write("\n")
for i in range(210):
# f.write(f"{a[i][0]} {a[i][1]}\n")
f.write(f"{a[i+210][0]:<.5f} {a[i+210][1]:10.5f}\n")
In [122]:
plt.plot(gamma_list, pd.DataFrame(all_value_list).sum(axis=0)/99)
plt.xlabel("relative_k")
plt.ylabel("average fraction of decoys that has energy E_decoy > E_native")
plt.ylim((0,1))
# plt.savefig("/Users/weilu/Dropbox/openAWSEM/figures/relative_k.png", kpi=300)
Out[122]:
In [107]:
pre = "/Users/weilu/Research/server/oct_2019/relative_k_optimization/phis"
gamma_list = np.linspace(1,20, num=100)
cutoff = 1.0
all_value_list = []
all_value_sum_list = []
for i, name in enumerate(names):
cc += 1
if i % 100 == 0:
print(i)
# break
decoy = np.loadtxt(f"{pre}/phi_relative_k_well_{name}_decoys_shifted_4.5_6.5_5.0_10")
native = np.loadtxt(f"{pre}/phi_relative_k_well_{name}_native_4.5_6.5_5.0_10")
decoyQ = np.loadtxt(f"{pre}/phi_relative_k_well_{name}_decoysQ_shifted_4.5_6.5_5.0_10")
one_value_list, one_value_sum_list = get_value_array(gamma_list, decoy=decoy, decoyQ=decoyQ, native=native, cutoff=cutoff)
all_value_list.append(one_value_list)
# all_value_sum_list.append(one_value_sum_list)
In [112]:
a = pd.DataFrame(all_value_list)
In [115]:
a.to_csv("/Users/weilu/Research/data/relative_k_oct21.csv")
In [110]:
plt.plot(gamma_list, pd.DataFrame(all_value_list).sum(axis=0)/len(names))
plt.xlabel("relative_k")
plt.ylabel("average fraction of decoys that has energy E_decoy > E_native")
plt.ylim((0,1))
# plt.savefig("/Users/weilu/Dropbox/openAWSEM/figures/relative_k.png", kpi=300)
Out[110]:
In [106]:
plt.plot(gamma_list, pd.DataFrame(all_value_list).sum(axis=0)/99)
plt.xlabel("relative_k")
plt.ylabel("average fraction of decoys that has energy E_decoy > E_native")
plt.ylim((0,1))
plt.savefig("/Users/weilu/Dropbox/openAWSEM/figures/relative_k.png", kpi=300)
In [71]:
pre = "/Users/weilu/Research/server/oct_2019/relative_k_optimization/phis"
name = names[0]
cutoff = 1.0
decoy = np.loadtxt(f"{pre}/phi_relative_k_well_{name}_decoys_shifted_4.5_6.5_5.0_10")
native = np.loadtxt(f"{pre}/phi_relative_k_well_{name}_native_4.5_6.5_5.0_10")
decoyQ = np.loadtxt(f"{pre}/phi_relative_k_well_{name}_decoysQ_shifted_4.5_6.5_5.0_10")
value_list, value_sum_list = get_value_array(decoy=decoy, decoyQ=decoyQ, native=native, cutoff=cutoff)
cc = 0
for i, name in enumerate(names[1:]):
cc += 1
# if i % 10 == 0:
# print(i)
# break
if i == 10:
print(i)
break
decoy = np.loadtxt(f"{pre}/phi_relative_k_well_{name}_decoys_shifted_4.5_6.5_5.0_10")
native = np.loadtxt(f"{pre}/phi_relative_k_well_{name}_native_4.5_6.5_5.0_10")
decoyQ = np.loadtxt(f"{pre}/phi_relative_k_well_{name}_decoysQ_shifted_4.5_6.5_5.0_10")
one_value_list, one_value_sum_list = get_value_array(decoy=decoy, decoyQ=decoyQ, native=native, cutoff=cutoff)
value_list += one_value_list
value_sum_list += one_value_sum_list
# value_list /= len(names)
# one_value_sum_list /= len(names)
In [73]:
gamma_list = np.linspace(1,15, num=100)
plt.plot(gamma_list, value_list/cc)
Out[73]:
In [180]:
plt.plot(gamma_list, value_list)
# plt.plot(gamma_list, value_sum_list)
Out[180]:
In [175]:
pre = "/Users/weilu/Research/server/jun_2019/relative_k/phis"
name = names[0]
cutoff = 0.9
decoy = np.loadtxt(f"{pre}/phi_relative_k_well_{name}_decoys_shifted_4.5_6.5_5.0_10")
native = np.loadtxt(f"{pre}/phi_relative_k_well_{name}_native_4.5_6.5_5.0_10")
decoyQ = np.loadtxt(f"{pre}/phi_relative_k_well_{name}_decoysQ_shifted_4.5_6.5_5.0_10")
value_list, value_sum_list = get_value_array(decoy=decoy, decoyQ=decoyQ, native=native, cutoff=cutoff)
for i, name in enumerate(names[1:]):
if i % 100 == 0:
print(i)
decoy = np.loadtxt(f"{pre}/phi_relative_k_well_{name}_decoys_shifted_4.5_6.5_5.0_10")
native = np.loadtxt(f"{pre}/phi_relative_k_well_{name}_native_4.5_6.5_5.0_10")
decoyQ = np.loadtxt(f"{pre}/phi_relative_k_well_{name}_decoysQ_shifted_4.5_6.5_5.0_10")
one_value_list, one_value_sum_list = get_value_array(decoy=decoy, decoyQ=decoyQ, native=native, cutoff=cutoff)
value_list += one_value_list
value_sum_list += one_value_sum_list
value_list /= len(names)
one_value_sum_list /= len(names)
In [178]:
plt.plot(gamma_list, value_list)
# plt.plot(gamma_list, value_sum_list)
Out[178]:
In [170]:
plt.plot(gamma_list, value_list)
# plt.plot(gamma_list, value_sum_list)
Out[170]:
In [ ]:
In [ ]:
pre = "/Users/weilu/Research/server/jun_2019/relative_k/phis"
decoy = np.loadtxt(f"{pre}/phi_relative_k_well_{name}_decoys_shifted_4.5_6.5_5.0_10")
native = np.loadtxt(f"{pre}/phi_relative_k_well_{name}_native_4.5_6.5_5.0_10")
decoyQ = np.loadtxt(f"{pre}/phi_relative_k_well_{name}_decoysQ_shifted_4.5_6.5_5.0_10")
In [163]:
pre = "/Users/weilu/Research/server/jun_2019/relative_k/phis"
name = names[0]
decoy = np.loadtxt(f"{pre}/phi_relative_k_well_{name}_decoys_shifted_4.5_6.5_5.0_10")
native = np.loadtxt(f"{pre}/phi_relative_k_well_{name}_native_4.5_6.5_5.0_10")
decoyQ = np.loadtxt(f"{pre}/phi_relative_k_well_{name}_decoysQ_shifted_4.5_6.5_5.0_10")
value_list, value_sum_list = get_value_array(decoy=decoy, decoyQ=decoyQ, native=native, cutoff=0.9)
for name in names[1:]:
decoy = np.loadtxt(f"{pre}/phi_relative_k_well_{name}_decoys_shifted_4.5_6.5_5.0_10")
native = np.loadtxt(f"{pre}/phi_relative_k_well_{name}_native_4.5_6.5_5.0_10")
decoyQ = np.loadtxt(f"{pre}/phi_relative_k_well_{name}_decoysQ_shifted_4.5_6.5_5.0_10")
one_value_list, one_value_sum_list = get_value_array(decoy=decoy, decoyQ=decoyQ, native=native, cutoff=0.9)
value_list += one_value_list
value_sum_list += one_value_sum_list
value_list /= len(names)
one_value_sum_list /= len(names)
In [166]:
plt.plot(gamma_list, value_sum_list)
Out[166]:
In [165]:
plt.plot(gamma_list, value_list)
# plt.plot(gamma_list, value_sum_list)
Out[165]:
In [17]:
In [18]:
In [150]:
value_list, value_sum_list = get_value_array(gamma=gamma, decoy=decoy, decoyQ=decoyQ, native=native, cutoff=0.9)
In [132]:
gamma_list = np.linspace(1,4, num=200)
value_list = []
value_sum_list = []
for gamma in gamma_list:
value, value_sum = get_value(gamma, decoy, decoyQ, native, cutoff=0.9)
# print(gamma, value)
value_list.append(value)
value_sum_list.append(value_sum)
In [133]:
plt.plot(gamma_list, value_list)
plt.plot(gamma_list, value_sum_list)
Out[133]:
In [128]:
plt.plot(gamma_list, value_list)
plt.plot(gamma_list, value_sum_list)
Out[128]:
In [82]:
plt.plot(gamma_list, value_list)
plt.plot(gamma_list, value_sum_list)
Out[82]:
In [83]:
get_value(2, decoy, decoyQ, native)
Out[83]:
In [117]:
native_energy
Out[117]:
In [126]:
native_energy/ (2.5 + 128*2 )
Out[126]:
In [144]:
gamma = 1.8
limit =60
cutoff = 1.0
energy = [a[0]+a[1]*gamma for a in decoy]
z_m = list(np.arange(1, limit, 1)) + list(np.arange(-limit, -1, 1))
d = pd.DataFrame([z_m, energy, list(decoyQ)]).T
d.columns = ["z_m", "energy", "Q"]
d = d.sort_values("z_m")
d = d.reset_index()
native_energy =native[0] + native[1]*gamma
d["z"] = native_energy/d["energy"]
value = ( ((d["z"] > cutoff)) * (1-d["Q"]) ).sum()/(1-d["Q"]).sum()
print(value)
d.plot("z_m", "z")
Out[144]:
In [111]:
gamma = 2
limit =60
cutoff = 1.0
energy = [a[0]+a[1]*gamma for a in decoy]
z_m = list(np.arange(1, limit, 1)) + list(np.arange(-limit, -1, 1))
d = pd.DataFrame([z_m, energy, list(decoyQ)]).T
d.columns = ["z_m", "energy", "Q"]
d = d.sort_values("z_m")
d = d.reset_index()
native_energy =native[0] + native[1]*gamma
d["z"] = native_energy/d["energy"]
value = ( ((d["z"] > cutoff)) * (1-d["Q"]) ).sum()/(1-d["Q"]).sum()
print(value)
d.plot("z_m", "z")
Out[111]:
In [105]:
d
Out[105]:
In [84]:
gamma = 2
limit =60
cutoff = 1.0
energy = [a[0]+a[1]*gamma for a in decoy]
z_m = list(np.arange(1, limit, 1)) + list(np.arange(-limit, -1, 1))
d = pd.DataFrame([z_m, energy, list(decoyQ)]).T
d.columns = ["z_m", "energy", "Q"]
d = d.sort_values("z_m")
native_energy =native[0] + native[1]*gamma
d["z"] = native_energy/d["energy"]
value = ( ((d["z"] > cutoff)) * (1-d["Q"]) ).sum()/(1-d["Q"]).sum()
print(value)
d.plot("z_m", "z")
Out[84]:
In [47]:
plt.plot( ((d["z"].reset_index()["z"] > 0.95) ) )
Out[47]:
In [227]:
d
Out[227]:
In [192]:
decoys_all = np.loadtxt("/Users/weilu/Research/server/jun_2019/relative_k/phis/protein_list_phi_relative_k_well4.5_6.5_5.0_10_phi_decoy_all_summary.txt")
decoy_Q = np.loadtxt("/Users/weilu/Research/server/jun_2019/relative_k/phis/phi_relative_k_well_5j4i_decoysQ_shifted_4.5_6.5_5.0_10")
In [197]:
normalized = decoys_all.sum(axis=0) / (1-decoy_Q).sum()
In [198]:
normalized
Out[198]:
In [183]:
(1-decoy_Q).sum()
Out[183]:
In [ ]:
In [174]:
decoys_all.mean(axis=0)
Out[174]:
In [ ]:
pre = "/Users/weilu/Research/server/jun_2019/relative_k/phis"
decoy_high = np.loadtxt(f"{pre}/{name}_phis_high")
decoy_low = np.loadtxt(f"{pre}/{name}_phis_low")
In [220]:
gamma = 2
energy = [a[0]+a[1]*gamma for a in decoy_low]
z_m = list(z_m_high) + list(z_m_low)
d = pd.DataFrame([z_m, energy]).T
d.columns = ["z_m", "energy"]
d = d.sort_values("z_m")
native_energy = d.query("abs(z_m) == 15").iloc[0][1]
d["z"] = native_energy/d["energy"]
d.plot("z_m", "z")
Out[220]:
In [216]:
gamma = 10
energy = [a[0]+a[1]*gamma for a in decoy_low]
z_m = list(z_m_high) + list(z_m_low)
d = pd.DataFrame([z_m, energy]).T
d.columns = ["z_m", "energy"]
d = d.sort_values("z_m")
native_energy = d.query("abs(z_m) == 15").iloc[0][1]
d["z"] = native_energy/d["energy"]
d.plot("z_m", "z")
Out[216]:
In [212]:
gamma = 0.1
energy = [a[0]+a[1]*gamma for a in decoy_low]
z_m = list(z_m_high) + list(z_m_low)
d = pd.DataFrame([z_m, energy]).T
d.columns = ["z_m", "energy"]
d = d.sort_values("z_m")
native_energy = d.query("abs(z_m) == 15").iloc[0][1]
d["z"] = native_energy/d["energy"]
d.plot("z_m", "z")
Out[212]:
In [151]:
In [157]:
In [169]:
def interaction_well(r, r_min, r_max, kappa):
return 0.5 * (np.tanh(kappa * (r - r_min)) * np.tanh(kappa * (r_max - r))) + 0.5
# r = np.linspace(10, 20, num=200)
r = np.linspace(-60,60, num=200)
y = interaction_well(r, 12, 18, 1) + interaction_well(r, -18, -12, 1)
plt.plot(r, y)
Out[169]:
In [167]:
r = np.linspace(10, 20, num=200)
y = interaction_well(r, 12, 18, 1)
plt.plot(r, y)
Out[167]:
In [134]:
d.plot("z_m", "energy")
Out[134]:
In [122]:
len(energy)
Out[122]:
In [46]:
pre = "/Users/weilu/Research/server/jun_2019/relative_k/phis"
decoy = np.loadtxt(f"{pre}/phi_relative_k_well_5j4i_decoys_shifted_4.5_6.5_5.0_10")
native = np.loadtxt(f"{pre}/phi_relative_k_well_5j4i_native_4.5_6.5_5.0_10")
decoyQ = np.loadtxt(f"{pre}/phi_relative_k_well_5j4i_decoysQ_shifted_4.5_6.5_5.0_10")
In [54]:
def compute_energy(decoy, native, decoyQ, gamma):
energy = [a[0]+a[1]*gamma for a in decoy]
return np.array(energy)
In [70]:
gamma = 10
native_energy = native[0] + native[1]*gamma
In [71]:
decoy_energy = compute_energy(decoy, native, decoyQ, gamma)
In [72]:
offset_all = np.arange(-50, 50, 1)
z = native_energy/decoy_energy
plt.plot(offset_all, z)
Out[72]:
In [45]:
decoyQ
Out[45]:
In [ ]:
In [3]:
data = pd.read_csv("/Users/weilu/Research/database/membrane_training_set/proteins-2019-05-01.csv")
data.pdbid = data.pdbid.apply(lambda x: x[2:-1])
In [4]:
data.head()
Out[4]:
In [4]:
dd = pd.read_csv("/Users/weilu/Research/database/membrane_training_set/chosen.csv", index_col=0)
pdb_list = dd.pdbid.tolist()
In [ ]:
In [5]:
filtered_list = []
for pdb in pdb_list:
location = f"/Users/weilu/Research/database/relative_k/cleaned_pdbs/{pdb}.pdb"
a = get_inside_or_not_table(location)
ratio = sum(a)/len(a)
if ratio < 0.2 or ratio > 0.8:
print("not good", pdb, ratio)
else:
filtered_list.append(pdb)
# print("good", pdb, ratio)
# print(pdb, ratio)
In [6]:
len(filtered_list)
Out[6]:
In [7]:
pdb_list = filtered_list
In [8]:
pdb_list
Out[8]:
In [23]:
with open("/Users/weilu/Research/server/may_2019/relative_k/optimization/protein_list", "w") as out:
for pdb in pdb_list:
# print(pdb)
out.write(pdb+"\n")
In [28]:
for pdb in pdb_list:
location = f"/Users/weilu/Research/server/may_2019/relative_k/database/dompdb/{pdb}.pdb"
toLocation = f"/Users/weilu/Research/server/may_2019/relative_k/database/S20_seq/{pdb}.seq"
seq,resseqs = getSeqFromPDB(location, considerGap=False)
with open(toLocation, "w") as out:
out.write(seq+'\n')
In [ ]:
0.01442
-0.31612
In [29]:
complete_proteins = "../database/cath-dataset-nonredundant-S20Clean.list"
In [33]:
a = "proteins_name_list/proteins_name_list_0.txt"
In [34]:
a.split('/')[-1].split('.')[0]
Out[34]:
In [ ]: