In [1]:
import os
import sys
import random
import time
from random import seed, randint
import argparse
import platform
from datetime import datetime
import imp
import numpy as np
import fileinput
from itertools import product
import pandas as pd
from scipy.interpolate import griddata
from scipy.interpolate import interp2d
import seaborn as sns
from os import listdir

import matplotlib.pyplot as plt
import seaborn as sns
from scipy.interpolate import griddata
import matplotlib as mpl
# sys.path.insert(0,'..')
# from notebookFunctions import *
# from .. import notebookFunctions
from Bio.PDB.Polypeptide import one_to_three
from Bio.PDB.Polypeptide import three_to_one
from Bio.PDB.PDBParser import PDBParser
from pyCodeLib import *
# from small_script.myFunctions import *
sys.path.insert(0, "/Users/weilu/openmmawsem")
from helperFunctions.myFunctions import *
from collections import defaultdict
%matplotlib inline
# plt.rcParams['figure.figsize'] = (10,6.180)    #golden ratio
# %matplotlib notebook
%load_ext autoreload
%autoreload 2

In [2]:
plt.rcParams['figure.figsize'] = np.array([16.18033, 10])    #golden ratio
plt.rcParams['figure.facecolor'] = 'w'
plt.rcParams['figure.dpi'] = 100
plt.rcParams.update({'font.size': 22})

In [3]:
from Bio.PDB.PDBParser import PDBParser
from Bio.PDB.Polypeptide import PPBuilder
from Bio.PDB.Polypeptide import three_to_one
def get_inside_or_not_table(pdb_file):
    parser = PDBParser(PERMISSIVE=1,QUIET=True)
    try:
        structure = parser.get_structure('X', pdb_file)
    except:
        return [0]
    inside_or_not_table = []
    for res in structure.get_residues():
        if res.get_id()[0] != " ":
            continue# skip
        try:
            res["CA"].get_vector()
        except:
            print(pdb_file, res.get_id())
            return [0]
        inside_or_not_table.append(int(abs(res["CA"].get_vector()[-1]) < 15))
    return inside_or_not_table
def extractTransmembrane(toLocation, location):
    x = PDBParser().get_structure("x", location)
    class Transmembrane(Select):
        def accept_residue(self, residue):
            if abs(residue["CA"].get_vector()[-1]) < 15:
                return 1
            else:
                return 0

    io = PDBIO()
    io.set_structure(x)
    io.save(toLocation, Transmembrane())

def getSeqFromPDB(location, considerGap=True):
    x = PDBParser().get_structure("x", location)
    seq = ""
    resseqs = []
    preResId = 0
    for res in x.get_residues():
        resId = res.get_id()[1]
        if considerGap and resId != preResId + 1:
            seq += " "
            resseqs.append(-1)
        seq += three_to_one(res.get_resname())
        resseqs.append(res.get_id()[1])
        preResId = resId
    return seq,resseqs

In [4]:
info = pd.read_csv("/Users/weilu/Research/database/membrane_contact_dtabase/for_iter0_training_complete_jun06.csv", index_col=0)

In [5]:
info.shape


Out[5]:
(1561, 3)

In [6]:
a = info.query("InMembraneRatio > 0.2 and InMembraneRatio < 0.8 and Length < 2000").reset_index()
a.to_csv("/Users/weilu/Research/database/relative_k/chosen_jun19.csv")

In [7]:
a.hist("Length", bins=100)


Out[7]:
array([[<matplotlib.axes._subplots.AxesSubplot object at 0x1a255e42e8>]],
      dtype=object)

In [8]:
a.shape


Out[8]:
(1116, 4)

In [9]:
a.columns


Out[9]:
Index(['index', 'Protein', 'Length', 'InMembraneRatio'], dtype='object')

I will compute the z score for those 1116 structures.


In [4]:
chosen = pd.read_csv("/Users/weilu/Research/database/relative_k/chosen_jun19.csv", index_col=0)
parser = PDBParser(PERMISSIVE=1,QUIET=True)

In [5]:
def do(cmd, get=False, show=True):
    if get:
        out = subprocess.Popen(cmd,stdout=subprocess.PIPE,shell=True).communicate()[0].decode()
        if show:
            print(out, end="")
        return out
    else:
        return subprocess.Popen(cmd, shell=True).wait()

In [7]:
fromPre = "/Users/weilu/Research/database/hybrid_contact_database/"
toPre = "/Users/weilu/Research/server/oct_2019/relative_k_optimization/"
pdb_list = chosen.Protein.tolist()

pre = toPre

do(f"mkdir -p {pre}")
do(f"mkdir -p {pre}/optimization")
do(f"mkdir -p {pre}/phis")
do(f"mkdir -p {pre}/database/dompdb")
do(f"mkdir -p {pre}/database/S20_seq")

for pdb in pdb_list:
    fromLocation = f'{fromPre}/cleaned/{pdb}.pdb'
    toLocation = f"{toPre}/database/dompdb/"
    cmd = f"cp {fromLocation} {toLocation}"
    do(cmd)
    # print(cmd)

In [9]:
for pdb in pdb_list:
    fromLocation = f'{fromPre}/cleaned/{pdb}.pdb'
#     toLocation = f"{toPre}/dompdb/"
#     os.system(f"cp {fromLocation} {toLocation}")
    try:
        structure = parser.get_structure(pdb, fromLocation)
    except:
        print(fromLocation)
        print("cannot get ", pdb)
        continue
    seq = ""
    for res in structure.get_residues():
        resName = three_to_one(res.resname)
        seq += resName
    with open(f"{toPre}/database/S20_seq/{pdb}.seq", "w") as out:
        out.write(seq+"\n")

In [10]:
np.random.shuffle(pdb_list)
with open(f"{toPre}/optimization/protein_list", "w") as out:
    for pdb in pdb_list:
        out.write(pdb+"\n")

In [88]:
with open("/Users/weilu/Research/server/jun_2019/relative_k/optimization/small_protein_list") as f:
    names = f.readlines()

In [12]:


In [14]:


In [199]:
l = "/Users/weilu/Research/server/nov_2019/relative_k_optimization_with_membrane/database/dompdb/3b60"
input_pdb_filename = l
structure = parse_pdb(input_pdb_filename)
res_list = get_res_list(structure)

In [137]:
def get_relative_k_data_rotation(gamma, decoy, decoyQ, native, include_mem_burial=0):
    # gamma = 1
    rotation_all = np.arange(0, 180, 30)
    z_shift_all = np.arange(-20, 20, 4)
    rotation_axis=Vector(1,0,0)
    z_m = []
    rad_list = []
    for z_shift in z_shift_all:
        for degree in rotation_all:
            z_m.append(z_shift)
            rad_list.append(degree)
    if include_mem_burial == 1:
        energy = [a[0]+(a[1]+a[2])*gamma for a in decoy]
        native_energy =native[0] + (native[2]+native[1])*gamma
    elif include_mem_burial == 0:
        energy = [a[0]+(a[1])*gamma for a in decoy]
        native_energy =native[0] + (native[1])*gamma
    elif include_mem_burial == 2:
        # only membrane 
        energy = [(a[2])*gamma for a in decoy]
        native_energy = (native[1])*gamma
    elif include_mem_burial == 3:
        energy = [a[0]+(a[1]+a[2]*0.5)*gamma for a in decoy]
        native_energy =native[0] + (native[2]*0.5+native[1])*gamma
    a0 = [a[0] for a in decoy]
    a1 = [a[1] for a in decoy]
    a2 = [a[2] for a in decoy]
    # z_m =  list(np.arange(1, limit, 1)) + list(np.arange(-limit, -1, 1))
#     z_m = np.arange(-limit, limit, 1)
    d = pd.DataFrame([z_m, rad_list, energy, a0, a1, a2, list(decoyQ)]).T
    d.columns = ["z_m",  "rad", "energy", "contact_wat", "contact_mem", "membrane", "Q"]
    # d = d.sort_values("z_m").reset_index()

    d["native"] = native_energy
    d["z"] = native_energy/d["energy"]
    d["z"] = (d["energy"] - native_energy)/d["energy"].std()
    # d = d.query("z_m != 0").reset_index(drop=True)
    # d = d.query("abs(z_m) >= 5").reset_index(drop=True)
    # dz = d["z"].tolist()
    return d

def get_value_array_5(gamma_list, decoy, decoyQ, native, cutoff=0, **kwargs):
    # gamma_list = np.linspace(1,15, num=100)
    value_list = []
    value_sum_list = []
    for gamma in gamma_list:
        d = get_relative_k_data_rotation(gamma, decoy, decoyQ, native, **kwargs)
        corr, p = scipy.stats.pearsonr(d["Q"], d["z"])
        # value, value_sum = get_value(gamma, **kwargs)
        # print(gamma, value)
        value_list.append(corr)
        value_sum_list.append(p)
    return np.array(value_list), np.array(value_sum_list)

In [7]:
# pre = "/Users/weilu/Research/server/oct_2019/relative_k_optimization_with_burial_and_membrane"
pre = "/Users/weilu/Research/server/nov_2019/relative_k_optimization_with_membrane/"

phi_pre = "phi_relative_k_with_membrane_well"
with open(f"{pre}/optimization/protein_list") as f:
    names = f.readlines()
names = [i.strip() for i in names]

In [20]:
name


Out[20]:
'4pgr'

In [32]:
np.arange(0, 360, 60)


Out[32]:
array([  0,  60, 120, 180, 240, 300])

In [33]:
np.arange(-20, 20, 4)


Out[33]:
array([-20, -16, -12,  -8,  -4,   0,   4,   8,  12,  16])

In [30]:
np.identity(3)


Out[30]:
array([[1., 0., 0.],
       [0., 1., 0.],
       [0., 0., 1.]])

In [143]:
# pre = "/Users/weilu/Research/server/oct_2019/relative_k_optimization/phis"
name = names[1]
cutoff = 1.0
gamma_list = np.linspace(1,15, num=100)
decoy = np.loadtxt(f"{pre}/phis/{phi_pre}_{name}_decoys_rotation")
native = np.loadtxt(f"{pre}/phis/{phi_pre}_{name}_native_rotation")
decoyQ = np.loadtxt(f"{pre}/phis/{phi_pre}_{name}_decoysQ_rotation")
gamma = 2
d = get_relative_k_data_rotation(gamma, decoy, decoyQ, native, include_mem_burial=1)
# value_list, value_sum_list = get_value_array_3(gamma_list, decoy=decoy, decoyQ=decoyQ, native=native, cutoff=cutoff)

In [144]:
np.corrcoef(d[["Q", "z"]].values.T)


Out[144]:
array([[ 1.        , -0.98442896],
       [-0.98442896,  1.        ]])

In [145]:
import scipy
corr, p = scipy.stats.pearsonr(d["Q"], d["z"])

In [146]:
corr


Out[146]:
-0.9844289648659434

In [147]:
d.plot.scatter("Q", "z", c="red")


Out[147]:
<matplotlib.axes._subplots.AxesSubplot at 0x1a27894278>

In [142]:
# pre = "/Users/weilu/Research/server/oct_2019/relative_k_optimization_with_burial_and_membrane"
# pre = "/Users/weilu/Research/server/oct_2019/relative_k_optimization_new_gamma/phis"
# pre = "/Users/weilu/Research/server/oct_2019/relative_k_optimization_new_gamma/back_phis/"
# pre = "/Users/weilu/Research/server/oct_2019/relative_k_optimization/phis"
gamma_list = np.linspace(0,10, num=20)
cutoff = 0
all_value_list = []
all_value_sum_list = []
cc = 0
n = 80
for i, name in enumerate(names):
    cc += 1
    if i % 100 == n:
        print(i)
        break
    decoy = np.loadtxt(f"{pre}/phis/{phi_pre}_{name}_decoys_rotation")
    native = np.loadtxt(f"{pre}/phis/{phi_pre}_{name}_native_rotation")
    decoyQ = np.loadtxt(f"{pre}/phis/{phi_pre}_{name}_decoysQ_rotation")
    one_value_list, one_value_sum_list = get_value_array_5(gamma_list, decoy=decoy, decoyQ=decoyQ, native=native, cutoff=cutoff, include_mem_burial=1)
    all_value_list.append(one_value_list)
    # all_value_sum_list.append(one_value_sum_list)

plt.rcParams['figure.figsize'] = 0.5*np.array([16.18033, 10])    #golden ratio
plt.rcParams['figure.facecolor'] = 'w'
plt.rcParams['figure.dpi'] = 100
plt.rcParams.update({'font.size': 22})


a_3 = pd.DataFrame(all_value_list).sum(axis=0)/n
plt.plot(gamma_list, a_3)
plt.xlabel("relative_k")
plt.ylabel("average pearson correlation")


80
Out[142]:
Text(0, 0.5, 'average pearson correlation')

In [ ]:
# 4p6v is big.

In [139]:
# 1 to 0.5, mode 3
plt.rcParams['figure.figsize'] = 0.5*np.array([16.18033, 10])    #golden ratio
plt.rcParams['figure.facecolor'] = 'w'
plt.rcParams['figure.dpi'] = 100
plt.rcParams.update({'font.size': 22})


a_3 = pd.DataFrame(all_value_list).sum(axis=0)/20
plt.plot(gamma_list, a_3)
plt.xlabel("relative_k")
plt.ylabel("average fraction of decoys that E_decoy > E_native")
# plt.ylim((-1, 1))
# plt.savefig("/Users/weilu/Dropbox/openAWSEM/figures/relative_k.png", kpi=300)


Out[139]:
Text(0, 0.5, 'average fraction of decoys that E_decoy > E_native')

In [131]:
# membrane only
plt.rcParams['figure.figsize'] = 0.5*np.array([16.18033, 10])    #golden ratio
plt.rcParams['figure.facecolor'] = 'w'
plt.rcParams['figure.dpi'] = 100
plt.rcParams.update({'font.size': 22})


a_3 = pd.DataFrame(all_value_list).sum(axis=0)/20
plt.plot(gamma_list, a_3)
plt.xlabel("relative_k")
plt.ylabel("average fraction of decoys that E_decoy > E_native")
# plt.ylim((-1, 1))
# plt.savefig("/Users/weilu/Dropbox/openAWSEM/figures/relative_k.png", kpi=300)


Out[131]:
Text(0, 0.5, 'average fraction of decoys that E_decoy > E_native')

In [129]:
# together mode 1 
plt.rcParams['figure.figsize'] = 0.5*np.array([16.18033, 10])    #golden ratio
plt.rcParams['figure.facecolor'] = 'w'
plt.rcParams['figure.dpi'] = 100
plt.rcParams.update({'font.size': 22})


a_3 = pd.DataFrame(all_value_list).sum(axis=0)/20
plt.plot(gamma_list, a_3)
plt.xlabel("relative_k")
plt.ylabel("average fraction of decoys that E_decoy > E_native")
# plt.ylim((-1, 1))
# plt.savefig("/Users/weilu/Dropbox/openAWSEM/figures/relative_k.png", kpi=300)


Out[129]:
Text(0, 0.5, 'average fraction of decoys that E_decoy > E_native')

In [119]:
# pre = "/Users/weilu/Research/server/oct_2019/relative_k_optimization_with_burial_and_membrane"
# pre = "/Users/weilu/Research/server/oct_2019/relative_k_optimization_new_gamma/phis"
# pre = "/Users/weilu/Research/server/oct_2019/relative_k_optimization_new_gamma/back_phis/"
# pre = "/Users/weilu/Research/server/oct_2019/relative_k_optimization/phis"
gamma_list = np.linspace(0,10, num=20)
cutoff = 0
all_value_list = []
all_value_sum_list = []
cc = 0
for i, name in enumerate(names):
    cc += 1
    if i % 100 == 40:
        print(i)
        break
    decoy = np.loadtxt(f"{pre}/phis/{phi_pre}_{name}_decoys_rotation")
    native = np.loadtxt(f"{pre}/phis/{phi_pre}_{name}_native_rotation")
    decoyQ = np.loadtxt(f"{pre}/phis/{phi_pre}_{name}_decoysQ_rotation")
    one_value_list, one_value_sum_list = get_value_array_5(gamma_list, decoy=decoy, decoyQ=decoyQ, native=native, cutoff=cutoff)
    all_value_list.append(one_value_list)
    # all_value_sum_list.append(one_value_sum_list)


40

In [120]:
plt.rcParams['figure.figsize'] = 0.5*np.array([16.18033, 10])    #golden ratio
plt.rcParams['figure.facecolor'] = 'w'
plt.rcParams['figure.dpi'] = 100
plt.rcParams.update({'font.size': 22})


a_3 = pd.DataFrame(all_value_list).sum(axis=0)/20
plt.plot(gamma_list, a_3)
plt.xlabel("relative_k")
plt.ylabel("average fraction of decoys that E_decoy > E_native")
# plt.ylim((-1, 1))
# plt.savefig("/Users/weilu/Dropbox/openAWSEM/figures/relative_k.png", kpi=300)


Out[120]:
Text(0, 0.5, 'average fraction of decoys that E_decoy > E_native')

In [116]:
plt.rcParams['figure.figsize'] = 0.5*np.array([16.18033, 10])    #golden ratio
plt.rcParams['figure.facecolor'] = 'w'
plt.rcParams['figure.dpi'] = 100
plt.rcParams.update({'font.size': 22})


a_3 = pd.DataFrame(all_value_list).sum(axis=0)/20
plt.plot(gamma_list, a_3)
plt.xlabel("relative_k")
plt.ylabel("average fraction of decoys that E_decoy > E_native")
# plt.ylim((-1, 1))
# plt.savefig("/Users/weilu/Dropbox/openAWSEM/figures/relative_k.png", kpi=300)


Out[116]:
Text(0, 0.5, 'average fraction of decoys that E_decoy > E_native')

In [110]:
def get_value_4(gamma, decoy, decoyQ, native, cutoff=0, **kwargs):
    d = get_relative_k_data_rotation(gamma, decoy, decoyQ, native, **kwargs)
    
    # dz = d["z"].tolist()
    value = ( ((d["z"] >= cutoff)) ).sum()/len(d)
    value_sum =  ((d["z"]) * (1-d["Q"]) ).sum()/(1-d["Q"]).sum()
    return value, value_sum

def get_value_array_4(gamma_list, **kwargs):
    # gamma_list = np.linspace(1,15, num=100)
    value_list = []
    value_sum_list = []
    for gamma in gamma_list:
        value, value_sum = get_value_4(gamma, **kwargs)
        # value, value_sum = get_value(gamma, **kwargs)
        # print(gamma, value)
        value_list.append(value)
        value_sum_list.append(value_sum)
    return np.array(value_list), np.array(value_sum_list)



# def get_relative_k_data_rotation(gamma, decoy, decoyQ, native):
#     # gamma = 1
#     rotation_all = [60] * 6
#     z_shift_all = [-20] + [4]*10
#     rotation_axis=Vector(1,0,0)
#     z = 0
#     rad = 0
#     z_m = []
#     rad_list = []
#     for z_shift in z_shift_all:
#         z += z_shift
#         for degree in rotation_all:
#             rad += degree
#             z_m.append(z)
#             rad_list.append(rad)
#     energy = [a[0]+(a[1]+a[2])*gamma for a in decoy]
#     a0 = [a[0] for a in decoy]
#     a1 = [a[1] for a in decoy]
#     a2 = [a[2] for a in decoy]
#     # z_m =  list(np.arange(1, limit, 1)) + list(np.arange(-limit, -1, 1))
# #     z_m = np.arange(-limit, limit, 1)
#     d = pd.DataFrame([z_m, rad_list, energy, a0, a1, a2, list(decoyQ)]).T
#     d.columns = ["z_m",  "rad", "energy", "contact_wat", "contact_mem", "membrane", "Q"]
#     # d = d.sort_values("z_m").reset_index()
#     native_energy =native[0] + (native[2]+native[1])*gamma
#     d["native"] = native_energy
#     d["z"] = native_energy/d["energy"]
#     d["z"] = (d["energy"] - native_energy)/d["energy"].std()
#     # d = d.query("z_m != 0").reset_index(drop=True)
#     # d = d.query("abs(z_m) >= 5").reset_index(drop=True)
#     # dz = d["z"].tolist()
#     return d

In [ ]:
def get_relative_k_data_rotation(gamma, decoy, decoyQ, native):
    # gamma = 1
    rotation_all = [60] * 6
    z_shift_all = [-20] + [4]*10
    rotation_axis=Vector(1,0,0)
    z = 0
    rad = 0
    z_m = []
    for z_shift in z_shift_all:
        z += z_shift
        for degree in rotation_all:
            rad += degree
            z_m.append(f"{z}_{rad}")
    energy = [a[0]+(a[1]+a[2])*gamma for a in decoy]
    # z_m =  list(np.arange(1, limit, 1)) + list(np.arange(-limit, -1, 1))
#     z_m = np.arange(-limit, limit, 1)
    d = pd.DataFrame([z_m, energy, list(decoyQ)]).T
    d.columns = ["z_m", "energy", "Q"]
    # d = d.sort_values("z_m").reset_index()
    native_energy =native[0] + (native[2]+native[1])*gamma
    d["native"] = native_energy
    d["z"] = native_energy/d["energy"]
    d["z"] = (d["energy"] - native_energy)/d["energy"].std()
    # d = d.query("z_m != 0").reset_index(drop=True)
    # d = d.query("abs(z_m) >= 5").reset_index(drop=True)
    # dz = d["z"].tolist()
    return d

In [10]:
def get_relative_k_data(gamma, decoy, decoyQ, native):
    # gamma = 1
    limit =30
    energy = [a[0]+(a[1]+a[2])*gamma for a in decoy]
    # z_m =  list(np.arange(1, limit, 1)) + list(np.arange(-limit, -1, 1))
    z_m = np.arange(-limit, limit, 1)
    d = pd.DataFrame([z_m, energy, list(decoyQ)]).T
    d.columns = ["z_m", "energy", "Q"]
    # d = d.sort_values("z_m").reset_index()
    native_energy =native[0] + (native[2]+native[1])*gamma
    d["native"] = native_energy
    d["z"] = native_energy/d["energy"]
    d["z"] = (d["energy"] - native_energy)/d["energy"].std()
    # d = d.query("z_m != 0").reset_index(drop=True)
    # d = d.query("abs(z_m) >= 5").reset_index(drop=True)
    # dz = d["z"].tolist()
    return d
def get_value_3(gamma, decoy, decoyQ, native, cutoff=0):
    d = get_relative_k_data(gamma, decoy, decoyQ, native)
    
    # dz = d["z"].tolist()
    value = ( ((d["z"] >= cutoff)) ).sum()/len(d)
    value_sum =  ((d["z"]) * (1-d["Q"]) ).sum()/(1-d["Q"]).sum()
    return value, value_sum

def get_value_array_3(gamma_list, **kwargs):
    # gamma_list = np.linspace(1,15, num=100)
    value_list = []
    value_sum_list = []
    for gamma in gamma_list:
        value, value_sum = get_value_3(gamma, **kwargs)
        # value, value_sum = get_value(gamma, **kwargs)
        # print(gamma, value)
        value_list.append(value)
        value_sum_list.append(value_sum)
    return np.array(value_list), np.array(value_sum_list)

In [104]:
# pre = "/Users/weilu/Research/server/oct_2019/relative_k_optimization_with_burial_and_membrane"
# pre = "/Users/weilu/Research/server/oct_2019/relative_k_optimization_new_gamma/phis"
# pre = "/Users/weilu/Research/server/oct_2019/relative_k_optimization_new_gamma/back_phis/"
# pre = "/Users/weilu/Research/server/oct_2019/relative_k_optimization/phis"
gamma_list = np.linspace(0,3, num=50)
cutoff = 0
all_value_list = []
all_value_sum_list = []
cc = 0
for i, name in enumerate(names):
    cc += 1
    if i % 100 == 20:
        print(i)
        break
    decoy = np.loadtxt(f"{pre}/phis/{phi_pre}_{name}_decoys_shifted_4.5_6.5_5.0_10")
    native = np.loadtxt(f"{pre}/phis/{phi_pre}_{name}_native_4.5_6.5_5.0_10")
    decoyQ = np.loadtxt(f"{pre}/phis/{phi_pre}_{name}_decoysQ_shifted_4.5_6.5_5.0_10")
    one_value_list, one_value_sum_list = get_value_array_3(gamma_list, decoy=decoy, decoyQ=decoyQ, native=native, cutoff=cutoff)
    all_value_list.append(one_value_list)
    # all_value_sum_list.append(one_value_sum_list)


20

In [109]:
plt.rcParams['figure.figsize'] = 0.5*np.array([16.18033, 10])    #golden ratio
plt.rcParams['figure.facecolor'] = 'w'
plt.rcParams['figure.dpi'] = 100
plt.rcParams.update({'font.size': 22})


a_3 = pd.DataFrame(all_value_list).sum(axis=0)/20
plt.plot(gamma_list, a_3)
plt.xlabel("relative_k")
plt.ylabel("average fraction of decoys that E_decoy > E_native")
plt.ylim((0,1))
plt.savefig("/Users/weilu/Dropbox/openAWSEM/figures/relative_k.png", kpi=300)



In [126]:
def get_relative_k_data_alpha(gamma, decoy, decoyQ, native):
    # gamma = 1
    limit =30
    energy = [a[0]*(1-gamma)+(a[1]+a[2])*gamma for a in decoy]
    # z_m =  list(np.arange(1, limit, 1)) + list(np.arange(-limit, -1, 1))
    z_m = np.arange(-limit, limit, 1)
    d = pd.DataFrame([z_m, energy, list(decoyQ)]).T
    d.columns = ["z_m", "energy", "Q"]
    # d = d.sort_values("z_m").reset_index()
    native_energy =native[0]*(1-gamma) + (native[2]+native[1])*gamma
    d["native"] = native_energy
    d["z"] = native_energy/d["energy"]
    d["z"] = (d["energy"] - native_energy)/d["energy"].std()
    # d = d.query("z_m != 0").reset_index(drop=True)
    # d = d.query("abs(z_m) >= 5").reset_index(drop=True)
    # dz = d["z"].tolist()
    return d

# relative k =1 
name = names[1]
decoy = np.loadtxt(f"{pre}/phis/{phi_pre}_{name}_decoys_shifted_4.5_6.5_5.0_10")
native = np.loadtxt(f"{pre}/phis/{phi_pre}_{name}_native_4.5_6.5_5.0_10")
decoyQ = np.loadtxt(f"{pre}/phis/{phi_pre}_{name}_decoysQ_shifted_4.5_6.5_5.0_10")
d = get_relative_k_data_alpha(1, decoy=decoy, decoyQ=decoyQ, native=native)
d["abs_z_m"] = abs(d["z_m"])
d.plot.scatter("abs_z_m", "energy")


Out[126]:
<matplotlib.axes._subplots.AxesSubplot at 0x1a2a0db390>

In [128]:
def get_relative_k_data_alpha(gamma, decoy, decoyQ, native):
    # gamma = 1
    limit =30
    energy = [a[0]*(1-gamma)+(a[1]+a[2])*gamma for a in decoy]
    # z_m =  list(np.arange(1, limit, 1)) + list(np.arange(-limit, -1, 1))
    z_m = np.arange(-limit, limit, 1)
    d = pd.DataFrame([z_m, energy, list(decoyQ)]).T
    d.columns = ["z_m", "energy", "Q"]
    # d = d.sort_values("z_m").reset_index()
    native_energy =native[0]*(1-gamma) + (native[2]+native[1])*gamma
    d["native"] = native_energy
    d["z"] = native_energy/d["energy"]
    d["z"] = (d["energy"] - native_energy)/d["energy"].std()
    # d = d.query("z_m != 0").reset_index(drop=True)
    # d = d.query("abs(z_m) >= 5").reset_index(drop=True)
    # dz = d["z"].tolist()
    return d

# relative k =1 
name = names[1]
decoy = np.loadtxt(f"{pre}/phis/{phi_pre}_{name}_decoys_shifted_4.5_6.5_5.0_10")
native = np.loadtxt(f"{pre}/phis/{phi_pre}_{name}_native_4.5_6.5_5.0_10")
decoyQ = np.loadtxt(f"{pre}/phis/{phi_pre}_{name}_decoysQ_shifted_4.5_6.5_5.0_10")
d = get_relative_k_data_alpha(1, decoy=decoy, decoyQ=decoyQ, native=native)
d["abs_z_m"] = abs(d["z_m"])
d.plot.scatter("abs_z_m", "energy")


Out[128]:
<matplotlib.axes._subplots.AxesSubplot at 0x1a29d24748>

In [122]:
# relative k = 2
name = names[1]
decoy = np.loadtxt(f"{pre}/phis/{phi_pre}_{name}_decoys_shifted_4.5_6.5_5.0_10")
native = np.loadtxt(f"{pre}/phis/{phi_pre}_{name}_native_4.5_6.5_5.0_10")
decoyQ = np.loadtxt(f"{pre}/phis/{phi_pre}_{name}_decoysQ_shifted_4.5_6.5_5.0_10")
d = get_relative_k_data(2, decoy=decoy, decoyQ=decoyQ, native=native)
d["abs_z_m"] = abs(d["z_m"])
d.plot.scatter("abs_z_m", "energy")


Out[122]:
<matplotlib.axes._subplots.AxesSubplot at 0x1a25e6c4e0>

In [124]:
# relative k = 2
relative_k = 100
name = names[1]
decoy = np.loadtxt(f"{pre}/phis/{phi_pre}_{name}_decoys_shifted_4.5_6.5_5.0_10")
native = np.loadtxt(f"{pre}/phis/{phi_pre}_{name}_native_4.5_6.5_5.0_10")
decoyQ = np.loadtxt(f"{pre}/phis/{phi_pre}_{name}_decoysQ_shifted_4.5_6.5_5.0_10")
d = get_relative_k_data(relative_k, decoy=decoy, decoyQ=decoyQ, native=native)
d["abs_z_m"] = abs(d["z_m"])
d.plot.scatter("abs_z_m", "energy")


Out[124]:
<matplotlib.axes._subplots.AxesSubplot at 0x1a25e4f4e0>

In [117]:
d["abs_z_m"] = abs(d["z_m"])
d.plot.scatter("abs_z_m", "energy")


Out[117]:
<matplotlib.axes._subplots.AxesSubplot at 0x1a2611f4a8>

In [115]:
d.plot.scatter("abs_z_m", "energy")


Out[115]:
<matplotlib.axes._subplots.AxesSubplot at 0x1a290dcd30>

In [ ]:
z_m_low = -15
z_m_high = 15
inside_or_not_table = []
for res in res_list:
    z_loc = res["CA"].get_coord()[-1]
    if z_loc > z_m_low and z_loc < z_m_high:
        inside_or_not_table.append(1)
    else:
        inside_or_not_table.append(0)
        
rotation_axis=Vector(1,0,0)
degree = 0
radian=math.radians(degree)
translation=(0,0,10)
# Iterate through all atoms and rotate by 90 degress
rotation_matrix = rotaxis2m(radian, rotation_axis)

translation_matrix = np.array(translation, 'f')
for atom in structure.get_atoms():
    print(atom.get_coord())
    atom.transform(rotation_matrix, translation_matrix)
    print(atom.get_coord())

count = 0
# res_list = get_res_list(structure)
for i, res in enumerate(res_list):
    z_loc = res["CA"].get_coord()[-1]
    if z_loc > z_m_low and z_loc < z_m_high:
        is_inside = 1
    else:
        is_inside = 0
    if is_inside == inside_or_not_table[i]:
        count += 1
print(count, count/len(inside_or_not_table))

In [111]:
d.plot("z_m", "energy")


Out[111]:
<matplotlib.axes._subplots.AxesSubplot at 0x1a27d64b38>

In [118]:
name = names[1]
decoy = np.loadtxt(f"{pre}/phis/{phi_pre}_{name}_decoys_shifted_4.5_6.5_5.0_10")
native = np.loadtxt(f"{pre}/phis/{phi_pre}_{name}_native_4.5_6.5_5.0_10")
decoyQ = np.loadtxt(f"{pre}/phis/{phi_pre}_{name}_decoysQ_shifted_4.5_6.5_5.0_10")
d = get_relative_k_data(1, decoy=decoy, decoyQ=decoyQ, native=native)
d


Out[118]:
z_m energy Q native z
0 -30.0 -943.8827 0.0000 -1075.4507 3.150069
1 -29.0 -947.1971 0.0001 -1075.4507 3.070714
2 -28.0 -952.8465 0.0001 -1075.4507 2.935453
3 -27.0 -964.8886 0.0002 -1075.4507 2.647135
4 -26.0 -962.4638 0.0002 -1075.4507 2.705191
5 -25.0 -966.6158 0.0003 -1075.4507 2.605781
6 -24.0 -990.3614 0.0005 -1075.4507 2.037252
7 -23.0 -996.6935 0.0008 -1075.4507 1.885645
8 -22.0 -998.7021 0.0011 -1075.4507 1.837555
9 -21.0 -1020.5514 0.0017 -1075.4507 1.314427
10 -20.0 -1011.8476 0.0025 -1075.4507 1.522818
11 -19.0 -1011.4102 0.0038 -1075.4507 1.533291
12 -18.0 -1033.9151 0.0056 -1075.4507 0.994467
13 -17.0 -1034.8382 0.0083 -1075.4507 0.972365
14 -16.0 -1030.4669 0.0123 -1075.4507 1.077025
15 -15.0 -1032.0314 0.0183 -1075.4507 1.039567
16 -14.0 -1030.3096 0.0271 -1075.4507 1.080791
17 -13.0 -1035.3844 0.0399 -1075.4507 0.959288
18 -12.0 -1040.4375 0.0583 -1075.4507 0.838304
19 -11.0 -1051.0993 0.0846 -1075.4507 0.583034
20 -10.0 -1047.1895 0.1211 -1075.4507 0.676644
21 -9.0 -1050.4118 0.1704 -1075.4507 0.599494
22 -8.0 -1047.5364 0.2344 -1075.4507 0.668339
23 -7.0 -1034.2493 0.3131 -1075.4507 0.986465
24 -6.0 -1042.6363 0.4037 -1075.4507 0.785659
25 -5.0 -1070.8008 0.5000 -1075.4507 0.111330
26 5.0 -1111.6340 0.5000 -1075.4507 -0.866319
27 6.0 -1114.4054 0.4037 -1075.4507 -0.932674
28 7.0 -1103.8513 0.3131 -1075.4507 -0.679982
29 8.0 -1104.6606 0.2344 -1075.4507 -0.699358
30 9.0 -1095.6808 0.1704 -1075.4507 -0.484359
31 10.0 -1094.3646 0.1211 -1075.4507 -0.452846
32 11.0 -1090.3287 0.0846 -1075.4507 -0.356217
33 12.0 -1069.7162 0.0583 -1075.4507 0.137298
34 13.0 -1062.4386 0.0399 -1075.4507 0.311542
35 14.0 -1062.2628 0.0271 -1075.4507 0.315752
36 15.0 -1058.2882 0.0183 -1075.4507 0.410913
37 16.0 -1057.5280 0.0123 -1075.4507 0.429115
38 17.0 -1056.7419 0.0083 -1075.4507 0.447936
39 18.0 -1057.4922 0.0056 -1075.4507 0.429972
40 19.0 -1041.3495 0.0038 -1075.4507 0.816469
41 20.0 -1041.8434 0.0025 -1075.4507 0.804643
42 21.0 -1031.3609 0.0017 -1075.4507 1.055621
43 22.0 -1031.6371 0.0011 -1075.4507 1.049008
44 23.0 -1036.6175 0.0008 -1075.4507 0.929764
45 24.0 -1034.2907 0.0005 -1075.4507 0.985474
46 25.0 -1023.4981 0.0003 -1075.4507 1.243876
47 26.0 -1023.0087 0.0002 -1075.4507 1.255593
48 27.0 -1030.6065 0.0002 -1075.4507 1.073683
49 28.0 -1020.0493 0.0001 -1075.4507 1.326449
50 29.0 -1032.8618 0.0001 -1075.4507 1.019685

In [98]:



Out[98]:
z_m energy Q native z
0 -30.0 -768.7444 0.0000 -1525.6811 2.911356
1 -29.0 -768.9475 0.0001 -1525.6811 2.910575
2 -28.0 -783.6661 0.0001 -1525.6811 2.853964
3 -27.0 -804.8482 0.0002 -1525.6811 2.772493
4 -26.0 -808.5686 0.0002 -1525.6811 2.758183
5 -25.0 -822.9620 0.0003 -1525.6811 2.702823
6 -24.0 -869.3310 0.0005 -1525.6811 2.524477
7 -23.0 -905.7683 0.0008 -1525.6811 2.384330
8 -22.0 -935.1202 0.0011 -1525.6811 2.271436
9 -21.0 -995.0494 0.0017 -1525.6811 2.040934
10 -20.0 -1014.2606 0.0025 -1525.6811 1.967043
11 -19.0 -1032.2256 0.0038 -1525.6811 1.897946
12 -18.0 -1095.6541 0.0056 -1525.6811 1.653985
13 -17.0 -1124.9821 0.0083 -1525.6811 1.541182
14 -16.0 -1150.9572 0.0123 -1525.6811 1.441276
15 -15.0 -1201.0993 0.0183 -1525.6811 1.248418
16 -14.0 -1223.7679 0.0271 -1525.6811 1.161229
17 -13.0 -1265.9859 0.0399 -1525.6811 0.998849
18 -12.0 -1303.9916 0.0583 -1525.6811 0.852670
19 -11.0 -1347.2346 0.0846 -1525.6811 0.686347
20 -10.0 -1345.6804 0.1211 -1525.6811 0.692325
21 -9.0 -1377.4529 0.1704 -1525.6811 0.570120
22 -8.0 -1389.8580 0.2344 -1525.6811 0.522408
23 -7.0 -1397.5923 0.3131 -1525.6811 0.492660
24 -6.0 -1432.1223 0.4037 -1525.6811 0.359849
25 -5.0 -1487.9474 0.5000 -1525.6811 0.145133
26 5.0 -1557.9714 0.5000 -1525.6811 -0.124196
27 6.0 -1549.3347 0.4037 -1525.6811 -0.090977
28 7.0 -1512.1793 0.3131 -1525.6811 0.051931
29 8.0 -1487.1396 0.2344 -1525.6811 0.148240
30 9.0 -1444.7057 0.1704 -1525.6811 0.311450
31 10.0 -1401.5765 0.1211 -1525.6811 0.477335
32 11.0 -1376.9074 0.0846 -1525.6811 0.572219
33 12.0 -1309.1171 0.0583 -1525.6811 0.832956
34 13.0 -1268.7547 0.0399 -1525.6811 0.988199
35 14.0 -1234.8577 0.0271 -1525.6811 1.118575
36 15.0 -1178.1680 0.0183 -1525.6811 1.336617
37 16.0 -1154.6852 0.0123 -1525.6811 1.426937
38 17.0 -1113.8930 0.0083 -1525.6811 1.583834
39 18.0 -1091.3410 0.0056 -1525.6811 1.670574
40 19.0 -1044.6398 0.0038 -1525.6811 1.850198
41 20.0 -1019.1849 0.0025 -1525.6811 1.948103
42 21.0 -983.8153 0.0017 -1525.6811 2.084143
43 22.0 -966.7270 0.0011 -1525.6811 2.149869
44 23.0 -954.4758 0.0008 -1525.6811 2.196990
45 24.0 -942.0562 0.0005 -1525.6811 2.244758
46 25.0 -920.9998 0.0003 -1525.6811 2.325746
47 26.0 -912.2406 0.0002 -1525.6811 2.359436
48 27.0 -922.9212 0.0002 -1525.6811 2.318356
49 28.0 -911.7409 0.0001 -1525.6811 2.361358
50 29.0 -923.5301 0.0001 -1525.6811 2.316014

In [58]:
name


Out[58]:
'4pgr'

In [ ]:


In [ ]:
def get_relative_k_data(gamma, decoy, decoyQ, native, cutoff=0.95):
    # gamma = 1
    limit =30
    energy = [a[0]+a[1]*gamma for a in decoy]
    # z_m =  list(np.arange(1, limit, 1)) + list(np.arange(-limit, -1, 1))
    z_m = list(np.arange(5, limit, 1)) + list(np.arange(-limit, -5, 1))
    d = pd.DataFrame([z_m, energy, list(decoyQ)]).T
    d.columns = ["z_m", "energy", "Q"]
    d = d.sort_values("z_m").reset_index()
    native_energy =native[0] + native[1]*gamma
    d["native"] = native_energy
    d["z"] = native_energy/d["energy"]
    # dz = d["z"].tolist()
    return d
def get_value(gamma, decoy, decoyQ, native, cutoff=0.95):
    # gamma = 1
    limit =30
    energy = [a[0]+a[1]*gamma for a in decoy]
    # z_m =  list(np.arange(1, limit, 1)) + list(np.arange(-limit, -1, 1))
    z_m = list(np.arange(5, limit, 1)) + list(np.arange(-limit, -5, 1))
    d = pd.DataFrame([z_m, energy, list(decoyQ)]).T
    d.columns = ["z_m", "energy", "Q"]
    d = d.sort_values("z_m").reset_index()
    native_energy =native[0] + native[1]*gamma
    d["z"] = native_energy/d["energy"]
    d["z_real"] = (native_energy-d["energy"])/d["energy"].std()
    # dz = d["z"].tolist()
    value = ( ((d["z"] > cutoff)) * (1-d["Q"]) ).sum()/(1-d["Q"]).sum()
    value_sum =  ((d["z"]) * (1-d["Q"]) ).sum()/(1-d["Q"]).sum()
    return value, value_sum

def get_value_2(gamma, decoy, decoyQ, native, cutoff=0):
    # gamma = 1
    limit =30
    energy = [a[0]+a[1]*gamma for a in decoy]
    # z_m =  list(np.arange(1, limit, 1)) + list(np.arange(-limit, -1, 1))
    z_m = list(np.arange(5, limit, 1)) + list(np.arange(-limit, -5, 1))
    d = pd.DataFrame([z_m, energy, list(decoyQ)]).T
    d.columns = ["z_m", "energy", "Q"]
    d = d.sort_values("z_m").reset_index()
    native_energy =native[0] + native[1]*gamma
    # d["z"] = native_energy/d["energy"]
    d["z"] = (native_energy-d["energy"])/d["energy"].std()
    # dz = d["z"].tolist()
    value = ( ((d["z"] > cutoff)) * (1-d["Q"]) ).sum()/(1-d["Q"]).sum()
    value_sum =  ((d["z"]) * (1-d["Q"]) ).sum()/(1-d["Q"]).sum()
    return value, value_sum

def get_value_3(gamma, decoy, decoyQ, native, cutoff=0):
    # gamma = 1
    limit =30
    energy = [a[0]+a[1]*gamma for a in decoy]
    # z_m =  list(np.arange(1, limit, 1)) + list(np.arange(-limit, -1, 1))
    z_m = list(np.arange(5, limit, 1)) + list(np.arange(-limit, -5, 1))
    d = pd.DataFrame([z_m, energy, list(decoyQ)]).T
    d.columns = ["z_m", "energy", "Q"]
    d = d.sort_values("z_m").reset_index()
    native_energy =native[0] + native[1]*gamma
    # d["z"] = native_energy/d["energy"]
    d["z"] = (native_energy-d["energy"])/d["energy"].std()
    # dz = d["z"].tolist()
    value = ( ((d["z"] > cutoff)) ).sum()/len(d)
    value_sum =  ((d["z"]) * (1-d["Q"]) ).sum()/(1-d["Q"]).sum()
    return value, value_sum

def get_value_array(gamma_list, **kwargs):
    # gamma_list = np.linspace(1,15, num=100)
    value_list = []
    value_sum_list = []
    for gamma in gamma_list:
        value, value_sum = get_value_2(gamma, **kwargs)
        # value, value_sum = get_value(gamma, **kwargs)
        # print(gamma, value)
        value_list.append(value)
        value_sum_list.append(value_sum)
    return np.array(value_list), np.array(value_sum_list)

def get_value_array_3(gamma_list, **kwargs):
    # gamma_list = np.linspace(1,15, num=100)
    value_list = []
    value_sum_list = []
    for gamma in gamma_list:
        value, value_sum = get_value_3(gamma, **kwargs)
        # value, value_sum = get_value(gamma, **kwargs)
        # print(gamma, value)
        value_list.append(value)
        value_sum_list.append(value_sum)
    return np.array(value_list), np.array(value_sum_list)

In [194]:
pre = "/Users/weilu/Research/server/oct_2019/relative_k_optimization_new_gamma/phis"
# pre = "/Users/weilu/Research/server/oct_2019/relative_k_optimization_new_gamma/back_phis/"
# pre = "/Users/weilu/Research/server/oct_2019/relative_k_optimization/phis"
gamma_list = np.linspace(1,10, num=50)
cutoff = 1.0
all_value_list = []
all_value_sum_list = []
for i, name in enumerate(names):
    cc += 1
    if i % 100 == 20:
        print(i)
        break
    decoy = np.loadtxt(f"{pre}/phi_relative_k_well_{name}_decoys_shifted_4.5_6.5_5.0_10")
    native = np.loadtxt(f"{pre}/phi_relative_k_well_{name}_native_4.5_6.5_5.0_10")
    decoyQ = np.loadtxt(f"{pre}/phi_relative_k_well_{name}_decoysQ_shifted_4.5_6.5_5.0_10")
    one_value_list, one_value_sum_list = get_value_array_3(gamma_list, decoy=decoy, decoyQ=decoyQ, native=native, cutoff=cutoff)
    all_value_list.append(one_value_list)
    # all_value_sum_list.append(one_value_sum_list)


20

In [196]:
a_3 = pd.DataFrame(all_value_list).sum(axis=0)/20
plt.plot(gamma_list, a_3)
plt.xlabel("relative_k")
plt.ylabel("average fraction of decoys that has energy E_decoy > E_native")
plt.ylim((0,1))
# plt.savefig("/Users/weilu/Dropbox/openAWSEM/figures/relative_k.png", kpi=300)


Out[196]:
(0, 1)

In [200]:
# a_3 = pd.DataFrame(all_value_list).sum(axis=0)/20
plt.plot(gamma_list, a_3, color="red")
plt.plot(gamma_list, a_2)
plt.xlabel("relative_k")
plt.ylabel("average fraction of decoys that has energy E_decoy > E_native")
plt.ylim((0,1))
# plt.savefig("/Users/weilu/Dropbox/openAWSEM/figures/relative_k.png", kpi=300)


Out[200]:
(0, 1)

In [197]:
pre = "/Users/weilu/Research/server/oct_2019/relative_k_optimization_new_gamma/phis"
# pre = "/Users/weilu/Research/server/oct_2019/relative_k_optimization_new_gamma/back_phis/"
# pre = "/Users/weilu/Research/server/oct_2019/relative_k_optimization/phis"
gamma_list = np.linspace(1,10, num=50)
cutoff = 1.0
all_value_list = []
all_value_sum_list = []
for i, name in enumerate(names):
    cc += 1
    if i % 100 == 20:
        print(i)
        break
    decoy = np.loadtxt(f"{pre}/phi_relative_k_well_{name}_decoys_shifted_4.5_6.5_5.0_10")
    native = np.loadtxt(f"{pre}/phi_relative_k_well_{name}_native_4.5_6.5_5.0_10")
    decoyQ = np.loadtxt(f"{pre}/phi_relative_k_well_{name}_decoysQ_shifted_4.5_6.5_5.0_10")
    one_value_list, one_value_sum_list = get_value_array(gamma_list, decoy=decoy, decoyQ=decoyQ, native=native, cutoff=cutoff)
    all_value_list.append(one_value_list)
    # all_value_sum_list.append(one_value_sum_list)


20

In [198]:
a_2 = pd.DataFrame(all_value_list).sum(axis=0)/20
plt.plot(gamma_list, a_3)
plt.xlabel("relative_k")
plt.ylabel("average fraction of decoys that has energy E_decoy > E_native")
plt.ylim((0,1))
# plt.savefig("/Users/weilu/Dropbox/openAWSEM/figures/relative_k.png", kpi=300)


Out[198]:
(0, 1)

In [ ]:


In [168]:
plt.plot(gamma_list, pd.DataFrame(all_value_list).sum(axis=0)/20)
plt.xlabel("relative_k")
plt.ylabel("average fraction of decoys that has energy E_decoy > E_native")
plt.ylim((0,1))
# plt.savefig("/Users/weilu/Dropbox/openAWSEM/figures/relative_k.png", kpi=300)


Out[168]:
(0, 1)

In [166]:
plt.plot(gamma_list, pd.DataFrame(all_value_list).sum(axis=0)/20)
plt.xlabel("relative_k")
plt.ylabel("average fraction of decoys that has energy E_decoy > E_native")
plt.ylim((0,1))
# plt.savefig("/Users/weilu/Dropbox/openAWSEM/figures/relative_k.png", kpi=300)


Out[166]:
(0, 1)

In [126]:
a.shape


Out[126]:
(420, 2)

In [ ]:
np.savetxt()

In [186]:
a = np.loadtxt("/Users/weilu/Research/server/oct_2019/relative_k_optimization_new_gamma/optimization/membrane_gamma_original.dat")
b = np.loadtxt("/Users/weilu/Research/server/oct_2019/relative_k_optimization_new_gamma/optimization/membrane_gamma_rescaled.dat")

In [171]:
a = np.loadtxt("/Users/weilu/Research/server/oct_2019/relative_k_optimization_new_gamma/optimization/membrane_gamma_original.dat")
fileName = "/Users/weilu/Research/server/oct_2019/relative_k_optimization_new_gamma/optimization/membrane_gamma_original_x200.dat"
# a = -a
a *= 200
with open(fileName, "w") as f:
    for i in range(210):
        # f.write(f"{a[i][0]} {a[i][1]}\n")
        f.write(f"{a[i][0]:<.5f} {a[i][1]:10.5f}\n")
    f.write("\n")
    for i in range(210):
        # f.write(f"{a[i][0]} {a[i][1]}\n")
        f.write(f"{a[i+210][0]:<.5f} {a[i+210][1]:10.5f}\n")

In [122]:
plt.plot(gamma_list, pd.DataFrame(all_value_list).sum(axis=0)/99)
plt.xlabel("relative_k")
plt.ylabel("average fraction of decoys that has energy E_decoy > E_native")
plt.ylim((0,1))
# plt.savefig("/Users/weilu/Dropbox/openAWSEM/figures/relative_k.png", kpi=300)


Out[122]:
(0, 1)

In [107]:
pre = "/Users/weilu/Research/server/oct_2019/relative_k_optimization/phis"
gamma_list = np.linspace(1,20, num=100)
cutoff = 1.0
all_value_list = []
all_value_sum_list = []
for i, name in enumerate(names):
    cc += 1
    if i % 100 == 0:
        print(i)
        # break
    decoy = np.loadtxt(f"{pre}/phi_relative_k_well_{name}_decoys_shifted_4.5_6.5_5.0_10")
    native = np.loadtxt(f"{pre}/phi_relative_k_well_{name}_native_4.5_6.5_5.0_10")
    decoyQ = np.loadtxt(f"{pre}/phi_relative_k_well_{name}_decoysQ_shifted_4.5_6.5_5.0_10")
    one_value_list, one_value_sum_list = get_value_array(gamma_list, decoy=decoy, decoyQ=decoyQ, native=native, cutoff=cutoff)
    all_value_list.append(one_value_list)
    # all_value_sum_list.append(one_value_sum_list)


0
100
200
300
400
500
600
700
800
900
1000
1100

In [112]:
a = pd.DataFrame(all_value_list)

In [115]:
a.to_csv("/Users/weilu/Research/data/relative_k_oct21.csv")

In [110]:
plt.plot(gamma_list, pd.DataFrame(all_value_list).sum(axis=0)/len(names))
plt.xlabel("relative_k")
plt.ylabel("average fraction of decoys that has energy E_decoy > E_native")
plt.ylim((0,1))
# plt.savefig("/Users/weilu/Dropbox/openAWSEM/figures/relative_k.png", kpi=300)


Out[110]:
(0, 1)

In [106]:
plt.plot(gamma_list, pd.DataFrame(all_value_list).sum(axis=0)/99)
plt.xlabel("relative_k")
plt.ylabel("average fraction of decoys that has energy E_decoy > E_native")
plt.ylim((0,1))
plt.savefig("/Users/weilu/Dropbox/openAWSEM/figures/relative_k.png", kpi=300)



In [71]:
pre = "/Users/weilu/Research/server/oct_2019/relative_k_optimization/phis"
name = names[0]
cutoff = 1.0
decoy = np.loadtxt(f"{pre}/phi_relative_k_well_{name}_decoys_shifted_4.5_6.5_5.0_10")
native = np.loadtxt(f"{pre}/phi_relative_k_well_{name}_native_4.5_6.5_5.0_10")
decoyQ = np.loadtxt(f"{pre}/phi_relative_k_well_{name}_decoysQ_shifted_4.5_6.5_5.0_10")
value_list, value_sum_list = get_value_array(decoy=decoy, decoyQ=decoyQ, native=native, cutoff=cutoff)
cc = 0
for i, name in enumerate(names[1:]):
    cc += 1
#     if i % 10 == 0:
#         print(i)
#         break
    if i == 10:
        print(i)
        break
    decoy = np.loadtxt(f"{pre}/phi_relative_k_well_{name}_decoys_shifted_4.5_6.5_5.0_10")
    native = np.loadtxt(f"{pre}/phi_relative_k_well_{name}_native_4.5_6.5_5.0_10")
    decoyQ = np.loadtxt(f"{pre}/phi_relative_k_well_{name}_decoysQ_shifted_4.5_6.5_5.0_10")
    one_value_list, one_value_sum_list = get_value_array(decoy=decoy, decoyQ=decoyQ, native=native, cutoff=cutoff)
    value_list += one_value_list
    value_sum_list += one_value_sum_list

# value_list /= len(names)
# one_value_sum_list /= len(names)


10

In [73]:
gamma_list = np.linspace(1,15, num=100)
plt.plot(gamma_list, value_list/cc)


Out[73]:
[<matplotlib.lines.Line2D at 0x1a1fa51080>]

In [180]:
plt.plot(gamma_list, value_list)
# plt.plot(gamma_list, value_sum_list)


Out[180]:
[<matplotlib.lines.Line2D at 0x1a348b3550>]

In [175]:
pre = "/Users/weilu/Research/server/jun_2019/relative_k/phis"
name = names[0]
cutoff = 0.9
decoy = np.loadtxt(f"{pre}/phi_relative_k_well_{name}_decoys_shifted_4.5_6.5_5.0_10")
native = np.loadtxt(f"{pre}/phi_relative_k_well_{name}_native_4.5_6.5_5.0_10")
decoyQ = np.loadtxt(f"{pre}/phi_relative_k_well_{name}_decoysQ_shifted_4.5_6.5_5.0_10")
value_list, value_sum_list = get_value_array(decoy=decoy, decoyQ=decoyQ, native=native, cutoff=cutoff)
for i, name in enumerate(names[1:]):
    if i % 100 == 0:
        print(i)
    decoy = np.loadtxt(f"{pre}/phi_relative_k_well_{name}_decoys_shifted_4.5_6.5_5.0_10")
    native = np.loadtxt(f"{pre}/phi_relative_k_well_{name}_native_4.5_6.5_5.0_10")
    decoyQ = np.loadtxt(f"{pre}/phi_relative_k_well_{name}_decoysQ_shifted_4.5_6.5_5.0_10")
    one_value_list, one_value_sum_list = get_value_array(decoy=decoy, decoyQ=decoyQ, native=native, cutoff=cutoff)
    value_list += one_value_list
    value_sum_list += one_value_sum_list

value_list /= len(names)
one_value_sum_list /= len(names)


0
100
200
300
400
500
600
700
800
900
1000
1100

In [178]:
plt.plot(gamma_list, value_list)
# plt.plot(gamma_list, value_sum_list)


Out[178]:
[<matplotlib.lines.Line2D at 0x1a33feab38>]

In [170]:
plt.plot(gamma_list, value_list)
# plt.plot(gamma_list, value_sum_list)


Out[170]:
[<matplotlib.lines.Line2D at 0x1a33225da0>]

In [ ]:


In [ ]:
pre = "/Users/weilu/Research/server/jun_2019/relative_k/phis"
decoy = np.loadtxt(f"{pre}/phi_relative_k_well_{name}_decoys_shifted_4.5_6.5_5.0_10")
native = np.loadtxt(f"{pre}/phi_relative_k_well_{name}_native_4.5_6.5_5.0_10")
decoyQ = np.loadtxt(f"{pre}/phi_relative_k_well_{name}_decoysQ_shifted_4.5_6.5_5.0_10")

In [163]:
pre = "/Users/weilu/Research/server/jun_2019/relative_k/phis"
name = names[0]
decoy = np.loadtxt(f"{pre}/phi_relative_k_well_{name}_decoys_shifted_4.5_6.5_5.0_10")
native = np.loadtxt(f"{pre}/phi_relative_k_well_{name}_native_4.5_6.5_5.0_10")
decoyQ = np.loadtxt(f"{pre}/phi_relative_k_well_{name}_decoysQ_shifted_4.5_6.5_5.0_10")
value_list, value_sum_list = get_value_array(decoy=decoy, decoyQ=decoyQ, native=native, cutoff=0.9)
for name in names[1:]:
    decoy = np.loadtxt(f"{pre}/phi_relative_k_well_{name}_decoys_shifted_4.5_6.5_5.0_10")
    native = np.loadtxt(f"{pre}/phi_relative_k_well_{name}_native_4.5_6.5_5.0_10")
    decoyQ = np.loadtxt(f"{pre}/phi_relative_k_well_{name}_decoysQ_shifted_4.5_6.5_5.0_10")
    one_value_list, one_value_sum_list = get_value_array(decoy=decoy, decoyQ=decoyQ, native=native, cutoff=0.9)
    value_list += one_value_list
    value_sum_list += one_value_sum_list

value_list /= len(names)
one_value_sum_list /= len(names)

In [166]:
plt.plot(gamma_list, value_sum_list)


Out[166]:
[<matplotlib.lines.Line2D at 0x1a32902b00>]

In [165]:
plt.plot(gamma_list, value_list)
# plt.plot(gamma_list, value_sum_list)


Out[165]:
[<matplotlib.lines.Line2D at 0x1a328942b0>]

In [17]:


In [18]:


In [150]:
value_list, value_sum_list = get_value_array(gamma=gamma, decoy=decoy, decoyQ=decoyQ, native=native, cutoff=0.9)

In [132]:
gamma_list = np.linspace(1,4, num=200)
value_list = []
value_sum_list = []
for gamma in gamma_list:
    value, value_sum = get_value(gamma, decoy, decoyQ, native, cutoff=0.9)
    # print(gamma, value)
    value_list.append(value)
    value_sum_list.append(value_sum)

In [133]:
plt.plot(gamma_list, value_list)
plt.plot(gamma_list, value_sum_list)


Out[133]:
[<matplotlib.lines.Line2D at 0x1a2dfc3320>]

In [128]:
plt.plot(gamma_list, value_list)
plt.plot(gamma_list, value_sum_list)


Out[128]:
[<matplotlib.lines.Line2D at 0x1a2db69898>]

In [82]:
plt.plot(gamma_list, value_list)
plt.plot(gamma_list, value_sum_list)


Out[82]:
[<matplotlib.lines.Line2D at 0x1a27d66860>]

In [83]:
get_value(2, decoy, decoyQ, native)


Out[83]:
(1.0, 0.9755688843602008)

In [117]:
native_energy


Out[117]:
221.33339999999998

In [126]:
native_energy/ (2.5 + 128*2 )


Out[126]:
0.8562220502901353

In [144]:
gamma = 1.8
limit =60
cutoff = 1.0
energy = [a[0]+a[1]*gamma for a in decoy]
z_m =  list(np.arange(1, limit, 1)) + list(np.arange(-limit, -1, 1))
d = pd.DataFrame([z_m, energy, list(decoyQ)]).T
d.columns = ["z_m", "energy", "Q"]
d = d.sort_values("z_m")
d = d.reset_index()
native_energy =native[0] + native[1]*gamma
d["z"] = native_energy/d["energy"]

value = ( ((d["z"] > cutoff)) * (1-d["Q"]) ).sum()/(1-d["Q"]).sum()
print(value)
d.plot("z_m", "z")


0.48108864356347847
Out[144]:
<matplotlib.axes._subplots.AxesSubplot at 0x1a3122e0f0>

In [111]:
gamma = 2
limit =60
cutoff = 1.0
energy = [a[0]+a[1]*gamma for a in decoy]
z_m =  list(np.arange(1, limit, 1)) + list(np.arange(-limit, -1, 1))
d = pd.DataFrame([z_m, energy, list(decoyQ)]).T
d.columns = ["z_m", "energy", "Q"]
d = d.sort_values("z_m")
d = d.reset_index()
native_energy =native[0] + native[1]*gamma
d["z"] = native_energy/d["energy"]

value = ( ((d["z"] > cutoff)) * (1-d["Q"]) ).sum()/(1-d["Q"]).sum()
print(value)
d.plot("z_m", "z")


0.41069983150911604
Out[111]:
<matplotlib.axes._subplots.AxesSubplot at 0x1a2d205588>

In [105]:
d


Out[105]:
index z_m energy Q z
0 59 -60.0 162.9544 0.0 1.014328
1 60 -59.0 162.9544 0.0 1.014328
2 61 -58.0 162.9544 0.0 1.014328
3 62 -57.0 162.9544 0.0 1.014328
4 63 -56.0 162.9544 0.0 1.014328
5 64 -55.0 162.9544 0.0 1.014328
6 65 -54.0 162.9544 0.0 1.014328
7 66 -53.0 162.9544 0.0 1.014328
8 67 -52.0 162.9544 0.0 1.014328
9 68 -51.0 162.9544 0.0 1.014328
10 69 -50.0 162.9544 0.0 1.014328
11 70 -49.0 162.9544 0.0 1.014328
12 71 -48.0 162.9544 0.0 1.014328
13 72 -47.0 162.9544 0.0 1.014328
14 73 -46.0 162.9544 0.0 1.014328
15 74 -45.0 162.9544 0.0 1.014328
16 75 -44.0 162.9544 0.0 1.014328
17 76 -43.0 162.9544 0.0 1.014328
18 77 -42.0 162.9544 0.0 1.014328
19 78 -41.0 162.9544 0.0 1.014328
20 79 -40.0 162.9544 0.0 1.014328
21 80 -39.0 162.9544 0.0 1.014328
22 81 -38.0 162.9544 0.0 1.014328
23 82 -37.0 162.9544 0.0 1.014328
24 83 -36.0 162.9544 0.0 1.014328
25 84 -35.0 162.9544 0.0 1.014328
26 85 -34.0 162.9544 0.0 1.014328
27 86 -33.0 162.9544 0.0 1.014328
28 87 -32.0 162.9544 0.0 1.014328
29 88 -31.0 162.9544 0.0 1.014328
... ... ... ... ... ...
88 29 30.0 158.7718 0.0 1.041049
89 30 31.0 161.3037 0.0 1.024708
90 31 32.0 166.8417 0.0 0.990695
91 32 33.0 170.1233 0.0 0.971585
92 33 34.0 170.3250 0.0 0.970434
93 34 35.0 169.6295 0.0 0.974413
94 35 36.0 167.1150 0.0 0.989075
95 36 37.0 161.5368 0.0 1.023229
96 37 38.0 158.1782 0.0 1.044956
97 38 39.0 143.5881 0.0 1.151134
98 39 40.0 140.9730 0.0 1.172488
99 40 41.0 140.2142 0.0 1.178834
100 41 42.0 141.7289 0.0 1.166235
101 42 43.0 131.9997 0.0 1.252194
102 43 44.0 129.6248 0.0 1.275136
103 44 45.0 130.4661 0.0 1.266913
104 45 46.0 128.8012 0.0 1.283289
105 46 47.0 128.8794 0.0 1.282511
106 47 48.0 127.7465 0.0 1.293884
107 48 49.0 129.3980 0.0 1.277371
108 49 50.0 129.1467 0.0 1.279856
109 50 51.0 128.3426 0.0 1.287875
110 51 52.0 129.5045 0.0 1.276320
111 52 53.0 131.4498 0.0 1.257432
112 53 54.0 131.2152 0.0 1.259680
113 54 55.0 131.2152 0.0 1.259680
114 55 56.0 131.3004 0.0 1.258863
115 56 57.0 131.6649 0.0 1.255378
116 57 58.0 131.6649 0.0 1.255378
117 58 59.0 131.6649 0.0 1.255378

118 rows × 5 columns


In [84]:
gamma = 2
limit =60
cutoff = 1.0
energy = [a[0]+a[1]*gamma for a in decoy]
z_m =  list(np.arange(1, limit, 1)) + list(np.arange(-limit, -1, 1))
d = pd.DataFrame([z_m, energy, list(decoyQ)]).T
d.columns = ["z_m", "energy", "Q"]
d = d.sort_values("z_m")
native_energy =native[0] + native[1]*gamma
d["z"] = native_energy/d["energy"]

value = ( ((d["z"] > cutoff)) * (1-d["Q"]) ).sum()/(1-d["Q"]).sum()
print(value)
d.plot("z_m", "z")


0.1512625495519803
Out[84]:
<matplotlib.axes._subplots.AxesSubplot at 0x1a27bd3ba8>

In [47]:
plt.plot( ((d["z"].reset_index()["z"] > 0.95) ) )


Out[47]:
[<matplotlib.lines.Line2D at 0x1a1e235a58>]

In [227]:
d


Out[227]:
z_m energy z
59 -60.0 687.9749 0.964768
60 -59.0 687.9749 0.964768
61 -58.0 687.9749 0.964768
62 -57.0 687.9749 0.964768
63 -56.0 687.9749 0.964768
64 -55.0 687.9749 0.964768
65 -54.0 687.9749 0.964768
66 -53.0 687.9749 0.964768
67 -52.0 687.9749 0.964768
68 -51.0 687.9749 0.964768
69 -50.0 687.9749 0.964768
70 -49.0 687.9749 0.964768
71 -48.0 687.9749 0.964768
72 -47.0 687.9749 0.964768
73 -46.0 687.9749 0.964768
74 -45.0 687.9749 0.964768
75 -44.0 687.9749 0.964768
76 -43.0 687.9749 0.964768
77 -42.0 687.9749 0.964768
78 -41.0 687.9749 0.964768
79 -40.0 687.9749 0.964768
80 -39.0 687.9749 0.964768
81 -38.0 687.9749 0.964768
82 -37.0 687.9749 0.964768
83 -36.0 687.9749 0.964768
84 -35.0 687.9749 0.964768
85 -34.0 687.9749 0.964768
86 -33.0 687.3165 0.965692
87 -32.0 687.3163 0.965693
88 -31.0 687.4239 0.965541
... ... ... ...
29 30.0 693.2924 0.957368
30 31.0 693.2924 0.957368
31 32.0 693.2924 0.957368
32 33.0 693.2924 0.957368
33 34.0 693.2924 0.957368
34 35.0 693.2924 0.957368
35 36.0 693.2924 0.957368
36 37.0 693.2924 0.957368
37 38.0 693.2924 0.957368
38 39.0 693.2924 0.957368
39 40.0 693.2924 0.957368
40 41.0 693.2924 0.957368
41 42.0 693.2924 0.957368
42 43.0 693.2924 0.957368
43 44.0 693.2924 0.957368
44 45.0 693.2924 0.957368
45 46.0 693.2924 0.957368
46 47.0 693.2924 0.957368
47 48.0 693.2924 0.957368
48 49.0 693.2924 0.957368
49 50.0 693.2924 0.957368
50 51.0 693.2924 0.957368
51 52.0 693.2924 0.957368
52 53.0 693.2924 0.957368
53 54.0 693.2924 0.957368
54 55.0 693.2924 0.957368
55 56.0 693.2924 0.957368
56 57.0 693.2924 0.957368
57 58.0 693.2924 0.957368
58 59.0 693.2924 0.957368

118 rows × 3 columns


In [192]:
decoys_all = np.loadtxt("/Users/weilu/Research/server/jun_2019/relative_k/phis/protein_list_phi_relative_k_well4.5_6.5_5.0_10_phi_decoy_all_summary.txt")
decoy_Q = np.loadtxt("/Users/weilu/Research/server/jun_2019/relative_k/phis/phi_relative_k_well_5j4i_decoysQ_shifted_4.5_6.5_5.0_10")

In [197]:
normalized = decoys_all.sum(axis=0) / (1-decoy_Q).sum()

In [198]:
normalized


Out[198]:
array([ 89.09506801, 295.90188123])

In [183]:
(1-decoy_Q).sum()


Out[183]:
105.9998

In [ ]:


In [174]:
decoys_all.mean(axis=0)


Out[174]:
array([ 80.03440161, 265.80966297])

In [ ]:
pre = "/Users/weilu/Research/server/jun_2019/relative_k/phis"
decoy_high = np.loadtxt(f"{pre}/{name}_phis_high")
decoy_low = np.loadtxt(f"{pre}/{name}_phis_low")

In [220]:
gamma = 2
energy = [a[0]+a[1]*gamma for a in decoy_low]
z_m =  list(z_m_high) + list(z_m_low)
d = pd.DataFrame([z_m, energy]).T
d.columns = ["z_m", "energy"]
d = d.sort_values("z_m")
native_energy = d.query("abs(z_m) == 15").iloc[0][1]
d["z"] = native_energy/d["energy"]
d.plot("z_m", "z")


Out[220]:
<matplotlib.axes._subplots.AxesSubplot at 0x1a1e189b70>

In [216]:
gamma = 10
energy = [a[0]+a[1]*gamma for a in decoy_low]
z_m =  list(z_m_high) + list(z_m_low)
d = pd.DataFrame([z_m, energy]).T
d.columns = ["z_m", "energy"]
d = d.sort_values("z_m")
native_energy = d.query("abs(z_m) == 15").iloc[0][1]
d["z"] = native_energy/d["energy"]
d.plot("z_m", "z")


Out[216]:
<matplotlib.axes._subplots.AxesSubplot at 0x1a1d0fb4e0>

In [212]:
gamma = 0.1
energy = [a[0]+a[1]*gamma for a in decoy_low]
z_m =  list(z_m_high) + list(z_m_low)
d = pd.DataFrame([z_m, energy]).T
d.columns = ["z_m", "energy"]
d = d.sort_values("z_m")
native_energy = d.query("abs(z_m) == 15").iloc[0][1]
d["z"] = native_energy/d["energy"]
d.plot("z_m", "z")


Out[212]:
<matplotlib.axes._subplots.AxesSubplot at 0x1a1c9d3c18>

In [151]:


In [157]:


In [169]:
def interaction_well(r, r_min, r_max, kappa):
    return 0.5 * (np.tanh(kappa * (r - r_min)) * np.tanh(kappa * (r_max - r))) + 0.5
# r = np.linspace(10, 20, num=200)
r = np.linspace(-60,60, num=200)
y = interaction_well(r, 12, 18, 1) + interaction_well(r, -18, -12, 1)
plt.plot(r, y)


Out[169]:
[<matplotlib.lines.Line2D at 0x1a1c4575c0>]

In [167]:
r = np.linspace(10, 20, num=200)
y = interaction_well(r, 12, 18, 1)
plt.plot(r, y)


Out[167]:
[<matplotlib.lines.Line2D at 0x1a1cb1b390>]

In [134]:
d.plot("z_m", "energy")


Out[134]:
<matplotlib.axes._subplots.AxesSubplot at 0x1a1d4d72b0>

In [122]:
len(energy)


Out[122]:
118

In [46]:
pre = "/Users/weilu/Research/server/jun_2019/relative_k/phis"
decoy = np.loadtxt(f"{pre}/phi_relative_k_well_5j4i_decoys_shifted_4.5_6.5_5.0_10")
native = np.loadtxt(f"{pre}/phi_relative_k_well_5j4i_native_4.5_6.5_5.0_10")
decoyQ = np.loadtxt(f"{pre}/phi_relative_k_well_5j4i_decoysQ_shifted_4.5_6.5_5.0_10")

In [54]:
def compute_energy(decoy, native, decoyQ, gamma):
    energy = [a[0]+a[1]*gamma for a in decoy]
    return np.array(energy)

In [70]:
gamma = 10
native_energy = native[0] + native[1]*gamma

In [71]:
decoy_energy = compute_energy(decoy, native, decoyQ, gamma)

In [72]:
offset_all = np.arange(-50, 50, 1)
z = native_energy/decoy_energy
plt.plot(offset_all, z)


Out[72]:
[<matplotlib.lines.Line2D at 0x1a1d9b6c50>]

In [45]:
decoyQ


Out[45]:
array([0.37528604, 0.37528604, 0.37528604, 0.37528604, 0.37528604,
       0.37528604, 0.37528604, 0.37528604, 0.37528604, 0.37299771,
       0.37185355, 0.36498856, 0.35697941, 0.34668192, 0.33524027,
       0.31922197, 0.29977117, 0.27803204, 0.25858124, 0.23913043,
       0.21395881, 0.23226545, 0.24828375, 0.27116705, 0.28832952,
       0.30778032, 0.33409611, 0.35469108, 0.37871854, 0.39702517,
       0.41533181, 0.43592677, 0.46224256, 0.48283753, 0.49885584,
       0.51716247, 0.5389016 , 0.55377574, 0.58009153, 0.60183066,
       0.62356979, 0.64988558, 0.67963387, 0.70823799, 0.74485126,
       0.78489703, 0.81922197, 0.86613272, 0.8993135 , 0.95308924,
       1.        , 0.95652174, 0.92334096, 0.87757437, 0.83524027,
       0.79290618, 0.74485126, 0.70709382, 0.66132723, 0.63501144,
       0.60526316, 0.5778032 , 0.54576659, 0.52402746, 0.50800915,
       0.48855835, 0.46338673, 0.44622426, 0.41990847, 0.40045767,
       0.3798627 , 0.3604119 , 0.33867277, 0.32036613, 0.29519451,
       0.27002288, 0.25629291, 0.23112128, 0.2173913 , 0.18306636,
       0.16132723, 0.18649886, 0.20366133, 0.22654462, 0.25171625,
       0.27459954, 0.29633867, 0.31235698, 0.33524027, 0.34324943,
       0.35354691, 0.36155606, 0.36727689, 0.36842105, 0.36842105,
       0.36956522, 0.37299771, 0.37528604, 0.37528604, 0.37528604])

In [ ]:


In [3]:
data = pd.read_csv("/Users/weilu/Research/database/membrane_training_set/proteins-2019-05-01.csv")
data.pdbid = data.pdbid.apply(lambda x: x[2:-1])

In [4]:
data.head()


Out[4]:
id ordering family_name_cache species_name_cache membrane_name_cache name description comments pdbid resolution ... species_id family_id superfamily_id classtype_id type_id secondary_representations_count structure_subunits_count citations_count created_at updated_at
0 1 2024.0 OmpA family Escherichia coli Gram-neg. outer Outer membrane protein A (OMPA), disordered loops NaN OmpA is required for the action of colicins K ... 1qjp 1.65 ... 9 34 26 2 1 3 1 2 2018-08-13 03:49:46 UTC 2018-09-21 18:14:03 UTC
1 2 2028.0 Enterobacterial Ail/Lom protein Escherichia coli Gram-neg. outer Outer membrane protein X (OMPX) NaN OmpX from Escherichia coli promotes adhesion t... 1qj8 1.90 ... 9 355 26 2 1 7 1 1 2018-08-13 03:49:46 UTC 2018-09-21 18:14:03 UTC
2 3 2033.0 Opacity porins Neisseria meningitidis Gram-neg. outer Outer membrane protein NspA NaN Pathogenic Neisseria spp. possess a repertoire... 1p4t 2.55 ... 24 337 235 2 1 0 1 0 2018-08-13 03:49:46 UTC 2018-09-21 18:14:03 UTC
3 4 1740.0 Influenza virus matrix protein 2 Influenza virus Viral M2 proton channel of Influenza A, closed state... NaN NaN 3lbw 1.65 ... 51 263 185 11 1 3 4 0 2018-08-13 03:49:46 UTC 2018-10-02 17:42:36 UTC
4 5 2045.0 OM protease omptin, OMPT Yersinia pestis Gram-neg. outer Plasminogen activator PLA (coagulase/fibrinoly... NaN NaN 2x55 1.85 ... 299 36 27 2 1 2 1 0 2018-08-13 03:49:46 UTC 2018-09-21 18:14:03 UTC

5 rows × 31 columns


In [4]:
dd = pd.read_csv("/Users/weilu/Research/database/membrane_training_set/chosen.csv", index_col=0)
pdb_list = dd.pdbid.tolist()

In [ ]:


In [5]:
filtered_list = []
for pdb in pdb_list:
    location = f"/Users/weilu/Research/database/relative_k/cleaned_pdbs/{pdb}.pdb"
    a = get_inside_or_not_table(location)
    ratio = sum(a)/len(a)
    if ratio < 0.2 or ratio > 0.8:
        print("not good", pdb, ratio)
    else:
        filtered_list.append(pdb)
        # print("good", pdb, ratio)
    # print(pdb, ratio)


not good 5lcb 0.0847457627118644
not good 6f0k 0.09178743961352658
not good 6adq 0.1346153846153846
not good 2oau 0.15748031496062992
not good 6cxh 0.1099476439790576
not good 1p49 0.09124087591240876
not good 4i0u 0.11527377521613832
not good 3bpp 0.0
not good 1kn9 0.004273504273504274
not good 4qo2 0.8186813186813187
not good 2hi7 0.0
not good 5svl 0.16516516516516516
not good 6el1 0.06267806267806268
not good 2ksf 0.8691588785046729
not good 2bhv 0.010582010582010581
not good 6idf 0.037481259370314844
not good 4jza 0.0
not good 5iji 0.18061674008810572
not good 1lv7 0.00398406374501992
not good 3b8n 0.0
not good 3pjv 0.0078125
not good 2lop 0.88
not good 2lom 0.8387096774193549
not good 2mmu 0.82
not good 5eke 0.1736111111111111
not good 5sy1 0.0
not good 5wud 0.8469387755102041
not good 5uph 0.0
not good 5v7v 0.021207177814029365
not good 6f2d 0.03940886699507389
not good 6c5w 0.14736842105263157
not good 6bug 0.0
not good 6mlu 0.0784313725490196
not good 6mct 0.8076923076923077

In [6]:
len(filtered_list)


Out[6]:
86

In [7]:
pdb_list = filtered_list

In [8]:
pdb_list


Out[8]:
['6c6l',
 '6aky',
 '1j4n',
 '5gjw',
 '1kpl',
 '4y7k',
 '6eu6',
 '5kxi',
 '4j05',
 '6ajg',
 '6fn4',
 '4nv6',
 '2zjs',
 '6gct',
 '2c3e',
 '5ksd',
 '4b4a',
 '4llh',
 '5y79',
 '5oc0',
 '3fi1',
 '4m58',
 '4a2n',
 '4uc1',
 '2q7r',
 '2yvx',
 '3h90',
 '3b4r',
 '6akg',
 '2kdc',
 '6al2',
 '3m73',
 '2m67',
 '5t77',
 '6bvg',
 '6bbg',
 '3rce',
 '6g72',
 '5vkv',
 '6d9z',
 '4r1i',
 '6nsj',
 '4p02',
 '5tcx',
 '5oyb',
 '6n28',
 '4quv',
 '4qtn',
 '5hwy',
 '3tij',
 '4av3',
 '2lor',
 '4il3',
 '3zd0',
 '5o5e',
 '4od5',
 '4o6m',
 '4pgr',
 '5jwy',
 '4o9u',
 '2mpn',
 '4q2e',
 '4x5m',
 '4rp9',
 '4zr1',
 '5a40',
 '4xu4',
 '5azb',
 '5dir',
 '5zfp',
 '5n6h',
 '5vre',
 '5mlz',
 '5tsa',
 '5xj5',
 '6bml',
 '6b87',
 '6cb2',
 '5zug',
 '6c70',
 '6bar',
 '6bhp',
 '4cad',
 '6iu3',
 '6nt6',
 '6m97']

In [23]:
with open("/Users/weilu/Research/server/may_2019/relative_k/optimization/protein_list", "w") as out:
    for pdb in pdb_list:
        # print(pdb)
        out.write(pdb+"\n")

In [28]:
for pdb in pdb_list:
    location = f"/Users/weilu/Research/server/may_2019/relative_k/database/dompdb/{pdb}.pdb"
    toLocation = f"/Users/weilu/Research/server/may_2019/relative_k/database/S20_seq/{pdb}.seq"
    seq,resseqs = getSeqFromPDB(location, considerGap=False)
    with open(toLocation, "w") as out:
        out.write(seq+'\n')

In [ ]:
0.01442
-0.31612

In [29]:
complete_proteins = "../database/cath-dataset-nonredundant-S20Clean.list"

In [33]:
a = "proteins_name_list/proteins_name_list_0.txt"

In [34]:
a.split('/')[-1].split('.')[0]


Out[34]:
'proteins_name_list_0'

In [ ]: