In [1]:
import os
import sys
import random
import time
from random import seed, randint
import argparse
import platform
from datetime import datetime
import imp
import numpy as np
import fileinput
from itertools import product
import pandas as pd
from scipy.interpolate import griddata
from scipy.interpolate import interp2d
import seaborn as sns
from os import listdir
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.interpolate import griddata
import matplotlib as mpl
# sys.path.insert(0,'..')
# from notebookFunctions import *
# from .. import notebookFunctions
from Bio.PDB.Polypeptide import one_to_three
from Bio.PDB.Polypeptide import three_to_one
from Bio.PDB.PDBParser import PDBParser
from pyCodeLib import *
from small_script.myFunctions import *
from collections import defaultdict
%matplotlib inline
# plt.rcParams['figure.figsize'] = (10,6.180) #golden ratio
# %matplotlib notebook
%load_ext autoreload
%autoreload 2
In [2]:
plt.rcParams['figure.figsize'] = [16.18033, 10] #golden ratio
plt.rcParams['figure.facecolor'] = 'w'
plt.rcParams['figure.dpi'] = 100
plt.rcParams.update({'font.size': 22})
In [ ]:
from Bio.PDB import *
class ExtractResidues(Select):
def __init__(self, ResidueIndexGroup, resList):
super(ExtractResidues, self).__init__()
self.ResidueIndexGroup = ResidueIndexGroup
self.resList = resList
def accept_residue(self, residue):
if self.resList.index(residue) in self.ResidueIndexGroup:
return True
else:
return False
def extractResidues(structure, toName, ResidueIndexGroup):
resList = list(structure.get_residues())
io = PDBIO()
io.set_structure(structure)
io.save(toName, ExtractResidues(ResidueIndexGroup, resList))
In [312]:
def getFrame(frame, outLocation, movieLocation="movie.pdb"):
location = movieLocation
with open(location) as f:
a = f.readlines()
n = len(a)
# get the position of every model title
model_title_index_list = []
for i in range(n):
if len(a[i]) >= 5 and a[i][:5] == "MODEL":
model_title_index = i
model_title_index_list.append(model_title_index)
model_title_index_list.append(n)
check_array = np.diff(model_title_index_list)
if not np.allclose(check_array, check_array[0]):
print("!!!! Someting is wrong !!!!")
print(check_array)
else:
size = check_array[0]
with open(outLocation, "w") as out:
out.write("".join(a[size*frame:size*(frame+1)]))
def get_best_frame_and_extract(pdb, run, step, Q="Q_wat"):
outLocation = f"/Users/weilu/Research/server/jun_2019/simluation_hybrid/sixth_with_er/{Q}_max/{pdb}_best.pdb"
frame = step - 2
movieLocation = f'/Users/weilu/Research/server/jun_2019/simluation_hybrid/sixth_with_er/{pdb}/{run}/movie.pdb'
getFrame(frame, outLocation, movieLocation)
probFile= f"/Users/weilu/Research/server/jun_2019/simluation_hybrid/TM_pred/{pdb}_PureTM/{pdb}.prob"
GlobularPart, MembranePart = get_two_part_from_prediction(probFile)
if pdb == "2xov_complete":
GlobularPart = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62]
fileLocation = outLocation.split(".")[0]
parser = PDBParser()
structure = parser.get_structure('X', outLocation)
extractResidues(structure, f"{fileLocation}_globular.pdb", GlobularPart)
extractResidues(structure, f"{fileLocation}_membrane.pdb", MembranePart)
In [3]:
pdb_list =['4a2n', '3kp9', '5xpd', '2xov_complete', '5d91', '6e67A']
# pdb_list = ["2xov_complete", "6e67A", "5xpd", "3kp9", "4a2n", "5d91", "2jo1"]
In [238]:
length_info.drop("index", axis=1)
Out[238]:
In [4]:
length_info = pd.read_csv("/Users/weilu/Research/server/jun_2019/simluation_hybrid/length_info.csv", index_col=0)
length_info = length_info.sort_values("Length").reset_index()
pdb_list_sorted_by_length = list(length_info.Protein.unique())
length_info_sorted_by_length = list(length_info.Length.unique())
label_list = []
for p, n in zip(pdb_list_sorted_by_length, length_info_sorted_by_length):
label_list.append(p+f"\n{n}")
In [266]:
simulationType = "simluation_hybrid"
# folder = "original"
folder = "sixth_with_er"
all_data = []
for pdb in pdb_list:
for i in range(2):
for restart in range(1):
location = f"/Users/weilu/Research/server/jun_2019/{simulationType}/{folder}/{pdb}/{i}/info.dat"
try:
tmp = pd.read_csv(location, sep="\s+")
tmp = tmp.assign(Run=i, Protein=pdb, Restart=restart)
all_data.append(tmp)
except:
print(pdb, i, restart)
pass
data = pd.concat(all_data)
today = datetime.today().strftime('%m-%d')
data.reset_index(drop=True).to_csv(f"/Users/weilu/Research/data/openMM/{simulationType}_{folder}_{today}_er.csv")
In [268]:
fileLocation = "/Users/weilu/Research/data/openMM/simluation_hybrid_sixth_with_er_07-02_er.csv"
er_2 = pd.read_csv(fileLocation, index_col=0).reset_index(drop=True)
In [242]:
fileLocation = "/Users/weilu/Research/data/openMM/simluation_hybrid_fifth_with_er_07-01_er.csv"
er = pd.read_csv(fileLocation, index_col=0).reset_index(drop=True)
In [281]:
combined = pd.concat([single.assign(Scheme="single"), ha.assign(Scheme="frag(HA)")
# , er.assign(Scheme="ER-frag")
, er_2.assign(Scheme="ER-Frag")], sort=False)
In [311]:
y = "Q_mem"
d = combined.query("Steps > 200").reset_index(drop=True)
# max_Q_data = d.groupby(["Protein", "Frag"])["Q_wat"].max().reset_index()
d = d.query("Protein != '2jo1'").reset_index(drop=True)
sub_pdb_list =['4a2n', '3kp9', '5xpd', '2xov_complete', '5d91']
# pdb_list =
sub_label_list = []
for p, n in zip(pdb_list_sorted_by_length, length_info_sorted_by_length):
if p in sub_pdb_list:
sub_label_list.append(p+f"\n{n}")
d.Protein = pd.Categorical(d.Protein,
categories=sub_pdb_list)
t = d.groupby(["Protein", "Scheme"])[y].idxmax().reset_index()
max_Q_data = d.iloc[t[y].to_list()].reset_index(drop=True)
ax = sns.lineplot(x="Protein", y=y, hue="Scheme", style="Scheme", markers=True, ms=10, data=max_Q_data)
_ = ax.set_xticklabels(labels=sub_label_list, rotation=0, ha='center')
In [284]:
y = "Q_wat"
d = combined.query("Steps > 200").reset_index(drop=True)
# max_Q_data = d.groupby(["Protein", "Frag"])["Q_wat"].max().reset_index()
d = d.query("Protein != '2jo1'").reset_index(drop=True)
sub_pdb_list =['4a2n', '3kp9', '5xpd', '2xov_complete', '5d91']
# pdb_list =
sub_label_list = []
for p, n in zip(pdb_list_sorted_by_length, length_info_sorted_by_length):
if p in sub_pdb_list:
sub_label_list.append(p+f"\n{n}")
d.Protein = pd.Categorical(d.Protein,
categories=sub_pdb_list)
t = d.groupby(["Protein", "Scheme"])[y].idxmax().reset_index()
max_Q_data = d.iloc[t[y].to_list()].reset_index(drop=True)
ax = sns.lineplot(x="Protein", y=y, hue="Scheme", style="Scheme", markers=True, ms=10, data=max_Q_data)
_ = ax.set_xticklabels(labels=sub_label_list, rotation=0, ha='center')
In [258]:
In [286]:
max_Q_data.query("Scheme == 'ER-Frag'")
Out[286]:
In [294]:
In [302]:
d = max_Q_data.query("Scheme == 'ER-Frag'")
for i, line in d.iterrows():
run = line["Run"]
pdb = line["Protein"]
step = line["Steps"]
print(pdb, run, step)
get_best_frame_and_extract(pdb, run, step)
In [316]:
d = max_Q_data.query("Scheme == 'ER-Frag'")
for i, line in d.iterrows():
run = line["Run"]
pdb = line["Protein"]
step = line["Steps"]
print(pdb, run, step)
get_best_frame_and_extract(pdb, run, step, Q="Q_mem")
In [314]:
max_Q_data.query("Scheme == 'ER-Frag'")
Out[314]:
In [265]:
max_Q_data
Out[265]:
In [264]:
y = "Q_wat"
d = combined.query("Steps > 200").reset_index(drop=True)
# max_Q_data = d.groupby(["Protein", "Frag"])["Q_wat"].max().reset_index()
d = d.query("Protein != '2jo1'").query("Protein != '5xpd'").query("Protein != '6e67A'").reset_index(drop=True)
sub_pdb_list =['4a2n', '3kp9', '2xov_complete', '5d91']
d.Protein = pd.Categorical(d.Protein,
categories=sub_pdb_list)
t = d.groupby(["Protein", "Scheme"])[y].idxmax().reset_index()
max_Q_data = d.iloc[t[y].to_list()].reset_index(drop=True)
ax = sns.lineplot(x="Protein", y=y, hue="Scheme", style="Scheme", markers=True, ms=10, data=max_Q_data)
_ = ax.set_xticklabels(labels=sub_label_list, rotation=0, ha='center')
In [262]:
y = "Q_mem"
d = combined.query("Steps > 200").reset_index(drop=True)
# max_Q_data = d.groupby(["Protein", "Frag"])["Q_wat"].max().reset_index()
d = d.query("Protein != '2jo1'").query("Protein != '5xpd'").query("Protein != '6e67A'").reset_index(drop=True)
sub_pdb_list =['4a2n', '3kp9', '2xov_complete', '5d91']
d.Protein = pd.Categorical(d.Protein,
categories=sub_pdb_list)
t = d.groupby(["Protein", "Scheme"])[y].idxmax().reset_index()
max_Q_data = d.iloc[t[y].to_list()].reset_index(drop=True)
ax = sns.lineplot(x="Protein", y=y, hue="Scheme", style="Scheme", markers=True, ms=10, data=max_Q_data)
_ = ax.set_xticklabels(labels=sub_label_list, rotation=0, ha='center')
In [7]:
simulationType = "simluation_hybrid"
# folder = "original"
folder = "fourth"
all_data = []
for pdb in pdb_list:
for i in range(5):
for restart in range(1):
location = f"/Users/weilu/Research/server/jun_2019/{simulationType}/{folder}/{pdb}/{i}/info.dat"
try:
tmp = pd.read_csv(location, sep="\s+")
tmp = tmp.assign(Run=i, Protein=pdb, Restart=restart)
all_data.append(tmp)
except:
print(pdb, i, restart)
pass
data = pd.concat(all_data)
today = datetime.today().strftime('%m-%d')
data.reset_index(drop=True).to_csv(f"/Users/weilu/Research/data/openMM/{simulationType}_{folder}_{today}_ha.csv")
In [8]:
fileLocation = "/Users/weilu/Research/data/openMM/simluation_hybrid_fourth_07-01_ha.csv"
ha = pd.read_csv(fileLocation, index_col=0)
In [9]:
fileLocation = "/Users/weilu/Research/data/openMM/simluation_hybrid_second_small_batch_06-29.csv"
single = pd.read_csv(fileLocation, index_col=0)
In [17]:
combined = pd.concat([single.assign(Frag="single"), ha.assign(Frag="frag(HA)")])
In [23]:
d = combined.query("Steps > 200").reset_index(drop=True)
d.Protein = pd.Categorical(d.Protein,
categories=pdb_list)
# max_Q_data = d.groupby(["Protein", "Frag"])["Q_wat"].max().reset_index()
t = d.groupby(["Protein", "Frag"])["Q_wat"].idxmax().reset_index()
max_Q_data = d.iloc[t["Q_wat"].to_list()].reset_index(drop=True)
ax = sns.lineplot(x="Protein", y="Q_wat", hue="Frag", style="Frag", markers=True, ms=10, data=max_Q_data)
_ = ax.set_xticklabels(labels=label_list[1:], rotation=0, ha='center')
In [24]:
y = "Q_mem"
d = combined.query("Steps > 200").reset_index(drop=True)
d.Protein = pd.Categorical(d.Protein,
categories=pdb_list)
# max_Q_data = d.groupby(["Protein", "Frag"])["Q_wat"].max().reset_index()
t = d.groupby(["Protein", "Frag"])[y].idxmax().reset_index()
max_Q_data = d.iloc[t[y].to_list()].reset_index(drop=True)
ax = sns.lineplot(x="Protein", y=y, hue="Frag", style="Frag", markers=True, ms=10, data=max_Q_data)
_ = ax.set_xticklabels(labels=label_list[1:], rotation=0, ha='center')
In [ ]:
simulationType = "simluation_hybrid"
# folder = "original"
folder = "fifth_with_er"
all_data = []
for pdb in pdb_list:
for i in range(2):
for restart in range(1):
location = f"/Users/weilu/Research/server/jun_2019/{simulationType}/{folder}/{pdb}/{i}/info.dat"
try:
tmp = pd.read_csv(location, sep="\s+")
tmp = tmp.assign(Run=i, Protein=pdb, Restart=restart)
all_data.append(tmp)
except:
print(pdb, i, restart)
pass
data = pd.concat(all_data)
today = datetime.today().strftime('%m-%d')
data.reset_index(drop=True).to_csv(f"/Users/weilu/Research/data/openMM/{simulationType}_{folder}_{today}_er.csv")
In [ ]:
plt.rcParams.update({'font.size': 12})
native_energy = combined.query("Steps < 1 and Run == 0").reset_index(drop=True)
y_show = "Fragment"
g = sns.FacetGrid(combined.query("Steps > 100"), col="Protein",col_wrap=2, hue="Frag", sharey=False, sharex=False)
g = (g.map(plt.scatter, "Q_wat", y_show, alpha=0.5).add_legend())
# energy = native_energy.query("Name == 'T0759-D1' and Folder == 'multi_iter0_with_minimization'")["VTotal"][0]
# g.axes[0].axhline(energy, ls="--", color="blue", linewidth=4)
# energy = native_energy.query("Name == 'T0759-D1' and Folder == 'original_with_minimization'")["VTotal"][0]
# g.axes[0].axhline(energy, ls="--", color="orange", linewidth=4)
for ax in g.axes:
name= ax.title.get_text().split(" ")[-1]
# print(name)
energy = native_energy.query(f"Protein == '{name}'")[y_show].iloc[0]
ax.axhline(energy, ls="--", color="blue", linewidth=4)
try:
energy = native_energy.query(f"Protein == '{name}'")[y_show].iloc[1]
ax.axhline(energy, ls="--", color="orange", linewidth=4)
except:
pass
In [25]:
pdb_list = ["2xov_complete", "6e67A", "5xpd", "3kp9", "4a2n", "5d91", "2jo1"]
In [223]:
pre = "/Users/weilu/Research/server/jun_2019/simluation_hybrid"
for pdb in pdb_list:
location = f"{pre}/setup/{pdb}/{pdb}.pdb"
table = get_inside_or_not_table(location)
probFile = f"{pre}/TM_pred/{pdb}_PureTM/{pdb}.prob"
predict_table = get_inside_or_not_table_from_TM_pred(probFile)
cm = confusion_matrix(table, predict_table)
print(f"{pdb:^20s}", "{:^10s}".format("pred_0"), "{:^10s}".format("pred_1"))
print("{:^20s}".format("true_0"), f"{cm[0][0]:^10d}", f"{cm[0][1]:^10d}")
print("{:^20s}".format("true_1"), f"{cm[1][0]:^10d}", f"{cm[1][1]:^10d}")
print("")
In [ ]:
In [31]:
def get_inside_or_not_table_from_TM_pred(probFile):
with open(f"{probFile}") as f:
a = f.readlines()
res_list = []
for i, line in enumerate(a[3:]):
prob = float(line.strip().split()[3])
res = 0 if prob < 0.5 else 1
res_list.append(res)
return res_list
In [224]:
def magnify():
return [dict(selector="th",
props=[("font-size", "4pt")]),
dict(selector="td",
props=[('padding', "0em 0em")]),
dict(selector="th:hover",
props=[("font-size", "12pt")]),
dict(selector="tr:hover td:hover",
props=[('max-width', '200px'),
('font-size', '12pt')])
]
In [170]:
from sklearn.metrics import confusion_matrix
In [172]:
cm = confusion_matrix(table, predict_table)
In [183]:
t = pd.DataFrame(cm, columns=["pred_0", "pred_1"], index=["true_0", "true_1"])
In [221]:
print(f"{pdb:^20s}", "{:^10s}".format("pred_0"), "{:^10s}".format("pred_1"))
print("{:^20s}".format("true_0"), f"{cm[0][0]:^10d}", f"{cm[0][1]:^10d}")
print("{:^20s}".format("true_1"), f"{cm[1][0]:^10d}", f"{cm[1][1]:^10d}")
In [ ]:
In [227]:
pdb = pdb_list[0]
print(pdb)
location = f"{pre}/setup/{pdb}/{pdb}.pdb"
table = get_inside_or_not_table(location)
probFile = f"{pre}/TM_pred/{pdb}_PureTM/{pdb}.prob"
predict_table = get_inside_or_not_table_from_TM_pred(probFile)
d = pd.DataFrame([table, predict_table])
bigdf = d
cmap = cmap=sns.diverging_palette(5, 250, as_cmap=True)
a = bigdf.style.background_gradient(cmap, axis=1)\
.set_properties(**{'max-width': '10px', 'font-size': '0pt'})\
.set_precision(2)\
.set_table_styles(magnify())
a
Out[227]:
In [232]:
pdb = pdb_list[1]
print(pdb)
location = f"{pre}/setup/{pdb}/{pdb}.pdb"
table = get_inside_or_not_table(location)
probFile = f"{pre}/TM_pred/{pdb}_PureTM/{pdb}.prob"
predict_table = get_inside_or_not_table_from_TM_pred(probFile)
d = pd.DataFrame([table, predict_table])
bigdf = d
cmap = cmap=sns.diverging_palette(5, 250, as_cmap=True)
a = bigdf.style.background_gradient(cmap, axis=1)\
.set_properties(**{'max-width': '10px', 'font-size': '0pt'})\
.set_precision(2)\
.set_table_styles(magnify())
a
Out[232]:
In [233]:
pdb = pdb_list[2]
print(pdb)
location = f"{pre}/setup/{pdb}/{pdb}.pdb"
table = get_inside_or_not_table(location)
probFile = f"{pre}/TM_pred/{pdb}_PureTM/{pdb}.prob"
predict_table = get_inside_or_not_table_from_TM_pred(probFile)
d = pd.DataFrame([table, predict_table])
bigdf = d
cmap = cmap=sns.diverging_palette(5, 250, as_cmap=True)
a = bigdf.style.background_gradient(cmap, axis=1)\
.set_properties(**{'max-width': '10px', 'font-size': '0pt'})\
.set_precision(2)\
.set_table_styles(magnify())
a
Out[233]:
In [234]:
pdb = pdb_list[3]
print(pdb)
location = f"{pre}/setup/{pdb}/{pdb}.pdb"
table = get_inside_or_not_table(location)
probFile = f"{pre}/TM_pred/{pdb}_PureTM/{pdb}.prob"
predict_table = get_inside_or_not_table_from_TM_pred(probFile)
d = pd.DataFrame([table, predict_table])
bigdf = d
cmap = cmap=sns.diverging_palette(5, 250, as_cmap=True)
a = bigdf.style.background_gradient(cmap, axis=1)\
.set_properties(**{'max-width': '10px', 'font-size': '0pt'})\
.set_precision(2)\
.set_table_styles(magnify())
a
Out[234]:
In [235]:
pdb = pdb_list[4]
print(pdb)
location = f"{pre}/setup/{pdb}/{pdb}.pdb"
table = get_inside_or_not_table(location)
probFile = f"{pre}/TM_pred/{pdb}_PureTM/{pdb}.prob"
predict_table = get_inside_or_not_table_from_TM_pred(probFile)
d = pd.DataFrame([table, predict_table])
bigdf = d
cmap = cmap=sns.diverging_palette(5, 250, as_cmap=True)
a = bigdf.style.background_gradient(cmap, axis=1)\
.set_properties(**{'max-width': '10px', 'font-size': '0pt'})\
.set_precision(2)\
.set_table_styles(magnify())
a
Out[235]:
In [236]:
pdb = pdb_list[5]
print(pdb)
location = f"{pre}/setup/{pdb}/{pdb}.pdb"
table = get_inside_or_not_table(location)
probFile = f"{pre}/TM_pred/{pdb}_PureTM/{pdb}.prob"
predict_table = get_inside_or_not_table_from_TM_pred(probFile)
d = pd.DataFrame([table, predict_table])
bigdf = d
cmap = cmap=sns.diverging_palette(5, 250, as_cmap=True)
a = bigdf.style.background_gradient(cmap, axis=1)\
.set_properties(**{'max-width': '10px', 'font-size': '0pt'})\
.set_precision(2)\
.set_table_styles(magnify())
a
Out[236]:
In [146]:
cmap = cmap=sns.diverging_palette(5, 250, as_cmap=True)
bigdf.style.background_gradient(cmap, axis=1)\
.set_properties(**{'max-width': '10px', 'font-size': '0pt'})\
.set_precision(2)\
.set_table_styles(magnify())
Out[146]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [145]:
# import imgkit
cmap = cmap=sns.diverging_palette(5, 250, as_cmap=True)
bigdf = d
styled_table = bigdf.style.background_gradient(cmap, axis=1)\
.set_properties(**{'max-width': '80px', 'font-size': '0pt'})\
.set_precision(2)\
.set_table_styles(magnify())
with open ('/Users/weilu/Desktop/out.html','w') as out:
html = styled_table.render()
out.write(html)
In [165]:
for i in table:
print(i, end="")
print("")
for i in predict_table:
print(i, end="")
In [162]:
d
Out[162]:
In [104]:
d.columns = [""] * 72
In [102]:
s.hide_columns([0,1])
Out[102]:
In [85]:
d
Out[85]:
In [76]:
pd.get_option("display.max_rows")
Out[76]:
In [77]:
pd.get_option("display.max_columns")
Out[77]:
In [49]:
def color_negative_red(val):
"""
Takes a scalar and returns a string with
the css property `'color: red'` for negative
strings, black otherwise.
"""
color = 'red' if val == 1 else 'black'
return 'color: %s' % color
In [37]:
print().values)
In [39]:
print(table)
print(predict_table)
In [318]:
In [320]:
In [323]:
In [335]:
pdb = "4rws"
pre = "/Users/weilu/Research/server/jul_2019/hybrid_simulation
loc = f"{pre}/TM_pred/{pdb}_topo"
with open(loc) as f:
a = f.readlines()
assert len(a) % 3 == 0
chain_count = len(a) // 3
seq = ""
for i in range(chain_count):
seq_i = (a[i*3+2]).strip()
seq += seq_i
assert np.alltrue([i in ["0", "1"] for i in seq])
with open(f"{pre}/TM_pred/{pdb}_predicted_zim", "w") as out:
for i in seq:
if i == "0":
out.write("1\n")
elif i == "1":
out.write("2\n")
else:
raise
In [339]:
force_setup_file = f"{pre}/energy_forces/forces_setup_{pdb}.py"
res_list = []
first = None
count = 1
previousEnd = 0
# print("g_all = [")
zimOut = open(f"{pre}/{pdb}_predicted_zim", "w")
out = "[\n"
for i, res in enumerate(seq):
o = "2" if res == "1" else "1"
zimOut.write(o+"\n")
if res == "0":
if len(res_list) > 0:
# print(f"g{count} =", res_list)
print(res_list, ", ")
out += f" {res_list},\n"
count += 1
last = res_list[-1]
first = res_list[0] if first is None else first
span = res_list[0] - previousEnd
if span > 30:
print(f"{pdb} Globular", previousEnd, res_list[0])
globular = list(range(previousEnd+10, res_list[0]-10))
previousEnd = last
res_list = []
if res == "1":
res_list.append(i)
n = len(seq)
print(f"{pdb}: size {n}")
span = n - previousEnd
if span > 30:
print(f"{pdb} Globular", previousEnd, n)
globular = list(range(previousEnd+10, n-10))
out += "]\n"
zimOut.close()
do(f"cp {pre}/TM_pred/{pdb}_predicted_zim {pred}/setup/{pdb}/PredictedZim")
membranePart = []
for i in range(first-5, last+5):
if i not in globular:
membranePart.append(i)
# print("]")
# replace(, "GALL", out)
# , backup='.bak'
# print(out, first, last, membranePart, globular)
with fileinput.FileInput(force_setup_file, inplace=True) as file:
for line in file:
tmp = line.replace("GALL", out).replace("FIRST", str(first)).replace("LAST", str(last))
tmp = tmp.replace("RESMEMB", f"{membranePart}")
tmp = tmp.replace("RESGLOBULAR", f"{globular}")
print(tmp, end='')
In [ ]:
In [ ]:
def get_inside_or_not_table(pdb_file):
parser = PDBParser(PERMISSIVE=1,QUIET=True)
try:
structure = parser.get_structure('X', pdb_file)
except:
return [0]
inside_or_not_table = []
for res in structure.get_residues():
if res.get_id()[0] != " ":
continue # skip
try:
res["CA"].get_vector()
except:
print(pdb_file, res.get_id())
return [0]
inside_or_not_table.append(int(abs(res["CA"].get_vector()[-1]) < 15))
return inside_or_not_table
In [ ]:
parser = PDBParser(QUIET=1)
structure = parser.get_structure('X', pdb)