In [1]:
import os
import sys
import random
import time
from random import seed, randint
import argparse
import platform
from datetime import datetime
import imp
import numpy as np
import fileinput
from itertools import product
import pandas as pd
from scipy.interpolate import griddata
from scipy.interpolate import interp2d
import seaborn as sns
from os import listdir
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.interpolate import griddata
import matplotlib as mpl
# sys.path.insert(0,'..')
# from notebookFunctions import *
# from .. import notebookFunctions
from Bio.PDB.Polypeptide import one_to_three
from Bio.PDB.Polypeptide import three_to_one
from Bio.PDB.PDBParser import PDBParser
from pyCodeLib import *
from small_script.myFunctions import *
from collections import defaultdict
%matplotlib inline
# plt.rcParams['figure.figsize'] = (10,6.180) #golden ratio
# %matplotlib notebook
%load_ext autoreload
%autoreload 2
In [2]:
plt.rcParams['figure.figsize'] = [16.18033, 10] #golden ratio
plt.rcParams['figure.facecolor'] = 'w'
plt.rcParams['figure.dpi'] = 100
plt.rcParams.update({'font.size': 22})
In [3]:
from Bio.PDB import *
class ExtractResidues(Select):
def __init__(self, ResidueIndexGroup, resList):
super(ExtractResidues, self).__init__()
self.ResidueIndexGroup = ResidueIndexGroup
self.resList = resList
def accept_residue(self, residue):
if self.resList.index(residue) in self.ResidueIndexGroup:
return True
else:
return False
def extractResidues(structure, toName, ResidueIndexGroup):
resList = list(structure.get_residues())
io = PDBIO()
io.set_structure(structure)
io.save(toName, ExtractResidues(ResidueIndexGroup, resList))
In [4]:
def getFrame(frame, outLocation, movieLocation="movie.pdb"):
location = movieLocation
with open(location) as f:
a = f.readlines()
n = len(a)
# get the position of every model title
model_title_index_list = []
for i in range(n):
if len(a[i]) >= 5 and a[i][:5] == "MODEL":
model_title_index = i
model_title_index_list.append(model_title_index)
model_title_index_list.append(n)
check_array = np.diff(model_title_index_list)
if not np.allclose(check_array, check_array[0]):
print("!!!! Someting is wrong !!!!")
print(check_array)
else:
size = check_array[0]
with open(outLocation, "w") as out:
out.write("".join(a[size*frame:size*(frame+1)]))
def get_best_frame_and_extract(pdb, run, step, Q="Q_wat"):
outLocation = f"/Users/weilu/Research/server/jun_2019/simluation_hybrid/sixth_with_er/{Q}_max/{pdb}_best.pdb"
frame = step - 2
movieLocation = f'/Users/weilu/Research/server/jun_2019/simluation_hybrid/sixth_with_er/{pdb}/{run}/movie.pdb'
getFrame(frame, outLocation, movieLocation)
probFile= f"/Users/weilu/Research/server/jun_2019/simluation_hybrid/TM_pred/{pdb}_PureTM/{pdb}.prob"
GlobularPart, MembranePart = get_two_part_from_prediction(probFile)
if pdb == "2xov_complete":
GlobularPart = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62]
fileLocation = outLocation.split(".")[0]
parser = PDBParser()
structure = parser.get_structure('X', outLocation)
extractResidues(structure, f"{fileLocation}_globular.pdb", GlobularPart)
extractResidues(structure, f"{fileLocation}_membrane.pdb", MembranePart)
In [5]:
pdb_list =['4a2n', '3kp9', '5xpd', '2xov_complete', '5d91', '6e67A']
# pdb_list = ["2xov_complete", "6e67A", "5xpd", "3kp9", "4a2n", "5d91", "2jo1"]
pdb_list = ["2xov_complete", "6e67A", "5xpd", "3kp9", "4a2n", "5d91", "4nv6", "4p79", "5dsg", "6g7o", "6a93", "2jo1", "1py6", "1pv6", "1u19"]
pdb_list = ["1fs3"]
In [22]:
simulationType = "cys_proteins"
# folder = "original"
# folder = "first"
# folder = "second_withoutExclusion"
# folder_list = ["first", "second_withoutExclusion"]
folder_list = ["second_single_mem_cys_again", "second_cys", "second_no_cys"]
all_data = []
for folder in folder_list:
for pdb in pdb_list:
for i in range(20):
pre = f"/Users/weilu/Research/server/aug_2019/{simulationType}/{folder}/{i}"
location = f"{pre}/info_recompute.dat"
try:
tmp = pd.read_csv(location, sep="\s+")
tmp = tmp.assign(Run=i, Protein=pdb, Folder=folder)
all_data.append(tmp)
except:
print(pdb, i, folder)
pass
data = pd.concat(all_data)
today = datetime.today().strftime('%m-%d')
outFile = f"/Users/weilu/Research/data/openMM/{simulationType}_{today}.csv"
data.reset_index(drop=True).to_csv(outFile)
print(outFile)
In [24]:
data = pd.read_csv("/Users/weilu/Research/data/openMM/cys_proteins_08-13.csv",index_col=0)
In [25]:
data.shape
Out[25]:
In [12]:
data.tail()
Out[12]:
In [98]:
last = data.query("Steps > 350").query("Folder != 'second_single_mem_cys_again'")
In [53]:
# reorder run
Q_max.shape
Out[53]:
In [105]:
plt.rcParams['figure.figsize'] = 0.5*np.array([16.18033, 10]) #golden ratio
plt.rcParams['figure.facecolor'] = 'w'
plt.rcParams['figure.dpi'] = 100
plt.rcParams.update({'font.size': 22})
In [106]:
Q_max = last.sort_values('Q').groupby(["Folder", "Run"]).tail(1).sort_values(['Folder', "Q"])
In [109]:
Q_max["Annealing Index"] = list(range(20, 0, -1))*2
In [189]:
convert_dic = {"second_cys":"Single_memory_cys", "second_no_cys":"Single_memory_no_cys"}
Q_max["Scheme"] = Q_max["Folder"].apply(lambda x: convert_dic[x])
In [193]:
In [195]:
sns.lineplot("Annealing Index", "Q", hue="Scheme", markers=True, data=Q_max, style="Scheme", dashes=False)
from matplotlib.ticker import FuncFormatter
# plt.gca().xaxis.set_major_formatter(FuncFormatter(lambda x, _: int(x)))
_ = plt.xticks(np.arange(1, 20+1, 1))
plt.ylabel("Best Q")
Out[195]:
In [122]:
sns.lineplot("Annealing Index", "Q", hue="Folder", markers=True, data=Q_max, style="Folder")
from matplotlib.ticker import FuncFormatter
# plt.gca().xaxis.set_major_formatter(FuncFormatter(lambda x, _: int(x)))
_ = plt.xticks(np.arange(1, 20+1, 1))
plt.ylabel("Best Q")
Out[122]:
In [167]:
def get_contactFromDMP(fileLocation, n, threshold=0.2):
a = np.zeros((n,n))
c_list = []
with open(fileLocation, "r") as f:
# for i in range(9):
# next(f)
for line in f:
# print(line)
try:
i,j,_,_,_,p = line.split(" ")
# print(i,j,p)
a[int(i)-1,int(j)-1] = float(p)
a[int(j)-1,int(i)-1] = float(p)
if float(p) > threshold:
c_list.append([int(i),int(j),float(p)])
except Exception as e:
print(e)
pass
return a, np.array(c_list)
from Bio.PDB.PDBParser import PDBParser
def getContactMapFromPDB(pdbFile):
cutoff = 9.5
MAX_OFFSET = 6
parser = PDBParser()
structure = parser.get_structure('target', pdbFile)
all_residues = list(structure.get_residues())
tmp = []
for res in all_residues:
# print(res.id)
if res.id[0] == ' ':
tmp.append(res)
all_residues = tmp
n = len(all_residues)
contact_table = np.zeros((n,n))
# print(all_residues, n)
for i, res1 in enumerate(all_residues):
for j, res2 in enumerate(all_residues):
contact_table[i][j] = res1["CA"]-res2["CA"]
data = (contact_table < cutoff)
remove_band = np.eye(n)
for i in range(1, MAX_OFFSET):
remove_band += np.eye(n, k=i)
remove_band += np.eye(n, k=-i)
data[remove_band==1] = 0
return data
def convertDMPToInput(pdbID, dmp_file, fasta_file):
# pdbID = "2xov_complete_2"
# read in median distances for pairwise interactions (obtained from analysis of the pdb)
directory='/Users/weilu/opt/gremlin/'
distancesCACB=pd.read_csv(directory+'CACBmediandist.dat', delim_whitespace=True, header=None)
distancesCACA=pd.read_csv(directory+'CACAmediandist.dat', delim_whitespace=True, header=None)
distancesCBCB=pd.read_csv(directory+'CBCBmediandist.dat', delim_whitespace=True, header=None)
distancesCACB.columns = ['i', 'j', 'dist']
distancesCACA.columns = ['i', 'j', 'dist']
distancesCBCB.columns = ['i', 'j', 'dist']
# if you want to filter the gremlin data, adjust the parameters below
filter_threshold=0.5
column=2
seq = ""
with open(fasta_file) as f:
for line in f:
if line[0] == ">":
continue
seq += line.strip()
# seq
n=len(seq)
_, dmp_pairs = get_contactFromDMP(dmp_file, n=n)
# print(n)
rnative_matrixCACB=np.ones([n,n])*99
rnative_matrixCACA=np.ones([n,n])*99
rnative_matrixCBCB=np.ones([n,n])*99
for pair in dmp_pairs:
i=int(pair[0])
j=int(pair[1])
irestype=seq[i-1]
jrestype=seq[j-1]
if float(pair[column]) > filter_threshold:
if sum((distancesCACB['i']==irestype)&(distancesCACB['j']==jrestype))>0: #check if pair is in correct order
well_centerCACB = distancesCACB[(distancesCACB['i']==irestype)&(distancesCACB['j']==jrestype)]['dist'].values[0]
well_centerCACA = distancesCACA[(distancesCACA['i']==irestype)&(distancesCACA['j']==jrestype)]['dist'].values[0]
well_centerCBCB = distancesCBCB[(distancesCBCB['i']==irestype)&(distancesCBCB['j']==jrestype)]['dist'].values[0]
else:
well_centerCACB = distancesCACB[(distancesCACB['i']==jrestype)&(distancesCACB['j']==irestype)]['dist'].values[0]
well_centerCACA = distancesCACA[(distancesCACA['i']==jrestype)&(distancesCACA['j']==irestype)]['dist'].values[0]
well_centerCBCB = distancesCBCB[(distancesCBCB['i']==jrestype)&(distancesCBCB['j']==irestype)]['dist'].values[0]
rnative_matrixCACB[i-1, j-1] = well_centerCACB
rnative_matrixCACB[j-1, i-1] = well_centerCACB
rnative_matrixCACA[i-1, j-1] = well_centerCACA
rnative_matrixCACA[j-1, i-1] = well_centerCACA
rnative_matrixCBCB[i-1, j-1] = well_centerCBCB
rnative_matrixCBCB[j-1, i-1] = well_centerCBCB
import matplotlib.pyplot as plt
plt.imshow(rnative_matrixCACB, origin=0)
# plt.show()
fig = plt.gcf()
directory = "/Users/weilu/opt/gremlin/protein/" + pdbID + "/DMP/"
os.system("mkdir -p " + directory)
figureDirectory = f"{directory}/contact.png"
fig.savefig(figureDirectory)
os.system(f"cp {dmp_file} {directory}")
os.system(f"cp {fasta_file} {directory}")
np.savetxt(directory + 'go_rnativeCACB.dat', rnative_matrixCACB, fmt='%10.5f')
np.savetxt(directory + 'go_rnativeCACA.dat', rnative_matrixCACA, fmt='%10.5f')
np.savetxt(directory + 'go_rnativeCBCB.dat', rnative_matrixCBCB, fmt='%10.5f')
In [168]:
pdbID = "1fs3"
fasta_file = f"/Users/weilu/Research/server/aug_2019/predict_contact_map/{pdbID}.fasta"
DMP_file = f"/Users/weilu/Research/server/aug_2019/predict_contact_map/{pdbID}.deepmetapsicov.con"
convertDMPToInput(pdbID, DMP_file, fasta_file)
In [150]:
seq = ""
with open("/Users/weilu/Research/server/aug_2019/predict_contact_map/1fs3.fasta") as f:
for line in f:
if line[0] == ">":
continue
seq += line.strip()
seq
dmp, dmp_pairs = get_contactFromDMP(f"/Users/weilu/Research/server/aug_2019/predict_contact_map/1fs3.deepmetapsicov.con", n=n)
Out[150]:
In [174]:
plt.imshow(crystal, origin="bottom")
Out[174]:
In [180]:
fileName = f"/Users/weilu/Research/server/aug_2019/cys_proteins/ninth_cys/0/lastFrame.pdb"
last_frame_debug = getContactMapFromPDB(fileName)
plt.imshow(last_frame_debug, origin="bottom")
Out[180]:
In [175]:
fileName = f"/Users/weilu/Research/server/aug_2019/cys_proteins/fifth_HA_frag_cys/0/lastFrame.pdb"
last_frame_debug = getContactMapFromPDB(fileName)
In [176]:
plt.imshow(last_frame_debug, origin="bottom")
Out[176]:
In [127]:
fileName = f"/Users/weilu/Research/server/aug_2019/cys_proteins/setup/1fs3/1fs3.pdb"
crystal = getContactMapFromPDB(fileName)
n = crystal.shape[0]
dmp, _ = get_contactFromDMP(f"/Users/weilu/Research/server/aug_2019/predict_contact_map/1fs3.deepmetapsicov.con", n=n)
In [146]:
combined = (crystal>0.5).astype(int) + 2*(dmp>0.3).astype(int)
from matplotlib import colors
cmap = colors.ListedColormap(['white', 'red', 'blue', 'lime'])
bounds=[-1,0.01, 1.1, 2.1, 3.1]
norm = colors.BoundaryNorm(bounds, cmap.N)
plt.imshow(combined, origin="bottom", cmap=cmap, norm=norm)
Out[146]:
In [173]:
combined = (crystal>0.5).astype(int) + 2*(dmp>0.8).astype(int)
from matplotlib import colors
cmap = colors.ListedColormap(['white', 'red', 'blue', 'lime'])
bounds=[-1,0.01, 1.1, 2.1, 3.1]
norm = colors.BoundaryNorm(bounds, cmap.N)
plt.imshow(combined, origin="bottom", cmap=cmap, norm=norm)
Out[173]:
In [183]:
combined = (crystal>0.5).astype(int) + 2*(last_frame_debug>0.5).astype(int)
from matplotlib import colors
cmap = colors.ListedColormap(['white', 'red', 'blue', 'lime'])
bounds=[-1,0.01, 1.1, 2.1, 3.1]
norm = colors.BoundaryNorm(bounds, cmap.N)
plt.imshow(combined, origin="bottom", cmap=cmap, norm=norm)
Out[183]:
In [182]:
combined = (dmp>0.5).astype(int) + 2*(last_frame_debug>0.5).astype(int)
from matplotlib import colors
cmap = colors.ListedColormap(['white', 'red', 'blue', 'lime'])
bounds=[-1,0.01, 1.1, 2.1, 3.1]
norm = colors.BoundaryNorm(bounds, cmap.N)
plt.imshow(combined, origin="bottom", cmap=cmap, norm=norm)
Out[182]:
In [137]:
combined = (crystal>0.5).astype(int) + 2*(dmp>0.5).astype(int)
from matplotlib import colors
cmap = colors.ListedColormap(['white', 'red', 'blue', 'lime'])
bounds=[-1,0.01, 1.1, 2.1, 3.1]
norm = colors.BoundaryNorm(bounds, cmap.N)
plt.imshow(combined, origin="bottom", cmap=cmap, norm=norm)
Out[137]:
In [179]:
combined = (crystal>0.5).astype(int) + 2*(dmp>0.8).astype(int)
from matplotlib import colors
cmap = colors.ListedColormap(['white', 'red', 'blue', 'lime'])
bounds=[-1,0.01, 1.1, 2.1, 3.1]
norm = colors.BoundaryNorm(bounds, cmap.N)
plt.imshow(combined, origin="bottom", cmap=cmap, norm=norm)
Out[179]:
In [136]:
combined = (crystal>0.5).astype(int) + 2*(dmp>0.2).astype(int)
from matplotlib import colors
cmap = colors.ListedColormap(['white', 'red', 'blue', 'lime'])
bounds=[-1,0.01, 1.1, 2.1, 3.1]
norm = colors.BoundaryNorm(bounds, cmap.N)
plt.imshow(combined, origin="bottom", cmap=cmap, norm=norm)
Out[136]:
In [28]:
sns.boxplot("Folder", "Q",data=last)
Out[28]:
In [27]:
sns.scatterplot("Q", "Disulfide", data=last, hue="Run")
Out[27]:
In [20]:
sns.scatterplot("Q", "Disulfide", data=data, hue="Run")
Out[20]:
In [19]:
sns.scatterplot("Q", "Contact", data=data, hue="Run")
Out[19]:
In [16]:
sns.scatterplot("Q", "Total", data=data, hue="Run")
Out[16]:
In [10]:
from bokeh.io import output_notebook, show
output_notebook()
In [23]:
from bokeh.plotting import figure, show, output_file
# from bokeh.sampledata.iris import flowers
from bokeh.palettes import Inferno256
from bokeh.transform import factor_cmap
x = "Q"
y = "Total"
p = figure(title = "Iris Morphology")
p.xaxis.axis_label = x
p.yaxis.axis_label = y
p.circle("Q", "Total", fill_alpha=0.2, size=10, fill_color=factor_cmap('Run', palette=Inferno256, factors=["1", "2"]),
source=data)
show(p)
In [ ]:
from bokeh.palettes import Spectral5
from bokeh.models import ColumnDataSource
from bokeh.transform import factor_cmap
import bokeh
from bokeh.plotting import figure, show, output_file
from bokeh.sampledata.iris import flowers
p = figure(title = f"y = {reg.coef_[0]:.2}*x + {reg.intercept_:.2f}", plot_height=600, plot_width=970)
p.circle(x='Length', y='Time (min)',
source=timeData, fill_color=factor_cmap('Platform', palette=['Red', 'Blue'], factors=["OpenCL", "CUDA"]),
size=10, legend='Platform')
# output_file("iris.html", title="iris.py example")
x = np.linspace(0, 500)
p.line(x, x*reg.coef_[0] + reg.intercept_)
show(p)
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [17]:
small = data.query("Steps % 100 == 0")
In [25]:
# create label
sub_label_list = []
pdb_list_sorted_by_length = ['2jo1', '4p79', '4a2n', '1py6', '3kp9', '4nv6', '5xpd', '2xov', '5d91', '1u19', '6g7o', '6a93', '5dsg', '1pv6', '6e67A']
length_info_sorted_by_length = length_info["Length"].tolist()
sub_label_list = []
for p, n in zip(pdb_list_sorted_by_length, length_info_sorted_by_length):
sub_label_list.append(p+f"\n{n}")
In [22]:
sub_pdb_list = length_info["Protein"].tolist()
data.Protein = pd.Categorical(data.Protein,
categories=sub_pdb_list)
In [26]:
y = "Q_mem"
d = data.query("Steps > 200").reset_index(drop=True)
t = d.groupby(["Protein"])[y].idxmax().reset_index()
max_Q_data = d.iloc[t[y].to_list()].reset_index(drop=True)
ax = sns.lineplot(x="Protein", y=y, markers=True, ms=10, data=max_Q_data)
_ = ax.set_xticklabels(labels=sub_label_list, rotation=0, ha='center')
In [27]:
y = "Q_wat"
d = data.query("Steps > 200").reset_index(drop=True)
t = d.groupby(["Protein"])[y].idxmax().reset_index()
max_Q_data = d.iloc[t[y].to_list()].reset_index(drop=True)
ax = sns.lineplot(x="Protein", y=y, markers=True, ms=10, data=max_Q_data)
_ = ax.set_xticklabels(labels=sub_label_list, rotation=0, ha='center')
In [50]:
y = "Q_wat"
d = data.query("Steps > 200").reset_index(drop=True)
t = d.groupby(["Protein"])[y].idxmax().reset_index()
max_Q_data = d.iloc[t[y].to_list()].reset_index(drop=True)
ax = sns.lineplot(x="Protein", y=y, markers=True, ms=10, data=max_Q_data)
_ = ax.set_xticklabels(labels=sub_label_list, rotation=0, ha='center')
In [ ]:
In [5]:
# check time spent
simulationType = "hybrid_protein_simulation"
# folder = "original"
# folder = "first"
# folder = "second_withoutExclusion"
# folder_list = ["first", "second_withoutExclusion"]
folder_list = ["third"]
all_data = []
for folder in folder_list:
for pdb in pdb_list:
for i in range(20):
try:
location = f"/Users/weilu/Research/server/aug_2019/{simulationType}/{folder}/{pdb}/{i}/time.dat"
tmp = np.loadtxt(location)
except Exception as e:
print(e)
tmp = -1
all_data.append([pdb, i, float(tmp), folder])
timeData = pd.DataFrame(all_data, columns=["Protein", "i", "time", "Folder"])
timeData = timeData.merge(length_info, on="Protein")
In [68]:
timeData["Platform"] = timeData["i"].apply(lambda x: "CUDA" if x > 15 else "OpenCL")
In [12]:
timeData["Time (h)"] = timeData.time / 3600
timeData = timeData.query("time != -1").reset_index(drop=True)
from sklearn.linear_model import LinearRegression
timeData["Time (min)"] = timeData.time / 60
X = timeData.Length.values.reshape(-1, 1)
y = timeData["Time (min)"].values
reg = LinearRegression().fit(X, y)
sns.regplot("Length", "Time (min)", data=timeData, ci=0)
# sns.lmplot("Length", "Time (min)", data=timeData, ci=0, hue="CUDA")
sns.scatterplot("Length", "Time (min)", data=timeData, ci=0, hue="Platform")
plt.title(f"y = {reg.coef_[0]:.1}*x + {reg.intercept_:.2f}")
# x = np.linspace(0, 500)
# plt.plot(x, x*reg.coef_[0] + reg.intercept_)
Out[12]:
In [7]:
from bokeh.io import output_notebook, show
output_notebook()
In [77]:
# 400 reisdues, 8 millions steps, 3 hours.
Out[77]:
In [76]:
(0.34*400+48.37) / 8
Out[76]:
In [78]:
(1432*0.34+50)/2
Out[78]:
In [74]:
from bokeh.palettes import Spectral5
from bokeh.models import ColumnDataSource
from bokeh.transform import factor_cmap
import bokeh
from bokeh.plotting import figure, show, output_file
from bokeh.sampledata.iris import flowers
p = figure(title = f"y = {reg.coef_[0]:.2}*x + {reg.intercept_:.2f}", plot_height=600, plot_width=970)
p.circle(x='Length', y='Time (min)',
source=timeData, fill_color=factor_cmap('Platform', palette=['Red', 'Blue'], factors=["OpenCL", "CUDA"]),
size=10, legend='Platform')
# output_file("iris.html", title="iris.py example")
x = np.linspace(0, 500)
p.line(x, x*reg.coef_[0] + reg.intercept_)
show(p)
In [6]:
# time check
simulationType = "hybrid_protein_simulation"
# folder = "original"
# folder = "first"
# folder = "second_withoutExclusion"
# folder_list = ["first", "second_withoutExclusion"]
folder_list = ["third"]
all_data = []
for folder in folder_list:
for pdb in pdb_list:
for i in range(4):
try:
location = f"/Users/weilu/Research/server/aug_2019/{simulationType}/{folder}/{pdb}/{i}/time.dat"
tmp = np.loadtxt(location)
except Exception as e:
print(e)
tmp = -1
all_data.append([pdb, i, float(tmp), folder])
timeData = pd.DataFrame(all_data, columns=["Protein", "i", "time", "Folder"])
In [15]:
timeData.dtypes
Out[15]:
In [19]:
timeData["Time (h)"] = timeData.time / 3600
timeData = timeData.query("time != -1").reset_index(drop=True)
timeData = timeData.merge(length_info, on="Protein")
from sklearn.linear_model import LinearRegression
timeData["Time (min)"] = timeData.time / 60
X = timeData.Length.values.reshape(-1, 1)
y = timeData["Time (min)"].values
reg = LinearRegression().fit(X, y)
sns.regplot("Length", "Time (min)", data=timeData, ci=0)
plt.title(f"y = {reg.coef_[0]:.1}*x + {reg.intercept_:.2f}")
# x = np.linspace(0, 500)
# plt.plot(x, x*reg.coef_[0] + reg.intercept_)
In [67]:
In [69]:
sns.regplot("Length", "Time (min)", data=timeData, ci=0)
plt.title(f"y = {reg.coef_[0]:.1}*x + {reg.intercept_:.2f}")
# x = np.linspace(0, 500)
# plt.plot(x, x*reg.coef_[0] + reg.intercept_)
Out[69]:
In [31]:
sns.scatterplot("Length", "Time (h)", data=timeData)
Out[31]:
In [20]:
sns.scatterplot("Protein", "Time (h)", data=)
Out[20]:
In [7]:
length_info = pd.read_csv("/Users/weilu/Research/server/aug_2019/hybrid_protein_simulation/length_info.csv", index_col=0)
length_info = length_info.sort_values("Length").reset_index()
pdb_list_sorted_by_length = list(length_info.Protein.unique())
length_info_sorted_by_length = list(length_info.Length.unique())
label_list = []
for p, n in zip(pdb_list_sorted_by_length, length_info_sorted_by_length):
label_list.append(p+f"\n{n}")
In [22]:
length_info
Out[22]:
In [19]:
# time check
simulationType = "group1_hybrid_simulation"
# folder = "original"
# folder = "first"
# folder = "second_withoutExclusion"
folder_list = ["first", "second_withoutExclusion"]
all_data = []
for folder in folder_list:
for pdb in pdb_list:
for i in range(2):
location = f"/Users/weilu/Research/server/jul_2019/{simulationType}/{folder}/{pdb}/{i}/time.dat"
tmp = np.loadtxt(location)
all_data.append([pdb, i, float(tmp), folder])
In [22]:
In [25]:
timeData
Out[25]:
In [33]:
ax = sns.scatterplot("Protein", "time", hue="Folder", s=100, data=timeData)
In [68]:
simulationType = "group1_hybrid_simulation"
# folder = "original"
# folder = "first"
folder = "third_without_contact"
all_data = []
for pdb in pdb_list:
for i in range(3):
for restart in range(1):
pre = f"/Users/weilu/Research/server/jul_2019/{simulationType}/{folder}/{pdb}/{i}"
location = f"{pre}/info.dat"
try:
tmp = pd.read_csv(location, sep="\s+")
tmp = tmp.assign(Run=i, Protein=pdb, Restart=restart)
all_data.append(tmp)
except:
print(pdb, i, restart)
pass
data = pd.concat(all_data)
today = datetime.today().strftime('%m-%d')
data.reset_index(drop=True).to_csv(f"/Users/weilu/Research/data/openMM/{simulationType}_{folder}_{today}_no_contact.csv")
In [13]:
simulationType = "group1_hybrid_simulation"
# folder = "original"
# folder = "first"
folder = "second_withoutExclusion"
all_data = []
for pdb in pdb_list:
for i in range(2):
for restart in range(1):
pre = f"/Users/weilu/Research/server/jul_2019/{simulationType}/{folder}/{pdb}/{i}"
location = f"{pre}/info.dat"
try:
tmp = pd.read_csv(location, sep="\s+")
tmp = tmp.assign(Run=i, Protein=pdb, Restart=restart)
all_data.append(tmp)
except:
print(pdb, i, restart)
pass
data = pd.concat(all_data)
today = datetime.today().strftime('%m-%d')
data.reset_index(drop=True).to_csv(f"/Users/weilu/Research/data/openMM/{simulationType}_{folder}_{today}_er.csv")
In [67]:
datetime.today().strftime('%m-%d_%H_%M_%S')
Out[67]:
In [50]:
from simtk.unit import angstrom
from simtk.unit import kilocalorie_per_mole
In [51]:
type(angstrom)
Out[51]:
In [65]:
isinstance(1*angstrom, Quantity)
Out[65]:
In [61]:
from simtk.unit.quantity import Quantity
In [64]:
Quantity()
Out[64]:
In [62]:
type(10*kilocalorie_per_mole) == Quantity
Out[62]:
In [69]:
fileLocation = "/Users/weilu/Research/data/openMM/group1_hybrid_simulation_third_without_contact_07-15_no_contact.csv"
third = pd.read_csv(fileLocation, index_col=0).reset_index(drop=True)
In [70]:
fileLocation = "/Users/weilu/Research/data/openMM/group1_hybrid_simulation_first_07-12_er.csv"
first = pd.read_csv(fileLocation, index_col=0).reset_index(drop=True)
In [71]:
fileLocation = "/Users/weilu/Research/data/openMM/group1_hybrid_simulation_second_withoutExclusion_07-12_er.csv"
second = pd.read_csv(fileLocation, index_col=0).reset_index(drop=True)
In [72]:
combined = pd.concat([first.assign(Folder="first"), second.assign(Folder="second"),
third.assign(Folder="third")], sort=False).reset_index(drop=True)
In [74]:
sns.relplot("Steps", "Q_mem", hue="Folder", col="Protein", data=combined, col_wrap=1)
Out[74]:
In [73]:
sns.relplot("Steps", "Q_wat", hue="Folder", col="Protein", data=combined, col_wrap=1)
Out[73]:
In [40]:
combined = pd.concat([first.assign(Folder="first"), second.assign(Folder="second")], sort=False).reset_index(drop=True)
In [48]:
sns.relplot("Steps", "Q_wat", hue="Folder", col="Protein", data=combined, col_wrap=1)
Out[48]:
In [49]:
sns.relplot("Steps", "Q_mem", hue="Folder", col="Protein", data=combined, col_wrap=1)
Out[49]:
In [43]:
combined.columns
Out[43]:
In [42]:
combined.shape
Out[42]:
In [311]:
y = "Q_mem"
d = combined.query("Steps > 200").reset_index(drop=True)
# max_Q_data = d.groupby(["Protein", "Frag"])["Q_wat"].max().reset_index()
d = d.query("Protein != '2jo1'").reset_index(drop=True)
sub_pdb_list =['4a2n', '3kp9', '5xpd', '2xov_complete', '5d91']
# pdb_list =
sub_label_list = []
for p, n in zip(pdb_list_sorted_by_length, length_info_sorted_by_length):
if p in sub_pdb_list:
sub_label_list.append(p+f"\n{n}")
d.Protein = pd.Categorical(d.Protein,
categories=sub_pdb_list)
t = d.groupby(["Protein", "Scheme"])[y].idxmax().reset_index()
max_Q_data = d.iloc[t[y].to_list()].reset_index(drop=True)
ax = sns.lineplot(x="Protein", y=y, hue="Scheme", style="Scheme", markers=True, ms=10, data=max_Q_data)
_ = ax.set_xticklabels(labels=sub_label_list, rotation=0, ha='center')
In [284]:
y = "Q_wat"
d = combined.query("Steps > 200").reset_index(drop=True)
# max_Q_data = d.groupby(["Protein", "Frag"])["Q_wat"].max().reset_index()
d = d.query("Protein != '2jo1'").reset_index(drop=True)
sub_pdb_list =['4a2n', '3kp9', '5xpd', '2xov_complete', '5d91']
# pdb_list =
sub_label_list = []
for p, n in zip(pdb_list_sorted_by_length, length_info_sorted_by_length):
if p in sub_pdb_list:
sub_label_list.append(p+f"\n{n}")
d.Protein = pd.Categorical(d.Protein,
categories=sub_pdb_list)
t = d.groupby(["Protein", "Scheme"])[y].idxmax().reset_index()
max_Q_data = d.iloc[t[y].to_list()].reset_index(drop=True)
ax = sns.lineplot(x="Protein", y=y, hue="Scheme", style="Scheme", markers=True, ms=10, data=max_Q_data)
_ = ax.set_xticklabels(labels=sub_label_list, rotation=0, ha='center')
In [258]:
In [286]:
max_Q_data.query("Scheme == 'ER-Frag'")
Out[286]:
In [294]:
In [302]:
d = max_Q_data.query("Scheme == 'ER-Frag'")
for i, line in d.iterrows():
run = line["Run"]
pdb = line["Protein"]
step = line["Steps"]
print(pdb, run, step)
get_best_frame_and_extract(pdb, run, step)
In [316]:
d = max_Q_data.query("Scheme == 'ER-Frag'")
for i, line in d.iterrows():
run = line["Run"]
pdb = line["Protein"]
step = line["Steps"]
print(pdb, run, step)
get_best_frame_and_extract(pdb, run, step, Q="Q_mem")
In [314]:
max_Q_data.query("Scheme == 'ER-Frag'")
Out[314]:
In [265]:
max_Q_data
Out[265]:
In [264]:
y = "Q_wat"
d = combined.query("Steps > 200").reset_index(drop=True)
# max_Q_data = d.groupby(["Protein", "Frag"])["Q_wat"].max().reset_index()
d = d.query("Protein != '2jo1'").query("Protein != '5xpd'").query("Protein != '6e67A'").reset_index(drop=True)
sub_pdb_list =['4a2n', '3kp9', '2xov_complete', '5d91']
d.Protein = pd.Categorical(d.Protein,
categories=sub_pdb_list)
t = d.groupby(["Protein", "Scheme"])[y].idxmax().reset_index()
max_Q_data = d.iloc[t[y].to_list()].reset_index(drop=True)
ax = sns.lineplot(x="Protein", y=y, hue="Scheme", style="Scheme", markers=True, ms=10, data=max_Q_data)
_ = ax.set_xticklabels(labels=sub_label_list, rotation=0, ha='center')
In [262]:
y = "Q_mem"
d = combined.query("Steps > 200").reset_index(drop=True)
# max_Q_data = d.groupby(["Protein", "Frag"])["Q_wat"].max().reset_index()
d = d.query("Protein != '2jo1'").query("Protein != '5xpd'").query("Protein != '6e67A'").reset_index(drop=True)
sub_pdb_list =['4a2n', '3kp9', '2xov_complete', '5d91']
d.Protein = pd.Categorical(d.Protein,
categories=sub_pdb_list)
t = d.groupby(["Protein", "Scheme"])[y].idxmax().reset_index()
max_Q_data = d.iloc[t[y].to_list()].reset_index(drop=True)
ax = sns.lineplot(x="Protein", y=y, hue="Scheme", style="Scheme", markers=True, ms=10, data=max_Q_data)
_ = ax.set_xticklabels(labels=sub_label_list, rotation=0, ha='center')
In [7]:
simulationType = "simluation_hybrid"
# folder = "original"
folder = "fourth"
all_data = []
for pdb in pdb_list:
for i in range(5):
for restart in range(1):
location = f"/Users/weilu/Research/server/jun_2019/{simulationType}/{folder}/{pdb}/{i}/info.dat"
try:
tmp = pd.read_csv(location, sep="\s+")
tmp = tmp.assign(Run=i, Protein=pdb, Restart=restart)
all_data.append(tmp)
except:
print(pdb, i, restart)
pass
data = pd.concat(all_data)
today = datetime.today().strftime('%m-%d')
data.reset_index(drop=True).to_csv(f"/Users/weilu/Research/data/openMM/{simulationType}_{folder}_{today}_ha.csv")
In [8]:
fileLocation = "/Users/weilu/Research/data/openMM/simluation_hybrid_fourth_07-01_ha.csv"
ha = pd.read_csv(fileLocation, index_col=0)
In [9]:
fileLocation = "/Users/weilu/Research/data/openMM/simluation_hybrid_second_small_batch_06-29.csv"
single = pd.read_csv(fileLocation, index_col=0)
In [17]:
combined = pd.concat([single.assign(Frag="single"), ha.assign(Frag="frag(HA)")])
In [23]:
d = combined.query("Steps > 200").reset_index(drop=True)
d.Protein = pd.Categorical(d.Protein,
categories=pdb_list)
# max_Q_data = d.groupby(["Protein", "Frag"])["Q_wat"].max().reset_index()
t = d.groupby(["Protein", "Frag"])["Q_wat"].idxmax().reset_index()
max_Q_data = d.iloc[t["Q_wat"].to_list()].reset_index(drop=True)
ax = sns.lineplot(x="Protein", y="Q_wat", hue="Frag", style="Frag", markers=True, ms=10, data=max_Q_data)
_ = ax.set_xticklabels(labels=label_list[1:], rotation=0, ha='center')
In [24]:
y = "Q_mem"
d = combined.query("Steps > 200").reset_index(drop=True)
d.Protein = pd.Categorical(d.Protein,
categories=pdb_list)
# max_Q_data = d.groupby(["Protein", "Frag"])["Q_wat"].max().reset_index()
t = d.groupby(["Protein", "Frag"])[y].idxmax().reset_index()
max_Q_data = d.iloc[t[y].to_list()].reset_index(drop=True)
ax = sns.lineplot(x="Protein", y=y, hue="Frag", style="Frag", markers=True, ms=10, data=max_Q_data)
_ = ax.set_xticklabels(labels=label_list[1:], rotation=0, ha='center')
In [ ]:
simulationType = "simluation_hybrid"
# folder = "original"
folder = "fifth_with_er"
all_data = []
for pdb in pdb_list:
for i in range(2):
for restart in range(1):
location = f"/Users/weilu/Research/server/jun_2019/{simulationType}/{folder}/{pdb}/{i}/info.dat"
try:
tmp = pd.read_csv(location, sep="\s+")
tmp = tmp.assign(Run=i, Protein=pdb, Restart=restart)
all_data.append(tmp)
except:
print(pdb, i, restart)
pass
data = pd.concat(all_data)
today = datetime.today().strftime('%m-%d')
data.reset_index(drop=True).to_csv(f"/Users/weilu/Research/data/openMM/{simulationType}_{folder}_{today}_er.csv")
In [ ]:
plt.rcParams.update({'font.size': 12})
native_energy = combined.query("Steps < 1 and Run == 0").reset_index(drop=True)
y_show = "Fragment"
g = sns.FacetGrid(combined.query("Steps > 100"), col="Protein",col_wrap=2, hue="Frag", sharey=False, sharex=False)
g = (g.map(plt.scatter, "Q_wat", y_show, alpha=0.5).add_legend())
# energy = native_energy.query("Name == 'T0759-D1' and Folder == 'multi_iter0_with_minimization'")["VTotal"][0]
# g.axes[0].axhline(energy, ls="--", color="blue", linewidth=4)
# energy = native_energy.query("Name == 'T0759-D1' and Folder == 'original_with_minimization'")["VTotal"][0]
# g.axes[0].axhline(energy, ls="--", color="orange", linewidth=4)
for ax in g.axes:
name= ax.title.get_text().split(" ")[-1]
# print(name)
energy = native_energy.query(f"Protein == '{name}'")[y_show].iloc[0]
ax.axhline(energy, ls="--", color="blue", linewidth=4)
try:
energy = native_energy.query(f"Protein == '{name}'")[y_show].iloc[1]
ax.axhline(energy, ls="--", color="orange", linewidth=4)
except:
pass
In [25]:
pdb_list = ["2xov_complete", "6e67A", "5xpd", "3kp9", "4a2n", "5d91", "2jo1"]
In [223]:
pre = "/Users/weilu/Research/server/jun_2019/simluation_hybrid"
for pdb in pdb_list:
location = f"{pre}/setup/{pdb}/{pdb}.pdb"
table = get_inside_or_not_table(location)
probFile = f"{pre}/TM_pred/{pdb}_PureTM/{pdb}.prob"
predict_table = get_inside_or_not_table_from_TM_pred(probFile)
cm = confusion_matrix(table, predict_table)
print(f"{pdb:^20s}", "{:^10s}".format("pred_0"), "{:^10s}".format("pred_1"))
print("{:^20s}".format("true_0"), f"{cm[0][0]:^10d}", f"{cm[0][1]:^10d}")
print("{:^20s}".format("true_1"), f"{cm[1][0]:^10d}", f"{cm[1][1]:^10d}")
print("")
In [ ]:
In [31]:
def get_inside_or_not_table_from_TM_pred(probFile):
with open(f"{probFile}") as f:
a = f.readlines()
res_list = []
for i, line in enumerate(a[3:]):
prob = float(line.strip().split()[3])
res = 0 if prob < 0.5 else 1
res_list.append(res)
return res_list
In [224]:
def magnify():
return [dict(selector="th",
props=[("font-size", "4pt")]),
dict(selector="td",
props=[('padding', "0em 0em")]),
dict(selector="th:hover",
props=[("font-size", "12pt")]),
dict(selector="tr:hover td:hover",
props=[('max-width', '200px'),
('font-size', '12pt')])
]
In [170]:
from sklearn.metrics import confusion_matrix
In [172]:
cm = confusion_matrix(table, predict_table)
In [183]:
t = pd.DataFrame(cm, columns=["pred_0", "pred_1"], index=["true_0", "true_1"])
In [221]:
print(f"{pdb:^20s}", "{:^10s}".format("pred_0"), "{:^10s}".format("pred_1"))
print("{:^20s}".format("true_0"), f"{cm[0][0]:^10d}", f"{cm[0][1]:^10d}")
print("{:^20s}".format("true_1"), f"{cm[1][0]:^10d}", f"{cm[1][1]:^10d}")
In [ ]:
In [227]:
pdb = pdb_list[0]
print(pdb)
location = f"{pre}/setup/{pdb}/{pdb}.pdb"
table = get_inside_or_not_table(location)
probFile = f"{pre}/TM_pred/{pdb}_PureTM/{pdb}.prob"
predict_table = get_inside_or_not_table_from_TM_pred(probFile)
d = pd.DataFrame([table, predict_table])
bigdf = d
cmap = cmap=sns.diverging_palette(5, 250, as_cmap=True)
a = bigdf.style.background_gradient(cmap, axis=1)\
.set_properties(**{'max-width': '10px', 'font-size': '0pt'})\
.set_precision(2)\
.set_table_styles(magnify())
a
Out[227]:
In [232]:
pdb = pdb_list[1]
print(pdb)
location = f"{pre}/setup/{pdb}/{pdb}.pdb"
table = get_inside_or_not_table(location)
probFile = f"{pre}/TM_pred/{pdb}_PureTM/{pdb}.prob"
predict_table = get_inside_or_not_table_from_TM_pred(probFile)
d = pd.DataFrame([table, predict_table])
bigdf = d
cmap = cmap=sns.diverging_palette(5, 250, as_cmap=True)
a = bigdf.style.background_gradient(cmap, axis=1)\
.set_properties(**{'max-width': '10px', 'font-size': '0pt'})\
.set_precision(2)\
.set_table_styles(magnify())
a
Out[232]:
In [233]:
pdb = pdb_list[2]
print(pdb)
location = f"{pre}/setup/{pdb}/{pdb}.pdb"
table = get_inside_or_not_table(location)
probFile = f"{pre}/TM_pred/{pdb}_PureTM/{pdb}.prob"
predict_table = get_inside_or_not_table_from_TM_pred(probFile)
d = pd.DataFrame([table, predict_table])
bigdf = d
cmap = cmap=sns.diverging_palette(5, 250, as_cmap=True)
a = bigdf.style.background_gradient(cmap, axis=1)\
.set_properties(**{'max-width': '10px', 'font-size': '0pt'})\
.set_precision(2)\
.set_table_styles(magnify())
a
Out[233]:
In [234]:
pdb = pdb_list[3]
print(pdb)
location = f"{pre}/setup/{pdb}/{pdb}.pdb"
table = get_inside_or_not_table(location)
probFile = f"{pre}/TM_pred/{pdb}_PureTM/{pdb}.prob"
predict_table = get_inside_or_not_table_from_TM_pred(probFile)
d = pd.DataFrame([table, predict_table])
bigdf = d
cmap = cmap=sns.diverging_palette(5, 250, as_cmap=True)
a = bigdf.style.background_gradient(cmap, axis=1)\
.set_properties(**{'max-width': '10px', 'font-size': '0pt'})\
.set_precision(2)\
.set_table_styles(magnify())
a
Out[234]:
In [235]:
pdb = pdb_list[4]
print(pdb)
location = f"{pre}/setup/{pdb}/{pdb}.pdb"
table = get_inside_or_not_table(location)
probFile = f"{pre}/TM_pred/{pdb}_PureTM/{pdb}.prob"
predict_table = get_inside_or_not_table_from_TM_pred(probFile)
d = pd.DataFrame([table, predict_table])
bigdf = d
cmap = cmap=sns.diverging_palette(5, 250, as_cmap=True)
a = bigdf.style.background_gradient(cmap, axis=1)\
.set_properties(**{'max-width': '10px', 'font-size': '0pt'})\
.set_precision(2)\
.set_table_styles(magnify())
a
Out[235]:
In [236]:
pdb = pdb_list[5]
print(pdb)
location = f"{pre}/setup/{pdb}/{pdb}.pdb"
table = get_inside_or_not_table(location)
probFile = f"{pre}/TM_pred/{pdb}_PureTM/{pdb}.prob"
predict_table = get_inside_or_not_table_from_TM_pred(probFile)
d = pd.DataFrame([table, predict_table])
bigdf = d
cmap = cmap=sns.diverging_palette(5, 250, as_cmap=True)
a = bigdf.style.background_gradient(cmap, axis=1)\
.set_properties(**{'max-width': '10px', 'font-size': '0pt'})\
.set_precision(2)\
.set_table_styles(magnify())
a
Out[236]:
In [146]:
cmap = cmap=sns.diverging_palette(5, 250, as_cmap=True)
bigdf.style.background_gradient(cmap, axis=1)\
.set_properties(**{'max-width': '10px', 'font-size': '0pt'})\
.set_precision(2)\
.set_table_styles(magnify())
Out[146]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [145]:
# import imgkit
cmap = cmap=sns.diverging_palette(5, 250, as_cmap=True)
bigdf = d
styled_table = bigdf.style.background_gradient(cmap, axis=1)\
.set_properties(**{'max-width': '80px', 'font-size': '0pt'})\
.set_precision(2)\
.set_table_styles(magnify())
with open ('/Users/weilu/Desktop/out.html','w') as out:
html = styled_table.render()
out.write(html)
In [165]:
for i in table:
print(i, end="")
print("")
for i in predict_table:
print(i, end="")
In [162]:
d
Out[162]:
In [104]:
d.columns = [""] * 72
In [102]:
s.hide_columns([0,1])
Out[102]:
In [85]:
d
Out[85]:
In [76]:
pd.get_option("display.max_rows")
Out[76]:
In [77]:
pd.get_option("display.max_columns")
Out[77]:
In [49]:
def color_negative_red(val):
"""
Takes a scalar and returns a string with
the css property `'color: red'` for negative
strings, black otherwise.
"""
color = 'red' if val == 1 else 'black'
return 'color: %s' % color
In [37]:
print().values)
In [39]:
print(table)
print(predict_table)
In [318]:
In [320]:
In [323]:
In [335]:
pdb = "4rws"
pre = "/Users/weilu/Research/server/jul_2019/hybrid_simulation
loc = f"{pre}/TM_pred/{pdb}_topo"
with open(loc) as f:
a = f.readlines()
assert len(a) % 3 == 0
chain_count = len(a) // 3
seq = ""
for i in range(chain_count):
seq_i = (a[i*3+2]).strip()
seq += seq_i
assert np.alltrue([i in ["0", "1"] for i in seq])
with open(f"{pre}/TM_pred/{pdb}_predicted_zim", "w") as out:
for i in seq:
if i == "0":
out.write("1\n")
elif i == "1":
out.write("2\n")
else:
raise
In [339]:
force_setup_file = f"{pre}/energy_forces/forces_setup_{pdb}.py"
res_list = []
first = None
count = 1
previousEnd = 0
# print("g_all = [")
zimOut = open(f"{pre}/{pdb}_predicted_zim", "w")
out = "[\n"
for i, res in enumerate(seq):
o = "2" if res == "1" else "1"
zimOut.write(o+"\n")
if res == "0":
if len(res_list) > 0:
# print(f"g{count} =", res_list)
print(res_list, ", ")
out += f" {res_list},\n"
count += 1
last = res_list[-1]
first = res_list[0] if first is None else first
span = res_list[0] - previousEnd
if span > 30:
print(f"{pdb} Globular", previousEnd, res_list[0])
globular = list(range(previousEnd+10, res_list[0]-10))
previousEnd = last
res_list = []
if res == "1":
res_list.append(i)
n = len(seq)
print(f"{pdb}: size {n}")
span = n - previousEnd
if span > 30:
print(f"{pdb} Globular", previousEnd, n)
globular = list(range(previousEnd+10, n-10))
out += "]\n"
zimOut.close()
do(f"cp {pre}/TM_pred/{pdb}_predicted_zim {pred}/setup/{pdb}/PredictedZim")
membranePart = []
for i in range(first-5, last+5):
if i not in globular:
membranePart.append(i)
# print("]")
# replace(, "GALL", out)
# , backup='.bak'
# print(out, first, last, membranePart, globular)
with fileinput.FileInput(force_setup_file, inplace=True) as file:
for line in file:
tmp = line.replace("GALL", out).replace("FIRST", str(first)).replace("LAST", str(last))
tmp = tmp.replace("RESMEMB", f"{membranePart}")
tmp = tmp.replace("RESGLOBULAR", f"{globular}")
print(tmp, end='')
In [ ]:
In [ ]:
def get_inside_or_not_table(pdb_file):
parser = PDBParser(PERMISSIVE=1,QUIET=True)
try:
structure = parser.get_structure('X', pdb_file)
except:
return [0]
inside_or_not_table = []
for res in structure.get_residues():
if res.get_id()[0] != " ":
continue # skip
try:
res["CA"].get_vector()
except:
print(pdb_file, res.get_id())
return [0]
inside_or_not_table.append(int(abs(res["CA"].get_vector()[-1]) < 15))
return inside_or_not_table
In [ ]:
parser = PDBParser(QUIET=1)
structure = parser.get_structure('X', pdb)