In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import datetime
import os
# from small_script.myFunctions import *
plt.rcParams['figure.figsize'] = [16.18033, 10] #golden ratio
plt.rcParams['figure.facecolor'] = 'w'
plt.rcParams['figure.dpi'] = 100
%matplotlib inline
%load_ext autoreload
%autoreload 2
In [2]:
plt.rcParams['figure.figsize'] = [16.18033, 10] #golden ratio
plt.rcParams['figure.facecolor'] = 'w'
plt.rcParams['figure.dpi'] = 100
In [3]:
dataset = {"old":"1R69, 1UTG, 3ICB, 256BA, 4CPV, 1CCR, 2MHR, 1MBA, 2FHA".split(", "),
"new":"1FC2C, 1ENH, 2GB1, 2CRO, 1CTF, 4ICB".split(", "),
"test":["t089", "t120", "t251", "top7", "1ubq", "t0766", "t0778", "t0782", "t0792", "t0803", "t0815", "t0833", "t0842", "t0844"]}
dataset["combined"] = dataset["old"] + dataset["new"]
# pdb_list, steps = dataset["old"]
def get_data(pre, pdb_list, simType="all_simulations", n_rum=30, rerun=1, formatName=True):
# to get last 20 frame of each run
_all = []
for p in pdb_list:
if formatName:
name = p.lower()[:4]
else:
name = p
for i in range(n_rum):
for ii in range(rerun):
location = pre + f"{simType}/{name}/simulation/{i}/{ii}/wham.dat"
try:
tmp = pd.read_csv(location).tail(50).reset_index()
tmp.columns = tmp.columns.str.strip()
_all.append(tmp.assign(Run=i, Name=name, Rerun=ii))
except Exception as e:
print(e)
data = pd.concat(_all)
data["Run"] = "Run" + data["Run"].astype(str)
return data
# pre = "/Users/weilu/Research/server/feb_2019/optimization_iter1/database/2gb1/"
# fileName = "movie.pdb"
def splitPDB(pre, fileName):
location = f"{pre}/{fileName}"
with open(location, "r") as f:
a = f.readlines()
i = 0
tmp = ""
for line in a:
tmp += line
# os.system(f"echo '{line}' >> {pre}frame{i}")
if line == "END\n":
with open(f"{pre}frame{i}.pdb", "w") as out:
out.write(tmp)
i += 1
tmp = ""
import subprocess
def getFromTerminal(CMD):
return subprocess.Popen(CMD,stdout=subprocess.PIPE,shell=True).communicate()[0].decode()
def getSize(p):
protein = p.lower()[:4]
pre = f"/Users/weilu/Research/server/feb_2019/iterative_optimization_test_set/all_simulations/{protein}/{protein}/ssweight"
a = getFromTerminal(f"wc {pre}")
# print(a)
n = int(a.split()[0])
return n
In [4]:
d = pd.read_csv("/Users/weilu/Research/server/april_second_2019/test_set/seq_info.csv", index_col=0)
pdb_list = d.query("length < 150 and index % 2 == 0")["protein"].tolist()
In [5]:
print(pdb_list)
In [ ]:
f"decoyData/{name}/{name}_{offset}.pdb"
In [33]:
name = "5guw"
offset = -50
pdb_file = f"/Users/weilu/Research/server/april_second_2019/weighted/clean/decoyData/{name}/{name}_{offset}.pdb"
In [35]:
from Bio.PDB.PDBParser import PDBParser
parser = PDBParser()
structure = parser.get_structure('X', pdb_file)
In [49]:
inside_or_not_table = []
for res in structure.get_residues():
inside_or_not_table.append(int(abs(res["CA"].get_vector()[-1]) < 15))
# print(int(abs(ca.get_vector()[-1]) < 15))
In [52]:
def get_inside_or_not_table(pdb_file):
parser = PDBParser()
structure = parser.get_structure('X', pdb_file)
inside_or_not_table = []
for res in structure.get_residues():
inside_or_not_table.append(int(abs(res["CA"].get_vector()[-1]) < 15))
return inside_or_not_table
In [54]:
np.array([1, 2, 3]) == np.array([2,4,2])
Out[54]:
In [10]:
data1 = pd.read_csv("/Users/weilu/Research/data/optimization/multi_iter0_test_set_04-18.csv", index_col=0)
data2 = pd.read_csv("/Users/weilu/Research/data/optimization/original_test_set_04-18.csv", index_col=0)
data3 = pd.read_csv("/Users/weilu/Research/data/optimization/multi_iter2_test_set_04-18.csv", index_col=0)
data4 = pd.read_csv("/Users/weilu/Research/data/optimization/multi_iter1_test_set_04-30.csv", index_col=0)
d = pd.concat([
data2.assign(Scheme="original"),
# data1.assign(Scheme="mutliSeq iter0"),
# data3.assign(Scheme="mutliSeq iter2"),
data4.assign(Scheme="mutliSeq group iter1"),
])
sns.boxplot("Name", "Qw", hue="Scheme", data=d)
# sns.boxplot("Qw", "Name", hue="Scheme", data=d)
Out[10]:
In [9]:
data1 = pd.read_csv("/Users/weilu/Research/data/optimization/multi_iter0_test_set_04-18.csv", index_col=0)
data2 = pd.read_csv("/Users/weilu/Research/data/optimization/original_test_set_04-18.csv", index_col=0)
data3 = pd.read_csv("/Users/weilu/Research/data/optimization/multi_iter2_test_set_04-18.csv", index_col=0)
data4 = pd.read_csv("/Users/weilu/Research/data/optimization/multi_iter1_test_set_04-30.csv", index_col=0)
d = pd.concat([
# data2.assign(Scheme="original"),
# data1.assign(Scheme="mutliSeq iter0"),
data3.assign(Scheme="mutliSeq iter2"),
data4.assign(Scheme="mutliSeq group iter1"),
])
sns.boxplot("Name", "Qw", hue="Scheme", data=d)
# sns.boxplot("Qw", "Name", hue="Scheme", data=d)
Out[9]:
In [29]:
data.head()
Out[29]:
In [30]:
data3.head()
Out[30]:
In [16]:
data1 = pd.read_csv("/Users/weilu/Research/data/optimization/multi_iter0_single_memory_test_set_04-30.csv", index_col=0)
data2 = pd.read_csv("/Users/weilu/Research/data/optimization/original_single_memory_test_set_04-30.csv", index_col=0)
data3 = pd.read_csv("/Users/weilu/Research/data/optimization/multi_iter1_single_memory_test_set_04-30.csv", index_col=0)
d = pd.concat([
data2.assign(Scheme="original"),
data1.assign(Scheme="mutliSeq iter0"),
data3.assign(Scheme="mutliSeq group iter1"),
])
sns.boxplot("Name", "Qw", hue="Scheme", data=d)
# sns.boxplot("Qw", "Name", hue="Scheme", data=d)
Out[16]:
In [14]:
pre = "/Users/weilu/Research/server/april_second_2019/"
folder = "test_set"
pre = pre + folder + "/"
simulationType = "multi_iter1_single_memory"
today = datetime.datetime.today().strftime('%m-%d')
# pdb_list, steps = dataset["test"]
# pdb_list = "1FC2C, 1ENH, 2GB1, 2CRO, 1CTF, 4ICB".split(", ")
data = get_data(pre, pdb_list, simType=simulationType, n_rum=10, rerun=1, formatName=False)
data.reset_index(drop=True).to_csv(f"/Users/weilu/Research/data/optimization/{simulationType}_{folder}_{today}.csv")
# data.reset_index(drop=True).to_csv(f"/Users/weilu/Research/data/optimization/withoutContact_{today}.csv")
sns.boxplot("Name", "Qw", data=data)
Out[14]:
In [13]:
pre = "/Users/weilu/Research/server/april_second_2019/"
folder = "test_set"
pre = pre + folder + "/"
simulationType = "original_single_memory"
today = datetime.datetime.today().strftime('%m-%d')
# pdb_list, steps = dataset["test"]
# pdb_list = "1FC2C, 1ENH, 2GB1, 2CRO, 1CTF, 4ICB".split(", ")
data = get_data(pre, pdb_list, simType=simulationType, n_rum=10, rerun=1, formatName=False)
data.reset_index(drop=True).to_csv(f"/Users/weilu/Research/data/optimization/{simulationType}_{folder}_{today}.csv")
# data.reset_index(drop=True).to_csv(f"/Users/weilu/Research/data/optimization/withoutContact_{today}.csv")
sns.boxplot("Name", "Qw", data=data)
Out[13]:
In [11]:
pre = "/Users/weilu/Research/server/april_second_2019/"
folder = "test_set"
pre = pre + folder + "/"
simulationType = "multi_iter0_single_memory"
today = datetime.datetime.today().strftime('%m-%d')
# pdb_list, steps = dataset["test"]
# pdb_list = "1FC2C, 1ENH, 2GB1, 2CRO, 1CTF, 4ICB".split(", ")
data = get_data(pre, pdb_list, simType=simulationType, n_rum=10, rerun=1, formatName=False)
data.reset_index(drop=True).to_csv(f"/Users/weilu/Research/data/optimization/{simulationType}_{folder}_{today}.csv")
# data.reset_index(drop=True).to_csv(f"/Users/weilu/Research/data/optimization/withoutContact_{today}.csv")
sns.boxplot("Name", "Qw", data=data)
Out[11]:
In [6]:
pre = "/Users/weilu/Research/server/april_second_2019/"
folder = "test_set"
pre = pre + folder + "/"
simulationType = "multi_iter1"
today = datetime.datetime.today().strftime('%m-%d')
# pdb_list, steps = dataset["test"]
# pdb_list = "1FC2C, 1ENH, 2GB1, 2CRO, 1CTF, 4ICB".split(", ")
data = get_data(pre, pdb_list, simType=simulationType, n_rum=10, rerun=1, formatName=False)
data.reset_index(drop=True).to_csv(f"/Users/weilu/Research/data/optimization/{simulationType}_{folder}_{today}.csv")
# data.reset_index(drop=True).to_csv(f"/Users/weilu/Research/data/optimization/withoutContact_{today}.csv")
sns.boxplot("Name", "Qw", data=data)
Out[6]:
In [31]:
pre = "/Users/weilu/Research/server/april_second_2019/"
folder = "test_set"
pre = pre + folder + "/"
simulationType = "multi_iter2"
today = datetime.datetime.today().strftime('%m-%d')
# pdb_list, steps = dataset["test"]
# pdb_list = "1FC2C, 1ENH, 2GB1, 2CRO, 1CTF, 4ICB".split(", ")
data = get_data(pre, pdb_list, simType=simulationType, n_rum=10, rerun=1, formatName=False)
data.reset_index(drop=True).to_csv(f"/Users/weilu/Research/data/optimization/{simulationType}_{folder}_{today}.csv")
# data.reset_index(drop=True).to_csv(f"/Users/weilu/Research/data/optimization/withoutContact_{today}.csv")
sns.boxplot("Name", "Qw", data=data)
Out[31]:
In [7]:
pre = "/Users/weilu/Research/server/april_second_2019/"
folder = "test_set"
pre = pre + folder + "/"
simulationType = "original"
today = datetime.datetime.today().strftime('%m-%d')
# pdb_list, steps = dataset["test"]
# pdb_list = "1FC2C, 1ENH, 2GB1, 2CRO, 1CTF, 4ICB".split(", ")
data = get_data(pre, pdb_list, simType=simulationType, n_rum=10, rerun=1, formatName=False)
data.reset_index(drop=True).to_csv(f"/Users/weilu/Research/data/optimization/{simulationType}_{folder}_{today}.csv")
# data.reset_index(drop=True).to_csv(f"/Users/weilu/Research/data/optimization/withoutContact_{today}.csv")
sns.boxplot("Name", "Qw", data=data)
Out[7]:
In [10]:
# data_original = data
sns.boxplot("Qw", "Name", data=data_original)
Out[10]:
In [13]:
sns.boxplot("Qw", "Name", data=data)
Out[13]:
In [11]:
pre = "/Users/weilu/Research/server/april_second_2019/"
folder = "test_set"
pre = pre + folder + "/"
simulationType = "multi_iter0"
today = datetime.datetime.today().strftime('%m-%d')
# pdb_list, steps = dataset["test"]
# pdb_list = "1FC2C, 1ENH, 2GB1, 2CRO, 1CTF, 4ICB".split(", ")
data = get_data(pre, pdb_list, simType=simulationType, n_rum=10, rerun=1, formatName=False)
data.reset_index(drop=True).to_csv(f"/Users/weilu/Research/data/optimization/{simulationType}_{folder}_{today}.csv")
# data.reset_index(drop=True).to_csv(f"/Users/weilu/Research/data/optimization/withoutContact_{today}.csv")
sns.boxplot("Name", "Qw", data=data)
Out[11]:
In [20]:
d.query("length < 150 and index % 2 == 0")
Out[20]:
In [23]:
d
Out[23]:
In [ ]: