In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import datetime
import os
# from small_script.myFunctions import *
%matplotlib inline
%load_ext autoreload
%autoreload 2
In [2]:
plt.rcParams['figure.facecolor'] = 'w'
plt.rcParams['figure.figsize'] = [16.18033, 10]
plt.rcParams['figure.dpi'] = 200
In [3]:
# plt.rcParams['figure.figsize'] = [16.18033, 10]
dataset = {"old":"1R69, 1UTG, 3ICB, 256BA, 4CPV, 1CCR, 2MHR, 1MBA, 2FHA".split(", "),
"new":"1FC2C, 1ENH, 2GB1, 2CRO, 1CTF, 4ICB".split(", "),
"test":["t089", "t120", "t251", "top7", "1ubq", "t0766", "t0778", "t0782", "t0792", "t0803", "t0815", "t0833", "t0842", "t0844"]}
dataset["combined"] = dataset["old"] + dataset["new"]
# pdb_list, steps = dataset["old"]
def get_data(pre, pdb_list, simType="all_simulations", n_rum=30, rerun=1, formatName=True):
# to get last 20 frame of each run
_all = []
for p in pdb_list:
if formatName:
name = p.lower()[:4]
else:
name = p
for i in range(n_rum):
for ii in range(rerun):
location = pre + f"{simType}/{name}/simulation/{i}/{ii}/wham.dat"
try:
tmp = pd.read_csv(location).tail(50).reset_index()
tmp.columns = tmp.columns.str.strip()
_all.append(tmp.assign(Run=i, Name=name, Rerun=ii))
except Exception as e:
print(e)
data = pd.concat(_all)
data["Run"] = "Run" + data["Run"].astype(str)
return data
# pre = "/Users/weilu/Research/server/feb_2019/optimization_iter1/database/2gb1/"
# fileName = "movie.pdb"
def splitPDB(pre, fileName):
location = f"{pre}/{fileName}"
with open(location, "r") as f:
a = f.readlines()
i = 0
tmp = ""
for line in a:
tmp += line
# os.system(f"echo '{line}' >> {pre}frame{i}")
if line == "END\n":
with open(f"{pre}frame{i}.pdb", "w") as out:
out.write(tmp)
i += 1
tmp = ""
import subprocess
def getFromTerminal(CMD):
return subprocess.Popen(CMD,stdout=subprocess.PIPE,shell=True).communicate()[0].decode()
def getSize(p):
protein = p.lower()[:4]
pre = f"/Users/weilu/Research/server/feb_2019/iterative_optimization_test_set/all_simulations/{protein}/{protein}/ssweight"
a = getFromTerminal(f"wc {pre}")
# print(a)
n = int(a.split()[0])
return n
In [6]:
pre = "/Users/weilu/Research/server/single_memory_optimization/"
folder = "iterative_optimization_combined_train_set_singleMemory"
pre = pre + folder + "/"
simulationType = "original"
today = datetime.datetime.today().strftime('%m-%d')
# pdb_list, steps = dataset["test"]
pdb_list = dataset["combined"]
# pdb_list = "1FC2C, 1ENH, 2GB1, 2CRO, 1CTF, 4ICB".split(", ")
data = get_data(pre, pdb_list, simType=simulationType, n_rum=30, rerun=1, formatName=True)
data.reset_index(drop=True).to_csv(f"/Users/weilu/Research/data/optimization/{simulationType}_{folder}_{today}.csv")
# data.reset_index(drop=True).to_csv(f"/Users/weilu/Research/data/optimization/withoutContact_{today}.csv")
sns.boxplot("Name", "Qw", data=data)
Out[6]:
In [170]:
data_origin = pd.read_csv("/Users/weilu/Research/data/optimization/original_iterative_optimization_combined_train_set_with_frag_04-06.csv", index_col=0)
data = pd.read_csv("/Users/weilu/Research/data/optimization/iter0_iterative_optimization_combined_train_set_with_frag_04-08.csv", index_col=0)
data2 = pd.read_csv("/Users/weilu/Research/data/optimization/without_contact_iterative_optimization_combined_train_set_with_frag_04-09.csv", index_col=0)
data3 = pd.read_csv("/Users/weilu/Research/data/optimization/original_with_rg_iterative_optimization_combined_train_set_with_frag_04-09.csv", index_col=0)
# data4 = pd.read_csv("/Users/weilu/Research/data/optimization/iter7_normalized_90_iterative_optimization_combined_train_set_with_frag_04-08.csv", index_col=0)
d = pd.concat([
data_origin.query("Rerun == 1").assign(Scheme="original"),
data3.assign(Scheme="original with rg"),
data.assign(Scheme="iter0"),
data2.assign(Scheme="without contact"),
# data4.query("Rerun == 1").assign(Scheme="iter7_90"),
# data5.query("Rerun == 1").assign(Scheme="iter3_90"),
# data6.query("Rerun == 1").assign(Scheme="iter4"),
# data7.query("Rerun == 1").assign(Scheme="iter5"),
])
sns.boxplot("Name", "Qw", hue="Scheme", data=d)
# sns.boxplot("Qw", "Name", hue="Scheme", data=d)
Out[170]:
In [ ]:
In [3]:
pre = "/Users/weilu/Research/server/april_2019/"
folder = "iterative_optimization_combined_train_set_with_frag"
pre = pre + folder + "/"
simulationType = "iter6_with_rg"
today = datetime.datetime.today().strftime('%m-%d')
# pdb_list, steps = dataset["test"]
pdb_list = dataset["combined"]
# pdb_list = "1FC2C, 1ENH, 2GB1, 2CRO, 1CTF, 4ICB".split(", ")
data = get_data(pre, pdb_list, simType=simulationType, n_rum=30, rerun=1, formatName=True)
data.reset_index(drop=True).to_csv(f"/Users/weilu/Research/data/optimization/{simulationType}_{folder}_{today}.csv")
# data.reset_index(drop=True).to_csv(f"/Users/weilu/Research/data/optimization/withoutContact_{today}.csv")
sns.boxplot("Name", "Qw", data=data)
Out[3]:
In [165]:
pre = "/Users/weilu/Research/server/april_2019/"
folder = "iterative_optimization_combined_train_set_with_frag"
pre = pre + folder + "/"
simulationType = "without_contact"
today = datetime.datetime.today().strftime('%m-%d')
# pdb_list, steps = dataset["test"]
pdb_list = dataset["combined"]
# pdb_list = "1FC2C, 1ENH, 2GB1, 2CRO, 1CTF, 4ICB".split(", ")
data = get_data(pre, pdb_list, simType=simulationType, n_rum=30, rerun=1, formatName=True)
data.reset_index(drop=True).to_csv(f"/Users/weilu/Research/data/optimization/{simulationType}_{folder}_{today}.csv")
# data.reset_index(drop=True).to_csv(f"/Users/weilu/Research/data/optimization/withoutContact_{today}.csv")
sns.boxplot("Name", "Qw", data=data)
Out[165]:
In [164]:
pre = "/Users/weilu/Research/server/april_2019/"
folder = "iterative_optimization_combined_train_set_with_frag"
pre = pre + folder + "/"
simulationType = "original_with_rg"
today = datetime.datetime.today().strftime('%m-%d')
# pdb_list, steps = dataset["test"]
pdb_list = dataset["combined"]
# pdb_list = "1FC2C, 1ENH, 2GB1, 2CRO, 1CTF, 4ICB".split(", ")
data = get_data(pre, pdb_list, simType=simulationType, n_rum=30, rerun=1, formatName=True)
data.reset_index(drop=True).to_csv(f"/Users/weilu/Research/data/optimization/{simulationType}_{folder}_{today}.csv")
# data.reset_index(drop=True).to_csv(f"/Users/weilu/Research/data/optimization/withoutContact_{today}.csv")
sns.boxplot("Name", "Qw", data=data)
Out[164]:
In [163]:
pre = "/Users/weilu/Research/server/april_2019/"
folder = "iterative_optimization_combined_train_set_with_frag"
pre = pre + folder + "/"
simulationType = "iter0"
today = datetime.datetime.today().strftime('%m-%d')
# pdb_list, steps = dataset["test"]
pdb_list = dataset["combined"]
# pdb_list = "1FC2C, 1ENH, 2GB1, 2CRO, 1CTF, 4ICB".split(", ")
data = get_data(pre, pdb_list, simType=simulationType, n_rum=30, rerun=1, formatName=True)
data.reset_index(drop=True).to_csv(f"/Users/weilu/Research/data/optimization/{simulationType}_{folder}_{today}.csv")
# data.reset_index(drop=True).to_csv(f"/Users/weilu/Research/data/optimization/withoutContact_{today}.csv")
sns.boxplot("Name", "Qw", data=data)
Out[163]:
In [159]:
pre = "/Users/weilu/Research/server/april_2019/"
folder = "iterative_optimization_combined_train_set_with_frag"
pre = pre + folder + "/"
simulationType = "iter7_normalized_90"
today = datetime.datetime.today().strftime('%m-%d')
# pdb_list, steps = dataset["test"]
pdb_list = dataset["combined"]
# pdb_list = "1FC2C, 1ENH, 2GB1, 2CRO, 1CTF, 4ICB".split(", ")
data = get_data(pre, pdb_list, simType=simulationType, n_rum=10, rerun=2, formatName=True)
data.reset_index(drop=True).to_csv(f"/Users/weilu/Research/data/optimization/{simulationType}_{folder}_{today}.csv")
# data.reset_index(drop=True).to_csv(f"/Users/weilu/Research/data/optimization/withoutContact_{today}.csv")
sns.boxplot("Name", "Qw", data=data.query("Rerun == 1"))
Out[159]:
In [168]:
data_origin = pd.read_csv("/Users/weilu/Research/data/optimization/original_iterative_optimization_combined_train_set_with_frag_04-06.csv", index_col=0)
data = pd.read_csv("/Users/weilu/Research/data/optimization/iter5_normalized_noFrag_iterative_optimization_combined_train_set_with_frag_04-06.csv", index_col=0)
data2 = pd.read_csv("/Users/weilu/Research/data/optimization/iter6_normalized_noFrag_iterative_optimization_combined_train_set_with_frag_04-07.csv", index_col=0)
data3 = pd.read_csv("/Users/weilu/Research/data/optimization/iter7_normalized_iterative_optimization_combined_train_set_with_frag_04-08.csv", index_col=0)
data4 = pd.read_csv("/Users/weilu/Research/data/optimization/iter7_normalized_90_iterative_optimization_combined_train_set_with_frag_04-08.csv", index_col=0)
d = pd.concat([
data_origin.query("Rerun == 1").assign(Scheme="original"),
data.query("Rerun == 1").assign(Scheme="iter5"),
data2.query("Rerun == 1").assign(Scheme="iter6"),
data3.query("Rerun == 1").assign(Scheme="iter7"),
# data4.query("Rerun == 1").assign(Scheme="iter7_90"),
# data5.query("Rerun == 1").assign(Scheme="iter3_90"),
# data6.query("Rerun == 1").assign(Scheme="iter4"),
# data7.query("Rerun == 1").assign(Scheme="iter5"),
])
sns.boxplot("Name", "Qw", hue="Scheme", data=d)
# sns.boxplot("Qw", "Name", hue="Scheme", data=d)
Out[168]:
In [169]:
pre = "/Users/weilu/Research/server/april_2019/"
folder = "iterative_optimization_combined_train_set_with_frag"
pre = pre + folder + "/"
simulationType = "iter7_normalized"
today = datetime.datetime.today().strftime('%m-%d')
# pdb_list, steps = dataset["test"]
pdb_list = dataset["combined"]
# pdb_list = "1FC2C, 1ENH, 2GB1, 2CRO, 1CTF, 4ICB".split(", ")
data = get_data(pre, pdb_list, simType=simulationType, n_rum=10, rerun=2, formatName=True)
data.reset_index(drop=True).to_csv(f"/Users/weilu/Research/data/optimization/{simulationType}_{folder}_{today}.csv")
# data.reset_index(drop=True).to_csv(f"/Users/weilu/Research/data/optimization/withoutContact_{today}.csv")
sns.boxplot("Name", "Qw", data=data.query("Rerun == 1"))
Out[169]:
In [155]:
pre = "/Users/weilu/Research/server/april_2019/"
folder = "iterative_optimization_combined_train_set_with_frag"
pre = pre + folder + "/"
simulationType = "iter6_normalized_noFrag"
today = datetime.datetime.today().strftime('%m-%d')
# pdb_list, steps = dataset["test"]
pdb_list = dataset["combined"]
# pdb_list = "1FC2C, 1ENH, 2GB1, 2CRO, 1CTF, 4ICB".split(", ")
data = get_data(pre, pdb_list, simType=simulationType, n_rum=10, rerun=2, formatName=True)
data.reset_index(drop=True).to_csv(f"/Users/weilu/Research/data/optimization/{simulationType}_{folder}_{today}.csv")
# data.reset_index(drop=True).to_csv(f"/Users/weilu/Research/data/optimization/withoutContact_{today}.csv")
sns.boxplot("Name", "Qw", data=data.query("Rerun == 1"))
Out[155]:
In [153]:
pre = "/Users/weilu/Research/server/april_2019/"
folder = "iterative_optimization_combined_train_set_with_frag"
pre = pre + folder + "/"
simulationType = "original"
today = datetime.datetime.today().strftime('%m-%d')
# pdb_list, steps = dataset["test"]
pdb_list = dataset["combined"]
# pdb_list = "1FC2C, 1ENH, 2GB1, 2CRO, 1CTF, 4ICB".split(", ")
data = get_data(pre, pdb_list, simType=simulationType, n_rum=10, rerun=2, formatName=True)
data.reset_index(drop=True).to_csv(f"/Users/weilu/Research/data/optimization/{simulationType}_{folder}_{today}.csv")
# data.reset_index(drop=True).to_csv(f"/Users/weilu/Research/data/optimization/withoutContact_{today}.csv")
sns.boxplot("Name", "Qw", data=data.query("Rerun == 1"))
Out[153]:
In [152]:
pre = "/Users/weilu/Research/server/april_2019/"
folder = "iterative_optimization_combined_train_set_with_frag"
pre = pre + folder + "/"
simulationType = "iter5_normalized_noFrag"
today = datetime.datetime.today().strftime('%m-%d')
# pdb_list, steps = dataset["test"]
pdb_list = dataset["combined"]
# pdb_list = "1FC2C, 1ENH, 2GB1, 2CRO, 1CTF, 4ICB".split(", ")
data = get_data(pre, pdb_list, simType=simulationType, n_rum=10, rerun=2, formatName=True)
data.reset_index(drop=True).to_csv(f"/Users/weilu/Research/data/optimization/{simulationType}_{folder}_{today}.csv")
# data.reset_index(drop=True).to_csv(f"/Users/weilu/Research/data/optimization/withoutContact_{today}.csv")
sns.boxplot("Name", "Qw", data=data.query("Rerun == 1"))
Out[152]:
In [14]:
%matplotlib inline
plt.rcParams['figure.facecolor'] = 'w'
plt.rcParams['figure.figsize'] = [16.18033, 10]
plt.rcParams['figure.dpi'] = 200
In [19]:
data_origin = pd.read_csv("/Users/weilu/Research/data/optimization/original_iterative_optimization_combined_train_set_04-01.csv", index_col=0)
data = pd.read_csv("/Users/weilu/Research/data/optimization/iter0_normalized_noFrag_iterative_optimization_combined_train_set_03-31.csv", index_col=0)
data2 = pd.read_csv("/Users/weilu/Research/data/optimization/iter1_normalized_noFrag_iterative_optimization_combined_train_set_03-31.csv", index_col=0)
data3 = pd.read_csv("/Users/weilu/Research/data/optimization/iter2_normalized_noFrag_iterative_optimization_combined_train_set_04-01.csv", index_col=0)
data4 = pd.read_csv("/Users/weilu/Research/data/optimization/iter3_normalized_noFrag_iterative_optimization_combined_train_set_04-03.csv", index_col=0)
data5 = pd.read_csv("/Users/weilu/Research/data/optimization/iter3_normalized_noFrag_90_iterative_optimization_combined_train_set_04-03.csv", index_col=0)
data6 = pd.read_csv("/Users/weilu/Research/data/optimization/iter4_normalized_noFrag_iterative_optimization_combined_train_set_04-03.csv", index_col=0)
data7 = pd.read_csv("/Users/weilu/Research/data/optimization/iter5_normalized_noFrag_iterative_optimization_combined_train_set_04-04.csv", index_col=0)
d = pd.concat([
data_origin.query("Rerun == 1").assign(Scheme="original"),
data.query("Rerun == 1").assign(Scheme="iter0"),
data2.query("Rerun == 1").assign(Scheme="iter1"),
data3.query("Rerun == 1").assign(Scheme="iter2"),
data4.query("Rerun == 1").assign(Scheme="iter3"),
# data5.query("Rerun == 1").assign(Scheme="iter3_90"),
data6.query("Rerun == 1").assign(Scheme="iter4"),
data7.query("Rerun == 1").assign(Scheme="iter5"),
])
sns.boxplot("Name", "Qw", hue="Scheme", data=d)
# sns.boxplot("Qw", "Name", hue="Scheme", data=d)
Out[19]:
In [77]:
pre = "/Users/weilu/Research/server/april_2019/"
folder = "iterative_optimization_combined_train_set"
pre = pre + folder + "/"
simulationType = "iter5_normalized_noFrag"
today = datetime.datetime.today().strftime('%m-%d')
# pdb_list, steps = dataset["test"]
pdb_list = dataset["combined"]
# pdb_list = "1FC2C, 1ENH, 2GB1, 2CRO, 1CTF, 4ICB".split(", ")
data = get_data(pre, pdb_list, simType=simulationType, n_rum=10, rerun=2, formatName=True)
data.reset_index(drop=True).to_csv(f"/Users/weilu/Research/data/optimization/{simulationType}_{folder}_{today}.csv")
# data.reset_index(drop=True).to_csv(f"/Users/weilu/Research/data/optimization/withoutContact_{today}.csv")
sns.boxplot("Name", "Qw", data=data.query("Rerun == 1"))
Out[77]:
In [75]:
pre = "/Users/weilu/Research/server/april_2019/"
folder = "iterative_optimization_combined_train_set"
pre = pre + folder + "/"
simulationType = "iter4_normalized_noFrag"
today = datetime.datetime.today().strftime('%m-%d')
# pdb_list, steps = dataset["test"]
pdb_list = dataset["combined"]
# pdb_list = "1FC2C, 1ENH, 2GB1, 2CRO, 1CTF, 4ICB".split(", ")
data = get_data(pre, pdb_list, simType=simulationType, n_rum=10, rerun=2, formatName=True)
data.reset_index(drop=True).to_csv(f"/Users/weilu/Research/data/optimization/{simulationType}_{folder}_{today}.csv")
# data.reset_index(drop=True).to_csv(f"/Users/weilu/Research/data/optimization/withoutContact_{today}.csv")
sns.boxplot("Name", "Qw", data=data.query("Rerun == 1"))
Out[75]:
In [70]:
pre = "/Users/weilu/Research/server/april_2019/"
folder = "iterative_optimization_combined_train_set"
pre = pre + folder + "/"
simulationType = "iter3_normalized_noFrag_90"
today = datetime.datetime.today().strftime('%m-%d')
# pdb_list, steps = dataset["test"]
pdb_list = dataset["combined"]
# pdb_list = "1FC2C, 1ENH, 2GB1, 2CRO, 1CTF, 4ICB".split(", ")
data = get_data(pre, pdb_list, simType=simulationType, n_rum=10, rerun=2, formatName=True)
data.reset_index(drop=True).to_csv(f"/Users/weilu/Research/data/optimization/{simulationType}_{folder}_{today}.csv")
# data.reset_index(drop=True).to_csv(f"/Users/weilu/Research/data/optimization/withoutContact_{today}.csv")
sns.boxplot("Name", "Qw", data=data.query("Rerun == 1"))
Out[70]:
In [16]:
pre = "/Users/weilu/Research/server/april_2019/"
folder = "iterative_optimization_combined_train_set"
pre = pre + folder + "/"
simulationType = "original"
today = datetime.datetime.today().strftime('%m-%d')
# pdb_list, steps = dataset["test"]
pdb_list = dataset["combined"]
# pdb_list = "1FC2C, 1ENH, 2GB1, 2CRO, 1CTF, 4ICB".split(", ")
data = get_data(pre, pdb_list, simType=simulationType, n_rum=10, rerun=2, formatName=True)
data.reset_index(drop=True).to_csv(f"/Users/weilu/Research/data/optimization/{simulationType}_{folder}_{today}.csv")
# data.reset_index(drop=True).to_csv(f"/Users/weilu/Research/data/optimization/withoutContact_{today}.csv")
sns.boxplot("Name", "Qw", data=data.query("Rerun == 1"))
Out[16]:
In [71]:
pre = "/Users/weilu/Research/server/april_2019/"
folder = "iterative_optimization_combined_train_set"
pre = pre + folder + "/"
simulationType = "iter3_normalized_noFrag"
today = datetime.datetime.today().strftime('%m-%d')
# pdb_list, steps = dataset["test"]
pdb_list = dataset["combined"]
# pdb_list = "1FC2C, 1ENH, 2GB1, 2CRO, 1CTF, 4ICB".split(", ")
data = get_data(pre, pdb_list, simType=simulationType, n_rum=10, rerun=2, formatName=True)
data.reset_index(drop=True).to_csv(f"/Users/weilu/Research/data/optimization/{simulationType}_{folder}_{today}.csv")
# data.reset_index(drop=True).to_csv(f"/Users/weilu/Research/data/optimization/withoutContact_{today}.csv")
sns.boxplot("Name", "Qw", data=data.query("Rerun == 1"))
Out[71]:
In [57]:
pre = "/Users/weilu/Research/server/april_2019/"
folder = "iterative_optimization_combined_train_set"
pre = pre + folder + "/"
simulationType = "iter2_normalized_noFrag"
today = datetime.datetime.today().strftime('%m-%d')
# pdb_list, steps = dataset["test"]
pdb_list = dataset["combined"]
# pdb_list = "1FC2C, 1ENH, 2GB1, 2CRO, 1CTF, 4ICB".split(", ")
data = get_data(pre, pdb_list, simType=simulationType, n_rum=10, rerun=2, formatName=True)
data.reset_index(drop=True).to_csv(f"/Users/weilu/Research/data/optimization/{simulationType}_{folder}_{today}.csv")
# data.reset_index(drop=True).to_csv(f"/Users/weilu/Research/data/optimization/withoutContact_{today}.csv")
sns.boxplot("Name", "Qw", data=data.query("Rerun == 1"))
Out[57]:
In [13]:
pre = "/Users/weilu/Research/server/april_2019/"
folder = "iterative_optimization_combined_train_set"
pre = pre + folder + "/"
simulationType = "iter0_normalized_noFrag"
today = datetime.datetime.today().strftime('%m-%d')
# pdb_list, steps = dataset["test"]
pdb_list = dataset["combined"]
# pdb_list = "1FC2C, 1ENH, 2GB1, 2CRO, 1CTF, 4ICB".split(", ")
data = get_data(pre, pdb_list, simType=simulationType, n_rum=10, rerun=2, formatName=True)
data.reset_index(drop=True).to_csv(f"/Users/weilu/Research/data/optimization/{simulationType}_{folder}_{today}.csv")
# data.reset_index(drop=True).to_csv(f"/Users/weilu/Research/data/optimization/withoutContact_{today}.csv")
sns.boxplot("Name", "Qw", data=data.query("Rerun == 1"))
Out[13]:
In [14]:
pre = "/Users/weilu/Research/server/april_2019/"
folder = "iterative_optimization_combined_train_set"
pre = pre + folder + "/"
simulationType = "iter1_normalized_noFrag"
today = datetime.datetime.today().strftime('%m-%d')
# pdb_list, steps = dataset["test"]
pdb_list = dataset["combined"]
# pdb_list = "1FC2C, 1ENH, 2GB1, 2CRO, 1CTF, 4ICB".split(", ")
data = get_data(pre, pdb_list, simType=simulationType, n_rum=10, rerun=2, formatName=True)
data.reset_index(drop=True).to_csv(f"/Users/weilu/Research/data/optimization/{simulationType}_{folder}_{today}.csv")
# data.reset_index(drop=True).to_csv(f"/Users/weilu/Research/data/optimization/withoutContact_{today}.csv")
sns.boxplot("Name", "Qw", data=data.query("Rerun == 1"))
Out[14]:
In [10]:
data.head()
Out[10]:
In [11]:
data.tail()
Out[11]:
In [18]:
data = pd.read_csv("/Users/weilu/Research/data/optimization/single_iterative_optimization_old_set_with_frag_03-28.csv", index_col=0)
data2 = pd.read_csv("/Users/weilu/Research/data/optimization/iter7_2_iterative_optimization_old_set_with_frag_03-28.csv", index_col=0)
d = pd.concat([
data.assign(Scheme="original"),
data2.assign(Scheme="iter7"),
])
sns.boxplot("Name", "Qw", hue="Scheme", data=d)
Out[18]:
In [66]:
data = pd.read_csv("/Users/weilu/Research/data/optimization/single_iterative_optimization_new_set_with_frag_04-02.csv", index_col=0)
data2 = pd.read_csv("/Users/weilu/Research/data/optimization/iter7_2_iterative_optimization_new_set_with_frag_04-01.csv", index_col=0)
d = pd.concat([
data.assign(Scheme="original"),
data2.assign(Scheme="iter7"),
])
sns.boxplot("Name", "Qw", hue="Scheme", data=d)
Out[66]:
In [61]:
pre = "/Users/weilu/Research/server/april_2019/"
folder = "iterative_optimization_new_set_with_frag"
pre = pre + folder + "/"
simulationType = "iter7_2"
today = datetime.datetime.today().strftime('%m-%d')
# pdb_list, steps = dataset["test"]
pdb_list = dataset["new"]
# pdb_list = "1FC2C, 1ENH, 2GB1, 2CRO, 1CTF, 4ICB".split(", ")
data = get_data(pre, pdb_list, simType=simulationType, n_rum=10)
data.reset_index(drop=True).to_csv(f"/Users/weilu/Research/data/optimization/{simulationType}_{folder}_{today}.csv")
# data.reset_index(drop=True).to_csv(f"/Users/weilu/Research/data/optimization/withoutContact_{today}.csv")
sns.boxplot("Name", "Qw", data=data)
Out[61]:
In [65]:
pre = "/Users/weilu/Research/server/april_2019/"
folder = "iterative_optimization_new_set_with_frag"
pre = pre + folder + "/"
simulationType = "single"
today = datetime.datetime.today().strftime('%m-%d')
# pdb_list, steps = dataset["test"]
pdb_list = dataset["new"]
# pdb_list = "1FC2C, 1ENH, 2GB1, 2CRO, 1CTF, 4ICB".split(", ")
data = get_data(pre, pdb_list, simType=simulationType, n_rum=10)
data.reset_index(drop=True).to_csv(f"/Users/weilu/Research/data/optimization/{simulationType}_{folder}_{today}.csv")
# data.reset_index(drop=True).to_csv(f"/Users/weilu/Research/data/optimization/withoutContact_{today}.csv")
sns.boxplot("Name", "Qw", data=data)
Out[65]:
In [53]:
location = "/Users/weilu/Research/server/april_2019/iterative_optimization_new_set_with_frag/all_simulations/1fc2/1fc2"
# def relocate(location):
# fileLocation = location + "/frags.mem"
# pre = location + "/../"
# os.system(f"mkdir -p {pre}/fraglib")
# with open(fileLocation) as f:
# next(f)
# next(f)
# next(f)
# next(f)
# for line in f:
# out = os.system(f"cp {line.split()[0]} {pre}/fraglib/")
# if out != 0:
# print(f"!!Problem!!, {line.split()[0]}")
def relocate(location):
fileLocation = location + "/frags.mem"
pre = location + "/../"
os.system(f"mkdir -p {pre}/fraglib")
a = pd.read_csv(fileLocation, skiprows=4, sep=" ", names=["location", "i", "j", "sep", "w"])
b = a["location"].unique()
for l in b:
out = os.system(f"cp {l} {pre}/fraglib/")
if out != 0:
print(f"!!Problem!!, {line.split()[0]}")
relocate(location)
In [64]:
a = ["a", "b"]
print(f"{a}")
In [80]:
fastaFile = "/Users/weilu/Research/server/april_2019/complete_2xov/P09391.fasta"
with open(fastaFile) as input_data:
data = ""
for line in input_data:
if(line[0] == ">"):
print(line)
elif(line == "\n"):
pass
else:
data += line.strip("\n")
print(len(data))
print(data)
In [81]:
data
Out[81]:
In [83]:
from Bio.PDB.Polypeptide import one_to_three
In [84]:
one_to_three("A")
Out[84]:
In [88]:
i = 27
In [98]:
t = 123
print(f"!{t: >4}!")
In [108]:
from small_script.myFunctions import duplicate_pdb
In [112]:
duplicate_pdb("/Users/weilu/Research/server/april_2019/complete_2xov/2lep.pdb",
"/Users/weilu/Research/server/april_2019/complete_2xov/shifted.pdb", offset_z=-50, new_chain="A")
In [107]:
def generate_SEQRES(fastaFile):
# fastaFile = "/Users/weilu/Research/server/april_2019/complete_2xov/P09391.fasta"
with open(fastaFile) as input_data:
data = ""
for line in input_data:
if(line[0] == ">"):
print(line)
elif(line == "\n"):
pass
else:
data += line.strip("\n")
i = 0
template = ""
length = len(data)
while i < len(data):
seq = data[i:i+13]
a = [one_to_three(r) for r in seq]
template += f"SEQRES 1 A {length: >4} " + " ".join(a) + "\n"
i += 13
return template
In [121]:
i = 0
template = ""
c = 1
length = len(data)
while i < len(data):
seq = data[i:i+13]
a = [one_to_three(r) for r in seq]
ss = " ".join(a)
template += f"SEQRES {c: >3} A {length: >4} {ss: <51} \n"
i += 13
c += 1
In [122]:
print(template)
In [123]:
from Bio.PDB.PDBParser import PDBParser
In [124]:
from pdbfixer import PDBFixer
In [125]:
fromFile = "/Users/weilu/Research/server/april_2019/complete_2xov/original_pdbs/complete.pdb"
In [126]:
fixer = PDBFixer(filename=fromFile)
In [132]:
fixer.findMissingResidues()
In [134]:
chains = list(fixer.topology.chains())
In [135]:
chains
Out[135]:
In [133]:
fixer.missingResidues.keys()
Out[133]:
In [129]:
list(fixer.sequences)
Out[129]:
In [139]:
chains = [c for c in fixer.topology.chains() if len(list(c.residues())) > 0]
chainWithGaps = {}
# Find the sequence of each chain, with gaps for missing residues.
for chain in chains:
minResidue = min(int(r.id) for r in chain.residues())
maxResidue = max(int(r.id) for r in chain.residues())
residues = [None]*(maxResidue-minResidue+1)
for r in chain.residues():
residues[int(r.id)-minResidue] = r.name
chainWithGaps[chain] = residues
In [145]:
chainSequence = {}
chainOffset = {}
for sequence in fixer.sequences:
for chain in chains:
print(chain.id, sequence.chainId)
if chain.id != sequence.chainId:
continue
if chain in chainSequence:
continue
print(chain.id, sequence.chainId)
for offset in range(len(sequence.residues)-len(chainWithGaps[chain])+1):
if all(a == b or b == None for a,b in zip(sequence.residues[offset:], chainWithGaps[chain])):
chainSequence[chain] = sequence
chainOffset[chain] = offset
break
if chain in chainSequence:
break
In [146]:
len(sequence.residues)
Out[146]:
In [147]:
len(chainWithGaps[chain])
Out[147]:
In [142]:
chainOffset
Out[142]:
In [143]:
chainSequence
Out[143]:
In [138]:
for a in fixer.sequences:
print(a.residues, len(a.residues), a.id)
In [116]:
print(template)
In [ ]:
if i+13 < len(data):
seq = data[i:i+13]
else:
seq = data[i:]
In [106]:
"SEQRES 1 A 276 GLY LEU ALA MET ALA PHE VAL ASP SER LEU ASN ALA ARG"[20]
Out[106]:
In [ ]:
a = [one_to_three(r) for r in seq]
template += "SEQRES 1 A 69 " + " ".join(a[13]) + "\n"
"SEQRES 1 A 69 MET LEU MET ILE THR SER PHE ALA ASN PRO ARG VAL ALA"
"SEQRES 1 A 69 GLY LEU ALA MET ALA PHE VAL ASP SER LEU ASN ALA ARG"
"SEQRES 1 A 276 GLY LEU ALA MET ALA PHE VAL ASP SER LEU ASN ALA ARG"[20]
"SEQRES 21 A 276 GLY LEU ALA MET ALA PHE VAL ASP SER LEU ASN ALA ARG"
"SEQRES 21 A 276 GLY LEU ALA MET ALA PHE VAL ASP SER LEU ASN ALA ARG "
"SEQRES 21 A 276 GLY LEU ALA MET ALA PHE VAL ASP SER LEU ASN ALA ARG "