In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import datetime
sns.set(rc={'figure.figsize':(10,6.180)})
sns.set_style("whitegrid")
%matplotlib inline
In [106]:
def my_reorder(a, first):
# move first to the top. and keep the rest
new_order = first.copy()
for col in a:
if col not in first:
new_order.append(col)
return new_order
def read_pdb(pre, name, run=30, rerun=2):
all_data = []
for i in range(run):
for j in range(rerun):
# pre = "/Users/weilu/Research/server/nov_2018/iterative_optimization_4/all_simulations/"
location = pre + f"{name}/simulation/{i}/{j}/"
try:
wham = pd.read_csv(location+"wham.dat")
except:
print(f"PDB: {name}, Run: {i}, Rerun: {j} not exist")
print(location+"wham.dat")
continue
wham.columns = wham.columns.str.strip()
remove_columns = ['Tc', 'Energy']
wham = wham.drop(remove_columns, axis=1)
energy = pd.read_csv(location+"energy.dat")
energy.columns = energy.columns.str.strip()
remove_columns = ['Steps', 'Shake', 'Excluded', 'Helix', 'AMH-Go', 'Vec_FM', 'SSB']
energy = energy.drop(remove_columns, axis=1)
data = pd.concat([wham, energy], axis=1).assign(Repeat=i, Run=j)
all_data.append(data)
data = pd.concat(all_data).reset_index(drop=True)
data = data.reindex(columns=my_reorder(data.columns, ["Steps", "Qw", "VTotal", "Run", "Repeat"]))
print(name, len(data))
return data
In [49]:
dataset = {"old":"1R69, 1UTG, 3ICB, 256BA, 4CPV, 1CCR, 2MHR, 1MBA, 2FHA".split(", "),
"new":"1FC2C, 1ENH, 2GB1, 2CRO, 1CTF, 4ICB".split(", "),
"test":["t089", "t120", "t251", "top7", "1ubq", "t0766", "t0778", "t0782", "t0792", "t0803", "t0815", "t0833", "t0842", "t0844"]}
dataset["combined"] = dataset["old"] + dataset["new"]
def get_complete_data(pre, folder_list, pdb_list, **kwargs):
complete_all_data = []
for folder in folder_list:
# pre = "/Users/weilu/Research/server/april_2019/iterative_optimization_old_set/"
pre_folder = f"{pre}{folder}/"
all_data = []
for p in pdb_list:
name = p.lower()[:4]
tmp = read_pdb(pre_folder, name, **kwargs)
all_data.append(tmp.assign(Name=name))
data = pd.concat(all_data)
complete_all_data.append(data.assign(Folder=folder))
data = pd.concat(complete_all_data)
data = data.reindex(columns=my_reorder(data.columns, ["Name", "Folder"]))
return data
In [107]:
pre = "/Users/weilu/Research/server/april_2019/globular_2xov_named_2lep/"
folder_list = ["strengthen_beta"]
pdb_list = ["2lep"]
data = get_complete_data(pre, folder_list, pdb_list, run=20, rerun=1)
data_strength_beta = data
In [108]:
pre = "/Users/weilu/Research/server/april_2019/globular_2xov_named_2lep/"
folder_list = ["longerRun"]
pdb_list = ["2lep"]
data = get_complete_data(pre, folder_list, pdb_list, run=20, rerun=2)
data_longerRun = data
In [97]:
pre = "/Users/weilu/Research/server/april_2019/globular_2xov_named_2lep/"
folder_list = ["longerRun"]
pdb_list = ["2lep"]
data = get_complete_data(pre, folder_list, pdb_list, run=20, rerun=2)
# subset_data = data.query("Steps % 80000 == 0")
today = datetime.datetime.today().strftime('%m-%d')
print(today)
# data.reset_index(drop=True).to_csv(f"/Users/weilu/Research/data/optimization/energy_{today}.csv")
# a = pd.read_csv("/Users/weilu/Research/data/optimization/energy_03-31.csv", index_col=0)
# b = pd.read_csv("/Users/weilu/Research/data/optimization/energy_04-01.csv", index_col=0)
# data = pd.concat([a,b])
data.reset_index(drop=True).to_csv(f"/Users/weilu/Research/data/optimization/energy_{today}.csv")
In [101]:
data_strength_beta = data
In [99]:
data_longerRun = data
In [109]:
data = pd.concat([data_strength_beta, data_longerRun])
In [110]:
data.head(1)
Out[110]:
In [ ]:
g = sns.FacetGrid(data, col="Name",col_wrap=4, hue="Folder", sharey=False, sharex=False)
g = (g.map(plt.scatter, "Qw", "DSSP", alpha=0.1).add_legend())
In [111]:
g = sns.FacetGrid(data, col="Name",col_wrap=4, hue="Folder", sharey=False, sharex=False)
g = (g.map(plt.scatter, "Qw", "DSSP", alpha=0.1).add_legend())
In [104]:
g = sns.FacetGrid(data, col="Name",col_wrap=4, hue="Folder", sharey=False, sharex=False)
g = (g.map(plt.scatter, "Qw", "Water", alpha=0.1).add_legend())
In [98]:
g = sns.FacetGrid(data, col="Name",col_wrap=4, hue="Folder", sharey=False, sharex=False)
g = (g.map(plt.scatter, "Qw", "Water", alpha=0.5).add_legend())
In [62]:
pre = "/Users/weilu/Research/server/april_2019/iterative_optimization_combined_train_set/"
folder_list = ["iter2_normalized_noFrag"]
pdb_list = dataset["combined"]
data = get_complete_data(pre, folder_list, pdb_list, run=10, rerun=2)
subset_data = data.query("Steps % 80000 == 0")
today = datetime.datetime.today().strftime('%m-%d')
print(today)
# data.reset_index(drop=True).to_csv(f"/Users/weilu/Research/data/optimization/energy_{today}.csv")
# a = pd.read_csv("/Users/weilu/Research/data/optimization/energy_03-31.csv", index_col=0)
# b = pd.read_csv("/Users/weilu/Research/data/optimization/energy_04-01.csv", index_col=0)
# data = pd.concat([a,b])
data.reset_index(drop=True).to_csv(f"/Users/weilu/Research/data/optimization/energy_{today}.csv")
In [72]:
data = pd.read_csv("/Users/weilu/Research/data/optimization/energy_04-01.csv", index_col=0)
In [76]:
data.shape
Out[76]:
In [84]:
pre = "/Users/weilu/Research/server/april_2019/iterative_optimization_combined_train_set_with_frag/"
folder_list = ["iter6_normalized_noFrag"]
pdb_list = dataset["combined"]
data = get_complete_data(pre, folder_list, pdb_list, run=10, rerun=2)
subset_data = data.query("Steps % 80000 == 0")
today = datetime.datetime.today().strftime('%m-%d')
print(today)
# data.reset_index(drop=True).to_csv(f"/Users/weilu/Research/data/optimization/energy_{today}.csv")
# a = pd.read_csv("/Users/weilu/Research/data/optimization/energy_03-31.csv", index_col=0)
# b = pd.read_csv("/Users/weilu/Research/data/optimization/energy_04-01.csv", index_col=0)
# data = pd.concat([a,b])
data.reset_index(drop=True).to_csv(f"/Users/weilu/Research/data/optimization/energy_{today}_1.csv")
In [83]:
pre = "/Users/weilu/Research/server/april_2019/iterative_optimization_combined_train_set_with_frag/"
folder_list = ["iter0"]
pdb_list = dataset["combined"]
data = get_complete_data(pre, folder_list, pdb_list, run=30, rerun=1)
# subset_data = data.query("Steps % 80000 == 0")
today = datetime.datetime.today().strftime('%m-%d')
print(today)
# data.reset_index(drop=True).to_csv(f"/Users/weilu/Research/data/optimization/energy_{today}.csv")
# a = pd.read_csv("/Users/weilu/Research/data/optimization/energy_03-31.csv", index_col=0)
# b = pd.read_csv("/Users/weilu/Research/data/optimization/energy_04-01.csv", index_col=0)
# data = pd.concat([a,b])
data.reset_index(drop=True).to_csv(f"/Users/weilu/Research/data/optimization/energy_{today}.csv")
In [112]:
data = pd.read_csv("/Users/weilu/Research/data/optimization/energy_04-08.csv", index_col=0)
In [113]:
subset_iter6 = subset_data
In [115]:
data.head()
Out[115]:
In [116]:
g = sns.FacetGrid(data, col="Name",col_wrap=4, hue="Folder", sharey=False, sharex=False)
g = (g.map(plt.scatter, "Frag_Mem", "Water", alpha=0.5).add_legend())
In [87]:
subset_iter0_with_rg = data.query("Steps % 80000 == 0")
In [92]:
data = pd.concat([subset_iter0_with_rg, subset_iter6])
In [ ]:
In [95]:
g = sns.FacetGrid(data, col="Name",col_wrap=4, hue="Folder", sharey=False, sharex=False)
g = (g.map(plt.scatter, "Qw", "Water", alpha=0.5).add_legend())
In [93]:
g = sns.FacetGrid(data, col="Name",col_wrap=4, hue="Folder", sharey=False, sharex=False)
g = (g.map(plt.scatter, "Steps", "Rg", alpha=0.5).add_legend())
In [89]:
g = sns.FacetGrid(subset_iter0_with_rg, col="Name",col_wrap=4, hue="Folder", sharey=False, sharex=False)
g = (g.map(plt.scatter, "Steps", "Rg", alpha=0.5).add_legend())
In [88]:
g = sns.FacetGrid(subset_iter6, col="Name",col_wrap=4, hue="Folder", sharey=False, sharex=False)
g = (g.map(plt.scatter, "Steps", "Rg", alpha=0.5).add_legend())
In [82]:
g = sns.FacetGrid(subset_data, col="Name",col_wrap=4, hue="Folder", sharey=False, sharex=False)
g = (g.map(plt.scatter, "Steps", "Rg", alpha=0.5).add_legend())
In [78]:
g = sns.FacetGrid(subset_data, col="Name",col_wrap=4, hue="Folder", sharey=False, sharex=False)
g = (g.map(plt.scatter, "Steps", "Rg", alpha=0.5).add_legend())
In [75]:
g = sns.FacetGrid(subset_data, col="Name",col_wrap=4, hue="Folder", sharey=False, sharex=False)
g = (g.map(plt.scatter, "Qw", "Water", alpha=0.5).add_legend())
In [64]:
g = sns.FacetGrid(subset_data, col="Name",col_wrap=4, hue="Folder", sharey=False, sharex=False)
g = (g.map(plt.scatter, "Qw", "Water", alpha=0.5).add_legend())
In [50]:
pre = "/Users/weilu/Research/server/april_2019/iterative_optimization_combined_train_set/"
folder_list = ["iter0_normalized_noFrag", "iter1_normalized_noFrag"]
pdb_list = dataset["combined"]
data = get_complete_data(pre, folder_list, pdb_list, run=10, rerun=2)
subset_data = data.query("Steps % 80000 == 0")
today = datetime.datetime.today().strftime('%m-%d')
print(today)
data.reset_index(drop=True).to_csv(f"/Users/weilu/Research/data/optimization/energy_{today}.csv")
In [52]:
today = datetime.datetime.today().strftime('%m-%d')
print(today)
data.reset_index(drop=True).to_csv(f"/Users/weilu/Research/data/optimization/energy_{today}.csv")
In [57]:
g = sns.FacetGrid(subset_data, col="Name",col_wrap=4, hue="Folder", sharey=False, sharex=False)
g = (g.map(plt.scatter, "Steps", "Water", alpha=0.5).add_legend())
In [58]:
g = sns.FacetGrid(subset_data, col="Name",col_wrap=4, hue="Folder", sharey=False, sharex=False)
g = (g.map(plt.scatter, "Qw", "Water", alpha=0.5).add_legend())
In [ ]:
In [31]:
dataset = {"old":("1R69, 1UTG, 3ICB, 256BA, 4CPV, 1CCR, 2MHR, 1MBA, 2FHA".split(", "), 40),
"new":("1FC2C, 1ENH, 2GB1, 2CRO, 1CTF, 4ICB".split(", "), 80),
"test":(["t089", "t120", "t251", "top7", "1ubq", "t0766", "t0778", "t0782", "t0792", "t0803", "t0815", "t0833", "t0842", "t0844"], 40)}
pdb_list, steps = dataset["old"]
complete_all_data = []
for folder in ["single", "noFrag"]:
pre = f"/Users/weilu/Research/server/april_2019/iterative_optimization_old_set/{folder}/"
all_data = []
for p in pdb_list:
name = p.lower()[:4]
tmp = read_pdb(pre, name, run=30, rerun=1)
all_data.append(tmp.assign(Name=name))
data = pd.concat(all_data)
complete_all_data.append(data.assign(Folder=folder))
data = pd.concat(complete_all_data)
data = data.reindex(columns=my_reorder(data.columns, ["Name", "Folder"]))
data_subset = data.query("Steps % 80000 == 0")
In [32]:
data.head()
Out[32]:
In [33]:
data_subset = data.query("Steps % 80000 == 0")
In [40]:
g = sns.FacetGrid(data_subset, col="Name",col_wrap=4, hue="Folder")
g = (g.map(plt.scatter, "Qw", "Water", alpha=0.5).add_legend())
In [39]:
g = sns.FacetGrid(data_subset, col="Name",col_wrap=4, hue="Folder")
g = (g.map(plt.scatter, "Steps", "Water", alpha=0.5).add_legend())
In [27]:
g = sns.FacetGrid(data_subset, col="Name",col_wrap=4)
g = g.map(plt.scatter, "Steps", "Frag_Mem")
In [29]:
g = sns.FacetGrid(data_subset, col="Name",col_wrap=4)
g = g.map(plt.scatter, "Steps", "Qw")
In [16]:
g = sns.FacetGrid(data, col="Name",col_wrap=4)
g = g.map(plt.scatter, "Steps", "Qw")
In [ ]: