In [123]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
sns.set(rc={'figure.figsize':(10,6.180)})
sns.set_style("whitegrid")
In [2]:
%matplotlib inline
In [64]:
def my_reorder(a, first):
# move first to the top. and keep the rest
new_order = first.copy()
for col in a:
if col not in first:
new_order.append(col)
return new_order
In [69]:
def read_pdb(name):
all_data = []
for i in range(20):
for j in range(2):
pre = "/Users/weilu/Research/server/nov_2018/iterative_optimization_3/all_simulations/"
location = pre + f"{name}/simulation/{i}/{j}/"
try:
wham = pd.read_csv(location+"wham.dat")
except:
print(f"PDB: {name}, repeat: {i}, run: {j} not exist")
continue
wham.columns = wham.columns.str.strip()
remove_columns = ['Tc', 'Energy']
wham = wham.drop(remove_columns, axis=1)
energy = pd.read_csv(location+"energy.dat")
energy.columns = energy.columns.str.strip()
remove_columns = ['Steps', 'Shake', 'Excluded', 'DSSP', 'Helix', 'AMH-Go', 'Vec_FM', 'Membrane', 'SSB']
energy = energy.drop(remove_columns, axis=1)
data = pd.concat([wham, energy], axis=1).assign(Repeat=i, Run=j)
all_data.append(data)
data = pd.concat(all_data).reset_index(drop=True)
data = data.reindex(columns=my_reorder(data.columns, ["Steps", "Qw", "VTotal", "Run", "Repeat"]))
print(name, len(data))
return data
In [71]:
pdb_list = "1R69, 1UTG, 3ICB, 256BA, 4CPV, 1CCR, 2MHR, 1MBA, 2FHA".split(", ")
all_data = []
for p in pdb_list:
name = p.lower()[:4]
tmp = read_pdb(name)
all_data.append(tmp)
data = pd.concat(all_data)
In [72]:
data = read_pdb("1r69")
In [ ]:
In [121]:
last20 = data.groupby(["Repeat", "Run"]).apply(lambda x: x.sort_values(["Steps"], ascending=False).head(n=20).mean())
last20 = last20.reset_index(drop=True).assign(Type="Last20")
In [120]:
maxQ = data.groupby(["Repeat", "Run"]).apply(lambda x: x.sort_values(["Qw"], ascending=False).head(n=1))
maxQ = maxQ.reset_index(drop=True).assign(Type="maxQ")
In [122]:
plot_data = pd.concat([last20, maxQ])
In [136]:
In [150]:
ax = sns.scatterplot(x="Qw", y="Repeat",
hue="Run", size="Type",
data=plot_data)
In [151]:
plt.figure(figsize=(15,8))
ax = sns.scatterplot(x="Qw", y="Water",
hue="Type",
data=plot_data)
# plt.savefig("/Users/weilu/Desktop/test.png", dpi=300)
In [100]:
data.agg(["max", "min", average_of_last_ten_percent])
Out[100]:
In [ ]:
np.mean()
In [ ]: