In [1]:
import os
import sys
import random
import time
from random import seed, randint
import argparse
import platform
from datetime import datetime
import imp
import numpy as np
import fileinput
from itertools import product
import pandas as pd
from scipy.interpolate import griddata
from scipy.interpolate import interp2d
import seaborn as sns
from os import listdir

import matplotlib.pyplot as plt
import seaborn as sns
from scipy.interpolate import griddata
import matplotlib as mpl
# sys.path.insert(0,'..')
# from notebookFunctions import *
# from .. import notebookFunctions
from Bio.PDB.Polypeptide import one_to_three
from Bio.PDB.Polypeptide import three_to_one
from Bio.PDB.PDBParser import PDBParser
from pyCodeLib import *
from small_script.myFunctions import *
from collections import defaultdict
%matplotlib inline
# plt.rcParams['figure.figsize'] = (10,6.180)    #golden ratio
# %matplotlib notebook
%load_ext autoreload
%autoreload 2

In [2]:
plt.rcParams['figure.figsize'] = np.array([16.18033, 10])    #golden ratio
plt.rcParams['figure.facecolor'] = 'w'
plt.rcParams['figure.dpi'] = 100
plt.rcParams.update({'font.size': 22})

In [3]:
pdb_list = ["1akr", "1opd", "1ptf", "1tig", "1tmy", "2acy", "5nul"]

In [100]:
simulationType = "evaluation_simulation"
run_n = 5
# folder_list = ["iteration_14_frag_zBias_2", "iteration_14_contact_stronger_frag_zBias_2", "iteration_13_frag_zBias_2", "iteration_12_frag_zBias_2", "run2_frag_cbd_shift_center_iter10_include_native", "run2_frag_cbd_shift_center_iter4", "run2_frag_old", "run2_frag_cbd", "run2_frag_cbd_shift_center"]
folder_list = ["iteration_16_frag_zBias_3", "iteration_15_frag_zBias_2", "iteration_14_frag_zBias_2", "iteration_14_contact_stronger_frag_zBias_2", "iteration_12_frag_zBias_2", "run2_frag_old", "run2_frag_cbd", "run2_frag_cbd_shift_center"]

all_data = []

for folder in folder_list:
    for pdb in pdb_list:
        for i in range(run_n):
                pre = f"/Users/weilu/Research/server/jun_week1_2020/{simulationType}/{folder}/{pdb}/{i}"
                info_file = "info.dat"
                location = f"{pre}/{info_file}"
                try:
                    tmp = pd.read_csv(location, sep="\s+")
                    tmp = tmp.assign(Run=i, Protein=pdb, Folder=folder)
                    all_data.append(tmp)
                except:
                    print(pdb, i, folder, location)
                    pass
data = pd.concat(all_data)
today = datetime.today().strftime('%m-%d')
outFile = f"/Users/weilu/Research/data/openMM/{simulationType}_{folder}_{today}.csv"
data.reset_index(drop=True).to_csv(outFile)
print(outFile)


/Users/weilu/Research/data/openMM/evaluation_simulation_run2_frag_cbd_shift_center_06-03.csv

In [101]:
data = pd.read_csv("/Users/weilu/Research/data/openMM/evaluation_simulation_run2_frag_cbd_shift_center_06-03.csv")
data = data.query("Folder !='iteration_12_frag_zBias_2'").reset_index(drop=True)
y = "Q"
d = data.query("Steps > 1950").reset_index(drop=True)
t = d.groupby(["Protein", "Folder"])[y].idxmax().reset_index()
max_Q_data = d.iloc[t[y].to_list()].reset_index(drop=True)
sub_data = max_Q_data
ax = sns.lineplot(x="Protein", y=y, markers=True, ms=10, style="Folder", hue="Folder", data=sub_data, dashes=False)
# _ = ax.set_xticklabels(labels=sub_label_list, rotation=0, ha='center')
# plt.legend(["original model", "CBD model"])



In [95]:
d50 = data.query("Steps > 1950 and Folder !='run2_frag_old'").reset_index(drop=True)
g = sns.FacetGrid(d50, col="Protein", hue="Folder", col_wrap=3, sharex=False)
g = g.map(sns.scatterplot, "Fragment", "Q").add_legend()
# plt.legend()



In [96]:
d50 = data.query("Steps > 1950").reset_index(drop=True)
g = sns.FacetGrid(d50, col="Protein", hue="Folder", col_wrap=3, sharex=False)
g = g.map(sns.scatterplot, "Contact", "Q").add_legend()
# plt.legend()



In [32]:
d50 = data.query("Steps > 1950").reset_index(drop=True)
g = sns.FacetGrid(d50, col="Protein", hue="Folder", col_wrap=3, sharex=False)
g = g.map(sns.scatterplot, "Contact", "Q").add_legend()
# plt.legend()



In [27]:
d50 = data.query("Steps > 1950").reset_index(drop=True)
g = sns.FacetGrid(d50, col="Protein", hue="Folder", col_wrap=3, sharex=False)
g = g.map(sns.scatterplot, "Contact", "Q").add_legend()
# plt.legend()



In [33]:
sns.boxplot("Protein", "Q", hue="Folder", data=d50)


Out[33]:
<matplotlib.axes._subplots.AxesSubplot at 0x1a20621b38>

In [19]:
sns.boxplot("Protein", "Contact", hue="Folder", data=d50)


Out[19]:
<matplotlib.axes._subplots.AxesSubplot at 0x105c03c88>

In [71]:
max_Q_data = d.sort_values("Q").groupby(["Protein", "Folder"]).tail(1).reset_index(drop=True)
sub_data = max_Q_data
ax = sns.lineplot(x="Protein", y=y, markers=True, ms=10, style="Folder", hue="Folder", data=sub_data, dashes=False)



In [58]:
sub_data


Out[58]:
Steps Q Rg Backbone Rama Contact Fragment Excl_CBD S_chain Excl Beta Pap Helical Total Run Protein Folder
0 1827 0.30 14.64 233.69 -650.88 -278.81 -636.67 16.95 -93.68 11.05 -53.86 -10.92 0.0 -1480.08 3 1akr run2_frag_cbd
1 1528 0.33 12.62 349.66 -631.22 -368.64 -674.57 0.00 0.00 0.00 -48.10 -13.38 0.0 -1386.26 3 1akr run2_frag_old
2 1953 0.43 12.49 111.72 -490.00 -151.05 -416.22 8.75 -72.87 3.92 -33.22 -5.76 0.0 -1053.49 1 1opd run2_frag_cbd
3 1614 0.44 11.07 176.24 -476.76 -177.40 -454.94 0.00 0.00 0.00 -31.86 -6.13 0.0 -970.86 1 1opd run2_frag_old
4 1428 0.36 12.79 181.70 -463.18 -151.86 -355.18 4.44 -8.69 8.40 -14.63 -1.19 0.0 -804.62 0 1ptf run2_frag_cbd
5 1104 0.42 11.20 256.92 -453.98 -189.70 -389.06 0.00 0.00 0.00 -26.73 -6.02 0.0 -808.57 1 1ptf run2_frag_old
6 1669 0.39 13.15 137.88 -418.86 -157.33 -376.44 12.15 -20.78 7.56 -34.13 -4.44 0.0 -866.54 2 1tig run2_frag_cbd
7 1518 0.42 11.91 219.50 -415.01 -186.56 -421.94 0.00 0.00 0.00 -26.81 -7.20 0.0 -838.02 1 1tig run2_frag_old
8 1867 0.34 15.14 163.82 -784.28 -208.02 -613.41 8.11 -79.73 8.26 -44.65 -3.92 0.0 -1561.92 3 1tmy run2_frag_cbd
9 1147 0.36 12.40 353.75 -745.20 -256.54 -660.83 0.00 0.00 0.00 -29.46 -8.20 0.0 -1346.49 1 1tmy run2_frag_old
10 1542 0.25 16.98 190.33 -573.74 -169.26 -312.80 9.74 -18.91 5.58 -40.77 -13.49 0.0 -933.06 1 2acy run2_frag_cbd
11 1289 0.30 12.11 291.53 -536.38 -177.19 -357.56 0.00 0.00 0.00 -24.18 -16.20 0.0 -819.97 4 2acy run2_frag_old
12 1340 0.34 14.32 280.32 -693.93 -276.59 -613.80 18.82 -6.88 11.82 -21.90 -2.86 0.0 -1323.83 0 5nul run2_frag_cbd
13 1248 0.39 13.18 407.67 -684.23 -330.23 -677.92 0.00 0.00 0.00 -36.14 -9.43 0.0 -1330.28 2 5nul run2_frag_old

In [28]:
simulationType = "evaluation_simulation"
run_n = 3
folder_list = ["run1_cbd_withBeta_stronger_side_chain_new_exclude", "run1_cbd_withBeta_stronger_side_chain", "run1", "run1_cbd_withBeta"]
all_data = []

for folder in folder_list:
    for pdb in pdb_list:
        for i in range(run_n):
                pre = f"/Users/weilu/Research/server/may_week1_2020//{simulationType}/{folder}/{pdb}/{i}"
                info_file = "info.dat"
                location = f"{pre}/{info_file}"
                try:
                    tmp = pd.read_csv(location, sep="\s+")
                    tmp = tmp.assign(Run=i, Protein=pdb, Folder=folder)
                    all_data.append(tmp)
                except:
                    print(pdb, i, folder, location)
                    pass
data = pd.concat(all_data)
today = datetime.today().strftime('%m-%d')
outFile = f"/Users/weilu/Research/data/openMM/{simulationType}_{folder}_{today}.csv"
data.reset_index(drop=True).to_csv(outFile)
print(outFile)


/Users/weilu/Research/data/openMM/evaluation_simulation_run1_cbd_withBeta_05-04.csv

In [29]:


In [33]:
plt.rcParams['figure.figsize'] = 0.5 * np.array([16.18033, 10])    #golden ratio

In [63]:
data = pd.read_csv("/Users/weilu/Research/data/openMM/evaluation_simulation_run1_cbd_withBeta_05-04.csv")
data = data.reset_index(drop=True)
y = "Q"
d = data.query("Steps > 1000").reset_index(drop=True)
t = d.groupby(["Protein", "Folder"])[y].idxmax().reset_index()
max_Q_data = d.iloc[t[y].to_list()].reset_index(drop=True)
sub_data = max_Q_data.query("Folder != 'run1_cbd_withBeta' and Folder !='run1_cbd_withBeta_stronger_side_chain'")
ax = sns.lineplot(x="Protein", y=y, markers=True, ms=10, style="Folder", hue="Folder", data=sub_data, dashes=False)
# _ = ax.set_xticklabels(labels=sub_label_list, rotation=0, ha='center')
plt.legend(["original model", "CBD model"])


Out[63]:
<matplotlib.legend.Legend at 0x1a2c911240>

In [30]:
y = "Q"
d = data
t = d.groupby(["Protein", "Folder"])[y].idxmax().reset_index()
max_Q_data = d.iloc[t[y].to_list()].reset_index(drop=True)
sub_data = max_Q_data
ax = sns.lineplot(x="Protein", y=y, markers=True, ms=10, style="Folder", hue="Folder", data=sub_data, dashes=False)
# _ = ax.set_xticklabels(labels=sub_label_list, rotation=0, ha='center')



In [36]:
data["Folder_Run"]=  data["Folder"] + data["Run"].astype(str)

In [39]:
plt.rcParams['figure.figsize'] = np.array([16.18033, 10])    #golden ratio

In [50]:
chosen = data.query("Steps > 100 and Folder != 'run1_cbd_withBeta' and Folder !='run1_cbd_withBeta_stronger_side_chain'")
sns.boxplot("Q", "Protein", data=chosen, hue="Folder_Run")


Out[50]:
<matplotlib.axes._subplots.AxesSubplot at 0x1a313dcb70>

In [42]:
chosen = data.query("Steps > 1500 and Folder != 'run1_cbd_withBeta' and Folder !='run1_cbd_withBeta_stronger_side_chain'")
sns.boxplot("Q", "Protein", data=chosen, hue="Folder_Run")


Out[42]:
<matplotlib.axes._subplots.AxesSubplot at 0x1a2a7ebbe0>

In [53]:
plt.rcParams['figure.figsize'] = 0.5 * np.array([16.18033, 10])    #golden ratio
# chosen = data.query("Steps > 100 and Folder != 'run1_cbd_withBeta' and Folder !='run1_cbd_withBeta_stronger_side_chain'")
chosen = data.query("Steps > 1500")
max_Q_data = chosen.sort_values("Q").groupby(["Folder", "Protein"]).tail(1)
# sub_data = max_Q_data.query("Folder != 'run1_cbd_withBeta' and Folder !='run1_cbd_withBeta_stronger_side_chain'")
sub_data = max_Q_data
ax = sns.lineplot(x="Protein", y=y, markers=True, ms=10, style="Folder", hue="Folder", data=sub_data, dashes=False)
# _ = ax.set_xticklabels(labels=sub_label_list, rotation=0, ha='center')
# plt.legend(["original model", "CBD model"])



In [49]:
plt.rcParams['figure.figsize'] = 0.5 * np.array([16.18033, 10])    #golden ratio
max_Q_data = chosen.sort_values("Q").groupby(["Folder", "Protein"]).tail(1)
sub_data = max_Q_data.query("Folder != 'run1_cbd_withBeta' and Folder !='run1_cbd_withBeta_stronger_side_chain'")
ax = sns.lineplot(x="Protein", y=y, markers=True, ms=10, style="Folder", hue="Folder", data=sub_data, dashes=False)
# _ = ax.set_xticklabels(labels=sub_label_list, rotation=0, ha='center')
plt.legend(["original model", "CBD model"])


Out[49]:
<matplotlib.legend.Legend at 0x1a2e742da0>

In [10]:
# data = pd.read_csv("/Users/weilu/Research/data/openMM/mass_iterative_run_iteration_0_02-07.csv", index_col=0)
# data = pd.read_csv("/Users/weilu/Research/data/openMM/mass_iterative_run_iteration_1_02-10.csv", index_col=0)
# data = pd.read_csv("/Users/weilu/Research/data/openMM/mass_iterative_run_iteratiteration_native_new_4ion_2_02-11.csv", index_col=0)
data = pd.read_csv("/Users/weilu/Research/data/openMM/evaluation_simulation_run1_cbd_withBeta_05-03.csv", index_col=0)

sub_pdb_list = pdb_list
data.Protein = pd.Categorical(data.Protein, 
                      categories=sub_pdb_list)

y = "Steps"
d = data
t = d.groupby(["Protein", "Folder", "Run"])[y].idxmax().reset_index()
max_Q_data = d.iloc[t[y].to_list()].reset_index(drop=True)
sub_data = max_Q_data


# sub_data = max_Q_data.query("Folder != 'iter3_environment'")
g = sns.FacetGrid(sub_data, hue="Folder", height=5, aspect=1.618)
g = g.map(sns.distplot, "Q")
plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5))


Out[10]:
<matplotlib.legend.Legend at 0x1a20bd84e0>

In [83]:
sub_data


Out[83]:
Steps Q Rg Backbone Rama Contact Fragment Exclude_Side SideChain Exclude Beta Pap Helical Total Run Protein Folder
0 2001 0.41 11.13 117.34 -196.05 -136.96 -251.59 0.0 0.0 0.0 -44.55 -17.16 0.0 -528.97 0 1hoe iteration_0
1 2001 0.51 11.45 110.67 -194.74 -138.87 -264.83 0.0 0.0 0.0 -43.93 -21.11 0.0 -552.82 1 1hoe iteration_0
2 2001 0.53 10.86 99.05 -199.22 -175.27 -260.06 0.0 0.0 0.0 -55.93 -21.02 0.0 -612.47 0 1hoe iteration_1
3 2001 0.57 10.61 105.64 -196.98 -167.91 -266.47 0.0 0.0 0.0 -53.26 -18.12 0.0 -597.11 1 1hoe iteration_1
4 2001 0.54 12.86 111.86 -428.93 -135.75 -279.88 0.0 0.0 0.0 -31.11 -4.79 0.0 -768.60 0 1tif iteration_0
5 2001 0.57 12.62 107.60 -421.11 -127.06 -289.97 0.0 0.0 0.0 -49.94 -10.05 0.0 -790.52 1 1tif iteration_0
6 2001 0.39 13.21 125.35 -424.33 -136.42 -255.69 0.0 0.0 0.0 -45.63 -9.06 0.0 -745.79 0 1tif iteration_1
7 2001 0.50 12.63 116.50 -420.64 -146.02 -264.65 0.0 0.0 0.0 -53.17 -11.99 0.0 -779.97 1 1tif iteration_1
8 2001 0.41 10.26 104.34 -297.86 -190.62 -251.90 0.0 0.0 0.0 -45.95 -11.39 0.0 -693.38 0 1vcc iteration_0
9 2001 0.56 11.01 119.05 -301.40 -154.40 -289.91 0.0 0.0 0.0 -49.57 -7.43 0.0 -683.66 1 1vcc iteration_0
10 2001 0.50 10.73 107.80 -304.26 -172.30 -282.98 0.0 0.0 0.0 -32.95 -7.77 0.0 -692.46 0 1vcc iteration_1
11 2001 0.55 11.39 124.21 -310.87 -162.79 -294.75 0.0 0.0 0.0 -40.30 -8.30 0.0 -692.80 1 1vcc iteration_1
12 2001 0.48 12.51 114.00 -377.00 -126.26 -316.17 0.0 0.0 0.0 -31.35 -3.70 0.0 -740.47 0 1by9 iteration_0
13 2001 0.48 12.25 120.84 -379.21 -132.04 -297.12 0.0 0.0 0.0 -37.78 -3.72 0.0 -729.03 1 1by9 iteration_0
14 2001 0.43 12.69 124.48 -379.54 -156.44 -308.02 0.0 0.0 0.0 -39.12 -7.67 0.0 -766.32 0 1by9 iteration_1
15 2001 0.51 12.32 109.71 -379.06 -161.21 -308.82 0.0 0.0 0.0 -53.42 -9.38 0.0 -802.18 1 1by9 iteration_1
16 2001 0.80 11.30 124.68 -238.07 -176.29 -301.37 0.0 0.0 0.0 -48.71 -9.78 0.0 -649.54 0 1bdo iteration_0
17 2001 0.83 11.40 96.69 -224.01 -172.89 -304.67 0.0 0.0 0.0 -46.24 -8.93 0.0 -660.05 1 1bdo iteration_0
18 2001 0.78 11.21 116.86 -232.32 -195.47 -303.04 0.0 0.0 0.0 -58.69 -11.65 0.0 -684.30 0 1bdo iteration_1
19 2001 0.65 11.21 104.74 -236.68 -199.28 -287.72 0.0 0.0 0.0 -46.00 -10.00 0.0 -674.95 1 1bdo iteration_1
20 2001 0.47 11.00 127.43 -452.00 -136.17 -296.56 0.0 0.0 0.0 -27.98 -2.55 0.0 -787.84 0 451c iteration_0
21 2001 0.59 11.15 119.62 -454.66 -132.00 -296.29 0.0 0.0 0.0 -24.14 -2.60 0.0 -790.07 1 451c iteration_0
22 2001 0.60 11.28 115.34 -454.25 -153.42 -312.92 0.0 0.0 0.0 -37.95 -3.84 0.0 -847.04 0 451c iteration_1
23 2001 0.44 11.51 110.06 -450.64 -161.42 -304.44 0.0 0.0 0.0 -26.62 -1.00 0.0 -834.06 1 451c iteration_1
24 2001 0.54 11.31 108.38 -383.12 -123.73 -271.92 0.0 0.0 0.0 -24.14 -0.00 0.0 -694.54 0 1cc5 iteration_0
25 2001 0.51 11.47 109.14 -377.65 -125.09 -274.28 0.0 0.0 0.0 -19.59 -0.00 0.0 -687.47 1 1cc5 iteration_0
26 2001 0.41 10.62 113.54 -379.18 -174.44 -281.19 0.0 0.0 0.0 -27.28 -0.00 0.0 -748.56 0 1cc5 iteration_1
27 2001 0.35 10.83 108.24 -379.22 -176.24 -266.51 0.0 0.0 0.0 -23.59 -1.07 0.0 -738.39 1 1cc5 iteration_1
28 2001 0.41 12.28 117.67 -300.91 -167.69 -284.62 0.0 0.0 0.0 -29.99 -5.66 0.0 -671.21 0 1bb9 iteration_0
29 2001 0.48 12.31 108.74 -312.64 -148.34 -284.84 0.0 0.0 0.0 -32.62 -7.63 0.0 -677.33 1 1bb9 iteration_0
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
278 2001 0.44 13.66 202.02 -838.28 -336.81 -542.23 0.0 0.0 0.0 -83.54 -23.49 0.0 -1622.32 0 1jon iteration_1
279 2001 0.39 13.77 211.38 -852.64 -346.25 -526.49 0.0 0.0 0.0 -69.27 -13.70 0.0 -1596.96 1 1jon iteration_1
280 2001 0.46 19.16 203.47 -1127.20 -200.49 -604.54 0.0 0.0 0.0 -57.36 -0.00 0.0 -1786.11 0 1vls iteration_0
281 2001 0.40 24.44 197.89 -1121.25 -185.75 -605.40 0.0 0.0 0.0 -57.40 -0.00 0.0 -1771.91 1 1vls iteration_0
282 2001 0.55 18.60 215.56 -1126.07 -236.74 -599.90 0.0 0.0 0.0 -58.10 -0.29 0.0 -1805.52 0 1vls iteration_1
283 2001 0.49 18.32 213.48 -1122.55 -238.37 -611.70 0.0 0.0 0.0 -56.68 -0.00 0.0 -1815.82 1 1vls iteration_1
284 2001 0.45 13.61 203.64 -646.16 -301.18 -542.99 0.0 0.0 0.0 -77.56 -15.74 0.0 -1379.98 0 1lba iteration_0
285 2001 0.28 13.55 202.89 -640.09 -350.51 -488.08 0.0 0.0 0.0 -59.74 -10.44 0.0 -1345.98 1 1lba iteration_0
286 2001 0.38 13.68 210.32 -652.15 -363.63 -505.33 0.0 0.0 0.0 -74.03 -17.40 0.0 -1402.21 0 1lba iteration_1
287 2001 0.30 12.98 206.71 -642.36 -380.76 -487.13 0.0 0.0 0.0 -85.72 -13.95 0.0 -1403.20 1 1lba iteration_1
288 2001 0.39 14.66 228.95 -638.70 -341.01 -521.11 0.0 0.0 0.0 -112.05 -37.06 0.0 -1420.97 0 1aly iteration_0
289 2001 0.37 15.74 209.95 -629.45 -345.16 -531.95 0.0 0.0 0.0 -81.29 -29.25 0.0 -1407.16 1 1aly iteration_0
290 2001 0.58 15.20 214.48 -642.11 -334.61 -549.43 0.0 0.0 0.0 -101.43 -34.04 0.0 -1447.14 0 1aly iteration_1
291 2001 0.40 15.50 216.03 -644.37 -361.47 -521.96 0.0 0.0 0.0 -115.29 -44.27 0.0 -1471.34 1 1aly iteration_1
292 2001 0.44 15.12 210.75 -1108.73 -254.80 -594.07 0.0 0.0 0.0 -51.42 -0.00 0.0 -1798.27 0 1mba iteration_0
293 2001 0.48 14.70 217.36 -1104.96 -264.56 -594.90 0.0 0.0 0.0 -48.83 -0.00 0.0 -1795.89 1 1mba iteration_0
294 2001 0.54 14.27 216.82 -1119.21 -305.32 -591.28 0.0 0.0 0.0 -53.19 -0.17 0.0 -1852.35 0 1mba iteration_1
295 2001 0.66 14.18 198.04 -1102.75 -305.66 -607.77 0.0 0.0 0.0 -52.89 -0.00 0.0 -1871.03 1 1mba iteration_1
296 2001 0.49 14.40 203.21 -988.93 -276.20 -555.38 0.0 0.0 0.0 -52.63 -0.83 0.0 -1670.76 0 2hbg iteration_0
297 2001 0.34 14.62 221.00 -987.44 -261.71 -548.24 0.0 0.0 0.0 -49.67 -0.00 0.0 -1626.06 1 2hbg iteration_0
298 2001 0.37 13.64 215.36 -981.27 -342.12 -545.79 0.0 0.0 0.0 -53.28 -0.00 0.0 -1707.10 0 2hbg iteration_1
299 2001 0.37 13.89 190.91 -986.78 -327.62 -539.17 0.0 0.0 0.0 -50.85 -0.00 0.0 -1713.52 1 2hbg iteration_1
300 2001 0.34 13.78 204.98 -679.86 -358.82 -526.84 0.0 0.0 0.0 -46.28 -10.65 0.0 -1417.47 0 1akr iteration_0
301 2001 0.42 14.05 210.94 -654.81 -343.08 -547.83 0.0 0.0 0.0 -49.90 -7.95 0.0 -1392.63 1 1akr iteration_0
302 2001 0.35 13.16 219.74 -672.56 -378.84 -533.94 0.0 0.0 0.0 -71.12 -24.38 0.0 -1461.10 0 1akr iteration_1
303 2001 0.57 13.37 196.27 -678.08 -363.99 -556.98 0.0 0.0 0.0 -78.27 -23.90 0.0 -1504.94 1 1akr iteration_1
304 2001 0.38 19.05 208.71 -990.20 -248.69 -576.43 0.0 0.0 0.0 -49.89 -0.00 0.0 -1656.50 0 1osa iteration_0
305 2001 0.46 19.91 208.04 -996.58 -253.87 -582.04 0.0 0.0 0.0 -48.89 -0.00 0.0 -1673.33 1 1osa iteration_0
306 2001 0.42 18.72 204.42 -998.85 -259.04 -582.27 0.0 0.0 0.0 -48.79 -0.00 0.0 -1684.54 0 1osa iteration_1
307 2001 0.61 21.71 217.39 -997.14 -252.66 -587.59 0.0 0.0 0.0 -54.19 -0.14 0.0 -1674.32 1 1osa iteration_1

308 rows × 17 columns


In [84]:
sns.boxplot("Contact", "Folder", data=sub_data)


Out[84]:
<matplotlib.axes._subplots.AxesSubplot at 0x1a20056c18>

In [76]:
data = pd.read_csv("/Users/weilu/Research/data/openMM/mass_iterative_run_iteration_3_02-17.csv", index_col=0)

# sub_pdb_list = pdb_list
# data.Protein = pd.Categorical(data.Protein, 
#                       categories=sub_pdb_list)

y = "Steps"
d = data
t = d.groupby(["Protein", "Folder", "Run"])[y].idxmax().reset_index()
max_Q_data = d.iloc[t[y].to_list()].reset_index(drop=True)
sub_data = max_Q_data


# sub_data = max_Q_data.query("Folder != 'iter3_environment'")
g = sns.FacetGrid(sub_data, hue="Folder", height=5, aspect=1.618)
g = g.map(sns.distplot, "Q")
plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5))


Out[76]:
<matplotlib.legend.Legend at 0x1a2c7769e8>

In [79]:
sns.boxplot("Fragment", "Folder", data=sub_data)


Out[79]:
<matplotlib.axes._subplots.AxesSubplot at 0x1a2caf0630>

In [77]:
sns.boxplot("Contact", "Folder", data=sub_data)


Out[77]:
<matplotlib.axes._subplots.AxesSubplot at 0x1a2edd5c50>

In [401]:
raw_data_all = pd.read_csv("/Users/weilu/Research/frustration_selection/data.csv", index_col=0)

In [433]:
pre


Out[433]:
'/Users/weilu/Research/server/mar_2020/mass_iterative_run/iter4_shift_well/1osa/1'

In [449]:
all_data = []
for folder in folder_list:
    for pdb in pdb_list:
        for i in range(run_n):
            pre = f"/Users/weilu/Research/server/mar_2020/{simulationType}/{folder}/{pdb}/{i}"
            timeFile = f"{pre}/time.dat"
            time = float(np.loadtxt(timeFile))
            Length = len(read_fasta(f"{pre}/crystal_structure.fasta"))
            all_data.append([pdb, i, time, Length, folder])

In [450]:
data = pd.DataFrame(all_data, columns=["Protein", "Run", "Time", "Length", "Folder"])

In [459]:
data_selected = data.query("Folder=='iter4_shift_well' or Folder=='iteration_4' or Folder=='iter3_shift_well'")
sns.lmplot("Length", "Time", hue="Folder", data=data_selected)


Out[459]:
<seaborn.axisgrid.FacetGrid at 0x1a2d64f7b8>

In [64]:
simulationType = "mass_iterative_run"
run_n = 2
folder_list = ["iteration_0_cbd", "iteration_1_cbd", "iteration_2_cbd", "iteration_start_native", "iteration_start_native_iter2"]
folder_list = ["iteration_0_cbd", "iteration_start_native", "iteration_start_native_iter2", 
               "iteration_start_native_iter3", "iteration_new_1", 
               "iteration_start_native_iter4", "iteration_start_native_iter5", "iteration_new_1"]
folder_list = ["iteration_0_cbd", "iteration_start_native", "iteration_new_1", 
               "iteration_start_native_iter4", "iteration_start_native_iter5", 
               "iteration_new_2", "iteration_new_3", "iteration_new_4", "iteration_native_new_4", "iteration_new_4_without_burial", "iteration_new_4_without_burial_shift_well"]
folder_list = ["iteration_0_cbd", "iteration_start_native",
               "iteration_new_4", "iteration_native_new_4", "iteration_new_4_without_burial", "iteration_new_4_without_burial_shift_well",
              "iteration_0_stronger_exclude_volume", "iteration_0_stronger_exclude_volume_k10"]
folder_list = ["iteration_0_cbd", "iteration_0_stronger_exclude_volume", "iteration_0_stronger_exclude_volume_k10",
              "iteration_1_stronger_exclude", "iteration_1_stronger_exclude_withoutBurial", "iteration_native_new_4",
              "iteration_2_stronger_exclude_withoutBurial", "iteration_1_stronger_exclude_withoutBurial_k10",
              "iteration_1_stronger_exclude_withoutBurial_bugfix"]
folder_list = ["iteration_0_cbd", "iteration_0_stronger_exclude_volume", "iteration_1_stronger_exclude_withoutBurial",
#               "iteration_2_stronger_exclude_withoutBurial", 
              "iteration_1_stronger_exclude_withoutBurial_bugfix", "iteration_0_stronger_exclude_volume_shift_well",
              "iteration_0_stronger_exclude_volume_stronger_side_chain", "iteration_0_stronger_exclude_volume_stronger_side_chain_k4",
              "iteration_2_bug_fixed"]
folder_list = ["iteration_3", "iteration_2_bug_fixed", "iteration_0_stronger_exclude_volume", "iteration_4", "iteration_4_z_weighted_2",
              "iter3_shift_well", "iter4_shift_well", "iter5_shift_well"]
folder_list = ["iter1_environment", "iter1_environment", "iteration_0_stronger_exclude_volume", "iteration_4_z_weighted_2",
              "iter3_shift_well", "iter4_shift_well", "iter5_shift_well", "iter0_environment"]
folder_list = ["iter4_environment", "iter3_environment", "iter2_environment", "iter1_environment", "iteration_4_z_weighted_2",
              "iter5_shift_well", "iter0_environment"]
folder_list = ["iteration_4_z_weighted_2", "new_iter1_environment_new", "new_iter2_environment_new", "new_iter3_environment_new", "new_iter4_environment_new", "new_iter5_environment_new"]
all_data = []

for folder in folder_list:
    for pdb in pdb_list:
        for i in range(run_n):
                pre = f"/Users/weilu/Research/server/apr_2020/{simulationType}/{folder}/{pdb}/{i}"
                info_file = "info.dat"
                location = f"{pre}/{info_file}"
                try:
                    tmp = pd.read_csv(location, sep="\s+")
                    tmp = tmp.assign(Run=i, Protein=pdb, Folder=folder)
                    all_data.append(tmp)
                except:
                    print(pdb, i, folder)
                    pass
data = pd.concat(all_data)
today = datetime.today().strftime('%m-%d')
outFile = f"/Users/weilu/Research/data/openMM/{simulationType}_{folder}_{today}.csv"
data.reset_index(drop=True).to_csv(outFile)
print(outFile)


/Users/weilu/Research/data/openMM/mass_iterative_run_new_iter5_environment_new_05-01.csv

In [69]:
# data = pd.read_csv("/Users/weilu/Research/data/openMM/mass_iterative_run_iteration_0_02-07.csv", index_col=0)
# data = pd.read_csv("/Users/weilu/Research/data/openMM/mass_iterative_run_iteration_1_02-10.csv", index_col=0)
# data = pd.read_csv("/Users/weilu/Research/data/openMM/mass_iterative_run_iteratiteration_native_new_4ion_2_02-11.csv", index_col=0)
data = pd.read_csv("/Users/weilu/Research/data/openMM/mass_iterative_run_new_iter5_environment_new_05-01.csv", index_col=0)

sub_pdb_list = pdb_list
data.Protein = pd.Categorical(data.Protein, 
                      categories=sub_pdb_list)

In [70]:
y = "Steps"
d = data
t = d.groupby(["Protein", "Folder", "Run"])[y].idxmax().reset_index()
max_Q_data = d.iloc[t[y].to_list()].reset_index(drop=True)
sub_data = max_Q_data.query("Folder !='new_iter1_environment_new'")


# sub_data = max_Q_data.query("Folder != 'iter3_environment'")
g = sns.FacetGrid(sub_data, hue="Folder", height=5, aspect=1.618)
g = g.map(sns.distplot, "Q")
plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5))


Out[70]:
<matplotlib.legend.Legend at 0x1a34b81cf8>

In [66]:
y = "Steps"
d = data
t = d.groupby(["Protein", "Folder", "Run"])[y].idxmax().reset_index()
max_Q_data = d.iloc[t[y].to_list()].reset_index(drop=True)
sub_data = max_Q_data

In [67]:
# sub_data = max_Q_data.query("Folder != 'iter3_environment'")
g = sns.FacetGrid(sub_data, hue="Folder", height=5, aspect=1.618)
g = g.map(sns.distplot, "Q")
plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5))


Out[67]:
<matplotlib.legend.Legend at 0x1a20e9e9e8>

In [30]:
sub_data = max_Q_data.query("Folder != 'iter3_environment'")
g = sns.FacetGrid(sub_data, hue="Folder", height=5, aspect=1.618)
g = g.map(sns.distplot, "Q")
plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5))


Out[30]:
<matplotlib.legend.Legend at 0x1a30d6fda0>

In [18]:
g = sns.FacetGrid(sub_data, hue="Folder", height=5, aspect=1.618)
g = g.map(sns.distplot, "Q")
plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5))


Out[18]:
<matplotlib.legend.Legend at 0x1a1d7f1e48>

In [31]:
sns.boxplot("Q", "Folder", data=sub_data)


Out[31]:
<matplotlib.axes._subplots.AxesSubplot at 0x1a2e99e518>

In [32]:
sns.boxplot("Contact", "Folder", data=sub_data)


Out[32]:
<matplotlib.axes._subplots.AxesSubplot at 0x1a2ea07630>

In [14]:
g = sns.FacetGrid(sub_data, hue="Folder", height=5, aspect=1.618)
g = g.map(sns.distplot, "Q")
plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5))


Out[14]:
<matplotlib.legend.Legend at 0x1a1d7f1160>

In [474]:
g = sns.FacetGrid(sub_data, hue="Folder", height=5, aspect=1.618)
g = g.map(sns.distplot, "Q")
plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5))


Out[474]:
<matplotlib.legend.Legend at 0x1a29b531d0>

In [432]:
sns.boxplot("Contact", "Folder", data=sub_data)


Out[432]:
<matplotlib.axes._subplots.AxesSubplot at 0x1a99ab7be0>

In [393]:
g = sns.FacetGrid(sub_data, hue="Folder", height=5, aspect=1.618)
g = g.map(sns.distplot, "Q")
plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5))


Out[393]:
<matplotlib.legend.Legend at 0x1a28264390>

In [396]:
sub_data.query("Folder=='iteration_4'").sort_values("Contact")


Out[396]:
Steps Q Rg Backbone Rama Contact Fragment Exclude_Side SideChain Exclude Beta Pap Helical Total Run Protein Folder
583 2001 0.61 15.47 164.30 -652.76 -93.28 -609.14 19.65 32.63 6.05 -133.58 -37.42 0.0 -1303.55 1 1aly iteration_4
390 2001 0.54 15.25 135.84 -505.71 -89.33 -562.82 13.52 24.40 3.66 -78.10 -26.05 0.0 -1084.58 0 1rlw iteration_4
502 2001 0.36 15.82 151.17 -554.12 -89.00 -607.98 20.54 34.97 3.01 -85.44 -30.30 0.0 -1157.14 0 1lcl iteration_4
486 2001 0.51 14.41 153.43 -754.23 -81.87 -575.36 15.85 35.66 5.95 -61.80 -11.78 0.0 -1274.14 0 5nul iteration_4
391 2001 0.51 16.30 136.82 -507.48 -80.21 -566.24 14.45 26.13 4.75 -95.56 -31.00 0.0 -1098.34 1 1rlw iteration_4
607 2001 0.81 14.08 146.73 -671.64 -79.52 -629.61 15.48 20.68 7.99 -74.95 -18.13 0.0 -1282.97 1 1akr iteration_4
559 2001 0.81 13.93 145.42 -843.43 -79.17 -627.21 16.96 34.11 8.08 -91.43 -15.81 0.0 -1452.48 1 1jon iteration_4
543 2001 0.61 15.64 148.11 -665.84 -76.87 -588.44 10.34 24.61 6.34 -101.46 -27.80 0.0 -1271.03 1 1pkp iteration_4
606 2001 0.40 15.33 164.73 -686.29 -76.76 -600.54 9.72 28.91 5.99 -61.67 -10.67 0.0 -1226.56 0 1akr iteration_4
487 2001 0.52 14.36 139.06 -760.04 -75.05 -576.28 20.45 24.54 12.45 -70.20 -13.99 0.0 -1299.07 1 5nul iteration_4
542 2001 0.43 15.74 164.19 -678.89 -74.86 -574.57 7.66 16.46 8.17 -81.75 -24.69 0.0 -1238.29 0 1pkp iteration_4
574 2001 0.55 15.72 158.97 -644.25 -73.95 -619.50 11.19 27.29 4.33 -76.20 -6.74 0.0 -1218.86 0 1lba iteration_4
558 2001 0.51 15.08 146.77 -855.52 -71.95 -596.50 13.04 21.92 5.86 -80.02 -15.93 0.0 -1432.31 0 1jon iteration_4
382 2001 0.56 13.74 121.13 -662.68 -70.04 -509.90 15.29 32.14 4.10 -98.31 -24.43 0.0 -1192.69 0 1opy iteration_4
358 2001 0.51 13.61 129.47 -558.81 -69.64 -453.15 14.14 22.95 4.18 -101.17 -24.09 0.0 -1036.11 0 1bqk iteration_4
495 2001 0.77 14.07 148.81 -601.34 -69.07 -580.05 11.62 24.45 8.87 -96.28 -20.77 0.0 -1173.75 1 1pne iteration_4
326 2001 0.53 15.49 139.41 -511.75 -68.99 -529.51 9.12 26.06 3.12 -55.50 -18.59 0.0 -1006.63 0 2sak iteration_4
503 2001 0.36 17.55 146.97 -553.96 -68.98 -619.55 18.53 31.02 2.11 -94.64 -30.90 0.0 -1169.40 1 1lcl iteration_4
599 2001 0.75 14.95 158.07 -972.62 -68.51 -604.25 11.27 22.72 14.80 -55.40 -0.00 0.0 -1493.92 1 2hbg iteration_4
463 2001 0.66 15.50 153.06 -774.39 -67.63 -571.30 11.99 35.07 3.97 -124.61 -41.37 0.0 -1375.22 1 1crb iteration_4
582 2001 0.42 17.01 166.79 -645.70 -67.11 -591.67 9.93 24.64 5.30 -98.49 -26.62 0.0 -1222.93 0 1aly iteration_4
207 2001 0.86 12.60 109.41 -702.56 -66.87 -476.40 10.78 23.19 5.68 -50.87 -10.86 0.0 -1158.48 1 1erv iteration_4
527 2001 0.56 18.12 163.08 -800.88 -66.79 -644.19 14.37 32.87 11.45 -56.67 -8.25 0.0 -1355.01 1 1tfe iteration_4
231 2001 0.65 13.63 118.37 -502.18 -65.78 -425.86 14.31 15.70 4.07 -53.91 -12.37 0.0 -907.66 1 1bkf iteration_4
335 2001 0.73 16.28 134.58 -614.06 -64.45 -503.25 11.22 28.24 7.24 -67.27 -17.05 0.0 -1084.81 1 1dhn iteration_4
462 2001 0.52 15.74 124.43 -773.70 -63.80 -556.77 15.26 38.66 3.67 -130.72 -41.73 0.0 -1384.69 0 1crb iteration_4
311 2001 0.53 14.50 123.23 -407.62 -63.71 -499.38 14.39 28.43 3.48 -92.03 -29.09 0.0 -922.30 1 1neu iteration_4
206 2001 0.87 12.62 110.63 -707.67 -63.40 -466.18 10.72 13.51 4.52 -54.34 -12.15 0.0 -1164.33 0 1erv iteration_4
423 2001 0.65 13.92 123.57 -800.01 -63.28 -554.35 7.86 28.75 9.94 -66.10 -10.32 0.0 -1323.95 1 1pdo iteration_4
455 2001 0.67 13.70 141.94 -693.37 -63.14 -545.83 15.59 26.06 8.99 -57.62 -13.77 0.0 -1181.16 1 1kuh iteration_4
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
150 2001 0.44 14.19 97.44 -603.79 -32.21 -387.87 8.15 20.46 3.78 -49.30 -11.62 0.0 -954.96 0 2acy iteration_4
111 2001 0.73 12.72 87.57 -437.55 -31.59 -382.79 8.82 16.86 4.00 -50.22 -9.29 0.0 -794.20 1 1tig iteration_4
566 2001 0.72 18.88 136.35 -1120.83 -31.25 -663.99 7.67 40.78 16.40 -62.10 0.00 0.0 -1676.97 0 1vls iteration_4
7 2001 0.40 13.29 78.37 -209.66 -31.22 -284.09 2.80 6.41 1.97 -39.88 -11.88 0.0 -487.19 1 1hoe iteration_4
294 2001 0.66 15.38 119.07 -916.25 -30.52 -498.61 6.06 21.08 6.34 -48.31 -0.00 0.0 -1341.13 0 2a0b iteration_4
127 2001 0.53 14.68 101.37 -499.06 -30.35 -367.84 10.84 11.29 2.68 -68.58 -22.45 0.0 -862.10 1 1fna iteration_4
439 2001 0.31 17.06 118.70 -598.81 -30.20 -514.28 7.74 17.50 4.16 -67.90 -11.85 0.0 -1074.93 1 1htp iteration_4
102 2001 0.76 12.51 92.65 -401.16 -29.59 -353.99 6.11 21.32 2.26 -28.06 -4.71 0.0 -695.17 0 1cyo iteration_4
166 2001 0.47 14.88 115.51 -532.24 -29.57 -416.16 11.72 20.19 4.36 -57.33 -9.83 0.0 -893.35 0 1bm8 iteration_4
590 2001 0.34 18.60 147.94 -1117.90 -29.25 -636.10 7.04 23.04 14.52 -54.66 -0.00 0.0 -1645.38 0 1mba iteration_4
167 2001 0.50 15.00 114.78 -542.46 -28.54 -405.37 13.20 27.90 4.39 -47.84 -9.73 0.0 -873.66 1 1bm8 iteration_4
78 2001 0.47 12.56 84.38 -513.27 -28.44 -345.27 7.69 15.98 3.06 -47.12 -9.65 0.0 -832.64 0 1opd iteration_4
47 2001 0.79 11.44 91.05 -449.78 -28.15 -337.56 5.07 14.62 6.17 -31.04 -3.31 0.0 -732.92 1 451c iteration_4
238 2001 0.40 16.67 109.19 -486.81 -27.63 -414.58 6.66 23.22 9.14 -19.76 -0.00 0.0 -800.57 0 1ycc iteration_4
615 2001 0.57 21.55 148.95 -995.60 -27.51 -653.82 11.21 32.04 12.23 -51.47 -0.00 0.0 -1523.97 1 1osa iteration_4
103 2001 0.74 12.75 91.44 -408.05 -27.37 -359.60 12.54 21.30 4.53 -28.09 -4.88 0.0 -698.17 1 1cyo iteration_4
535 2001 0.34 23.09 147.55 -910.66 -26.21 -646.69 6.33 32.85 13.35 -44.42 -0.41 0.0 -1428.32 1 1ax8 iteration_4
534 2001 0.40 18.67 134.96 -908.61 -26.19 -637.70 7.52 32.91 14.22 -47.43 -0.00 0.0 -1430.32 0 1ax8 iteration_4
518 2001 0.34 24.54 117.97 -995.07 -25.97 -577.38 5.79 20.81 11.39 -52.84 -0.00 0.0 -1495.31 0 1flp iteration_4
55 2001 0.62 11.93 93.76 -375.11 -25.73 -309.33 5.14 13.23 6.22 -29.20 -0.00 0.0 -621.03 1 1cc5 iteration_4
15 2001 0.79 13.29 81.03 -424.72 -23.80 -314.00 10.78 12.53 4.29 -50.68 -9.73 0.0 -714.30 1 1tif iteration_4
14 2001 0.41 12.67 86.03 -431.42 -23.76 -299.77 3.53 17.21 3.05 -27.77 -6.73 0.0 -679.65 0 1tif iteration_4
70 2001 0.41 15.22 90.64 -275.79 -23.07 -325.02 2.27 27.39 3.60 -31.45 -9.62 0.0 -541.06 0 1pht iteration_4
71 2001 0.51 14.37 86.15 -278.44 -22.73 -328.10 4.01 21.97 4.69 -45.74 -11.90 0.0 -570.11 1 1pht iteration_4
95 2001 0.42 12.53 85.29 -474.67 -21.68 -359.69 5.59 9.07 5.69 -36.23 -4.01 0.0 -790.66 1 1ptf iteration_4
351 2001 0.39 19.02 130.41 -852.34 -20.69 -548.98 9.37 22.46 8.25 -42.10 0.00 0.0 -1293.62 1 1bgf iteration_4
46 2001 0.51 12.72 81.72 -437.93 -20.13 -331.93 3.20 12.18 6.89 -33.04 -3.59 0.0 -722.62 0 451c iteration_4
30 2001 0.34 22.07 76.70 -375.24 -17.03 -324.94 6.27 19.87 2.38 -40.01 -6.75 0.0 -658.75 0 1by9 iteration_4
87 2001 0.66 18.02 86.62 -637.03 -7.12 -366.89 3.61 18.32 7.28 -34.49 0.00 0.0 -929.69 1 1a32 iteration_4
86 2001 0.61 18.32 77.80 -636.64 -4.29 -363.96 2.74 24.99 7.03 -35.48 0.00 0.0 -927.81 0 1a32 iteration_4

154 rows × 17 columns


In [385]:
sub_data = max_Q_data.query("Folder=='iteration_0_stronger_exclude_volume_stronger_side_chain' or\
                            Folder=='iteration_0_stronger_exclude_volume_stronger_side_chain_k4' or\
                            Folder=='iteration_0_stronger_exclude_volume'")

g = sns.FacetGrid(sub_data, hue="Folder", height=5, aspect=1.618)
g = g.map(sns.distplot, "Q")
plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5))


Out[385]:
<matplotlib.legend.Legend at 0x1a98176e48>

In [381]:
sub_data = max_Q_data.query("Folder=='iteration_1_stronger_exclude_withoutBurial_bugfix' or\
                            Folder=='iteration_2_bug_fixed' or\
                            Folder=='iteration_0_stronger_exclude_volume'")

g = sns.FacetGrid(sub_data, hue="Folder", height=5, aspect=1.618)
g = g.map(sns.distplot, "Q")
plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5))


Out[381]:
<matplotlib.legend.Legend at 0x1a288f7da0>

In [379]:
sub_data = max_Q_data.query("Folder!='iteration_1_stronger_exclude_withoutBurial' and Folder!='iteration_0_stronger_exclude_volume_k10'")

g = sns.FacetGrid(sub_data, hue="Folder", height=5, aspect=1.618)
g = g.map(sns.distplot, "Q")
plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5))


Out[379]:
<matplotlib.legend.Legend at 0x1a4a80e668>

In [370]:
sub_data.query("Protein == '1cxc'")


Out[370]:
Steps Q Rg Backbone Rama Contact Fragment Exclude_Side SideChain Exclude Beta Pap Helical Total Run Protein Folder
504 2001 0.34 16.90 129.45 -581.40 -176.34 -490.84 11.00 29.51 5.47 -37.36 -4.21 0.0 -1114.71 0 1cxc iteration_0_cbd
505 2001 0.38 15.30 126.61 -591.59 -182.24 -473.92 15.33 21.87 6.76 -45.44 -2.92 0.0 -1125.54 1 1cxc iteration_0_cbd
506 2001 0.36 15.84 120.94 -584.20 -162.74 -471.62 9.97 22.78 10.51 -37.68 -4.27 0.0 -1096.32 0 1cxc iteration_0_stronger_exclude_volume
507 2001 0.42 13.97 127.13 -598.39 -188.00 -447.45 9.78 16.10 10.19 -32.40 -3.93 0.0 -1106.97 1 1cxc iteration_0_stronger_exclude_volume
508 2001 0.43 14.52 132.44 -591.39 -199.72 -470.68 7.79 26.62 9.37 -33.94 -4.24 0.0 -1123.74 0 1cxc iteration_0_stronger_exclude_volume_shift_well
509 2001 0.42 13.40 127.27 -577.91 -206.70 -460.03 8.98 27.00 11.05 -40.71 -0.86 0.0 -1111.92 1 1cxc iteration_0_stronger_exclude_volume_shift_well
510 2001 0.41 12.67 126.64 -594.37 -183.04 -454.81 19.16 18.11 7.77 -40.10 -2.40 0.0 -1103.05 0 1cxc iteration_1_stronger_exclude_withoutBurial
511 2001 0.40 12.85 118.25 -601.09 -187.24 -435.86 16.35 26.24 6.71 -40.48 -5.07 0.0 -1102.19 1 1cxc iteration_1_stronger_exclude_withoutBurial
512 2001 0.44 12.52 114.67 -600.65 -215.52 -434.71 17.67 30.18 5.60 -38.67 -4.39 0.0 -1125.80 0 1cxc iteration_1_stronger_exclude_withoutBurial_bugfix
513 2001 0.38 12.47 126.20 -599.81 -202.05 -435.06 22.24 34.40 8.54 -46.86 -5.69 0.0 -1098.09 1 1cxc iteration_1_stronger_exclude_withoutBurial_bugfix
514 2001 0.36 13.15 130.08 -589.79 -281.85 -422.01 28.01 24.56 7.36 -28.97 -0.28 0.0 -1132.90 0 1cxc iteration_2_stronger_exclude_withoutBurial
515 2001 0.32 13.22 127.12 -595.24 -249.39 -433.36 15.09 24.29 10.39 -42.16 -0.13 0.0 -1143.39 1 1cxc iteration_2_stronger_exclude_withoutBurial

In [367]:
sns.boxplot("Q", "Folder", data=sub_data)


Out[367]:
<matplotlib.axes._subplots.AxesSubplot at 0x1a4a5beb38>

In [349]:
sns.boxplot("Contact", "Folder", data=sub_data)


Out[349]:
<matplotlib.axes._subplots.AxesSubplot at 0x1a28fb5400>

In [331]:
a = sub_data.query("Folder == 'iteration_0_stronger_exclude_volume'")[["Q", "Contact", "Fragment", "Exclude_Side", "Run", "Protein", "Folder"]].reset_index(drop=True)
b = sub_data.query("Folder == 'iteration_2_stronger_exclude_withoutBurial'")[["Q", "Contact", "Fragment", "Exclude_Side", "Run", "Protein", "Folder"]].reset_index(drop=True)

In [332]:
c = a.merge(b, on=["Run", "Protein"])

In [333]:
c["Q_diff"] = c["Q_x"] - c["Q_y"]

In [334]:
c.sort_values("Contact_y").head()


Out[334]:
Q_x Contact_x Fragment_x Exclude_Side_x Run Protein Folder_x Q_y Contact_y Fragment_y Exclude_Side_y Folder_y Q_diff
112 0.59 -155.38 -542.42 10.74 0 1kuh iteration_0_stronger_exclude_volume 0.27 -448.11 -403.40 27.65 iteration_2_stronger_exclude_withoutBurial 0.32
98 0.33 -222.25 -460.68 12.24 0 1rie iteration_0_stronger_exclude_volume 0.29 -397.16 -377.11 29.10 iteration_2_stronger_exclude_withoutBurial 0.04
124 0.31 -319.96 -550.69 21.14 0 1lcl iteration_0_stronger_exclude_volume 0.33 -392.24 -503.10 34.80 iteration_2_stronger_exclude_withoutBurial -0.02
113 0.35 -115.85 -509.35 7.40 1 1kuh iteration_0_stronger_exclude_volume 0.27 -391.02 -413.79 32.66 iteration_2_stronger_exclude_withoutBurial 0.08
99 0.37 -211.56 -483.88 21.98 1 1rie iteration_0_stronger_exclude_volume 0.23 -388.38 -356.10 18.40 iteration_2_stronger_exclude_withoutBurial 0.14

In [338]:
c.sort_values("Q_diff").tail()


Out[338]:
Q_x Contact_x Fragment_x Exclude_Side_x Run Protein Folder_x Q_y Contact_y Fragment_y Exclude_Side_y Folder_y Q_diff
51 0.84 -226.91 -468.39 13.12 1 1erv iteration_0_stronger_exclude_volume 0.37 -156.67 -413.84 11.19 iteration_2_stronger_exclude_withoutBurial 0.47
138 0.82 -323.19 -630.06 26.13 0 1jon iteration_0_stronger_exclude_volume 0.34 -251.84 -560.16 20.89 iteration_2_stronger_exclude_withoutBurial 0.48
8 0.90 -152.14 -343.97 12.17 0 1bdo iteration_0_stronger_exclude_volume 0.42 -152.10 -280.41 11.09 iteration_2_stronger_exclude_withoutBurial 0.48
104 0.85 -239.43 -557.52 15.13 0 1pdo iteration_0_stronger_exclude_volume 0.35 -204.92 -496.63 15.29 iteration_2_stronger_exclude_withoutBurial 0.50
68 0.86 -226.62 -510.30 15.10 0 1a6f iteration_0_stronger_exclude_volume 0.32 -155.32 -452.98 7.65 iteration_2_stronger_exclude_withoutBurial 0.54

In [339]:
sub_data.query("Protein == '1erv'")


Out[339]:
Steps Q Rg Backbone Rama Contact Fragment Exclude_Side SideChain Exclude Beta Pap Helical Total Run Protein Folder
398 2001 0.65 12.55 121.06 -683.69 -228.09 -453.84 20.23 17.05 5.21 -44.26 -12.41 0.0 -1258.73 0 1erv iteration_0_stronger_exclude_volume
399 2001 0.84 12.41 122.04 -702.46 -226.91 -468.39 13.12 19.60 5.37 -47.43 -12.39 0.0 -1297.44 1 1erv iteration_0_stronger_exclude_volume
408 2001 0.44 14.10 107.93 -700.98 -123.43 -435.73 6.35 20.27 5.41 -31.94 -4.70 0.0 -1156.81 0 1erv iteration_2_stronger_exclude_withoutBurial
409 2001 0.37 14.87 107.29 -699.10 -156.67 -413.84 11.19 25.26 6.27 -37.00 -4.26 0.0 -1160.86 1 1erv iteration_2_stronger_exclude_withoutBurial

In [335]:
sub_data = max_Q_data.query("Folder=='iteration_0_stronger_exclude_volume' or Folder=='iteration_2_stronger_exclude_withoutBurial'")
sns.lineplot("Protein", "Q", hue="Folder", data=sub_data)


Out[335]:
<matplotlib.axes._subplots.AxesSubplot at 0x1a5a4fb3c8>

In [336]:
sub_data = max_Q_data.query("Folder=='iteration_0_stronger_exclude_volume' or Folder=='iteration_2_stronger_exclude_withoutBurial'")
sns.lineplot("Protein", "Exclude_Side", hue="Folder", data=sub_data)


Out[336]:
<matplotlib.axes._subplots.AxesSubplot at 0x1a4b6917b8>

In [323]:
a = pd.concat([selected, selected2])
sns.lineplot("Protein", "Exclude_Side", hue="Folder", data=a)


Out[323]:
<matplotlib.axes._subplots.AxesSubplot at 0x1a2a0dd320>

In [321]:
selected = data.query("Folder == 'iteration_native_new_4' and Steps == 0").reset_index(drop=True)
selected2 = data.query("Folder == 'iteration_1_stronger_exclude_withoutBurial' and Steps == 2001").reset_index(drop=True)
sns.scatterplot("Contact", "Exclude_Side", data=selected, color="red")
sns.scatterplot("Contact", "Exclude_Side", hue="Folder", data=selected2)


Out[321]:
<matplotlib.axes._subplots.AxesSubplot at 0x1a27cc00b8>

In [307]:
# selected = data.query("Folder == 'iteration_0_stronger_exclude_volume_k10' and Steps == 2001").reset_index(drop=True)
sns.scatterplot("Contact", "Exclude_Side", hue="Folder", data=sub_data)


Out[307]:
<matplotlib.axes._subplots.AxesSubplot at 0x1a258479e8>

In [255]:
sub_data = max_Q_data.query("Folder=='iteration_0_cbd' or Folder=='iteration_0_stronger_exclude_volume' or Folder=='iteration_0_stronger_exclude_volume_k10'")
g = sns.FacetGrid(sub_data, hue="Folder", height=5, aspect=1.618)
g = g.map(sns.distplot, "Q")
plt.legend()


Out[255]:
<matplotlib.legend.Legend at 0x1a4db3c518>

In [259]:
selected.query("Protein == '1vcc'")


Out[259]:
Steps Q Rg Backbone Rama Contact Fragment Exclude_Side SideChain Exclude Beta Pap Helical Total Run Protein Folder
4 2001 0.41 11.19 76.21 -307.50 -162.17 -295.54 14.55 20.58 3.27 -23.72 -2.2 0.0 -676.51 0 1vcc iteration_0_stronger_exclude_volume
5 2001 0.54 11.37 83.80 -309.74 -135.61 -319.18 14.85 15.85 3.68 -40.33 -8.2 0.0 -694.88 1 1vcc iteration_0_stronger_exclude_volume

In [258]:
selected = data.query("Folder == 'iteration_0_stronger_exclude_volume' and Steps == 2001").reset_index(drop=True)
selected.sort_values("Q").head(5)


Out[258]:
Steps Q Rg Backbone Rama Contact Fragment Exclude_Side SideChain Exclude Beta Pap Helical Total Run Protein Folder
126 2001 0.29 20.02 139.44 -569.22 -176.55 -540.28 10.81 35.49 3.97 -40.91 -11.27 0.0 -1148.53 0 2sns iteration_0_stronger_exclude_volume
57 2001 0.29 13.95 108.34 -510.78 -166.90 -367.82 8.28 19.23 3.50 -30.10 -8.22 0.0 -944.47 1 1bkf iteration_0_stronger_exclude_volume
108 2001 0.30 15.17 134.05 -600.10 -182.50 -501.64 7.88 20.45 4.66 -51.80 -12.63 0.0 -1181.64 0 1htp iteration_0_stronger_exclude_volume
64 2001 0.30 14.24 116.53 -579.71 -167.87 -399.65 10.89 11.63 3.67 -66.11 -24.21 0.0 -1094.82 0 2mcm iteration_0_stronger_exclude_volume
129 2001 0.31 14.79 147.77 -992.58 -259.76 -555.86 23.19 29.54 9.57 -52.06 -0.00 0.0 -1650.19 1 1flp iteration_0_stronger_exclude_volume

In [237]:
selected = data.query("Folder == 'iteration_0_stronger_exclude_volume_k10' and Steps == 2001").reset_index(drop=True)
sns.scatterplot("Contact", "Exclude_Side", hue="Q", data=selected)


Out[237]:
<matplotlib.axes._subplots.AxesSubplot at 0x1a715d43c8>

In [239]:
selected.sort_values("Exclude_Side").tail(5)


Out[239]:
Steps Q Rg Backbone Rama Contact Fragment Exclude_Side SideChain Exclude Beta Pap Helical Total Run Protein Folder
87 2001 0.50 14.04 131.31 -622.67 -242.89 -501.27 20.78 27.93 2.88 -47.77 -12.43 0.0 -1244.11 1 1mai iteration_0_stronger_exclude_volume_k10
160 2001 0.41 14.84 138.54 -842.09 -290.04 -599.22 20.95 29.81 11.05 -57.18 -8.09 0.0 -1596.26 0 1jon iteration_0_stronger_exclude_volume_k10
116 2001 0.50 13.74 129.96 -650.58 -279.41 -521.13 21.63 20.84 6.89 -51.14 -3.62 0.0 -1326.57 0 3chy iteration_0_stronger_exclude_volume_k10
161 2001 0.34 14.49 148.85 -851.20 -309.61 -552.92 25.06 31.25 8.00 -65.24 -12.75 0.0 -1578.57 1 1jon iteration_0_stronger_exclude_volume_k10
142 2001 0.45 13.34 148.37 -749.66 -318.28 -523.29 28.64 30.52 8.35 -59.29 -8.52 0.0 -1443.17 0 5nul iteration_0_stronger_exclude_volume_k10

In [240]:
selected = data.query("Folder == 'iteration_0_stronger_exclude_volume' and Steps == 2001").reset_index(drop=True)
sns.scatterplot("Contact", "Exclude_Side", hue="Q", data=selected)


Out[240]:
<matplotlib.axes._subplots.AxesSubplot at 0x1a4e52f5c0>

In [241]:
selected.sort_values("Exclude_Side").tail(5)


Out[241]:
Steps Q Rg Backbone Rama Contact Fragment Exclude_Side SideChain Exclude Beta Pap Helical Total Run Protein Folder
92 2001 0.34 12.93 123.10 -392.07 -295.82 -464.13 25.13 28.31 5.31 -56.66 -12.04 0.0 -1038.87 0 1b6e iteration_0_stronger_exclude_volume
101 2001 0.44 13.91 133.36 -850.41 -241.22 -545.73 25.55 30.14 10.32 -38.72 -0.00 0.0 -1476.71 1 1bgf iteration_0_stronger_exclude_volume
112 2001 0.60 15.51 134.35 -499.12 -268.50 -564.81 25.85 20.48 2.21 -74.28 -29.11 0.0 -1252.92 0 1rlw iteration_0_stronger_exclude_volume
160 2001 0.82 13.43 143.42 -845.31 -323.19 -630.06 26.13 25.88 7.92 -89.57 -14.78 0.0 -1699.56 0 1jon iteration_0_stronger_exclude_volume
150 2001 0.37 14.05 141.28 -995.48 -271.41 -554.85 29.66 19.20 13.38 -54.14 -0.00 0.0 -1672.36 0 1flp iteration_0_stronger_exclude_volume

In [212]:
selected = data.query("Folder == 'iteration_native_new_4' and Steps == 1").reset_index(drop=True)
sns.scatterplot("Contact", "Exclude_Side", hue="Q", data=selected)


Out[212]:
<matplotlib.axes._subplots.AxesSubplot at 0x1a4db7a7f0>

In [213]:
selected.sort_values("Exclude_Side").tail(1)


Out[213]:
Steps Q Rg Backbone Rama Contact Fragment Exclude_Side SideChain Exclude Beta Pap Helical Total Run Protein Folder
76 1 0.9 11.97 43.24 -366.01 -232.58 -480.27 16.98 -22.6 2.0 -70.34 -10.26 0.0 -1119.84 0 1by2 iteration_native_new_4

In [214]:
selected = data.query("Folder == 'iteration_native_new_4' and Steps == 0").reset_index(drop=True)
sns.scatterplot("Contact", "Exclude_Side", hue="Q", data=selected)


Out[214]:
<matplotlib.axes._subplots.AxesSubplot at 0x1a4d9c26a0>

In [215]:
selected.sort_values("Exclude_Side").tail(1)


Out[215]:
Steps Q Rg Backbone Rama Contact Fragment Exclude_Side SideChain Exclude Beta Pap Helical Total Run Protein Folder
76 0 1.0 12.13 3543.07 -286.47 -170.95 -541.52 21.04 -1.87 447.5 -54.19 -7.52 0.0 2949.09 0 1by2 iteration_native_new_4

In [202]:
selected = data.query("Folder == 'iteration_0_cbd' and Steps > 2000").reset_index(drop=True)
sns.scatterplot("Contact", "Exclude_Side", hue="Q", data=selected)


Out[202]:
<matplotlib.axes._subplots.AxesSubplot at 0x1a281fa978>

In [203]:
selected = data.query("Folder == 'iteration_new_4' and Steps > 2000").reset_index(drop=True)
sns.scatterplot("Contact", "Exclude_Side", hue="Q", data=selected)


Out[203]:
<matplotlib.axes._subplots.AxesSubplot at 0x1a4a5a5cf8>

In [219]:
selected = data.query("Folder == 'iteration_new_4_without_burial' and Steps > 2000").reset_index(drop=True)

In [220]:
selected["ratio"] = selected["Contact"] / selected["Fragment"]

In [217]:
gammaFile = "/Users/weilu/Research/server/mar_2020/mass_iterative_optimization/optimization_new_4_withoutBurial/saved_gammas/new_4_cutoff600_impose_Aprime_constraint"

def get_contact_gamma_info(gammaFile):
    # check the gamma.
    # read in gamma, and sort by size.
    # gammaFile = "/Users/weilu/Research/server/mar_2020/mass_iterative_optimization/optimization_new_4_withoutBurial/saved_gammas/new_4_cutoff600_impose_Aprime_constraint"
    gamma = np.loadtxt(gammaFile)

    res_type_map_letters = ['A', 'R', 'N', 'D', 'C', 'Q', 'E', 'G',
                            'H', 'I', 'L', 'K', 'M', 'F', 'P', 'S', 'T', 'W', 'Y', 'V']

    inverse_res_type_map = dict(list(zip(list(range(20)), res_type_map_letters)))
    c = 0
    info_ = []
    for i in range(20):
        for j in range(i, 20):
            info_.append(["Direct", res_type_map_letters[i], res_type_map_letters[j], c, round(gamma[c],3)])
            if i != j:
                info_.append(["Direct", res_type_map_letters[j], res_type_map_letters[i], c, round(gamma[c],3)])
            c += 1
    for i in range(20):
        for j in range(i, 20):
            info_.append(["Protein", res_type_map_letters[i], res_type_map_letters[j], c, round(gamma[c],3)])
            if i != j:
                info_.append(["Protein", res_type_map_letters[j], res_type_map_letters[i], c, round(gamma[c],3)])
            info_.append(["Water", res_type_map_letters[i], res_type_map_letters[j], c+210, round(gamma[c+210],3)])
            if i != j:
                info_.append(["Water", res_type_map_letters[j], res_type_map_letters[i], c+210, round(gamma[c+210],3)])
            c += 1
    contact_gammas = pd.DataFrame(info_, columns=["Interaction", "Res1", "Res2", "Index", "Gamma"])
    return contact_gammas

In [187]:
# check the gamma.
# read in gamma, and sort by size.
gammaFile = "/Users/weilu/Research/server/mar_2020/mass_iterative_optimization/optimization_new_4_withoutBurial/saved_gammas/new_4_cutoff600_impose_Aprime_constraint"
gamma = np.loadtxt(gammaFile)

res_type_map_letters = ['A', 'R', 'N', 'D', 'C', 'Q', 'E', 'G',
                        'H', 'I', 'L', 'K', 'M', 'F', 'P', 'S', 'T', 'W', 'Y', 'V']

inverse_res_type_map = dict(list(zip(list(range(20)), res_type_map_letters)))
c = 0
info_ = []
for i in range(20):
    for j in range(i, 20):
        info_.append(["Direct", res_type_map_letters[i], res_type_map_letters[j], c, round(gamma[c],3)])
        if i != j:
            info_.append(["Direct", res_type_map_letters[j], res_type_map_letters[i], c, round(gamma[c],3)])
        c += 1
for i in range(20):
    for j in range(i, 20):
        info_.append(["Protein", res_type_map_letters[i], res_type_map_letters[j], c, round(gamma[c],3)])
        if i != j:
            info_.append(["Protein", res_type_map_letters[j], res_type_map_letters[i], c, round(gamma[c],3)])
        info_.append(["Water", res_type_map_letters[i], res_type_map_letters[j], c+210, round(gamma[c+210],3)])
        if i != j:
            info_.append(["Water", res_type_map_letters[j], res_type_map_letters[i], c+210, round(gamma[c+210],3)])
        c += 1

In [188]:
len(gamma)


Out[188]:
630

In [193]:
contact_gammas = pd.DataFrame(info_, columns=["Interaction", "Res1", "Res2", "Index", "Gamma"])

In [216]:
contact_gammas.sort_values("Gamma").head()


Out[216]:
Interaction Res1 Res2 Index Gamma
180 Direct Q H 93 -4.963
181 Direct H Q 93 -4.963
160 Direct M C 82 -4.654
159 Direct C M 82 -4.654
172 Direct Y C 88 -4.012

In [197]:
sns.scatterplot("Contact", "Exclude_Side", hue="Q", data=selected)


Out[197]:
<matplotlib.axes._subplots.AxesSubplot at 0x1a4a2e1828>

In [222]:
selected.sort_values("ratio").tail(1)


Out[222]:
Steps Q Rg Backbone Rama Contact Fragment Exclude_Side SideChain Exclude Beta Pap Helical Total Run Protein Folder ratio
81 2001 0.22 10.31 153.19 -469.05 -1437.49 -227.44 214.79 78.21 32.92 -60.61 -5.98 0.0 -1721.46 1 1poa iteration_new_4_without_burial 6.320304

In [184]:
sns.scatterplot("Contact", "Fragment", data=selected)


Out[184]:
<matplotlib.axes._subplots.AxesSubplot at 0x1a26e3c668>

In [168]:
sub_data.query("Protein == '7rsa'")


Out[168]:
Steps Q Rg Backbone Rama Contact Fragment Exclude_Side SideChain Exclude Beta Pap Helical Total Run Protein Folder
1020 2001 0.31 14.75 148.88 -561.78 -210.33 -483.73 12.19 26.70 6.07 -56.02 -13.29 0.0 -1131.32 0 7rsa iteration_0_cbd
1021 2001 0.74 14.42 132.17 -549.82 -192.81 -543.72 18.93 24.95 4.79 -67.14 -15.43 0.0 -1188.08 1 7rsa iteration_0_cbd
1022 251 0.66 14.93 192.57 -535.22 -289.49 -521.89 17.26 47.07 6.71 -69.05 -15.03 0.0 -1167.08 0 7rsa iteration_native_new_4
1023 251 0.70 14.63 197.82 -531.77 -309.60 -513.80 20.79 50.52 6.93 -73.05 -17.90 0.0 -1170.05 1 7rsa iteration_native_new_4
1024 2001 0.35 17.25 133.94 -558.49 -345.38 -512.81 11.23 21.93 5.05 -55.89 -9.32 0.0 -1309.73 0 7rsa iteration_new_1
1025 2001 0.33 16.99 124.59 -552.37 -338.08 -496.07 13.67 18.02 4.45 -46.84 -10.63 0.0 -1283.25 1 7rsa iteration_new_1
1026 2001 0.38 14.82 130.13 -553.52 -260.42 -502.82 10.56 28.28 2.94 -44.31 -6.85 0.0 -1196.02 0 7rsa iteration_new_2
1027 2001 0.31 15.07 131.80 -558.35 -259.80 -488.27 8.20 24.26 4.64 -20.19 -6.65 0.0 -1164.35 1 7rsa iteration_new_2
1030 2001 0.31 15.05 145.18 -546.51 -407.78 -454.57 21.54 24.82 3.31 -68.11 -11.98 0.0 -1294.10 0 7rsa iteration_new_4
1031 2001 0.40 14.25 129.47 -555.43 -381.02 -474.58 14.99 34.86 5.46 -60.97 -13.99 0.0 -1301.21 1 7rsa iteration_new_4
1032 2001 0.24 11.28 126.59 -534.79 -748.74 -327.80 71.56 42.51 15.57 -59.32 -28.01 0.0 -1442.45 0 7rsa iteration_new_4_without_burial
1033 2001 0.26 11.66 137.10 -532.73 -750.86 -323.30 59.32 41.73 16.04 -77.80 -19.10 0.0 -1449.60 1 7rsa iteration_new_4_without_burial

In [161]:
a = selected.query("Steps == 1").reset_index(drop=True)

In [162]:
a.sort_values("SideChain")


Out[162]:
Steps Q Rg Backbone Rama Contact Fragment Exclude_Side SideChain Exclude Beta Pap Helical Total Run Protein Folder
171 1 0.98 14.57 33.92 -977.76 -286.61 -656.86 6.36 -46.21 2.62 -59.78 -0.00 0.0 -1984.31 1 2hbg iteration_native_new_4
170 1 0.98 14.57 33.92 -977.76 -286.61 -656.86 6.36 -46.21 2.62 -59.78 -0.00 0.0 -1984.31 0 2hbg iteration_native_new_4
169 1 0.98 14.76 50.74 -1094.70 -232.93 -689.80 6.85 -46.09 2.92 -59.52 -0.00 0.0 -2062.52 1 1mba iteration_native_new_4
168 1 0.98 14.76 50.74 -1094.70 -232.93 -689.80 6.85 -46.09 2.92 -59.52 -0.00 0.0 -2062.52 0 1mba iteration_native_new_4
121 1 0.98 15.63 37.50 -832.32 -149.63 -588.17 5.04 -44.85 1.54 -55.33 -0.00 0.0 -1626.21 1 1cpq iteration_native_new_4
120 1 0.98 15.63 37.50 -832.32 -149.63 -588.17 5.04 -44.85 1.54 -55.33 -0.00 0.0 -1626.21 0 1cpq iteration_native_new_4
150 1 0.98 14.71 42.26 -954.83 -225.29 -637.66 6.69 -44.76 1.81 -57.08 -0.00 0.0 -1868.86 0 1flp iteration_native_new_4
151 1 0.98 14.71 42.26 -954.83 -225.29 -637.66 6.69 -44.76 1.81 -57.08 -0.00 0.0 -1868.86 1 1flp iteration_native_new_4
70 1 0.97 13.21 28.91 -572.99 -261.64 -484.98 8.77 -41.49 1.53 -106.49 -27.45 0.0 -1455.82 0 2mcm iteration_native_new_4
71 1 0.97 13.21 28.91 -572.99 -261.64 -484.98 8.77 -41.49 1.53 -106.49 -27.45 0.0 -1455.82 1 2mcm iteration_native_new_4
160 1 0.97 13.57 38.22 -827.52 -234.03 -683.51 9.54 -41.00 1.06 -99.32 -14.11 0.0 -1850.69 0 1jon iteration_native_new_4
161 1 0.97 13.57 38.22 -827.52 -234.03 -683.51 9.54 -41.00 1.06 -99.32 -14.11 0.0 -1850.69 1 1jon iteration_native_new_4
157 1 0.98 14.88 38.41 -662.93 -204.49 -651.81 7.30 -39.60 1.35 -94.75 -26.32 0.0 -1632.84 1 1pkp iteration_native_new_4
156 1 0.98 14.88 38.41 -662.93 -204.49 -651.81 7.30 -39.60 1.35 -94.75 -26.32 0.0 -1632.84 0 1pkp iteration_native_new_4
172 1 0.98 13.71 46.49 -688.00 -304.73 -674.93 9.52 -39.27 1.19 -89.19 -18.82 0.0 -1757.74 0 1akr iteration_native_new_4
173 1 0.98 13.71 46.49 -688.00 -304.73 -674.93 9.52 -39.27 1.19 -89.19 -18.82 0.0 -1757.74 1 1akr iteration_native_new_4
163 1 0.98 18.40 39.10 -1128.94 -213.30 -719.42 4.76 -38.93 2.25 -69.21 0.00 0.0 -2123.70 1 1vls iteration_native_new_4
162 1 0.98 18.40 39.10 -1128.94 -213.30 -719.42 4.76 -38.93 2.25 -69.21 0.00 0.0 -2123.70 0 1vls iteration_native_new_4
34 1 0.97 11.55 28.52 -491.97 -160.22 -405.89 2.90 -37.87 1.84 -28.65 -0.00 0.0 -1091.33 0 1rzl iteration_native_new_4
35 1 0.97 11.55 28.52 -491.97 -160.22 -405.89 2.90 -37.87 1.84 -28.65 -0.00 0.0 -1091.33 1 1rzl iteration_native_new_4
105 1 0.97 12.93 39.11 -568.66 -191.26 -560.00 11.46 -37.21 1.79 -79.35 -16.61 0.0 -1400.73 1 1bqk iteration_native_new_4
104 1 0.97 12.93 39.11 -568.66 -191.26 -560.00 11.46 -37.21 1.79 -79.35 -16.61 0.0 -1400.73 0 1bqk iteration_native_new_4
158 1 0.97 16.88 40.47 -893.93 -195.42 -694.00 7.97 -36.28 1.53 -65.47 -5.19 0.0 -1840.33 0 1rss iteration_native_new_4
159 1 0.97 16.88 40.47 -893.93 -195.42 -694.00 7.97 -36.28 1.53 -65.47 -5.19 0.0 -1840.33 1 1rss iteration_native_new_4
131 1 0.97 13.35 30.46 -627.48 -190.34 -595.87 9.72 -36.22 1.73 -53.89 -4.98 0.0 -1466.86 1 1c52 iteration_native_new_4
130 1 0.97 13.35 30.46 -627.48 -190.34 -595.87 9.72 -36.22 1.73 -53.89 -4.98 0.0 -1466.86 0 1c52 iteration_native_new_4
123 1 0.98 13.77 33.82 -796.28 -197.90 -599.94 6.77 -36.17 1.83 -79.29 -9.61 0.0 -1676.77 1 1pdo iteration_native_new_4
122 1 0.98 13.77 33.82 -796.28 -197.90 -599.94 6.77 -36.17 1.83 -79.29 -9.61 0.0 -1676.77 0 1pdo iteration_native_new_4
139 1 0.97 15.85 35.16 -861.54 -253.82 -623.93 8.14 -34.61 0.93 -122.42 -28.01 0.0 -1880.10 1 1aqt iteration_native_new_4
138 1 0.97 15.85 35.16 -861.54 -253.82 -623.93 8.14 -34.61 0.93 -122.42 -28.01 0.0 -1880.10 0 1aqt iteration_native_new_4
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
64 1 0.96 12.74 32.76 -483.51 -158.53 -478.97 5.65 -18.82 0.91 -26.36 -1.41 0.0 -1128.29 0 1ycc iteration_native_new_4
65 1 0.96 12.74 32.76 -483.51 -158.53 -478.97 5.65 -18.82 0.91 -26.36 -1.41 0.0 -1128.29 1 1ycc iteration_native_new_4
87 1 0.96 13.44 33.82 -627.00 -237.23 -573.67 6.94 -18.24 0.84 -70.47 -16.24 0.0 -1501.25 1 1mai iteration_native_new_4
86 1 0.96 13.44 33.82 -627.00 -237.23 -573.67 6.94 -18.24 0.84 -70.47 -16.24 0.0 -1501.25 0 1mai iteration_native_new_4
137 1 0.97 15.57 54.12 -571.41 -305.45 -622.17 9.43 -18.09 1.12 -58.71 -8.34 0.0 -1519.49 1 1poc iteration_native_new_4
136 1 0.97 15.57 54.12 -571.41 -305.45 -622.17 9.43 -18.09 1.12 -58.71 -8.34 0.0 -1519.49 0 1poc iteration_native_new_4
118 1 0.96 14.44 35.19 -804.60 -175.45 -630.29 5.54 -18.08 1.60 -52.07 -0.91 0.0 -1639.06 0 1rcb iteration_native_new_4
119 1 0.96 14.44 35.19 -804.60 -175.45 -630.29 5.54 -18.08 1.60 -52.07 -0.91 0.0 -1639.06 1 1rcb iteration_native_new_4
2 1 0.97 11.21 18.99 -422.02 -125.64 -326.06 1.74 -17.97 1.05 -19.99 -0.00 0.0 -889.89 0 1hyp iteration_native_new_4
3 1 0.97 11.21 18.99 -422.02 -125.64 -326.06 1.74 -17.97 1.05 -19.99 -0.00 0.0 -889.89 1 1hyp iteration_native_new_4
16 1 0.98 11.81 25.77 -299.63 -141.45 -385.26 6.31 -17.23 0.50 -40.89 -10.16 0.0 -862.04 0 1bb9 iteration_native_new_4
17 1 0.98 11.81 25.77 -299.63 -141.45 -385.26 6.31 -17.23 0.50 -40.89 -10.16 0.0 -862.04 1 1bb9 iteration_native_new_4
108 1 0.98 12.66 37.28 -344.63 -217.23 -585.94 8.16 -17.08 0.52 -98.03 -23.10 0.0 -1240.05 0 1bfg iteration_native_new_4
109 1 0.98 12.66 37.28 -344.63 -217.23 -585.94 8.16 -17.08 0.52 -98.03 -23.10 0.0 -1240.05 1 1bfg iteration_native_new_4
92 1 0.95 13.39 50.28 -386.18 -286.80 -582.63 9.59 -16.85 1.48 -64.30 -10.88 0.0 -1286.28 0 1b6e iteration_native_new_4
93 1 0.95 13.39 50.28 -386.18 -286.80 -582.63 9.59 -16.85 1.48 -64.30 -10.88 0.0 -1286.28 1 1b6e iteration_native_new_4
4 1 0.96 13.17 22.99 -410.02 -109.75 -351.72 4.59 -16.84 2.34 -57.58 -9.11 0.0 -925.11 0 1tif iteration_native_new_4
5 1 0.96 13.17 22.99 -410.02 -109.75 -351.72 4.59 -16.84 2.34 -57.58 -9.11 0.0 -925.11 1 1tif iteration_native_new_4
75 1 0.97 12.91 30.93 -584.02 -159.66 -555.46 6.74 -15.37 0.70 -61.08 -10.34 0.0 -1347.55 1 1a6f iteration_native_new_4
74 1 0.97 12.91 30.93 -584.02 -159.66 -555.46 6.74 -15.37 0.70 -61.08 -10.34 0.0 -1347.55 0 1a6f iteration_native_new_4
26 1 0.96 12.32 29.98 -403.14 -115.62 -403.09 5.01 -15.28 0.58 -32.77 -5.34 0.0 -939.67 0 1cyo iteration_native_new_4
27 1 0.96 12.32 29.98 -403.14 -115.62 -403.09 5.01 -15.28 0.58 -32.77 -5.34 0.0 -939.67 1 1cyo iteration_native_new_4
9 1 0.96 12.14 25.47 -362.62 -153.02 -374.25 1.90 -13.40 0.62 -52.84 -7.99 0.0 -936.11 1 1by9 iteration_native_new_4
8 1 0.96 12.14 25.47 -362.62 -153.02 -374.25 1.90 -13.40 0.62 -52.84 -7.99 0.0 -936.11 0 1by9 iteration_native_new_4
19 1 0.97 11.76 22.49 -283.22 -125.62 -360.64 3.67 -13.12 0.29 -62.74 -15.82 0.0 -834.72 1 1pht iteration_native_new_4
18 1 0.97 11.76 22.49 -283.22 -125.62 -360.64 3.67 -13.12 0.29 -62.74 -15.82 0.0 -834.72 0 1pht iteration_native_new_4
7 1 0.97 11.68 29.83 -301.42 -138.94 -361.44 6.36 -12.03 0.61 -38.60 -7.25 0.0 -822.87 1 1vcc iteration_native_new_4
6 1 0.97 11.68 29.83 -301.42 -138.94 -361.44 6.36 -12.03 0.61 -38.60 -7.25 0.0 -822.87 0 1vcc iteration_native_new_4
22 1 0.97 17.60 16.44 -648.42 -77.66 -398.86 2.63 -11.13 1.01 -37.92 0.00 0.0 -1153.91 0 1a32 iteration_native_new_4
23 1 0.97 17.60 16.44 -648.42 -77.66 -398.86 2.63 -11.13 1.01 -37.92 0.00 0.0 -1153.91 1 1a32 iteration_native_new_4

176 rows × 17 columns


In [159]:
a.hist("Exclude_Side")


Out[159]:
array([[<matplotlib.axes._subplots.AxesSubplot object at 0x1a3e1d5390>]],
      dtype=object)

In [108]:
sub_data.query("Folder == 'iteration_new_2'").sort_values("Q")


Out[108]:
Steps Q Rg Backbone Rama Contact Fragment Exclude_Side SideChain Exclude Beta Pap Helical Total Run Protein Folder
1264 2001 0.28 20.36 150.91 -655.35 -193.59 -609.63 11.19 22.83 8.11 -69.03 -12.66 0.0 -1347.22 0 1div iteration_new_2
677 2001 0.28 15.25 134.16 -388.56 -280.16 -468.93 12.52 32.00 3.86 -52.52 -7.10 0.0 -1014.72 1 1b6e iteration_new_2
901 2001 0.28 14.82 136.72 -556.60 -282.37 -475.42 11.77 27.78 10.23 -50.32 -5.90 0.0 -1184.11 1 3lzt iteration_new_2
425 2001 0.30 15.72 125.60 -533.89 -175.98 -407.92 13.74 26.67 4.61 -82.70 -26.27 0.0 -1056.14 1 1a1x iteration_new_2
593 2001 0.30 15.73 123.31 -626.46 -255.99 -448.48 8.28 33.22 12.48 -40.08 -4.12 0.0 -1197.84 1 1poa iteration_new_2
831 2001 0.30 13.10 135.25 -538.83 -291.93 -460.68 16.96 21.31 6.91 -53.16 -12.71 0.0 -1176.87 1 1rie iteration_new_2
550 2001 0.31 12.87 113.91 -358.49 -257.37 -390.48 11.16 24.15 5.40 -60.50 -7.70 0.0 -919.90 0 1by2 iteration_new_2
1152 2001 0.31 14.19 138.97 -845.44 -268.03 -573.67 18.60 32.77 8.68 -66.89 -8.81 0.0 -1563.83 0 1jon iteration_new_2
676 2001 0.31 15.27 144.01 -385.68 -277.66 -487.08 9.43 27.35 2.12 -40.78 -5.93 0.0 -1014.21 0 1b6e iteration_new_2
747 2001 0.31 15.07 131.80 -558.35 -259.80 -488.27 8.20 24.26 4.64 -20.19 -6.65 0.0 -1164.35 1 7rsa iteration_new_2
1265 2001 0.32 27.33 153.08 -651.18 -205.24 -620.60 10.93 27.84 7.49 -77.22 -17.34 0.0 -1372.24 1 1div iteration_new_2
284 2001 0.32 12.92 95.75 -324.45 -225.63 -334.97 7.30 26.50 5.00 -52.46 -7.79 0.0 -810.75 0 2cbp iteration_new_2
551 2001 0.32 13.55 117.47 -366.72 -250.59 -375.60 11.69 29.72 5.28 -66.59 -14.94 0.0 -910.27 1 1by2 iteration_new_2
565 2001 0.32 13.55 115.09 -604.71 -240.74 -448.69 14.14 15.66 7.81 -38.05 -3.35 0.0 -1182.84 1 1bea iteration_new_2
649 2001 0.32 14.35 127.08 -402.28 -283.69 -465.96 17.23 22.44 3.34 -73.09 -27.35 0.0 -1082.27 1 1neu iteration_new_2
928 2001 0.32 13.99 148.70 -599.40 -317.76 -478.15 14.31 22.55 7.22 -78.74 -21.02 0.0 -1302.29 0 1htp iteration_new_2
900 2001 0.32 15.22 134.79 -551.44 -277.44 -486.00 12.66 33.84 7.58 -45.81 -4.57 0.0 -1176.40 0 3lzt iteration_new_2
998 2001 0.32 15.41 155.50 -851.49 -219.52 -552.79 14.98 27.83 10.35 -79.97 -19.06 0.0 -1514.16 0 1aqt iteration_new_2
1055 2001 0.32 15.90 155.80 -543.07 -257.26 -593.61 15.08 42.15 4.24 -92.98 -36.42 0.0 -1306.05 1 1lcl iteration_new_2
1054 2001 0.32 15.87 153.34 -556.12 -268.05 -582.17 21.34 23.71 3.63 -95.91 -35.78 0.0 -1336.01 0 1lcl iteration_new_2
984 2001 0.33 14.56 141.50 -595.46 -309.20 -490.30 9.11 34.17 8.62 -47.53 -4.40 0.0 -1253.49 0 1poc iteration_new_2
1250 2001 0.33 19.40 134.72 -998.51 -209.49 -620.63 12.06 34.14 12.07 -50.33 -0.00 0.0 -1685.97 0 1osa iteration_new_2
929 2001 0.33 13.05 126.86 -596.24 -327.65 -489.71 16.45 22.96 4.51 -68.32 -16.41 0.0 -1327.55 1 1htp iteration_new_2
481 2001 0.33 14.07 117.53 -477.30 -233.79 -421.60 14.59 26.38 4.79 -57.03 -18.29 0.0 -1044.71 1 1sfp iteration_new_2
1124 2001 0.33 14.45 143.90 -666.23 -353.19 -544.21 21.90 22.84 7.39 -88.69 -27.96 0.0 -1484.26 0 1pkp iteration_new_2
1222 2001 0.34 15.96 147.81 -983.25 -243.76 -582.25 7.66 28.31 16.02 -51.97 -0.00 0.0 -1661.43 0 2hbg iteration_new_2
355 2001 0.34 15.59 106.74 -443.76 -162.74 -398.70 8.52 20.68 3.88 -27.91 -5.44 0.0 -898.73 1 3vub iteration_new_2
1027 2001 0.34 14.22 138.75 -751.87 -313.82 -531.55 16.56 30.44 9.08 -60.57 -8.74 0.0 -1471.73 1 5nul iteration_new_2
1040 2001 0.34 15.47 130.10 -599.19 -300.88 -520.08 14.58 34.27 12.63 -73.60 -19.19 0.0 -1321.35 0 1pne iteration_new_2
1096 2001 0.35 16.72 144.60 -800.89 -197.26 -619.64 9.05 33.03 9.90 -52.14 -8.98 0.0 -1482.34 0 1tfe iteration_new_2
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
186 2001 0.67 12.01 83.83 -467.31 -167.39 -347.05 8.33 10.03 7.19 -43.46 -4.48 0.0 -920.31 0 1ptf iteration_new_2
172 2001 0.67 16.90 83.23 -641.79 -63.77 -359.69 4.68 24.76 8.69 -32.68 0.00 0.0 -976.56 0 1a32 iteration_new_2
1237 2001 0.68 14.24 161.41 -682.20 -315.35 -619.88 14.77 30.28 8.21 -79.20 -20.79 0.0 -1502.76 1 1akr iteration_new_2
915 2001 0.68 14.58 130.36 -828.01 -229.79 -557.15 11.83 31.39 7.35 -119.33 -37.25 0.0 -1590.62 1 1hmt iteration_new_2
1166 2001 0.69 18.85 130.76 -1124.92 -170.22 -671.62 8.97 31.78 13.98 -62.96 -0.00 0.0 -1844.24 0 1vls iteration_new_2
1236 2001 0.69 14.02 145.65 -678.39 -325.58 -614.34 18.53 31.58 8.15 -87.81 -22.24 0.0 -1524.44 0 1akr iteration_new_2
158 2001 0.69 11.17 86.99 -507.68 -146.03 -351.33 7.38 14.06 7.19 -58.83 -10.74 0.0 -958.99 0 1opd iteration_new_2
914 2001 0.69 14.29 142.25 -837.88 -219.13 -562.77 12.41 30.72 5.20 -112.34 -37.77 0.0 -1579.32 0 1hmt iteration_new_2
383 2001 0.70 12.69 108.26 -668.03 -189.30 -438.96 9.71 16.68 7.13 -48.38 -5.62 0.0 -1208.51 1 1kte iteration_new_2
187 2001 0.71 11.56 80.02 -466.04 -158.14 -352.50 5.56 13.50 7.43 -45.22 -4.95 0.0 -920.36 1 1ptf iteration_new_2
61 2001 0.71 11.37 82.44 -310.27 -128.46 -327.11 6.56 11.94 4.48 -41.22 -6.42 0.0 -708.06 1 1vcc iteration_new_2
47 2001 0.72 12.93 84.90 -425.25 -84.48 -294.83 7.01 13.97 5.34 -54.32 -11.03 0.0 -758.70 1 1tif iteration_new_2
887 2001 0.72 13.59 127.83 -794.34 -211.99 -563.52 7.19 25.29 12.12 -72.25 -10.86 0.0 -1480.53 1 1pdo iteration_new_2
382 2001 0.72 13.07 99.31 -667.39 -191.80 -440.81 10.83 16.89 9.78 -50.05 -6.14 0.0 -1219.38 0 1kte iteration_new_2
1068 2001 0.73 14.26 148.00 -549.53 -201.81 -577.29 17.55 25.33 6.59 -84.59 -10.77 0.0 -1226.53 0 2sns iteration_new_2
1026 2001 0.74 13.65 133.62 -754.60 -293.32 -578.64 20.18 28.87 6.00 -62.52 -8.52 0.0 -1508.92 0 5nul iteration_new_2
943 2001 0.77 13.51 124.54 -634.02 -196.18 -551.95 13.86 20.82 5.14 -56.81 -4.58 0.0 -1279.20 1 1c52 iteration_new_2
145 2001 0.78 11.98 76.96 -281.84 -159.84 -324.28 5.77 23.52 3.15 -54.63 -17.39 0.0 -728.57 1 1pht iteration_new_2
102 2001 0.78 11.20 94.17 -448.58 -136.40 -336.36 6.10 12.69 2.79 -33.20 -3.04 0.0 -841.85 0 451c iteration_new_2
60 2001 0.80 11.60 86.54 -313.09 -124.90 -328.99 8.90 22.22 2.71 -41.57 -8.71 0.0 -696.89 0 1vcc iteration_new_2
46 2001 0.80 12.77 72.56 -420.59 -87.01 -312.69 7.52 13.14 2.12 -51.74 -10.04 0.0 -786.73 0 1tif iteration_new_2
340 2001 0.80 12.21 105.22 -516.17 -165.76 -414.28 7.19 16.75 4.28 -55.05 -4.87 0.0 -1022.71 0 1opc iteration_new_2
327 2001 0.81 11.86 103.93 -539.89 -154.50 -432.64 13.77 17.28 5.77 -55.15 -12.60 0.0 -1054.03 1 1bm8 iteration_new_2
88 2001 0.82 11.51 86.93 -228.77 -161.91 -334.00 8.47 13.62 0.94 -62.10 -9.71 0.0 -686.53 0 1bdo iteration_new_2
326 2001 0.83 11.94 105.03 -534.61 -152.13 -433.09 11.69 21.78 4.93 -58.55 -12.00 0.0 -1046.96 0 1bm8 iteration_new_2
844 2001 0.83 12.98 136.56 -653.14 -213.01 -556.55 9.76 27.81 7.30 -71.09 -8.55 0.0 -1320.91 0 3chy iteration_new_2
704 2001 0.85 15.71 115.21 -613.44 -200.24 -514.24 13.75 27.50 6.87 -85.75 -20.25 0.0 -1270.59 0 1dhn iteration_new_2
75 2001 0.86 12.00 83.42 -380.98 -131.02 -345.74 6.01 15.72 5.47 -52.85 -9.89 0.0 -809.87 1 1by9 iteration_new_2
214 2001 0.89 12.11 98.94 -441.05 -113.03 -383.18 6.43 19.25 5.03 -60.40 -9.43 0.0 -877.45 0 1tig iteration_new_2
215 2001 0.91 12.28 95.68 -448.23 -112.52 -390.79 5.67 22.91 3.75 -58.99 -9.40 0.0 -891.93 1 1tig iteration_new_2

182 rows × 17 columns


In [100]:
sub_data.query("Folder == 'iteration_start_native_iter5'").sort_values("Q")


Out[100]:
Steps Q Rg Backbone Rama Contact Fragment Exclude_Side SideChain Exclude Beta Pap Helical Total Run Protein Folder
573 251 0.42 13.96 158.56 -583.02 -227.37 -449.44 10.23 39.96 16.49 -28.72 -2.42 0.0 -1065.73 1 1bea iteration_start_native_iter5
586 251 0.44 13.75 184.31 -535.81 -231.13 -486.63 9.76 44.32 9.28 -50.54 -5.29 0.0 -1061.73 0 1rmd iteration_start_native_iter5
587 251 0.44 13.66 174.37 -523.61 -246.44 -483.52 16.57 49.85 9.09 -41.93 -5.97 0.0 -1051.59 1 1rmd iteration_start_native_iter5
1146 251 0.44 18.67 193.95 -900.15 -154.08 -593.40 13.13 48.63 13.45 -62.46 -3.99 0.0 -1444.92 0 1rss iteration_start_native_iter5
1273 251 0.45 22.08 218.55 -634.58 -182.14 -633.87 14.84 52.40 7.95 -75.20 -15.92 0.0 -1247.98 1 1div iteration_start_native_iter5
1258 251 0.45 22.27 204.68 -977.26 -176.80 -620.43 12.65 45.91 18.66 -53.36 -0.80 0.0 -1546.75 0 1osa iteration_start_native_iter5
628 251 0.45 14.60 158.82 -897.95 -179.39 -461.79 12.30 43.61 13.68 -46.01 -0.00 0.0 -1356.72 0 2a0b iteration_start_native_iter5
265 251 0.46 12.19 130.04 -496.72 -196.82 -356.50 8.19 32.15 15.55 -24.88 -0.23 0.0 -889.21 1 1rzl iteration_start_native_iter5
40 251 0.46 12.86 101.36 -414.63 -135.61 -276.66 3.95 32.38 9.18 -19.29 -0.00 0.0 -699.31 0 1hyp iteration_start_native_iter5
1259 251 0.47 20.97 224.68 -984.72 -184.83 -613.77 13.36 51.76 15.66 -55.55 -0.78 0.0 -1534.20 1 1osa iteration_start_native_iter5
993 251 0.47 14.29 186.91 -584.07 -264.56 -510.26 9.18 64.29 11.61 -40.70 -9.07 0.0 -1136.68 1 1poc iteration_start_native_iter5
992 251 0.47 14.46 206.61 -589.94 -273.28 -501.29 14.40 52.23 18.89 -46.58 -12.25 0.0 -1131.19 0 1poc iteration_start_native_iter5
1272 251 0.48 22.66 205.66 -629.93 -180.74 -626.78 13.83 48.59 11.53 -81.54 -14.25 0.0 -1253.62 0 1div iteration_start_native_iter5
881 251 0.49 16.36 177.90 -836.37 -183.95 -523.29 8.82 35.40 17.43 -45.71 -0.00 0.0 -1349.77 1 1cpq iteration_start_native_iter5
600 251 0.50 14.64 183.64 -613.90 -232.26 -466.55 10.65 51.43 11.68 -36.49 -4.01 0.0 -1095.81 0 1poa iteration_start_native_iter5
601 251 0.50 14.82 185.53 -609.27 -230.04 -453.34 11.02 56.99 9.72 -31.50 -4.14 0.0 -1065.04 1 1poa iteration_start_native_iter5
1119 251 0.50 15.94 198.13 -887.22 -236.46 -614.16 15.52 40.57 16.04 -44.85 -0.01 0.0 -1512.44 1 1ax8 iteration_start_native_iter5
1049 251 0.50 14.42 182.86 -581.99 -266.11 -541.88 16.00 46.68 9.96 -72.90 -18.67 0.0 -1226.03 1 1pne iteration_start_native_iter5
1216 251 0.51 15.50 195.02 -1103.28 -239.44 -612.59 15.45 51.00 17.61 -51.47 -0.00 0.0 -1727.70 0 1mba iteration_start_native_iter5
1118 251 0.51 15.90 205.27 -894.62 -233.84 -608.96 19.95 47.41 19.85 -41.21 -0.33 0.0 -1486.47 0 1ax8 iteration_start_native_iter5
208 251 0.51 12.79 123.71 -401.77 -130.64 -335.93 6.13 31.09 7.00 -17.80 -3.39 0.0 -721.59 0 1cyo iteration_start_native_iter5
741 251 0.52 16.17 192.45 -838.01 -127.06 -543.52 11.59 44.47 20.59 -41.60 -0.00 0.0 -1281.07 1 1bgf iteration_start_native_iter5
726 251 0.52 14.52 176.96 -577.72 -193.99 -491.49 5.39 42.12 12.79 -32.00 -3.88 0.0 -1061.83 0 1cxc iteration_start_native_iter5
1147 251 0.52 17.96 211.19 -875.34 -139.76 -607.03 12.26 60.09 19.69 -59.86 -3.30 0.0 -1382.06 1 1rss iteration_start_native_iter5
41 251 0.52 11.17 108.97 -413.10 -152.09 -265.56 6.22 32.92 9.40 -14.73 -0.00 0.0 -687.97 1 1hyp iteration_start_native_iter5
474 251 0.52 13.56 154.74 -472.51 -185.80 -398.58 9.28 51.23 7.39 -25.85 -0.44 0.0 -860.54 0 1ycc iteration_start_native_iter5
880 251 0.52 15.45 184.65 -829.33 -197.53 -509.28 9.79 32.16 18.06 -42.32 -0.00 0.0 -1333.80 0 1cpq iteration_start_native_iter5
908 251 0.53 13.87 176.12 -531.72 -248.74 -505.45 24.71 61.22 7.75 -51.84 -4.78 0.0 -1072.75 0 3lzt iteration_start_native_iter5
475 251 0.53 12.94 170.79 -464.97 -186.52 -399.72 7.72 38.49 9.09 -27.37 -0.94 0.0 -853.43 1 1ycc iteration_start_native_iter5
629 251 0.54 14.73 171.51 -898.89 -175.24 -470.35 10.94 35.67 17.54 -45.89 -0.00 0.0 -1354.71 1 2a0b iteration_start_native_iter5
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
614 251 0.81 12.07 171.06 -777.16 -182.25 -506.70 16.40 44.23 17.87 -69.81 -12.84 0.0 -1299.20 0 1tmy iteration_start_native_iter5
83 251 0.81 12.14 105.40 -365.98 -127.97 -325.27 12.73 39.33 6.16 -53.90 -10.60 0.0 -720.11 1 1by9 iteration_start_native_iter5
699 251 0.81 14.98 180.45 -500.65 -204.19 -527.70 19.40 41.04 11.62 -95.26 -26.80 0.0 -1102.10 1 2sak iteration_start_native_iter5
853 251 0.81 13.12 168.12 -639.39 -199.75 -534.47 21.23 61.64 13.11 -73.46 -8.54 0.0 -1191.50 1 3chy iteration_start_native_iter5
1160 251 0.81 13.43 223.82 -802.57 -234.81 -594.07 20.58 57.39 15.15 -85.20 -14.77 0.0 -1414.49 0 1jon iteration_start_native_iter5
712 251 0.81 15.77 168.63 -595.31 -183.12 -501.13 16.16 48.28 10.91 -70.08 -18.01 0.0 -1123.66 0 1dhn iteration_start_native_iter5
1035 251 0.82 13.68 189.95 -734.43 -273.27 -564.87 15.04 55.39 7.60 -65.92 -13.26 0.0 -1383.76 1 5nul iteration_start_native_iter5
55 251 0.82 13.01 124.59 -406.66 -81.39 -316.46 14.64 32.15 2.26 -48.58 -9.73 0.0 -689.18 1 1tif iteration_start_native_iter5
825 251 0.82 15.36 204.04 -475.87 -209.36 -566.51 21.02 43.88 9.39 -101.26 -35.78 0.0 -1110.45 1 1rlw iteration_start_native_iter5
335 251 0.82 11.80 137.03 -528.68 -143.59 -414.05 18.59 41.13 9.66 -48.03 -12.30 0.0 -940.25 1 1bm8 iteration_start_native_iter5
278 251 0.82 11.99 161.86 -394.46 -157.44 -384.01 16.42 35.30 6.36 -70.01 -20.90 0.0 -806.89 0 1who iteration_start_native_iter5
26 251 0.83 11.06 110.20 -192.55 -152.70 -292.59 10.16 28.16 3.28 -43.03 -16.25 0.0 -545.32 0 1hoe iteration_start_native_iter5
377 251 0.83 13.25 146.89 -352.45 -163.55 -425.76 11.72 43.76 9.52 -82.49 -27.31 0.0 -839.68 1 1tul iteration_start_native_iter5
517 251 0.83 13.22 165.72 -558.56 -269.90 -431.47 14.20 19.15 6.81 -93.48 -30.36 0.0 -1177.90 1 2mcm iteration_start_native_iter5
321 251 0.83 12.42 144.75 -224.82 -237.41 -397.74 14.02 36.99 7.38 -59.27 -12.25 0.0 -728.36 1 1plc iteration_start_native_iter5
657 251 0.84 13.99 176.15 -396.02 -235.20 -491.67 12.43 52.94 4.48 -105.76 -28.50 0.0 -1011.15 1 1neu iteration_start_native_iter5
194 251 0.84 11.24 137.61 -468.31 -150.49 -359.79 9.29 27.90 8.85 -48.25 -9.21 0.0 -852.40 0 1ptf iteration_start_native_iter5
195 251 0.84 11.28 114.96 -468.26 -139.90 -355.54 10.35 15.79 9.24 -50.16 -9.13 0.0 -872.66 1 1ptf iteration_start_native_iter5
82 251 0.84 12.27 123.47 -368.85 -122.05 -323.58 13.50 29.26 9.09 -49.28 -10.21 0.0 -698.65 0 1by9 iteration_start_native_iter5
376 251 0.84 13.12 142.72 -351.82 -168.75 -421.63 13.40 47.83 6.03 -70.06 -27.00 0.0 -829.28 0 1tul iteration_start_native_iter5
1006 251 0.84 15.96 198.67 -852.20 -219.69 -555.81 16.96 47.71 14.44 -113.26 -29.30 0.0 -1492.49 0 1aqt iteration_start_native_iter5
1007 251 0.84 15.79 225.54 -851.39 -218.81 -563.39 19.27 41.04 12.49 -109.92 -27.08 0.0 -1472.25 1 1aqt iteration_start_native_iter5
615 251 0.85 12.39 175.54 -784.90 -183.39 -504.46 13.41 33.29 14.30 -71.34 -13.13 0.0 -1320.67 1 1tmy iteration_start_native_iter5
460 251 0.86 13.16 156.53 -485.45 -213.79 -423.21 14.83 30.71 5.70 -74.27 -21.00 0.0 -1009.95 0 1bkf iteration_start_native_iter5
97 251 0.86 11.44 125.86 -226.79 -164.47 -338.54 8.87 36.08 3.98 -53.72 -10.62 0.0 -619.35 1 1bdo iteration_start_native_iter5
167 251 0.87 11.22 118.96 -503.41 -144.14 -353.10 7.39 23.11 6.76 -48.33 -9.17 0.0 -901.92 1 1opd iteration_start_native_iter5
404 251 0.88 12.50 160.55 -690.28 -187.47 -454.90 8.83 37.92 9.67 -51.33 -11.61 0.0 -1178.64 0 1erv iteration_start_native_iter5
96 251 0.88 11.54 115.40 -223.03 -160.05 -329.77 12.11 22.15 3.28 -51.14 -8.51 0.0 -619.55 0 1bdo iteration_start_native_iter5
223 251 0.88 12.25 136.67 -434.49 -102.61 -368.80 7.73 36.66 8.90 -56.75 -7.77 0.0 -780.47 1 1tig iteration_start_native_iter5
166 251 0.88 11.20 128.87 -505.29 -143.41 -354.88 7.98 30.70 8.92 -44.22 -10.63 0.0 -881.96 0 1opd iteration_start_native_iter5

182 rows × 17 columns


In [80]:
sns.boxplot("Folder", "Q", data=sub_data)


Out[80]:
<matplotlib.axes._subplots.AxesSubplot at 0x1a3081c128>

In [38]:
simulationType = "mass_iterative_run"
run_n = 2
folder_list = ["iteration_0_cbd", "iteration_1_cbd", "iteration_2_cbd", "iteration_start_native", "iteration_start_native_iter2"]
folder_list = ["iteration_0_cbd", "iteration_start_native", "iteration_start_native_iter2", 
               "iteration_start_native_iter3", "iteration_new_1"]


all_data = []

for folder in folder_list:
    for pdb in pdb_list:
        for i in range(run_n):
                pre = f"/Users/weilu/Research/server/mar_2020/{simulationType}/{folder}/{pdb}/{i}"
                info_file = "info.dat"
                location = f"{pre}/{info_file}"
                try:
                    tmp = pd.read_csv(location, sep="\s+")
                    tmp = tmp.assign(Run=i, Protein=pdb, Folder=folder)
                    all_data.append(tmp)
                except:
                    print(pdb, i, folder)
                    pass
data = pd.concat(all_data)
today = datetime.today().strftime('%m-%d')
outFile = f"/Users/weilu/Research/data/openMM/{simulationType}_{folder}_{today}.csv"
data.reset_index(drop=True).to_csv(outFile)
print(outFile)


/Users/weilu/Research/data/openMM/mass_iterative_run_iteration_new_1_03-10.csv

In [39]:
# data = pd.read_csv("/Users/weilu/Research/data/openMM/mass_iterative_run_iteration_0_02-07.csv", index_col=0)
# data = pd.read_csv("/Users/weilu/Research/data/openMM/mass_iterative_run_iteration_1_02-10.csv", index_col=0)
# data = pd.read_csv("/Users/weilu/Research/data/openMM/mass_iterative_run_iteration_2_02-11.csv", index_col=0)
data = pd.read_csv("/Users/weilu/Research/data/openMM/mass_iterative_run_iteration_new_1_03-10.csv", index_col=0)

sub_pdb_list = pdb_list
data.Protein = pd.Categorical(data.Protein, 
                      categories=sub_pdb_list)

In [40]:
y = "Steps"
d = data
t = d.groupby(["Protein", "Folder", "Run"])[y].idxmax().reset_index()
max_Q_data = d.iloc[t[y].to_list()].reset_index(drop=True)
sub_data = max_Q_data

In [41]:
g = sns.FacetGrid(sub_data, hue="Folder", height=5, aspect=1.618)
g = g.map(sns.distplot, "Q")
plt.legend()


Out[41]:
<matplotlib.legend.Legend at 0x1a3247cba8>

In [42]:
sub_data.query("Folder == 'iteration_start_native_iter3'").sort_values("Q")


Out[42]:
Steps Q Rg Backbone Rama Contact Fragment Exclude_Side SideChain Exclude Beta Pap Helical Total Run Protein Folder
859 251 0.35 17.68 202.36 -971.96 -328.98 -549.92 10.71 47.91 15.17 -46.44 0.00 0.0 -1621.15 1 1flp iteration_start_native_iter3
918 251 0.35 19.33 211.52 -1111.01 -261.76 -635.26 5.42 60.10 18.31 -61.37 0.00 0.0 -1774.06 0 1vls iteration_start_native_iter3
599 251 0.35 17.08 172.64 -836.35 -245.93 -512.32 10.37 48.03 15.01 -40.44 -0.00 0.0 -1388.99 1 1bgf iteration_start_native_iter3
968 251 0.35 16.65 204.11 -662.84 -320.27 -580.70 9.58 61.01 10.53 -60.96 -13.19 0.0 -1352.73 0 1akr iteration_start_native_iter3
699 251 0.36 17.54 184.43 -834.48 -245.55 -515.32 6.29 43.76 15.42 -48.01 -0.00 0.0 -1393.47 1 1cpq iteration_start_native_iter3
929 251 0.37 15.54 226.69 -621.79 -353.08 -557.20 14.60 64.45 11.99 -42.77 -4.24 0.0 -1261.36 1 1lba iteration_start_native_iter3
598 251 0.38 16.89 179.09 -840.74 -255.74 -510.26 15.16 65.13 14.57 -41.32 -0.00 0.0 -1374.10 0 1bgf iteration_start_native_iter3
978 251 0.38 21.56 207.30 -984.95 -299.63 -600.31 12.43 55.55 17.14 -49.35 -0.00 0.0 -1641.82 0 1osa iteration_start_native_iter3
858 251 0.38 16.72 187.36 -977.35 -323.86 -557.85 14.11 47.15 19.10 -45.80 -0.00 0.0 -1637.14 0 1flp iteration_start_native_iter3
758 251 0.38 15.09 204.86 -619.74 -248.22 -518.86 9.83 36.57 10.04 -44.75 -2.17 0.0 -1172.44 0 1c52 iteration_start_native_iter3
309 251 0.38 14.20 166.17 -281.61 -304.65 -380.77 5.64 50.35 4.34 -44.08 -9.15 0.0 -793.76 1 1skz iteration_start_native_iter3
979 251 0.38 21.40 220.64 -991.27 -288.93 -607.31 8.68 50.29 19.37 -47.75 -0.00 0.0 -1636.28 1 1osa iteration_start_native_iter3
848 251 0.38 17.24 197.07 -545.07 -227.36 -540.48 17.63 53.71 6.17 -52.61 -6.97 0.0 -1097.89 0 2sns iteration_start_native_iter3
969 251 0.39 15.99 231.09 -663.63 -322.92 -582.23 7.13 56.25 10.40 -53.84 -10.56 0.0 -1328.31 1 1akr iteration_start_native_iter3
928 251 0.39 15.59 213.93 -638.07 -348.46 -565.40 13.94 51.91 10.91 -45.50 -7.14 0.0 -1313.88 0 1lba iteration_start_native_iter3
478 251 0.39 14.85 168.63 -517.59 -293.80 -495.55 14.31 47.46 10.21 -33.75 -5.38 0.0 -1105.45 0 1rmd iteration_start_native_iter3
639 251 0.40 14.91 193.85 -327.84 -273.03 -493.11 11.08 51.76 4.58 -74.57 -16.18 0.0 -923.45 1 1bfg iteration_start_native_iter3
308 251 0.40 14.92 169.85 -278.91 -307.50 -368.07 7.24 40.67 6.14 -42.24 -6.77 0.0 -779.61 0 1skz iteration_start_native_iter3
988 251 0.41 27.72 229.26 -635.55 -193.79 -618.74 8.61 45.25 14.39 -75.20 -12.10 0.0 -1237.88 0 1div iteration_start_native_iter3
269 251 0.41 16.52 157.95 -405.02 -203.58 -412.50 12.81 42.69 9.42 -38.77 -8.11 0.0 -845.11 1 1puc iteration_start_native_iter3
28 251 0.41 13.89 109.28 -413.75 -157.85 -268.19 3.16 34.22 7.23 -15.74 0.00 0.0 -701.65 0 1hyp iteration_start_native_iter3
638 251 0.41 15.00 198.52 -334.31 -271.98 -495.44 7.92 52.49 6.26 -76.29 -16.03 0.0 -928.85 0 1bfg iteration_start_native_iter3
759 251 0.41 15.75 182.87 -620.38 -243.13 -514.87 10.39 43.23 12.73 -48.13 -3.02 0.0 -1180.29 1 1c52 iteration_start_native_iter3
769 251 0.42 15.04 214.21 -675.11 -303.10 -506.46 9.04 56.92 13.06 -54.20 -12.40 0.0 -1258.05 1 1kuh iteration_start_native_iter3
158 251 0.43 13.96 142.19 -393.42 -173.08 -333.01 5.28 42.45 10.93 -19.48 -2.70 0.0 -720.83 0 1cyo iteration_start_native_iter3
508 251 0.43 15.64 153.02 -905.72 -215.82 -473.70 7.52 46.05 20.20 -48.77 -0.00 0.0 -1417.22 0 2a0b iteration_start_native_iter3
849 251 0.43 16.99 185.66 -542.63 -218.94 -544.46 14.64 50.15 5.46 -54.31 -5.16 0.0 -1109.58 1 2sns iteration_start_native_iter3
688 251 0.43 15.57 197.53 -804.43 -231.66 -529.82 9.71 59.01 14.74 -36.81 -0.00 0.0 -1321.72 0 1rcb iteration_start_native_iter3
378 251 0.43 13.28 162.70 -475.37 -237.12 -392.22 6.69 50.01 6.38 -26.87 -0.57 0.0 -906.38 0 1ycc iteration_start_native_iter3
549 251 0.44 13.89 164.43 -380.05 -321.23 -486.64 17.68 46.26 7.76 -44.98 -6.29 0.0 -1003.05 1 1b6e iteration_start_native_iter3
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
439 251 0.74 16.69 160.35 -503.04 -194.99 -468.27 13.83 44.64 5.33 -62.79 -14.58 0.0 -1019.52 1 2pii iteration_start_native_iter3
738 251 0.74 14.42 183.78 -812.24 -233.22 -558.91 15.28 50.14 6.89 -120.88 -36.63 0.0 -1505.77 0 1hmt iteration_start_native_iter3
369 251 0.74 13.23 153.11 -490.06 -207.75 -403.74 13.14 27.32 5.84 -71.52 -20.13 0.0 -993.80 1 1bkf iteration_start_native_iter3
558 251 0.75 15.20 173.24 -503.77 -237.27 -518.25 15.07 60.21 9.17 -98.62 -26.68 0.0 -1126.91 0 2sak iteration_start_native_iter3
419 251 0.75 13.58 157.95 -568.80 -257.29 -416.12 11.05 28.49 3.14 -79.43 -31.01 0.0 -1152.02 1 2mcm iteration_start_native_iter3
418 251 0.75 13.53 157.59 -563.80 -251.70 -433.42 11.23 32.48 2.25 -86.42 -29.74 0.0 -1161.54 0 2mcm iteration_start_native_iter3
218 251 0.75 12.17 140.88 -322.26 -256.78 -362.50 13.94 35.53 8.52 -46.29 -9.25 0.0 -798.22 0 2cbp iteration_start_native_iter3
209 251 0.76 12.48 128.01 -397.74 -199.22 -374.56 8.82 39.49 0.90 -57.67 -13.71 0.0 -865.67 1 1who iteration_start_native_iter3
109 251 0.77 11.95 116.40 -271.55 -153.90 -314.73 7.90 33.59 9.89 -51.20 -15.66 0.0 -639.28 1 1pht iteration_start_native_iter3
208 251 0.78 12.37 137.12 -394.60 -200.36 -378.04 11.62 23.89 2.23 -67.40 -15.91 0.0 -881.46 0 1who iteration_start_native_iter3
399 251 0.78 13.06 167.25 -458.64 -251.10 -452.50 17.92 44.45 8.09 -79.05 -29.92 0.0 -1033.51 1 1sfp iteration_start_native_iter3
938 251 0.78 15.70 212.49 -618.86 -330.90 -587.74 21.64 64.58 9.30 -105.88 -33.81 0.0 -1369.18 0 1aly iteration_start_native_iter3
539 251 0.78 16.53 186.18 -545.87 -219.19 -493.78 9.88 55.81 7.72 -61.13 -23.54 0.0 -1083.92 1 1dun iteration_start_native_iter3
739 251 0.80 14.37 189.10 -809.88 -237.51 -567.03 14.93 60.26 9.51 -116.98 -35.96 0.0 -1493.54 1 1hmt iteration_start_native_iter3
189 251 0.80 13.53 134.66 -478.13 -184.44 -363.47 7.58 20.22 2.72 -62.80 -21.98 0.0 -945.64 1 1fna iteration_start_native_iter3
648 251 0.80 13.90 174.49 -646.58 -285.73 -509.38 14.52 41.09 11.53 -87.65 -22.33 0.0 -1310.05 0 1opy iteration_start_native_iter3
119 251 0.80 11.67 114.07 -506.10 -142.84 -346.21 4.67 37.33 12.33 -47.66 -7.45 0.0 -881.86 1 1opd iteration_start_native_iter3
118 251 0.80 11.73 128.73 -507.52 -146.23 -349.60 5.30 27.61 8.14 -47.07 -9.01 0.0 -889.66 0 1opd iteration_start_native_iter3
649 251 0.81 13.93 197.79 -641.70 -290.61 -508.07 18.05 45.21 15.10 -94.36 -25.23 0.0 -1283.82 1 1opy iteration_start_native_iter3
839 251 0.81 13.90 206.17 -536.05 -368.13 -616.09 26.37 80.77 7.15 -107.44 -44.75 0.0 -1352.00 1 1lcl iteration_start_native_iter3
289 251 0.81 13.35 157.51 -350.02 -225.00 -431.11 8.55 46.07 4.31 -60.99 -26.21 0.0 -876.89 1 1tul iteration_start_native_iter3
188 251 0.81 13.38 161.42 -479.08 -181.83 -360.64 6.22 29.45 2.07 -67.95 -23.75 0.0 -914.08 0 1fna iteration_start_native_iter3
108 251 0.82 11.77 117.64 -277.16 -154.05 -317.97 7.94 39.69 4.09 -54.95 -16.86 0.0 -651.63 0 1pht iteration_start_native_iter3
398 251 0.83 13.13 164.01 -451.68 -251.06 -453.12 11.49 50.15 5.37 -80.33 -26.07 0.0 -1031.23 0 1sfp iteration_start_native_iter3
838 251 0.83 13.92 214.39 -529.80 -361.96 -622.09 18.83 63.55 2.24 -107.64 -41.16 0.0 -1363.63 0 1lcl iteration_start_native_iter3
288 251 0.83 13.40 169.06 -344.28 -216.54 -424.86 13.74 48.21 3.51 -74.24 -27.38 0.0 -852.80 0 1tul iteration_start_native_iter3
68 251 0.84 11.70 118.34 -228.11 -168.64 -322.80 12.62 34.12 3.34 -43.14 -8.17 0.0 -602.43 0 1bdo iteration_start_native_iter3
219 251 0.85 12.09 141.97 -326.27 -246.50 -372.17 14.20 34.24 6.51 -50.85 -9.63 0.0 -808.49 1 2cbp iteration_start_native_iter3
168 251 0.85 12.48 124.33 -438.82 -143.88 -370.09 9.16 35.50 5.57 -48.94 -7.34 0.0 -834.52 0 1tig iteration_start_native_iter3
69 251 0.91 11.65 114.29 -227.58 -169.59 -332.91 8.60 48.54 3.10 -44.70 -7.91 0.0 -608.16 1 1bdo iteration_start_native_iter3

198 rows × 17 columns


In [10]:
sns.boxplot("Folder", "Q", data=sub_data)


Out[10]:
<matplotlib.axes._subplots.AxesSubplot at 0x1a22072d68>

In [4]:
simulationType = "mass_iterative_run"
run_n = 1
folder_list = ["iteration_0", "iteration_1", "iteration_2", "iteration_3"]
all_data = []

for folder in folder_list:
    for pdb in pdb_list:
        for i in range(run_n):
                pre = f"/Users/weilu/Research/server/feb_2020/{simulationType}/{folder}/{pdb}/{i}"
                info_file = "info.dat"
                location = f"{pre}/{info_file}"
                try:
                    tmp = pd.read_csv(location, sep="\s+")
                    tmp = tmp.assign(Run=i, Protein=pdb, Folder=folder)
                    all_data.append(tmp)
                except:
                    print(pdb, i, folder)
                    pass
data = pd.concat(all_data)
today = datetime.today().strftime('%m-%d')
outFile = f"/Users/weilu/Research/data/openMM/{simulationType}_{folder}_{today}.csv"
data.reset_index(drop=True).to_csv(outFile)
print(outFile)


/Users/weilu/Research/data/openMM/mass_iterative_run_iteration_3_02-17.csv

In [357]:
# data = pd.read_csv("/Users/weilu/Research/data/openMM/mass_iterative_run_iteration_0_02-07.csv", index_col=0)
# data = pd.read_csv("/Users/weilu/Research/data/openMM/mass_iterative_run_iteration_1_02-10.csv", index_col=0)
# data = pd.read_csv("/Users/weilu/Research/data/openMM/mass_iterative_run_iteration_2_02-11.csv", index_col=0)
data = pd.read_csv("/Users/weilu/Research/data/openMM/mass_iterative_run_iteration_3_02-17.csv", index_col=0)

# sub_pdb_list = pdb_list
# data.Protein = pd.Categorical(data.Protein, 
#                       categories=sub_pdb_list)

In [358]:
y = "Steps"
d = data
t = d.groupby(["Protein", "Folder"])[y].idxmax().reset_index()
max_Q_data = d.iloc[t[y].to_list()].reset_index(drop=True)
sub_data = max_Q_data

In [359]:
def distplot_fig(data, x, hue=None, row=None, col=None, legend=True, hist=False, **kwargs):
    """A figure-level distribution plot with support for hue, col, row arguments."""
    bins = kwargs.pop('bins', None)
    if (bins is None) and hist: 
        # Make sure that the groups have equal-sized bins
        bins = np.histogram_bin_edges(data[x].dropna())
    g = sns.FacetGrid(data, hue=hue, row=row, col=col)
    g.map(sns.distplot, x, bins=bins, hist=hist, **kwargs)
    if legend and (hue is not None) and (hue not in [x, row, col]):
        g.add_legend(title=hue) 
    return g

In [363]:
sns.boxplot("Contact", "Folder", data=sub_data)


Out[363]:
<matplotlib.axes._subplots.AxesSubplot at 0x1a288ce470>

In [19]:
g = sns.FacetGrid(sub_data, hue="Folder", height=5, aspect=1.618)
g = g.map(sns.distplot, "Q")
plt.legend()


Out[19]:
<matplotlib.legend.Legend at 0x1a2d83e390>

In [20]:
sns.boxplot("Folder", "Q", data=sub_data)


Out[20]:
<matplotlib.axes._subplots.AxesSubplot at 0x1a2c242780>

In [26]:
print(new_order)


['1PUC', '1HTP', '2ACY', '2MCM', '1ALY', '1C52', '2PII', '1BB9', '1KUH', '2TGI', '1BQK', '1CYO', '1RLW', '1AX8', '1FMB', '1BY9', '1FNA', '1KPF', '1POC', '1OPD', '1WHO', '1CC5', '1SKZ', '1RSS', '2SNS', '3CYR', '1B6E', '1MSC', '1AKR', '1OSA', '1BY2', '2END', '1BKF', '2CBP', '1BGF', '1CTJ', '1AQE', '3LZT', '1HMT', '1DIV', '1A6F', '1GVP', '1PLC', '1SFP', '1CXC', '1MAI', '2A0B', '1YCC', '1RMD', '1BTN', '1LBA', '1CRB', '1CPQ', '1PTF', '1DUN', '1VCC', '1MBA', '1WHI', '1KTE', '1OPC', '1LCL', '1PKP', '1A32', '451C', '1TIF', '3PYP', '1BEA', '1A1X', '2HBG', '1POA', '1RIE', '7RSA', '1HYP', '1FLP', '1RCB', '1BM8', '1OPY', '1AQT', '1DHN', '1TFE', '5NUL', '1PNE', '1VLS', '1JON', '1RZL', '1BFG', '2SAK', '1PDO', '1PHT', '1HOE', '1BAJ', '3VUB', '3CHY', '1ERV', '1TMY', '1NEU', '1TIG', '1BDO', '1TUL']

In [24]:
new_order = max_Q_data.query("Folder == 'iteration_3'").sort_values("Q")["Protein"].unique().to_list()
sub_data = max_Q_data.sort_values("Q").reset_index(drop=True).reset_index()
sub_data.Protein = sub_data.Protein.astype(str)
sub_data.Protein = pd.Categorical(sub_data.Protein, 
                      categories=new_order)
ax = sns.lineplot(x="Protein", y="Q", markers=True, ms=10, style="Folder", hue="Folder", data=sub_data, dashes=False, sort=True)



In [73]:
new_order = max_Q_data.query("Folder == 'iteration_2'").sort_values("Q")["Protein"].unique().to_list()
sub_data = max_Q_data.sort_values("Q").reset_index(drop=True).reset_index()
sub_data.Protein = sub_data.Protein.astype(str)
sub_data.Protein = pd.Categorical(sub_data.Protein, 
                      categories=new_order)
ax = sns.lineplot(x="Protein", y="Q", markers=True, ms=10, style="Folder", hue="Folder", data=sub_data, dashes=False, sort=True)



In [71]:
new_order = max_Q_data.query("Folder == 'iteration_0'").sort_values("Q")["Protein"].unique().to_list()
sub_data = max_Q_data.sort_values("Q").reset_index(drop=True).reset_index()
sub_data.Protein = sub_data.Protein.astype(str)
sub_data.Protein = pd.Categorical(sub_data.Protein, 
                      categories=new_order)
ax = sns.lineplot(x="Protein", y="Q", markers=True, ms=10, style="Folder", hue="Folder", data=sub_data, dashes=False, sort=True)



In [67]:
new_order = max_Q_data.query("Folder == 'iteration_0'").sort_values("Q")["Protein"].unique().to_list()
sub_data = max_Q_data.sort_values("Q").reset_index(drop=True).reset_index()
sub_data.Protein = sub_data.Protein.astype(str)
sub_data.Protein = pd.Categorical(sub_data.Protein, 
                      categories=new_order)
ax = sns.lineplot(x="Protein", y="Q", markers=True, ms=10, style="Folder", hue="Folder", data=sub_data, dashes=False, sort=True)



In [47]:
y = "Q"
d = data
t = d.groupby(["Protein", "Folder"])[y].idxmax().reset_index()
max_Q_data = d.iloc[t[y].to_list()].reset_index(drop=True)
sub_data = max_Q_data

In [50]:
new_order = max_Q_data.query("Folder == 'iteration_0'").sort_values("Q")["Protein"].unique().to_list()

In [59]:
sub_data = max_Q_data.sort_values("Q").reset_index(drop=True).reset_index()
sub_data.Protein = sub_data.Protein.astype(str)
sub_data.Protein = pd.Categorical(sub_data.Protein, 
                      categories=new_order)

In [61]:
ax = sns.lineplot(x="Protein", y=y, markers=True, ms=10, style="Folder", hue="Folder", data=sub_data, dashes=False, sort=True)



In [22]:
# sub_data = max_Q_data.query("Scheme in ['hybrid contact', 'contact as in water', 'contact as in membrane']")
# sub_data = max_Q_mem_data
ax = sns.lineplot(x="Protein", y=y, markers=True, ms=10, style="Folder", hue="Folder", data=sub_data, dashes=False)
# _ = ax.set_xticklabels(labels=sub_label_list, rotation=0, ha='center')



In [10]:
# sub_data = max_Q_data.query("Scheme in ['hybrid contact', 'contact as in water', 'contact as in membrane']")
# sub_data = max_Q_mem_data
ax = sns.lineplot(x="Protein", y=y, markers=True, ms=10, style="Folder", hue="Folder", data=sub_data, dashes=False)
# _ = ax.set_xticklabels(labels=sub_label_list, rotation=0, ha='center')



In [ ]:


In [ ]:


In [165]:
pdb_list = dataset["optimization_cath"]
simulationType = "optimization_database"
run_n = 5
folder_list = ["iter0_gpu"]
all_data = []

for folder in folder_list:
    for pdb in pdb_list:
        for i in range(run_n):
                pre = f"/Users/weilu/Research/server/dec_2019/{simulationType}/{folder}/{pdb}/{i}"
                info_file = "info.dat"
                location = f"{pre}/{info_file}"
                try:
                    tmp = pd.read_csv(location, sep="\s+")
                    tmp = tmp.assign(Run=i, Protein=pdb, Folder=folder)
                    all_data.append(tmp)
                except:
                    print(pdb, i, folder)
                    pass
data = pd.concat(all_data)
today = datetime.today().strftime('%m-%d')
outFile = f"/Users/weilu/Research/data/openMM/{simulationType}_{folder}_{today}.csv"
data.reset_index(drop=True).to_csv(outFile)
print(outFile)


/Users/weilu/Research/data/openMM/optimization_database_iter0_gpu_12-29.csv

In [167]:
data = pd.read_csv("/Users/weilu/Research/data/openMM/optimization_database_iter0_gpu_12-29.csv", index_col=0)
sub_pdb_list = pdb_list
data.Protein = pd.Categorical(data.Protein, 
                      categories=sub_pdb_list)

In [170]:
max_Q_data


Out[170]:
Steps Q Rg Backbone Rama Contact Fragment Membrane ER TBM_Q Beta Pap Helical Total Run Protein Folder
0 366 0.56 11.60 408.51 -574.28 -220.53 -202.01 0.0 0.0 0.0 -24.93 -0.00 0.0 -613.24 0 1a75A00 iter0_gpu
1 478 0.30 15.25 533.60 -758.93 -330.51 -274.89 0.0 0.0 0.0 -27.71 -1.52 0.0 -859.96 0 1bekA01 iter0_gpu
2 271 0.39 14.98 584.51 -793.09 -181.79 -237.88 0.0 0.0 0.0 -30.24 -0.02 0.0 -658.51 4 1bqbA02 iter0_gpu
3 497 0.41 15.49 556.91 -1226.64 -328.39 -345.68 0.0 0.0 0.0 -59.33 -0.00 0.0 -1403.12 2 1cpcB00 iter0_gpu
4 447 0.46 13.25 369.69 -675.83 -179.42 -183.97 0.0 0.0 0.0 -24.79 -0.00 0.0 -694.33 2 1cscA02 iter0_gpu
5 336 0.69 11.34 361.05 -668.22 -163.57 -194.35 0.0 0.0 0.0 -25.60 -0.00 0.0 -690.68 4 1cy5A00 iter0_gpu
6 478 0.59 10.54 231.20 -352.70 -150.07 -142.83 0.0 0.0 0.0 -19.35 -0.00 0.0 -433.75 0 1dv5A00 iter0_gpu
7 181 0.36 17.51 892.96 -1120.93 -340.00 -323.41 0.0 0.0 0.0 -44.09 -1.14 0.0 -936.62 2 1e8yA05 iter0_gpu
8 407 0.40 13.83 512.47 -855.85 -307.81 -259.17 0.0 0.0 0.0 -40.74 -0.21 0.0 -951.30 4 1evyA02 iter0_gpu
9 484 0.82 10.89 221.51 -556.43 -119.58 -171.95 0.0 0.0 0.0 -28.40 -0.01 0.0 -654.85 3 1in4A03 iter0_gpu
10 446 0.68 12.93 153.87 -275.61 -60.38 -85.50 0.0 0.0 0.0 -11.71 0.00 0.0 -279.33 3 1l1fA03 iter0_gpu
11 500 0.83 9.86 141.58 -301.40 -80.48 -107.35 0.0 0.0 0.0 -18.13 -0.47 0.0 -366.24 0 1vq8P01 iter0_gpu
12 406 0.78 11.94 256.65 -457.75 -105.64 -169.22 0.0 0.0 0.0 -29.10 -1.50 0.0 -506.56 0 1xmkA00 iter0_gpu
13 364 0.40 13.69 437.49 -762.61 -195.02 -222.52 0.0 0.0 0.0 -30.81 -0.01 0.0 -773.48 0 1zcaA02 iter0_gpu
14 442 0.40 14.12 468.02 -1043.29 -260.68 -287.82 0.0 0.0 0.0 -40.31 -0.00 0.0 -1164.09 3 2grhA00 iter0_gpu
15 461 0.73 11.17 263.81 -518.62 -128.63 -160.49 0.0 0.0 0.0 -23.37 -0.00 0.0 -567.30 2 2ii2A04 iter0_gpu
16 485 0.63 11.88 341.95 -605.81 -195.26 -200.22 0.0 0.0 0.0 -25.86 -0.80 0.0 -686.00 1 2q6fB03 iter0_gpu
17 182 0.47 15.13 733.34 -1085.91 -270.03 -313.92 0.0 0.0 0.0 -47.65 -0.00 0.0 -984.17 2 2wh6A00 iter0_gpu
18 495 0.68 11.59 238.12 -436.30 -120.51 -153.13 0.0 0.0 0.0 -17.93 -0.00 0.0 -489.75 0 3g0vA00 iter0_gpu
19 168 0.43 20.60 914.93 -1208.80 -281.06 -355.37 0.0 0.0 0.0 -49.50 -0.00 0.0 -979.79 0 3geuA00 iter0_gpu
20 437 0.65 14.47 493.29 -903.02 -266.94 -290.74 0.0 0.0 0.0 -39.15 -0.00 0.0 -1006.56 4 3h99A03 iter0_gpu
21 401 0.72 11.66 270.38 -484.64 -127.43 -156.23 0.0 0.0 0.0 -21.57 -0.00 0.0 -519.48 2 3hrdD02 iter0_gpu
22 296 0.56 13.19 378.41 -341.56 -135.57 -142.35 0.0 0.0 0.0 -14.94 -0.00 0.0 -256.02 4 3ju5A01 iter0_gpu
23 284 0.44 13.10 434.58 -530.43 -172.48 -176.01 0.0 0.0 0.0 -21.73 -0.00 0.0 -466.07 4 3p1wA03 iter0_gpu
24 382 0.80 12.87 283.79 -612.06 -136.34 -182.44 0.0 0.0 0.0 -26.01 -0.01 0.0 -673.07 1 4cxfA01 iter0_gpu
25 490 0.70 12.60 274.80 -556.95 -149.63 -191.75 0.0 0.0 0.0 -20.00 -0.00 0.0 -643.53 3 4i2aA01 iter0_gpu
26 453 0.84 13.36 169.70 -423.72 -59.95 -117.80 0.0 0.0 0.0 -18.61 -0.00 0.0 -450.38 0 4i4tB03 iter0_gpu
27 290 0.76 11.24 326.89 -536.59 -128.89 -161.98 0.0 0.0 0.0 -26.45 -0.00 0.0 -527.03 1 4i6uB00 iter0_gpu
28 478 0.58 12.52 301.63 -751.65 -191.98 -226.75 0.0 0.0 0.0 -30.85 -0.00 0.0 -899.60 2 5kn9A02 iter0_gpu

In [169]:
y = "Q"
# d = data.query("Steps > 1500").reset_index(drop=True)
# d = data.query("Folder!='iter2_gpu' and Folder != 'first_cpu2'").reset_index(drop=True)
# d = data.query("Folder=='iter0_gpu_less_beta' or Folder == 'iter4_gpu'").reset_index(drop=True)
d = data
t = d.groupby(["Protein", "Folder"])[y].idxmax().reset_index()
max_Q_data = d.iloc[t[y].to_list()].reset_index(drop=True)
sub_data = max_Q_data
# sub_data = max_Q_data.query("Scheme in ['hybrid contact', 'contact as in water', 'contact as in membrane']")
# sub_data = max_Q_mem_data
ax = sns.lineplot(x="Protein", y=y, markers=True, ms=10, style="Folder", hue="Folder", data=sub_data, dashes=False)
# _ = ax.set_xticklabels(labels=sub_label_list, rotation=0, ha='center')



In [153]:
d = pd.read_csv("/Users/weilu/Research/server/dec_2019/iterative_optimization/original_pdbs/first_test_set.csv", index_col=0)
d = d.sort_values("Lpdb").reset_index(drop=True)
pdb_list = d.PDB.str.lower().to_list()

In [192]:
pdb_list = dataset["optimization"]
simulationType = "iterative_optimization"
# folder = "original"
# folder = "first"
# folder = "second_withoutExclusion"
# folder_list = ["first", "second_withoutExclusion"]
# "first", 
folder_list = ["iter7_gpu_long", "iter7_gpu", "iter6_gpu", "iter5_gpu", "iter5_withBiased_gpu", "iter0_gpu_less_beta", "iter4_gpu", "first_cpu2", "first_iter1_cpu4", "first_gpu", "iter2_gpu", "iter2_real_gpu", "iter3_gpu"]
all_data = []
for folder in folder_list:
    for pdb in pdb_list:
        for i in range(10):
                pre = f"/Users/weilu/Research/server/dec_2019/{simulationType}/{folder}/{pdb}/{i}"
                info_file = "info.dat"
                location = f"{pre}/{info_file}"
                try:
                    tmp = pd.read_csv(location, sep="\s+")
                    tmp = tmp.assign(Run=i, Protein=pdb, Folder=folder)
                    all_data.append(tmp)
                except:
                    print(pdb, i, folder)
                    pass
data = pd.concat(all_data)
today = datetime.today().strftime('%m-%d')
outFile = f"/Users/weilu/Research/data/openMM/{simulationType}_{folder}_{today}.csv"
data.reset_index(drop=True).to_csv(outFile)
print(outFile)


/Users/weilu/Research/data/openMM/iterative_optimization_iter3_gpu_01-03.csv

In [4]:
data = pd.read_csv("/Users/weilu/Research/data/openMM/iterative_optimization_iter3_gpu_01-03.csv", index_col=0)


/Users/weilu/anaconda3/envs/py36/lib/python3.6/site-packages/numpy/lib/arraysetops.py:522: FutureWarning: elementwise comparison failed; returning scalar instead, but in the future will perform elementwise comparison
  mask |= (ar1 == a)

In [194]:
sub_pdb_list = pdb_list
data.Protein = pd.Categorical(data.Protein, 
                      categories=sub_pdb_list)

In [196]:
y = "Q"
# d = data.query("Steps > 1500").reset_index(drop=True)
# d = data.query("Folder!='iter2_gpu' and Folder != 'first_cpu2'").reset_index(drop=True)
d = data.query("Folder == 'iter7_gpu_long' or Folder == 'iter5_withBiased_gpu' or Folder == 'first_gpu' or Folder == 'iter6_gpu' or Folder == 'iter7_gpu'").reset_index(drop=True)
#
t = d.groupby(["Protein", "Run", "Folder"]).tail(20)
ax = sns.boxenplot(x="Protein", y=y, hue="Folder", data=t)
# _ = ax.set_xticklabels(labels=sub_label_list, rotation=0, ha='center')



In [5]:
y = "Q"
# d = data.query("Steps > 1500").reset_index(drop=True)
# d = data.query("Folder!='iter2_gpu' and Folder != 'first_cpu2'").reset_index(drop=True)
# d = data.query("Folder=='iter0_gpu_less_beta' or Folder == 'iter4_gpu'").reset_index(drop=True)
d = data.query("Folder == 'iter7_gpu_long' or Folder == 'iter5_withBiased_gpu' or Folder == 'first_gpu' or Folder == 'iter6_gpu' or Folder == 'iter7_gpu'").reset_index(drop=True)
# d = data
t = d.groupby(["Protein", "Folder"])[y].idxmax().reset_index()
max_Q_data = d.iloc[t[y].to_list()].reset_index(drop=True)
sub_data = max_Q_data
sub_data = max_Q_data.query("Protein != '1hcd'").reset_index(drop=True)
# sub_data = max_Q_data.query("Scheme in ['hybrid contact', 'contact as in water', 'contact as in membrane']")
# sub_data = max_Q_mem_data
dataset["optimization_v2"] = ['1e0m', '1w4e', '1e0g', '2wqg', '1jo8', '1fex', '2l6r', '1c8c', '1g6p', '1mjc', '2jmc', '1hdn', '1st7', '1n88', '1d6o', '2ga5', '1j5u', '3o4d']

sub_data.Protein = pd.Categorical(sub_data.Protein, 
                      categories=dataset["optimization_v2"])
ax = sns.lineplot(x="Protein", y=y, markers=True, ms=10, style="Folder", hue="Folder", data=sub_data, dashes=False)
# _ = ax.set_xticklabels(labels=sub_label_list, rotation=0, ha='center')



In [ ]:
sub_data

In [195]:
y = "Q"
# d = data.query("Steps > 1500").reset_index(drop=True)
# d = data.query("Folder!='iter2_gpu' and Folder != 'first_cpu2'").reset_index(drop=True)
# d = data.query("Folder=='iter0_gpu_less_beta' or Folder == 'iter4_gpu'").reset_index(drop=True)
d = data.query("Folder == 'iter7_gpu_long' or Folder == 'iter5_withBiased_gpu' or Folder == 'first_gpu' or Folder == 'iter6_gpu' or Folder == 'iter7_gpu'").reset_index(drop=True)
# d = data
t = d.groupby(["Protein", "Folder"])[y].idxmax().reset_index()
max_Q_data = d.iloc[t[y].to_list()].reset_index(drop=True)
sub_data = max_Q_data
# sub_data = max_Q_data.query("Scheme in ['hybrid contact', 'contact as in water', 'contact as in membrane']")
# sub_data = max_Q_mem_data
ax = sns.lineplot(x="Protein", y=y, markers=True, ms=10, style="Folder", hue="Folder", data=sub_data, dashes=False)
# _ = ax.set_xticklabels(labels=sub_label_list, rotation=0, ha='center')



In [191]:
y = "Q"
# d = data.query("Steps > 1500").reset_index(drop=True)
# d = data.query("Folder!='iter2_gpu' and Folder != 'first_cpu2'").reset_index(drop=True)
# d = data.query("Folder=='iter0_gpu_less_beta' or Folder == 'iter4_gpu'").reset_index(drop=True)
d = data.query("Folder == 'iter5_withBiased_gpu' or Folder == 'first_gpu' or Folder == 'iter6_gpu' or Folder == 'iter7_gpu'").reset_index(drop=True)
# d = data
t = d.groupby(["Protein", "Folder"])[y].idxmax().reset_index()
max_Q_data = d.iloc[t[y].to_list()].reset_index(drop=True)
sub_data = max_Q_data
# sub_data = max_Q_data.query("Scheme in ['hybrid contact', 'contact as in water', 'contact as in membrane']")
# sub_data = max_Q_mem_data
ax = sns.lineplot(x="Protein", y=y, markers=True, ms=10, style="Folder", hue="Folder", data=sub_data, dashes=False)
# _ = ax.set_xticklabels(labels=sub_label_list, rotation=0, ha='center')



In [185]:
y = "Q"
# d = data.query("Steps > 1500").reset_index(drop=True)
# d = data.query("Folder!='iter2_gpu' and Folder != 'first_cpu2'").reset_index(drop=True)
# d = data.query("Folder=='iter0_gpu_less_beta' or Folder == 'iter4_gpu'").reset_index(drop=True)
d = data.query("Folder == 'iter5_withBiased_gpu' or Folder == 'first_gpu' or Folder == 'iter6_gpu'").reset_index(drop=True)
# d = data
t = d.groupby(["Protein", "Folder"])[y].idxmax().reset_index()
max_Q_data = d.iloc[t[y].to_list()].reset_index(drop=True)
sub_data = max_Q_data
# sub_data = max_Q_data.query("Scheme in ['hybrid contact', 'contact as in water', 'contact as in membrane']")
# sub_data = max_Q_mem_data
ax = sns.lineplot(x="Protein", y=y, markers=True, ms=10, style="Folder", hue="Folder", data=sub_data, dashes=False)
# _ = ax.set_xticklabels(labels=sub_label_list, rotation=0, ha='center')



In [184]:
y = "Q"
# d = data.query("Steps > 1500").reset_index(drop=True)
# d = data.query("Folder!='iter2_gpu' and Folder != 'first_cpu2'").reset_index(drop=True)
# d = data.query("Folder=='iter0_gpu_less_beta' or Folder == 'iter4_gpu'").reset_index(drop=True)
d = data.query("Folder=='iter5_gpu' or Folder == 'iter5_withBiased_gpu' or Folder == 'first_gpu'").reset_index(drop=True)
# d = data
t = d.groupby(["Protein", "Folder"])[y].idxmax().reset_index()
max_Q_data = d.iloc[t[y].to_list()].reset_index(drop=True)
sub_data = max_Q_data
# sub_data = max_Q_data.query("Scheme in ['hybrid contact', 'contact as in water', 'contact as in membrane']")
# sub_data = max_Q_mem_data
ax = sns.lineplot(x="Protein", y=y, markers=True, ms=10, style="Folder", hue="Folder", data=sub_data, dashes=False)
# _ = ax.set_xticklabels(labels=sub_label_list, rotation=0, ha='center')



In [160]:
y = "Q"
# d = data.query("Steps > 1500").reset_index(drop=True)
# d = data.query("Folder!='iter2_gpu' and Folder != 'first_cpu2'").reset_index(drop=True)
d = data.query("Folder=='iter0_gpu_less_beta' or Folder == 'iter4_gpu'").reset_index(drop=True)
t = d.groupby(["Protein", "Folder"])[y].idxmax().reset_index()
max_Q_data = d.iloc[t[y].to_list()].reset_index(drop=True)
sub_data = max_Q_data
# sub_data = max_Q_data.query("Scheme in ['hybrid contact', 'contact as in water', 'contact as in membrane']")
# sub_data = max_Q_mem_data
ax = sns.lineplot(x="Protein", y=y, markers=True, ms=10, style="Folder", hue="Folder", data=sub_data, dashes=False)
# _ = ax.set_xticklabels(labels=sub_label_list, rotation=0, ha='center')



In [161]:
y = "Q"
# d = data.query("Steps > 1500").reset_index(drop=True)
d = data.query("Folder!='iter2_gpu' and Folder != 'first_cpu2'").reset_index(drop=True)
# d = data.query("Folder=='iter0_gpu_less_beta' or Folder == 'iter4_gpu'").reset_index(drop=True)
t = d.groupby(["Protein", "Folder"])[y].idxmax().reset_index()
max_Q_data = d.iloc[t[y].to_list()].reset_index(drop=True)
sub_data = max_Q_data
# sub_data = max_Q_data.query("Scheme in ['hybrid contact', 'contact as in water', 'contact as in membrane']")
# sub_data = max_Q_mem_data
ax = sns.lineplot(x="Protein", y=y, markers=True, ms=10, style="Folder", hue="Folder", data=sub_data, dashes=False)
# _ = ax.set_xticklabels(labels=sub_label_list, rotation=0, ha='center')



In [149]:
y = "Q"
# d = data.query("Steps > 1500").reset_index(drop=True)
d = data.query("Folder!='iter2_gpu' and Folder != 'first_cpu2'").reset_index(drop=True)
t = d.groupby(["Protein", "Run", "Folder"]).tail(20)
ax = sns.boxenplot(x="Protein", y=y, hue="Folder", data=t)
# _ = ax.set_xticklabels(labels=sub_label_list, rotation=0, ha='center')



In [144]:
y = "Q"
# d = data.query("Steps > 1500").reset_index(drop=True)
d = data.query("Folder!='iter2_gpu' and Folder != 'first_cpu2'").reset_index(drop=True)
t = d.groupby(["Protein", "Folder"])[y].idxmax().reset_index()
max_Q_data = d.iloc[t[y].to_list()].reset_index(drop=True)
sub_data = max_Q_data
# sub_data = max_Q_data.query("Scheme in ['hybrid contact', 'contact as in water', 'contact as in membrane']")
# sub_data = max_Q_mem_data
ax = sns.lineplot(x="Protein", y=y, markers=True, ms=10, style="Folder", hue="Folder", data=sub_data, dashes=False)
# _ = ax.set_xticklabels(labels=sub_label_list, rotation=0, ha='center')



In [140]:
y = "Q"
# d = data.query("Steps > 1500").reset_index(drop=True)
d = data.query("Folder != 'iter2_gpu'")
t = d.groupby(["Protein", "Folder"])[y].idxmax().reset_index()
max_Q_data = d.iloc[t[y].to_list()].reset_index(drop=True)
sub_data = max_Q_data
# sub_data = max_Q_data.query("Scheme in ['hybrid contact', 'contact as in water', 'contact as in membrane']")
# sub_data = max_Q_mem_data
ax = sns.lineplot(x="Protein", y=y, markers=True, ms=10, style="Folder", hue="Folder", data=sub_data, dashes=False)
# _ = ax.set_xticklabels(labels=sub_label_list, rotation=0, ha='center')



In [121]:
y = "Q"
# d = data.query("Steps > 1500").reset_index(drop=True)
d = data
t = d.groupby(["Protein", "Folder"])[y].idxmax().reset_index()
max_Q_data = d.iloc[t[y].to_list()].reset_index(drop=True)
sub_data = max_Q_data
# sub_data = max_Q_data.query("Scheme in ['hybrid contact', 'contact as in water', 'contact as in membrane']")
# sub_data = max_Q_mem_data
ax = sns.lineplot(x="Protein", y=y, markers=True, ms=10, style="Folder", hue="Folder", data=sub_data, dashes=False)
# _ = ax.set_xticklabels(labels=sub_label_list, rotation=0, ha='center')



In [ ]:
y = "Steps"
# d = data.query("Steps > 1500").reset_index(drop=True)
d = data
t = d.groupby(["Protein", "Folder", "Run"])[y].idxmax().reset_index()
max_Q_data = d.iloc[t[y].to_list()].reset_index(drop=True)
sub_data = max_Q_data
# sub_data = max_Q_data.query("Scheme in ['hybrid contact', 'contact as in water', 'contact as in membrane']")
# sub_data = max_Q_mem_data
ax = sns.lineplot(x="Protein", y=y, markers=True, ms=10, style="Folder", hue="Folder", data=sub_data, dashes=False)
# _ = ax.set_xticklabels(labels=sub_label_list, rotation=0, ha='center')

CPU, thread 1


In [ ]:
data = pd.read_csv("/Users/weilu/Research/data/openMM/iterative_optimization_first_12-14.csv", index_col=0)
sub_pdb_list = pdb_list
data.Protein = pd.Categorical(data.Protein, 
                      categories=sub_pdb_list)

In [24]:
y = "Steps"
# d = data.query("Steps > 1500").reset_index(drop=True)
d = data
t = d.groupby(["Protein", "Folder", "Run"])[y].idxmax().reset_index()
max_Q_data = d.iloc[t[y].to_list()].reset_index(drop=True)
sub_data = max_Q_data
# sub_data = max_Q_data.query("Scheme in ['hybrid contact', 'contact as in water', 'contact as in membrane']")
# sub_data = max_Q_mem_data
ax = sns.lineplot(x="Protein", y=y, markers=True, ms=10, style="Folder", hue="Folder", data=sub_data, dashes=False)
# _ = ax.set_xticklabels(labels=sub_label_list, rotation=0, ha='center')



In [21]:
y = "Q"
# d = data.query("Steps > 1500").reset_index(drop=True)
d = data
t = d.groupby(["Protein", "Folder"])[y].idxmax().reset_index()
max_Q_data = d.iloc[t[y].to_list()].reset_index(drop=True)
sub_data = max_Q_data
# sub_data = max_Q_data.query("Scheme in ['hybrid contact', 'contact as in water', 'contact as in membrane']")
# sub_data = max_Q_mem_data
ax = sns.lineplot(x="Protein", y=y, markers=True, ms=10, style="Folder", hue="Folder", data=sub_data, dashes=False)
# _ = ax.set_xticklabels(labels=sub_label_list, rotation=0, ha='center')



In [27]:
y = "Q"
# d = data.query("Steps > 1500").reset_index(drop=True)
d = data
t = d.groupby(["Protein", "Folder", "Run"])[y].idxmax().reset_index()
max_Q_data = d.iloc[t[y].to_list()].reset_index(drop=True)
sub_data = max_Q_data
# sub_data = max_Q_data.query("Scheme in ['hybrid contact', 'contact as in water', 'contact as in membrane']")
# sub_data = max_Q_mem_data
ax = sns.lineplot(x="Protein", y=y, markers=True, ms=10, style="Folder", hue="Folder", data=sub_data, dashes=False)
# _ = ax.set_xticklabels(labels=sub_label_list, rotation=0, ha='center')



In [28]:
y = "Q"
# d = data.query("Steps > 1500").reset_index(drop=True)
d = data
t = d.groupby(["Protein", "Folder", "Run"])["Steps"].idxmax().reset_index()
max_Q_data = d.iloc[t["Steps"].to_list()].reset_index(drop=True)
sub_data = max_Q_data
# sub_data = max_Q_data.query("Scheme in ['hybrid contact', 'contact as in water', 'contact as in membrane']")
# sub_data = max_Q_mem_data
ax = sns.lineplot(x="Protein", y=y, markers=True, ms=10, style="Folder", hue="Folder", data=sub_data, dashes=False)
# _ = ax.set_xticklabels(labels=sub_label_list, rotation=0, ha='center')



In [16]:
data


Out[16]:
Steps Q Rg Backbone Rama Contact Fragment Membrane ER TBM_Q Beta Pap Helical Total Run Protein Folder
0 0 0.05 36.04 68.02 -111.94 -21.21 -13.23 0.0 0.0 0.0 -0.00 0.00 0.0 -78.36 0 1e0m first
1 1 0.10 33.51 1.35 -118.56 -33.90 -21.35 0.0 0.0 0.0 -0.00 0.00 0.0 -172.46 0 1e0m first
2 2 0.20 22.52 193.61 -61.70 -28.71 -37.44 0.0 0.0 0.0 -0.00 0.00 0.0 65.76 0 1e0m first
3 3 0.15 20.12 214.98 -83.38 -29.58 -30.61 0.0 0.0 0.0 -0.00 0.00 0.0 71.41 0 1e0m first
4 4 0.26 17.40 202.66 -80.84 -29.53 -42.52 0.0 0.0 0.0 -9.62 0.00 0.0 40.15 0 1e0m first
5 5 0.32 17.36 237.12 -81.77 -30.40 -43.85 0.0 0.0 0.0 -4.70 -0.01 0.0 76.38 0 1e0m first
6 6 0.35 13.83 193.53 -75.48 -29.74 -48.77 0.0 0.0 0.0 -11.08 -3.34 0.0 25.11 0 1e0m first
7 7 0.31 11.38 209.04 -75.46 -30.34 -39.83 0.0 0.0 0.0 -11.80 -2.85 0.0 48.77 0 1e0m first
8 8 0.28 14.80 201.89 -66.88 -31.54 -42.82 0.0 0.0 0.0 -20.81 -8.83 0.0 31.01 0 1e0m first
9 9 0.30 18.44 213.88 -63.19 -30.29 -48.24 0.0 0.0 0.0 -7.52 -0.00 0.0 64.64 0 1e0m first
10 10 0.33 17.25 224.73 -76.77 -29.93 -48.51 0.0 0.0 0.0 -16.29 -4.45 0.0 48.78 0 1e0m first
11 11 0.32 14.94 216.67 -64.24 -29.71 -46.80 0.0 0.0 0.0 -12.07 -8.74 0.0 55.11 0 1e0m first
12 12 0.31 14.53 216.29 -74.71 -30.91 -42.52 0.0 0.0 0.0 -19.72 -0.00 0.0 48.44 0 1e0m first
13 13 0.31 18.14 200.53 -66.42 -28.62 -46.56 0.0 0.0 0.0 -19.72 -3.88 0.0 35.33 0 1e0m first
14 14 0.38 11.11 201.16 -64.05 -32.61 -46.30 0.0 0.0 0.0 -16.94 -0.00 0.0 41.27 0 1e0m first
15 15 0.33 11.73 213.63 -74.41 -32.29 -40.14 0.0 0.0 0.0 -1.77 -2.00 0.0 63.02 0 1e0m first
16 16 0.50 10.78 193.64 -75.89 -35.98 -50.87 0.0 0.0 0.0 -18.44 -8.71 0.0 3.75 0 1e0m first
17 17 0.45 11.55 215.44 -80.24 -33.30 -52.92 0.0 0.0 0.0 -28.88 -7.40 0.0 12.69 0 1e0m first
18 18 0.57 10.33 179.26 -74.30 -29.23 -54.20 0.0 0.0 0.0 -49.27 -8.80 0.0 -36.54 0 1e0m first
19 19 0.53 9.85 159.16 -86.94 -36.67 -51.08 0.0 0.0 0.0 -30.36 -16.46 0.0 -62.36 0 1e0m first
20 20 0.57 9.70 189.18 -84.97 -34.21 -55.71 0.0 0.0 0.0 -36.75 -13.10 0.0 -35.55 0 1e0m first
21 21 0.53 10.87 207.76 -68.66 -32.55 -55.63 0.0 0.0 0.0 -18.35 -8.42 0.0 24.15 0 1e0m first
22 22 0.59 9.72 203.41 -82.07 -35.50 -57.64 0.0 0.0 0.0 -29.05 -9.63 0.0 -10.48 0 1e0m first
23 23 0.68 9.78 188.21 -72.73 -34.19 -55.21 0.0 0.0 0.0 -35.96 -9.59 0.0 -19.48 0 1e0m first
24 24 0.64 9.96 211.36 -79.77 -33.71 -63.32 0.0 0.0 0.0 -50.02 -14.05 0.0 -29.52 0 1e0m first
25 25 0.65 9.89 222.12 -86.93 -35.93 -58.56 0.0 0.0 0.0 -42.27 -10.08 0.0 -11.65 0 1e0m first
26 26 0.68 9.52 168.49 -83.48 -33.21 -60.33 0.0 0.0 0.0 -45.32 -12.64 0.0 -66.48 0 1e0m first
27 27 0.72 9.07 209.63 -84.38 -36.18 -61.44 0.0 0.0 0.0 -47.44 -12.59 0.0 -32.39 0 1e0m first
28 28 0.54 10.83 226.00 -75.01 -34.56 -56.70 0.0 0.0 0.0 -22.22 -8.22 0.0 29.29 0 1e0m first
29 29 0.69 9.86 206.89 -76.79 -36.37 -64.11 0.0 0.0 0.0 -35.56 -13.65 0.0 -19.59 0 1e0m first
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
42376 45 0.19 25.73 780.42 -428.77 -227.53 -234.46 0.0 0.0 0.0 -127.95 -39.57 0.0 -277.85 9 1k0s first
42377 46 0.21 22.63 891.06 -437.32 -234.44 -251.03 0.0 0.0 0.0 -119.45 -42.15 0.0 -193.33 9 1k0s first
42378 47 0.21 20.61 845.80 -442.51 -225.29 -245.23 0.0 0.0 0.0 -125.74 -42.13 0.0 -235.09 9 1k0s first
42379 48 0.18 20.78 872.71 -430.17 -254.79 -236.21 0.0 0.0 0.0 -97.43 -41.01 0.0 -186.89 9 1k0s first
42380 49 0.20 18.76 879.36 -408.75 -260.47 -227.67 0.0 0.0 0.0 -114.41 -38.94 0.0 -170.89 9 1k0s first
42381 50 0.22 18.43 856.17 -439.06 -234.71 -235.84 0.0 0.0 0.0 -118.61 -34.18 0.0 -206.24 9 1k0s first
42382 51 0.22 17.59 796.16 -434.52 -249.91 -237.90 0.0 0.0 0.0 -117.17 -47.37 0.0 -290.70 9 1k0s first
42383 52 0.24 16.10 808.36 -424.59 -264.71 -235.35 0.0 0.0 0.0 -150.40 -50.39 0.0 -317.08 9 1k0s first
42384 53 0.29 14.72 826.68 -431.99 -329.03 -233.93 0.0 0.0 0.0 -113.88 -52.62 0.0 -334.77 9 1k0s first
42385 54 0.30 14.28 842.07 -435.49 -351.71 -234.37 0.0 0.0 0.0 -126.62 -66.65 0.0 -372.78 9 1k0s first
42386 55 0.32 14.28 846.51 -409.18 -363.74 -239.56 0.0 0.0 0.0 -159.77 -71.12 0.0 -396.86 9 1k0s first
42387 56 0.32 14.38 803.47 -452.69 -339.00 -245.28 0.0 0.0 0.0 -146.26 -61.60 0.0 -441.36 9 1k0s first
42388 57 0.32 14.44 866.53 -436.85 -320.88 -249.16 0.0 0.0 0.0 -168.94 -58.65 0.0 -367.94 9 1k0s first
42389 58 0.33 14.94 826.33 -418.03 -313.50 -252.50 0.0 0.0 0.0 -130.43 -48.77 0.0 -336.89 9 1k0s first
42390 59 0.31 14.85 773.55 -395.37 -360.85 -240.87 0.0 0.0 0.0 -143.16 -56.03 0.0 -422.73 9 1k0s first
42391 60 0.31 14.41 774.87 -409.17 -316.67 -249.46 0.0 0.0 0.0 -139.07 -70.98 0.0 -410.48 9 1k0s first
42392 61 0.30 14.63 850.36 -410.72 -370.97 -244.40 0.0 0.0 0.0 -147.42 -63.49 0.0 -386.62 9 1k0s first
42393 62 0.31 14.15 858.83 -427.87 -327.65 -254.44 0.0 0.0 0.0 -123.96 -58.39 0.0 -333.47 9 1k0s first
42394 63 0.35 14.15 810.61 -440.21 -358.33 -252.36 0.0 0.0 0.0 -144.58 -65.40 0.0 -450.27 9 1k0s first
42395 64 0.34 14.54 825.57 -437.31 -320.07 -261.02 0.0 0.0 0.0 -131.32 -63.89 0.0 -388.03 9 1k0s first
42396 65 0.33 14.85 747.03 -435.97 -325.16 -250.14 0.0 0.0 0.0 -145.07 -53.06 0.0 -462.36 9 1k0s first
42397 66 0.36 14.35 790.67 -414.35 -333.56 -260.94 0.0 0.0 0.0 -111.63 -55.45 0.0 -385.27 9 1k0s first
42398 67 0.34 14.58 764.20 -412.78 -327.85 -260.89 0.0 0.0 0.0 -127.18 -56.43 0.0 -420.94 9 1k0s first
42399 68 0.33 14.56 858.28 -417.42 -336.67 -258.01 0.0 0.0 0.0 -149.93 -56.90 0.0 -360.66 9 1k0s first
42400 69 0.34 14.46 813.71 -436.84 -345.83 -258.06 0.0 0.0 0.0 -115.37 -57.28 0.0 -399.67 9 1k0s first
42401 70 0.32 14.96 913.64 -423.38 -331.08 -254.40 0.0 0.0 0.0 -126.57 -66.72 0.0 -288.52 9 1k0s first
42402 71 0.31 15.30 821.02 -422.38 -323.29 -242.96 0.0 0.0 0.0 -134.05 -61.05 0.0 -362.70 9 1k0s first
42403 72 0.31 14.88 770.12 -460.80 -340.11 -265.73 0.0 0.0 0.0 -125.62 -63.45 0.0 -485.60 9 1k0s first
42404 73 0.30 14.77 879.47 -421.11 -338.21 -237.73 0.0 0.0 0.0 -153.01 -65.99 0.0 -336.57 9 1k0s first
42405 74 0.31 14.58 741.65 -436.88 -320.05 -244.28 0.0 0.0 0.0 -146.53 -64.67 0.0 -470.75 9 1k0s first

42406 rows × 17 columns


In [30]:
parser = PDBParser()

In [35]:
movie_dcd = "/Users/weilu/Research/server/dec_2019/iterative_optimization/first/2wqg/4/movie.dcd"

In [36]:
s = parser.get_structure("X", movie_dcd)


---------------------------------------------------------------------------
UnicodeDecodeError                        Traceback (most recent call last)
<ipython-input-36-891398dda1cd> in <module>
----> 1 s = parser.get_structure("X", movie_dcd)

~/anaconda3/envs/py36/lib/python3.6/site-packages/Bio/PDB/PDBParser.py in get_structure(self, id, file)
     84 
     85             with as_handle(file, mode='rU') as handle:
---> 86                 self._parse(handle.readlines())
     87 
     88             self.structure_builder.set_header(self.header)

~/anaconda3/envs/py36/lib/python3.6/codecs.py in decode(self, input, final)
    319         # decode input (taking the buffer into account)
    320         data = self.buffer + input
--> 321         (result, consumed) = self._buffer_decode(data, self.errors, final)
    322         # keep undecoded input until the next call
    323         self.buffer = data[consumed:]

UnicodeDecodeError: 'utf-8' codec can't decode byte 0x80 in position 20: invalid start byte

In [31]:
movie = "/Users/weilu/Research/server/dec_2019/iterative_optimization/first/2wqg/4/movie.pdb"

In [37]:
movie = "/Users/weilu/Research/server/dec_2019/iterative_optimization/first/2wqg/0/movie.pdb"

In [38]:
s = parser.get_structure("X", movie)

In [45]:
complete_models = []
for i in range(10):
    movie = f"/Users/weilu/Research/server/dec_2019/iterative_optimization/first/2wqg/{i}/movie.pdb"
    s = parser.get_structure("X", movie)
    complete_models += list(s.get_models())

In [65]:
t = data.query("Protein == '2wqg' and Steps > 1").reset_index(drop=True)
t = t.rename(columns={"Q":"Qw"})

In [49]:
len(complete_models)


Out[49]:
3409

In [73]:
print(pdb_list)


['1e0m', '1w4e', '1e0g', '2wqg', '1jo8', '1fex', '2l6r', '1c8c', '1g6p', '1mjc', '2jmc', '1hdn', '1st7', '1n88', '1d6o', '1hcd', '2ga5', '1j5u', '3o4d', '1k0s']

In [ ]:
folder_list = ["first"]
folder = "first"
pre = f"/scratch/wl45/dec_2019/iterative_optimization/{folder}"
to_folder = "."
os.system(f"mkdir -p {to_folder}/decoys/openMM")
complete_models = []
for pdb in pdb_list:
    for i in range(10):
        movie = f"{pre}/{pdb}/{i}/movie.pdb"
        s = parser.get_structure("X", movie)
        complete_models += list(s.get_models())
    t = data.query(f"Protein == '{pdb}' and Steps > 1").reset_index(drop=True)
    t["structure"] = complete_models
    t = t.rename(columns={"Q":"Qw"})
    last50 = t.groupby("Run").tail(50).reset_index(drop=True)
    to_folder = "."
    last50.to_pickle(f"{to_folder}/decoys/openMM/{folder}_{pdb}")

In [75]:
folder_list = ["first"]
folder = "first"
pdb = "1w4e"
pre = f"/Users/weilu/Research/server/dec_2019/iterative_optimization/{folder}"
complete_models = []
for i in range(10):
    movie = f"{pre}/{pdb}/{i}/movie.pdb"
    s = parser.get_structure("X", movie)
    complete_models += list(s.get_models())
t = data.query(f"Protein == '{pdb}' and Steps > 1").reset_index(drop=True)
t["structure"] = complete_models
t = t.rename(columns={"Q":"Qw"})

In [76]:
len(t)


Out[76]:
3756

In [77]:
len(complete_models)


Out[77]:
3756

In [112]:
t = pd.read_pickle("/Users/weilu/Research/server/dec_2019/multiDensityOptimization/optimization_iteration1/optimization/decoys/openMM/1c8c_first.pkl")

In [113]:
structures = t["structure"].to_list()

In [89]:
print(structures[0])


<Model id=172>

In [114]:
all_res = list(structures[0].get_residues())

In [115]:
all_res[0]


Out[115]:
<Residue MET het=  resseq=1 icode= >

In [116]:
is_hetero(all_res[0])


Out[116]:
False

In [ ]:


In [ ]:
all_res

In [96]:
all_res[0].id[0]


Out[96]:
'H_NGP'

In [70]:
last50 = t.groupby("Run").tail(50).reset_index(drop=True)

In [ ]:
to_folder = "."
last50.to_pickle(f"{to_folder}/decoys/openMM/{folder}_{pdb}")

In [55]:
t["structure"] = complete_models

In [ ]:
sampled["structure"] = sampled.apply(getStructures, all_movies=all_movies, axis=1)

In [ ]:
import io
from Bio.PDB.PDBParser import PDBParser
simulation_location, name = args.label.split("__")
simulation_location_name = f"{simulation_location}_{name}"

def getStructures(x, all_movies):
    index = int(x["index"])+1
    run = int(x["Run"])

    start = index * size
    end = (index + 1) * size
    f = io.StringIO("".join(all_movies[run][start:end]))
    parser = PDBParser()
    return parser.get_structure(f"{index}", f)

a = pd.read_csv(f"{database_location}/Q_{simulation_location_name}", index_col=0).query(f"Rank < {decoy_n*3}")
sampled = a.sample(decoy_n)
all_movies = {}
for i in sampled["Run"].unique():
    with open(f"{database_location}/{simulation_location_name}_{i}/movie.pdb") as f:
        movie = f.readlines()
    all_movies[i] = movie
size = 0
for line in movie:
    size += 1
    if line == "ENDMDL\n":
        break
print(simulation_location_name, size)
sampled["structure"] = sampled.apply(getStructures, all_movies=all_movies, axis=1)
sampled["Qw"] = sampled[" Qw"].round(3)
sampled.drop(" Qw", axis=1)
sampled.to_pickle(f"decoys/lammps/{name}_{simulation_location}.pkl")

In [ ]:
a