In [1]:
# written in python3
import numpy as np
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
import os
from datetime import datetime
import seaborn as sns
%matplotlib inline
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import FeatureUnion
from sklearn.preprocessing import PolynomialFeatures
from sklearn.metrics import confusion_matrix
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.linear_model import LinearRegression

class DataFrameSelector(BaseEstimator, TransformerMixin):
    def __init__(self, attribute_names):
        self.attribute_names = attribute_names
    def fit(self, X, y=None):
        return self
    def transform(self, X):
        return X[self.attribute_names].values
    
plt.rcParams['figure.figsize'] = (10,6.180)    #golden ratio


def my_transform(data, label, degree, FEATURES):

    # LABEL = "Qw"
    LABEL = label
    PolynomialDegree = degree

    num_attribs = FEATURES
    cat_attribs = [LABEL]
    num_pipeline = Pipeline([
            ('selector', DataFrameSelector(num_attribs)),
            ('std_scaler', StandardScaler()),
            ('poly', PolynomialFeatures(degree=PolynomialDegree, include_bias=False))
        ])
    cat_pipeline = Pipeline([
            ('selector', DataFrameSelector(cat_attribs))
        ])

    full_pipeline = FeatureUnion(transformer_list=[
            ("num_pipeline", num_pipeline),
            ("cat_pipeline", cat_pipeline),
        ])
    return full_pipeline.fit_transform(data)

functions for reading data


In [70]:
import glob
a = glob.glob("/Users/weilu/Research/frustration_selection/tr*")
print(len(a))
pdb_list = [i.split("/")[-1] for i in a]


20

In [101]:
all_ = []
for pdb in pdb_list:
    #     print(pdb)
    fileLocation = f"/Users/weilu/Research/frustration_selection/{pdb}/q.txt"
    q = np.loadtxt(fileLocation)
    fileLocation = f"/Users/weilu/Research/frustration_selection/{pdb}/rmsd.txt"
    rmsd = np.loadtxt(fileLocation)
    fileLocation = f"/Users/weilu/Research/frustration_selection/{pdb}/gdt.txt"
    gdt = np.loadtxt(fileLocation)
    fileLocation = f"/Users/weilu/Research/frustration_selection/{pdb}/awsem_energy.txt"
    awsem_column = ['Step', 'Chain', 'Shake', 'Chi', 'Rama', 'Excluded', 'DSSP', 'P_AP', 'Water', 'Burial', 'Helix', 'AMH-Go', 'Frag_Mem', 'Vec_FM', 'Membrane', 'SSB', 'Electro', 'QGO', 'VTotal']
    awsem = pd.read_csv(fileLocation, sep="\s+", names=awsem_column)
    fileLocation = f"/Users/weilu/Research/frustration_selection/{pdb}/rosetta_energy.txt"
    rosetta = pd.read_csv(fileLocation, sep="\s+")
    data = pd.concat([awsem, rosetta], axis=1)
    assert len(awsem) == len(rosetta) == len(q) == len(rmsd) == len(gdt)
    data["Q"] = q
    data["RMSD"] = rmsd
    data["GDT"] = gdt
    data["Protein"] = pdb
    all_.append(data)
#     print(pdb, len(data))
data = pd.concat(all_)

drop_col = []
for col in data.columns:
#     print(col, len(data[col].unique()))
    if len(data[col].unique()) == 1:
        drop_col.append(col)
data = data.drop(drop_col, axis=1)

def extract_frame(data):
    return int(data.split("_")[0])
data["description"] = data["description"].apply(extract_frame)

In [135]:
def choose_top(data, col="RMSD", n=5, ascending=True):
    return data.assign(chosen=pd.DataFrame.rank(data[col], ascending=ascending, method='dense')<=n)

In [154]:
folder_list = ["tr894", "tr884", "tr922", "tr882", "tr896", "tr872", "tr594", "tr862", "tr869", "tr898", "tr885", "tr866", "tr868", "tr891", "tr895", "tr870", "tr921", "tr877", "tr948", "tr947"]

In [138]:
raw_data_all = data.groupby("Protein").apply(choose_top, n=100, col="RMSD").reset_index(drop=True)

In [193]:
# train_name_list = ["tr872", "tr885", "tr948"]
# train_name_list = ["tr862", "tr872", "tr885", "tr866", "tr868" , "tr895", "tr896", "tr870", "tr921", "tr891", "tr948"]

# train_name_list = ["tr870"]
# train_name_list = ["tr891"]
# train_name_list = ["tr882"]
# train_name_list = ["tr894"]
# train_name_list = ["tr872"]
# train_name_list = ["tr869"]
# train_name_list = ["tr884"]
# train_name_list = ["tr866", "tr884"]
# train_name_list = ["tr870", "tr872"]
# train_name_list = ["tr866", "tr947"]
# train_name_list = ["tr872"]
# train_name_list = ["tr884", "tr872"]
# train_name_list = ["tr866"]
# train_name_list = ["tr947"]
# train_name_list = ["tr894"]
# train_name_list = ["tr885"]
train_name_list = ["tr884"]
# select for training.
raw_data = raw_data_all.reset_index(drop=True).query(f'Protein in {train_name_list}')

In [194]:
raw_data_all.columns


Out[194]:
Index(['Chain', 'Chi', 'Rama', 'DSSP', 'P_AP', 'Water', 'Burial', 'Helix',
       'Electro', 'VTotal', 'score', 'fa_atr', 'fa_rep', 'fa_sol',
       'fa_intra_rep', 'fa_intra_sol_xover4', 'lk_ball_wtd', 'fa_elec',
       'pro_close', 'hbond_sr_bb', 'hbond_lr_bb', 'hbond_bb_sc', 'hbond_sc',
       'omega', 'fa_dun', 'p_aa_pp', 'ref', 'rama_prepro', 'allatom_rms',
       'maxsub', 'maxsub2.0', 'description', 'Q', 'RMSD', 'GDT', 'Protein',
       'chosen'],
      dtype='object')

In [195]:
# FEATURES = ["eigenvalues", "entropy", "pca"]
# FEATURES = ["eigenvalues", "entropy", "diffRMSD"]
# FEATURES = ["eigenvalues", "entropy"]
FEATURES = [
#     "biasQ",
    'score',
     'VTotal',
    'fa_atr', 'fa_rep', 'fa_sol', 
    'fa_intra_rep', 'fa_intra_sol_xover4', 'lk_ball_wtd', 'fa_elec',
    'pro_close', 'hbond_sr_bb', 'hbond_lr_bb', 'hbond_bb_sc', 'hbond_sc',
    'omega', 'fa_dun', 'p_aa_pp', 'ref', 'rama_prepro', 'allatom_rms', 'maxsub', 'maxsub2.0'
#     'RMSD', # test
#     'Qw',
#      'Burial',
#      'Water',
#      'Rama',
#      'DSSP',
#      'P_AP',
#      'Helix',
#      'Frag_Mem'
               ]
# FEATURES = ["eigenvalues"]
# LABEL = "diffRMSD"
# LABEL = "RMSD"
LABEL = "chosen"
DEGREE = 1

def pred_from_raw(a):
    data = my_transform(a, label=LABEL, degree=DEGREE, FEATURES=FEATURES)
    test_y = data[:,-1]
    test_set = data[:,:-1]
    prob= clf.predict_proba(test_set)[:,1]
    return a.assign(prob=prob)

# data = my_transform(raw_data, label=LABEL, degree=DEGREE, FEATURES=FEATURES)
# data = raw_data.groupby('name').apply(my_transform, label=LABEL, degree=DEGREE, FEATURES=FEATURES)[0]
data = np.concatenate(raw_data.groupby('Protein').apply(my_transform, 
                                                     label=LABEL, degree=DEGREE, FEATURES=FEATURES).values)
train_y = data[:,-1]
train_set = data[:,:-1]

# clf = svm.SVC(probability=True)
# p = 0.01
# clf = LogisticRegression(random_state=27, class_weight={0:p, 1:(1-p)})
clf = LogisticRegression(random_state=27, solver="liblinear")
clf.fit(train_set, train_y)

filtered = raw_data_all.groupby("Protein").apply(pred_from_raw).reset_index(drop=True)


picked_n = 1
best = raw_data_all.groupby("Protein").apply(choose_top, col="RMSD"
                                            , n=1, ascending=True).reset_index(drop=True).query("chosen==True")
# if True:
picked_1 = filtered.groupby("Protein").apply(choose_top, col="prob"
                                        , n=1, ascending=False).reset_index(drop=True).query("chosen==True")

# if False:
picked_5 = filtered.groupby("Protein").apply(choose_top, col="prob"
                                            , n=5, ascending=False).reset_index(drop=True).query("chosen==True")
picked = picked_5.groupby("Protein").apply(choose_top, col="RMSD"
                                            , n=1, ascending=True).reset_index(drop=True).query("chosen==True")
worst = filtered.groupby("Protein").apply(choose_top, col="RMSD"
                                            , n=1, ascending=False).reset_index(drop=True).query("chosen==True")
# init = raw_data_all.groupby("Protein").apply(choose_top, col="i"
#                                             , n=1, ascending=True).reset_index(drop=True).query("chosen==True")
all_results = pd.concat([best.assign(result='best'), 
                         picked_1.assign(result='picked'), 
#                          picked.assign(result='picked_5'), 
#                          init.assign(result='init'),
                        worst.assign(result='worst')
                        ], sort=False)
# all_results = pd.concat([best.assign(result='best'), 
#                          picked.assign(result='picked')])
# picked.to_csv("/Users/weilu/Desktop/picked.csv

# sns.set(rc={'figure.figsize':(20,30)})
# plt.figure(figsize=(15,8))
fg = sns.FacetGrid(data=all_results.reset_index(), hue='result', height=8, aspect=1.63)
fg.map(plt.plot, 'Protein', 'RMSD').add_legend(fontsize=20)
# fg.set(ylim=(0, 10))


Out[195]:
<seaborn.axisgrid.FacetGrid at 0x1a36f34748>

In [198]:
for pdb in folder_list:
    print(pdb, round(10*picked_1.query(f"Protein=='{pdb}'")["RMSD"].values[0], 3))


tr894 2.11
tr884 3.08
tr922 2.61
tr882 2.39
tr896 9.19
tr872 4.03
tr594 4.64
tr862 5.28
tr869 12.32
tr898 13.92
tr885 2.74
tr866 2.65
tr868 2.97
tr891 2.65
tr895 4.24
tr870 9.62
tr921 3.98
tr877 3.74
tr948 6.54
tr947 8.53

In [197]:
clf.coef_


Out[197]:
array([[-0.72522265,  0.18643322, -1.71463549, -0.19407362, -1.03379484,
         0.05434562, -0.30980744,  0.197212  , -0.04165204,  0.23477696,
         0.13732412,  0.51681827, -0.3461486 ,  0.83682149, -0.18798525,
         0.55778971, -0.35668253,  0.        , -0.5382404 ,  0.        ,
         0.        ,  0.        ]])

In [ ]:


In [ ]:
# train_name_list = ["tr872", "tr885", "tr948"]
# train_name_list = ["tr862", "tr872", "tr885", "tr866", "tr868" , "tr895", "tr896", "tr870", "tr921", "tr891", "tr948"]

# train_name_list = ["tr870"]
# train_name_list = ["tr891"]
# train_name_list = ["tr882"]
# train_name_list = ["tr894"]
# train_name_list = ["tr872"]
# train_name_list = ["tr869"]
# train_name_list = ["tr884"]
# train_name_list = ["tr866", "tr884"]
# train_name_list = ["tr870", "tr872"]
# train_name_list = ["tr866", "tr947"]
# train_name_list = ["tr872"]
# train_name_list = ["tr884", "tr872"]
train_name_list = ["tr866"]
# train_name_list = ["tr947"]
# select for training.
raw_data = raw_data_all.reset_index(drop=True).query(f'Protein in {train_name_list}')

In [152]:
# FEATURES = ["eigenvalues", "entropy", "pca"]
# FEATURES = ["eigenvalues", "entropy", "diffRMSD"]
# FEATURES = ["eigenvalues", "entropy"]
FEATURES = [
#     "biasQ",
    'score',
     'VTotal',
#     'RMSD', # test
#     'Qw',
#      'Burial',
#      'Water',
#      'Rama',
#      'DSSP',
#      'P_AP',
#      'Helix',
#      'Frag_Mem'
               ]
# FEATURES = ["eigenvalues"]
# LABEL = "diffRMSD"
# LABEL = "RMSD"
LABEL = "chosen"
DEGREE = 1

def pred_from_raw(a):
    data = my_transform(a, label=LABEL, degree=DEGREE, FEATURES=FEATURES)
    test_y = data[:,-1]
    test_set = data[:,:-1]
    prob= clf.predict_proba(test_set)[:,1]
    return a.assign(prob=prob)

# data = my_transform(raw_data, label=LABEL, degree=DEGREE, FEATURES=FEATURES)
# data = raw_data.groupby('name').apply(my_transform, label=LABEL, degree=DEGREE, FEATURES=FEATURES)[0]
data = np.concatenate(raw_data.groupby('Protein').apply(my_transform, 
                                                     label=LABEL, degree=DEGREE, FEATURES=FEATURES).values)
train_y = data[:,-1]
train_set = data[:,:-1]

# clf = svm.SVC(probability=True)
# p = 0.01
# clf = LogisticRegression(random_state=27, class_weight={0:p, 1:(1-p)})
clf = LogisticRegression(random_state=27)
clf.fit(train_set, train_y)

filtered = raw_data_all.groupby("Protein").apply(pred_from_raw).reset_index(drop=True)


picked_n = 1
best = raw_data_all.groupby("Protein").apply(choose_top, col="RMSD"
                                            , n=1, ascending=True).reset_index(drop=True).query("chosen==True")
# if True:
picked_1 = filtered.groupby("Protein").apply(choose_top, col="prob"
                                        , n=1, ascending=False).reset_index(drop=True).query("chosen==True")

# if False:
picked_5 = filtered.groupby("Protein").apply(choose_top, col="prob"
                                            , n=5, ascending=False).reset_index(drop=True).query("chosen==True")
picked = picked_5.groupby("Protein").apply(choose_top, col="RMSD"
                                            , n=1, ascending=True).reset_index(drop=True).query("chosen==True")
worst = filtered.groupby("Protein").apply(choose_top, col="RMSD"
                                            , n=1, ascending=False).reset_index(drop=True).query("chosen==True")
# init = raw_data_all.groupby("Protein").apply(choose_top, col="i"
#                                             , n=1, ascending=True).reset_index(drop=True).query("chosen==True")
all_results = pd.concat([best.assign(result='best'), 
                         picked_1.assign(result='picked'), 
#                          picked.assign(result='picked_5'), 
#                          init.assign(result='init'),
                        worst.assign(result='worst')
                        ], sort=False)
# all_results = pd.concat([best.assign(result='best'), 
#                          picked.assign(result='picked')])
# picked.to_csv("/Users/weilu/Desktop/picked.csv

# sns.set(rc={'figure.figsize':(20,30)})
# plt.figure(figsize=(15,8))
fg = sns.FacetGrid(data=all_results.reset_index(), hue='result', height=8, aspect=1.63)
fg.map(plt.plot, 'Protein', 'RMSD').add_legend(fontsize=20)
# fg.set(ylim=(0, 10))


/Users/weilu/anaconda3/envs/py36/lib/python3.6/site-packages/sklearn/linear_model/logistic.py:433: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.
  FutureWarning)
Out[152]:
<seaborn.axisgrid.FacetGrid at 0x1a240a7588>

In [156]:
picked_1


Out[156]:
Chain Chi Rama DSSP P_AP Water Burial Helix Electro VTotal ... allatom_rms maxsub maxsub2.0 description Q RMSD GDT Protein chosen prob
1698 124.183901 26.964318 -279.614296 -8.362123 -8.223005 -13.198036 -76.663990 -2.463773 -2.390729 -239.767733 ... 0.0 89.0 89.0 1698 0.345749 0.469 54.7750 tr594 True 0.975663
3605 110.428620 26.188862 -468.324714 -0.000021 -3.486636 -24.153081 -86.628249 -27.518606 -5.148007 -478.641832 ... 0.0 101.0 101.0 1100 0.345631 0.555 54.7025 tr862 True 0.959315
5861 141.913497 30.662560 -324.579853 -18.963096 -22.676859 -41.304054 -89.894447 -0.150713 -1.347088 -326.340054 ... 0.0 104.0 104.0 851 0.694104 0.276 82.9350 tr866 True 0.979996
8753 139.769819 23.008111 -499.436469 -7.250086 -12.746448 -47.037619 -99.072989 -21.620603 -2.485716 -526.872002 ... 0.0 116.0 116.0 1238 0.602163 0.258 72.8450 tr868 True 0.977855
10873 128.191708 35.596632 -456.590774 -0.000265 -5.121299 -33.315973 -88.835522 -28.647416 -0.973550 -449.696458 ... 0.0 104.0 104.0 853 0.253706 1.277 37.0225 tr869 True 0.964882
12969 172.430164 45.766535 -499.120228 -0.000000 -2.521616 -43.550806 -102.148723 -26.298814 -3.077160 -458.520649 ... 0.0 123.0 123.0 444 0.232280 0.899 34.7100 tr870 True 0.955580
16349 111.880039 17.229178 -330.195386 -18.781761 -13.516343 -29.764483 -74.969967 -1.134974 1.458206 -337.795491 ... 0.0 88.0 88.0 1463 0.566600 0.564 70.7375 tr872 True 0.980602
18384 208.343802 48.714874 -561.577629 -16.325579 -18.794622 -56.657073 -120.193422 -13.610528 -0.431476 -530.531654 ... 0.0 142.0 142.0 993 0.483540 0.339 64.0825 tr877 True 0.946987
20782 102.076125 17.415742 -370.755389 -0.557303 -11.448396 -28.986179 -67.998863 -10.711915 -0.194794 -371.160971 ... 0.0 79.0 79.0 886 0.643662 0.226 79.7475 tr882 True 0.984530
22893 77.601261 17.163718 -280.236825 -0.000000 -8.314877 -18.437334 -61.632293 -7.561033 -1.898691 -283.316073 ... 0.0 71.0 71.0 492 0.534594 0.370 72.8850 tr884 True 0.939604
26697 137.437784 39.581020 -523.259315 -0.000000 -8.952637 -53.864562 -93.487081 -33.894098 0.883839 -535.555049 ... 0.0 114.0 114.0 1791 0.711750 0.258 83.3350 tr885 True 0.990606
28029 170.407178 26.738415 -451.415795 -29.247597 -24.029337 -49.087487 -104.906014 -1.779287 -6.484034 -469.803959 ... 0.0 119.0 119.0 618 0.710001 0.193 83.2575 tr891 True 0.976945
30808 71.134382 12.673334 -219.850750 -0.617298 -8.194819 -13.433547 -47.365251 -5.936284 -0.990181 -212.580413 ... 0.0 54.0 54.0 892 0.546461 0.266 69.4450 tr894 True 0.959589
32489 145.288160 29.380842 -523.226656 -3.674996 -6.691491 -43.932735 -95.613897 -24.008356 3.873506 -518.605623 ... 0.0 120.0 120.0 68 0.493675 0.686 65.2075 tr895 True 0.950902
35560 115.579696 19.627578 -322.405908 -7.999120 -12.382616 -23.240249 -72.343211 -0.041960 -3.189297 -306.395085 ... 0.0 86.0 86.0 634 0.343373 0.839 56.1050 tr896 True 0.983266
39183 140.273997 32.144592 -475.718856 -0.000000 -0.265804 -22.531791 -92.191658 -22.471398 -3.103841 -443.864759 ... 0.0 106.0 106.0 1752 0.276848 1.450 32.7825 tr898 True 0.948924
41133 195.082170 34.577941 -542.303246 -39.946262 -28.536070 -53.861268 -117.551969 -0.001215 -0.672082 -553.212002 ... 0.0 138.0 138.0 1197 0.526336 0.375 66.4850 tr921 True 0.939680
44203 79.407459 17.342113 -288.784325 -0.000201 -3.888180 -15.643351 -60.248153 -13.580145 1.779713 -283.615071 ... 0.0 74.0 74.0 1923 0.672919 0.335 79.3950 tr922 True 0.938725
45053 272.053802 100.500725 -684.390715 -17.457918 -20.011288 -50.909581 -148.180452 -11.042174 -6.905148 -566.342750 ... 0.0 175.0 175.0 268 0.433293 1.190 63.4300 tr947 True 0.941570
46506 232.275121 75.114731 -746.700027 -0.000000 -4.013227 -55.166690 -134.456059 -57.341636 -2.255953 -692.543741 ... 0.0 161.0 161.0 1083 0.535518 0.673 67.2375 tr948 True 0.947949

20 rows × 38 columns


In [160]:
picked_1["Protein"].unique()


Out[160]:
array(['tr594', 'tr862', 'tr866', 'tr868', 'tr869', 'tr870', 'tr872',
       'tr877', 'tr882', 'tr884', 'tr885', 'tr891', 'tr894', 'tr895',
       'tr896', 'tr898', 'tr921', 'tr922', 'tr947', 'tr948'], dtype=object)

In [161]:
folder_list


Out[161]:
['tr894',
 'tr884',
 'tr922',
 'tr882',
 'tr896',
 'tr872',
 'tr594',
 'tr862',
 'tr869',
 'tr898',
 'tr885',
 'tr866',
 'tr868',
 'tr891',
 'tr895',
 'tr870',
 'tr921',
 'tr877',
 'tr948',
 'tr947']

In [166]:
for pdb in folder_list:
    print(pdb, 10*picked_1.query(f"Protein=='{pdb}'")["RMSD"].values[0])


tr894 2.66
tr884 3.7
tr922 3.35
tr882 2.2600000000000002
tr896 8.39
tr872 5.64
tr594 4.6899999999999995
tr862 5.550000000000001
tr869 12.77
tr898 14.5
tr885 2.58
tr866 2.7600000000000002
tr868 2.58
tr891 1.9300000000000002
tr895 6.86
tr870 8.99
tr921 3.75
tr877 3.39
tr948 6.73
tr947 11.899999999999999

In [153]:
clf.coef_


Out[153]:
array([[ 0.18808263, -0.21367747]])

In [149]:
clf.coef_


Out[149]:
array([[ 0.18808263, -0.21367747]])

In [ ]:


In [132]:
g = sns.FacetGrid(data, col="Protein", col_wrap=4)
g = g.map(plt.hist, "RMSD")



In [128]:


In [129]:
data


Out[129]:
Chain Chi Rama DSSP P_AP Water Burial Helix Electro VTotal ... ref rama_prepro allatom_rms maxsub maxsub2.0 description Q RMSD GDT Protein
0 120.305046 25.030902 -505.107887 -0.000000 -0.107303 -34.178516 -92.446636 -25.172317 -2.622472 -514.299186 ... 22.273 1.322 0.00 106.0 106.0 0 0.283478 1.402 36.5575 tr898
1 147.155070 22.444486 -491.517963 -0.000000 -0.000295 -26.463080 -92.809915 -17.591866 -2.362889 -461.146453 ... 22.273 -7.362 0.00 106.0 106.0 1 0.275945 1.414 34.9050 tr898
2 140.378840 25.081482 -493.970385 -0.000000 -0.417929 -26.765382 -92.506750 -21.973372 -2.641619 -472.815114 ... 22.273 -0.552 0.00 106.0 106.0 2 0.300145 1.387 36.0875 tr898
3 134.625889 30.279010 -501.437458 -0.000000 -0.520035 -26.874046 -93.514969 -20.440403 -2.859405 -480.741417 ... 22.273 -4.366 0.00 106.0 106.0 3 0.281346 1.401 34.9050 tr898
4 151.053641 36.293730 -497.221005 -0.000000 -0.682022 -20.044147 -90.688331 -17.551906 -2.778603 -441.618644 ... 22.273 -3.326 0.00 106.0 106.0 4 0.264499 1.372 33.2550 tr898
5 140.414193 23.658541 -484.762758 -0.000000 -0.626386 -24.541766 -92.988631 -21.578898 -2.244195 -462.669900 ... 22.273 -5.252 0.00 106.0 106.0 5 0.277808 1.429 34.6700 tr898
6 159.340721 28.608793 -483.378835 -0.000000 -0.441569 -20.504922 -92.248760 -20.652650 -2.619187 -431.896409 ... 22.273 -3.087 0.00 106.0 106.0 6 0.267574 1.390 32.7825 tr898
7 147.872491 32.334709 -489.868776 -0.000000 -0.909066 -26.898794 -92.782469 -27.623052 -2.464644 -460.339601 ... 22.273 -3.487 0.00 106.0 106.0 7 0.280151 1.364 34.1950 tr898
8 146.236786 27.978264 -477.439784 -0.000000 -0.870771 -22.715833 -91.274724 -21.917459 -2.583176 -442.586697 ... 22.273 4.919 0.15 106.0 106.0 8 0.276194 1.377 34.2000 tr898
9 143.034875 41.921821 -478.790702 -0.000000 -0.893802 -23.115023 -91.586434 -23.372143 -2.408531 -435.209938 ... 22.273 -4.397 0.00 106.0 106.0 9 0.266198 1.397 32.5450 tr898
10 135.213040 27.732520 -480.987290 -0.000000 -1.028209 -23.427637 -92.011519 -24.904458 -2.529449 -461.943003 ... 22.273 -7.331 0.00 106.0 106.0 10 0.268493 1.389 33.0175 tr898
11 153.232782 35.476262 -493.140806 -0.000000 -1.182479 -21.911948 -90.307486 -20.972645 -2.752883 -441.559203 ... 22.273 -7.982 0.00 106.0 106.0 11 0.280563 1.384 34.6700 tr898
12 150.276648 32.635979 -483.119053 -0.000000 -1.393953 -20.819231 -92.631930 -19.487376 -2.210180 -436.749096 ... 22.273 -7.873 0.00 106.0 106.0 12 0.273912 1.433 33.9625 tr898
13 143.384450 28.317570 -501.934539 -0.000000 -0.791625 -27.083677 -93.122703 -21.525666 -2.283150 -475.039340 ... 22.273 -4.924 0.00 106.0 106.0 13 0.276648 1.452 33.2525 tr898
14 135.644470 27.406200 -504.506680 -0.000000 -0.528258 -23.223932 -92.482201 -25.775818 -2.416920 -485.883138 ... 22.273 -5.650 0.00 106.0 106.0 14 0.279994 1.479 32.0750 tr898
15 143.989256 26.933794 -500.357215 -0.000000 -0.496883 -24.541615 -93.506283 -16.559409 -2.671673 -467.210028 ... 22.273 -13.591 0.00 106.0 106.0 15 0.280511 1.477 32.3100 tr898
16 164.525371 30.930790 -496.284421 -0.000000 -0.679354 -22.719010 -92.583999 -24.173413 -2.312121 -443.296157 ... 22.273 -1.686 0.00 106.0 106.0 16 0.277608 1.473 32.7825 tr898
17 137.528899 38.185541 -488.721707 -0.000000 -0.508331 -19.814349 -93.350337 -24.592710 -2.348784 -453.621778 ... 22.273 -2.947 0.00 106.0 106.0 17 0.278190 1.509 33.4925 tr898
18 143.958009 28.877087 -488.371979 -0.000000 -0.676125 -23.206006 -92.723647 -25.412838 -2.373214 -459.928712 ... 22.273 -3.508 0.00 106.0 106.0 18 0.283341 1.479 33.4900 tr898
19 141.386863 26.174881 -493.309733 -0.000000 -0.712303 -24.678066 -93.300807 -23.259784 -2.225128 -469.924076 ... 22.273 -2.771 0.00 106.0 106.0 19 0.280026 1.463 32.5475 tr898
20 133.635051 33.961600 -504.836761 -0.000000 -0.551922 -24.156508 -91.949129 -20.199186 -2.307536 -476.404390 ... 22.273 1.073 0.00 106.0 106.0 20 0.274937 1.482 33.7275 tr898
21 143.694197 34.527464 -485.833398 -0.000000 -0.870527 -23.981202 -91.800708 -20.188484 -2.318028 -446.770685 ... 22.273 -3.805 0.00 106.0 106.0 21 0.273461 1.475 32.7825 tr898
22 140.442942 26.590152 -498.494194 -0.000000 -0.503264 -23.382847 -92.830819 -24.254626 -2.329103 -474.761760 ... 22.273 0.591 0.00 106.0 106.0 22 0.281044 1.487 33.0200 tr898
23 144.453285 33.293267 -497.078075 -0.000000 -0.522480 -23.797127 -91.605937 -20.968929 -2.302041 -458.528037 ... 22.273 -6.766 0.00 106.0 106.0 23 0.276132 1.449 33.2550 tr898
24 140.248439 31.324129 -498.777073 -0.000000 -0.700141 -19.824562 -91.957134 -24.763273 -2.497942 -466.947556 ... 22.273 -8.620 0.00 106.0 106.0 24 0.280955 1.457 33.0175 tr898
25 160.716713 38.806819 -496.444635 -0.000000 -0.526379 -24.207083 -92.012533 -18.374025 -2.487256 -434.528379 ... 22.273 -8.042 0.00 106.0 106.0 25 0.275130 1.460 32.7825 tr898
26 123.610173 34.483728 -488.702662 -0.000000 -0.630171 -21.838544 -92.192668 -19.826124 -2.716325 -467.812593 ... 22.273 -5.160 0.00 106.0 106.0 26 0.276375 1.422 32.5475 tr898
27 139.025104 31.753790 -477.896577 -0.000000 -0.507182 -24.233222 -91.429011 -21.441210 -2.634183 -447.362493 ... 22.273 -1.055 0.00 106.0 106.0 27 0.268809 1.376 32.3100 tr898
28 146.414796 38.993244 -485.896979 -0.000000 -0.232584 -25.412671 -92.016919 -22.743471 -2.851942 -443.746526 ... 22.273 -4.547 0.00 106.0 106.0 28 0.285740 1.391 34.6700 tr898
29 134.597668 23.498079 -481.905364 -0.000000 -0.513820 -23.836249 -91.356219 -17.669486 -2.602972 -459.788364 ... 22.273 -6.282 0.00 106.0 106.0 29 0.281040 1.380 32.5475 tr898
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
2475 114.184312 20.612141 -334.696507 -15.482272 -12.805455 -29.390690 -76.531042 -0.072464 0.791644 -333.390333 ... 27.717 15.472 0.00 88.0 88.0 2475 0.554808 0.479 70.4550 tr872
2476 113.183756 22.429226 -340.700683 -11.230901 -13.343561 -26.159173 -74.616055 0.142715 0.825142 -329.469534 ... 27.717 11.955 0.00 88.0 88.0 2476 0.576930 0.487 71.0200 tr872
2477 117.365680 27.448752 -338.352160 -14.367716 -13.251562 -27.014365 -75.389999 -0.586974 0.865346 -323.282998 ... 27.717 16.867 0.00 88.0 88.0 2477 0.562874 0.480 69.3175 tr872
2478 106.714527 20.888156 -335.746106 -14.362893 -13.556630 -28.531148 -74.774550 0.680315 0.857126 -337.831202 ... 27.717 15.486 0.00 88.0 88.0 2478 0.527556 0.523 67.6125 tr872
2479 101.143000 22.629087 -333.957038 -14.677675 -13.485314 -29.264907 -76.100725 0.233648 0.699695 -342.780229 ... 27.717 23.197 0.00 88.0 88.0 2479 0.555542 0.492 68.1825 tr872
2480 110.205379 23.148222 -336.023308 -16.027287 -14.434788 -26.094789 -75.441194 -0.026334 0.814536 -333.879564 ... 27.717 19.600 0.00 88.0 88.0 2480 0.527158 0.508 68.1825 tr872
2481 122.534127 23.618180 -361.701931 -18.210748 -13.820391 -28.227122 -76.101366 -0.765222 0.552358 -352.122114 ... 27.717 15.724 0.00 88.0 88.0 2481 0.512061 0.524 67.8975 tr872
2482 115.877466 28.515640 -329.528482 -16.093678 -13.579994 -28.122889 -76.212415 -1.363799 0.825131 -319.683019 ... 27.717 13.983 0.00 88.0 88.0 2482 0.514066 0.520 65.9125 tr872
2483 114.171191 21.818982 -362.325245 -18.442345 -13.512973 -28.356791 -75.320840 -0.439890 0.583090 -361.824821 ... 27.717 16.890 0.00 88.0 88.0 2483 0.533812 0.507 72.1575 tr872
2484 133.188266 22.418145 -321.211042 -17.026340 -13.495404 -31.185915 -75.243902 -0.675351 0.517695 -302.713848 ... 27.717 20.939 0.00 88.0 88.0 2484 0.549870 0.494 68.1800 tr872
2485 122.594361 29.658989 -326.806442 -16.455129 -13.488875 -29.273046 -75.563123 -0.005215 0.408823 -308.929655 ... 27.717 30.371 0.00 88.0 88.0 2485 0.580638 0.476 74.1475 tr872
2486 117.840828 20.019533 -316.890299 -18.754850 -13.218852 -28.800832 -75.199657 -1.427760 0.686436 -315.745452 ... 27.717 19.665 0.00 88.0 88.0 2486 0.557980 0.486 71.8750 tr872
2487 125.535805 22.512860 -358.150352 -16.831025 -13.352600 -29.292355 -74.969820 -0.690783 0.455509 -344.782761 ... 27.717 23.536 0.00 88.0 88.0 2487 0.552423 0.497 71.0200 tr872
2488 123.761325 26.815237 -355.679176 -17.110966 -13.708667 -28.001939 -75.673521 -0.678044 0.622157 -339.653594 ... 27.717 18.582 0.00 88.0 88.0 2488 0.556455 0.479 71.0225 tr872
2489 122.107304 18.880054 -348.896859 -13.315039 -13.539133 -31.466034 -74.670406 -1.206328 0.912644 -341.193797 ... 27.717 14.783 0.00 88.0 88.0 2489 0.575515 0.472 72.1600 tr872
2490 116.304651 16.278705 -341.351296 -13.989398 -13.399719 -27.198742 -74.973206 -0.901745 0.629550 -338.601201 ... 27.717 14.076 0.00 88.0 88.0 2490 0.544252 0.485 70.1725 tr872
2491 115.281308 20.611756 -339.311303 -14.284938 -13.631203 -26.510174 -75.646152 -0.204735 0.813533 -332.881908 ... 27.717 22.999 0.00 88.0 88.0 2491 0.543608 0.502 68.7500 tr872
2492 103.461651 21.909182 -333.086730 -13.935617 -13.988378 -25.908047 -73.728835 0.046304 0.839551 -334.390921 ... 27.717 17.342 0.00 88.0 88.0 2492 0.560118 0.495 71.5900 tr872
2493 142.067762 23.515326 -328.611548 -12.732880 -13.091211 -29.676039 -73.819698 -0.645807 0.550437 -292.443658 ... 27.717 16.493 0.00 88.0 88.0 2493 0.557764 0.504 70.7350 tr872
2494 116.028991 25.400170 -314.322704 -15.836599 -13.653371 -23.666254 -75.525264 -0.331192 0.768138 -301.138085 ... 27.717 13.678 0.00 88.0 88.0 2494 0.556864 0.488 70.7375 tr872
2495 109.907230 15.848900 -339.136487 -13.067772 -12.764315 -30.856032 -76.287658 -0.302400 1.358640 -345.299894 ... 27.717 18.681 0.00 88.0 88.0 2495 0.568690 0.460 71.5900 tr872
2496 110.746574 17.587271 -339.428806 -13.249256 -13.158427 -29.208557 -73.798823 -0.239796 0.796010 -339.953810 ... 27.717 14.840 0.00 88.0 88.0 2496 0.570366 0.481 71.5900 tr872
2497 108.008997 19.225796 -334.264341 -13.812031 -13.319553 -26.795614 -74.620614 -0.043568 0.950638 -334.670290 ... 27.717 11.567 0.00 88.0 88.0 2497 0.542623 0.481 68.1800 tr872
2498 128.258815 20.184060 -336.059942 -13.973977 -13.452747 -27.238561 -74.537248 -0.815749 1.033337 -316.602012 ... 27.717 16.917 0.00 88.0 88.0 2498 0.561730 0.447 71.5900 tr872
2499 112.621289 24.383335 -346.467417 -14.327999 -13.496230 -30.011538 -74.291939 -0.647914 0.894787 -341.343625 ... 27.717 22.024 0.00 88.0 88.0 2499 0.542987 0.475 71.3050 tr872
2500 121.937615 20.056964 -336.219744 -17.234241 -13.203055 -30.347517 -76.036384 -0.801038 1.145839 -330.701561 ... 27.717 9.653 0.00 88.0 88.0 2500 0.539795 0.471 70.7375 tr872
2501 103.532949 24.474777 -327.265101 -14.732032 -13.040145 -31.762154 -73.294012 -0.950622 1.047322 -331.989018 ... 27.717 22.449 0.00 88.0 88.0 2501 0.562330 0.475 68.4650 tr872
2502 122.935590 23.591022 -334.764489 -12.766174 -13.346997 -29.116175 -75.296089 -0.548575 0.978441 -318.333445 ... 27.717 14.383 0.00 88.0 88.0 2502 0.559309 0.472 68.4650 tr872
2503 113.239371 22.149895 -314.072260 -17.722685 -13.912935 -28.181979 -74.131900 -0.667344 1.010119 -312.289718 ... 27.717 15.451 0.00 88.0 88.0 2503 0.532867 0.491 68.7500 tr872
2504 118.041435 24.843265 -325.009809 -12.994488 -13.576262 -31.914232 -75.266283 -0.619814 0.937684 -315.558502 ... 27.717 11.517 0.00 88.0 88.0 2504 0.562306 0.473 70.7375 tr872

47077 rows × 36 columns


In [121]:
data.groupby("Protein").head(1)


Out[121]:
Chain Chi Rama DSSP P_AP Water Burial Helix Electro VTotal ... ref rama_prepro allatom_rms maxsub maxsub2.0 description Q RMSD GDT Protein
0 120.305046 25.030902 -505.107887 -0.000000 -0.107303 -34.178516 -92.446636 -25.172317 -2.622472 -514.299186 ... 22.273 1.322 0.0 106.0 106.0 0_0001 0.283478 1.402 36.5575 tr898
0 112.913320 30.524565 -332.532107 -7.125356 -12.138282 -27.317621 -73.145221 -0.660060 -2.910019 -312.390781 ... 32.554 35.315 0.0 86.0 86.0 0_0001 0.377589 0.820 60.4650 tr896
0 129.893163 21.199104 -468.943420 -0.000000 -3.860495 -21.126415 -88.492366 -39.077031 -5.199941 -475.607401 ... 4.235 12.319 0.0 101.0 101.0 0_0001 0.325240 0.572 56.1875 tr862
0 137.067264 24.421757 -479.183988 -27.382336 -27.697501 -43.510377 -104.124806 -1.270136 -6.691340 -528.371463 ... 18.855 21.362 0.0 119.0 119.0 0_0001 0.797348 0.162 89.2875 tr891
0 198.007699 36.011043 -541.535066 -38.317776 -35.796249 -62.207052 -118.152975 -0.176772 -0.808177 -562.975325 ... 38.542 55.786 0.0 138.0 138.0 0_0001 0.531465 0.355 68.3000 tr921
0 151.533700 31.215873 -254.981231 -0.000000 -11.889834 -20.362156 -62.001855 -5.804071 -2.216831 -174.506404 ... 16.023 31.277 0.0 71.0 71.0 0_0001 0.451286 0.379 65.4925 tr884
0 173.522095 34.397842 -541.348701 -0.000000 -6.151931 -41.078064 -100.508252 -29.543585 -1.740115 -512.450711 ... 14.718 59.166 0.0 123.0 123.0 0_0001 0.261670 0.957 37.8100 tr870
0 189.422606 27.511208 -544.903493 -14.188880 -17.148209 -58.429289 -121.192948 -14.066593 -1.213007 -554.208604 ... 59.574 21.756 0.0 142.0 142.0 0_0001 0.538239 0.301 70.5975 tr877
0 79.464029 12.010538 -369.527360 -1.674597 -11.763620 -30.051908 -66.226296 -11.447843 -0.515120 -399.732177 ... 2.767 -4.437 0.0 79.0 79.0 0_0001 0.650360 0.232 81.3275 tr882
0 123.232220 28.551356 -540.216347 -0.000000 -11.237948 -60.850178 -94.438157 -35.575596 0.332164 -590.202487 ... 39.134 11.223 0.0 114.0 114.0 0_0001 0.761612 0.236 87.7200 tr885
0 150.278680 28.190165 -497.229165 -6.249447 -16.511902 -48.171631 -99.267204 -23.067165 -2.226016 -514.253687 ... 33.038 12.482 0.0 116.0 116.0 0_0001 0.662401 0.300 76.7250 tr868
0 165.798611 31.533757 -323.467341 -11.166358 -22.927684 -38.559688 -89.469934 -0.728277 -1.032178 -290.019093 ... 37.788 30.418 0.0 104.0 104.0 0_0001 0.651836 0.325 80.0475 tr866
0 163.062315 26.916240 -507.856411 -2.250993 -10.479302 -45.677259 -102.101553 -25.464097 4.614500 -499.236561 ... -0.748 44.793 0.0 120.0 120.0 0_0001 0.564097 0.433 70.0000 tr895
0 75.658505 8.515404 -221.770669 -0.024260 -9.610843 -14.350910 -47.132503 -10.956137 -1.157751 -220.829164 ... 17.583 18.687 0.0 54.0 54.0 0_0001 0.626163 0.218 75.9250 tr894
0 133.297640 28.285067 -456.010181 -0.000015 -5.211958 -33.347248 -87.683890 -19.686313 -1.223424 -441.580320 ... 30.254 37.752 0.0 104.0 104.0 0_0001 0.267610 1.232 38.2200 tr869
0 227.882252 89.779867 -732.361655 -0.000000 -5.804146 -56.306526 -135.060004 -55.580062 -0.428691 -667.878964 ... 9.860 60.413 0.0 161.0 161.0 0_0001 0.551050 0.672 70.9650 tr948
0 78.052729 16.539156 -313.608667 -0.008484 -3.879275 -14.564000 -57.284547 -14.714966 1.135120 -308.332935 ... 26.579 2.143 0.0 74.0 74.0 0_0001 0.712195 0.249 85.1350 tr922
0 265.735161 88.976112 -672.874332 -17.688782 -20.621786 -49.627074 -147.815901 -7.649920 -6.411544 -567.978066 ... 32.551 39.320 0.0 175.0 175.0 0_0001 0.440554 1.292 66.1450 tr947
0 147.177126 27.717338 -258.493409 -5.697287 -9.250652 -13.557519 -76.220229 -7.931782 -2.157246 -198.413661 ... 13.719 43.123 0.0 89.0 89.0 0_0001 0.352884 0.467 54.4925 tr594
0 84.120983 17.340991 -362.042976 -16.473402 -15.238775 -34.753678 -75.023984 0.154077 1.001555 -400.915208 ... 27.717 11.826 0.0 88.0 88.0 0_0001 0.621918 0.566 74.1475 tr872

20 rows × 36 columns


In [120]:
data.groupby("Protein").apply(choose_top, n=1).query("chosen == True")


Out[120]:
Chain Chi Rama DSSP P_AP Water Burial Helix Electro VTotal ... rama_prepro allatom_rms maxsub maxsub2.0 description Q RMSD GDT Protein chosen
Protein
tr594 1467 156.299121 28.293094 -263.791895 -5.973513 -8.798645 -12.293802 -74.715335 -1.861538 -2.186938 -185.029451 ... 57.481 0.0 89.0 89.0 1467_0001 0.353956 0.456 55.6175 tr594 True
2033 132.141228 31.070281 -304.371204 -7.419159 -9.853358 -10.955557 -74.937769 -3.372238 -2.335354 -250.033130 ... 46.315 0.0 89.0 89.0 2033_0001 0.363837 0.456 56.1800 tr594 True
tr862 1904 104.740312 20.277740 -467.450449 -0.000005 -2.728461 -24.640018 -85.922630 -32.158538 -5.373889 -493.255939 ... 9.787 0.0 101.0 101.0 1904_0001 0.366677 0.487 55.9400 tr862 True
tr866 2348 152.936694 31.683805 -343.113442 -17.236845 -22.454395 -47.006387 -88.657667 -1.045414 -1.413021 -336.306672 ... 31.845 0.0 104.0 104.0 2348_0001 0.731392 0.193 82.4525 tr866 True
tr868 2245 160.072038 35.093146 -483.912502 -7.030704 -10.292623 -42.186127 -97.768990 -26.338273 -1.815745 -474.179780 ... 2.684 0.0 116.0 116.0 2245_0001 0.713344 0.187 81.6800 tr868 True
tr869 1962 142.200146 32.228202 -401.509930 -0.001863 -2.115199 -33.517174 -88.006551 -18.227966 -1.122771 -370.073105 ... 15.266 0.0 104.0 104.0 1962_0001 0.253259 1.178 36.0600 tr869 True
tr870 321 197.302952 37.644259 -509.520496 -0.000000 -2.727696 -40.352249 -103.829016 -21.121046 -3.661269 -446.264562 ... 52.411 0.0 123.0 123.0 321_0001 0.237328 0.824 33.0600 tr870 True
tr872 1729 112.717782 15.775649 -340.183323 -18.274393 -15.297043 -34.297404 -74.486515 0.519748 1.148716 -352.376783 ... 9.392 0.0 88.0 88.0 1729_0001 0.649236 0.338 78.1250 tr872 True
tr877 1769 206.794127 39.500174 -546.157524 -16.999266 -22.337062 -51.568476 -120.683795 -12.959025 -0.838586 -525.249434 ... 30.353 0.0 142.0 142.0 1769_0001 0.555797 0.270 69.5425 tr877 True
tr882 615 108.637613 23.538167 -345.497453 -0.423200 -10.608575 -31.409745 -66.802893 -10.661596 -0.162433 -333.390114 ... 2.159 0.0 79.0 79.0 615_0001 0.664110 0.172 83.5425 tr882 True
tr884 311 83.815141 14.876922 -252.811266 -0.000005 -7.943737 -17.053695 -61.156397 -6.506653 -2.013289 -248.792978 ... 11.796 0.0 71.0 71.0 311_0001 0.583292 0.255 75.7050 tr884 True
tr885 29 146.749889 34.237838 -552.666635 -0.000000 -9.656559 -59.355977 -94.896665 -35.226312 0.549111 -570.265310 ... 9.207 0.0 114.0 114.0 29_0001 0.791411 0.215 89.6950 tr885 True
tr891 464 169.476586 28.975835 -465.145766 -30.074344 -27.503363 -40.552514 -102.326163 -0.534695 -6.661976 -474.346401 ... 24.073 0.0 119.0 119.0 464_0001 0.775169 0.146 88.3925 tr891 True
tr894 1812 75.474129 9.466150 -243.814855 -0.633749 -8.869247 -16.743827 -46.104296 -7.783449 -1.210139 -240.219284 ... 3.980 0.0 54.0 54.0 1812_0001 0.682410 0.147 82.8700 tr894 True
tr895 2061 172.057051 28.757937 -511.371603 -3.248566 -4.751365 -49.675047 -98.469276 -24.394026 3.530895 -487.564001 ... 24.794 0.0 120.0 120.0 2061_0001 0.554760 0.349 69.3775 tr895 True
tr896 547 146.683743 23.363011 -294.888843 -8.385013 -8.905919 -23.198019 -71.747336 0.313481 -2.523215 -239.288109 ... 30.301 0.0 86.0 86.0 547_0001 0.351371 0.789 56.9750 tr896 True
tr898 1169 150.758314 29.731386 -471.407546 -0.000000 -0.490920 -28.891884 -93.246571 -26.458079 -2.936550 -442.941850 ... 10.340 0.0 106.0 106.0 1169_0001 0.253212 1.295 32.7825 tr898 True
tr921 1620 227.200766 28.401003 -497.820830 -40.131447 -32.298104 -61.746688 -117.586230 -0.000363 -0.876987 -494.858879 ... 45.640 0.0 138.0 138.0 1620_0001 0.524412 0.328 68.4750 tr921 True
tr922 1422 107.137908 16.727546 -275.373612 -0.000002 -3.235502 -11.896563 -59.856031 -12.460218 1.815983 -237.140491 ... 23.892 0.0 74.0 74.0 1422_0001 0.761571 0.170 86.8275 tr922 True
tr947 586 269.960826 100.040947 -676.315012 -18.358758 -20.977804 -51.878420 -146.349827 -8.587827 -6.983271 -559.449146 ... 47.231 0.0 175.0 175.0 586_0001 0.446703 0.756 65.1400 tr947 True
tr948 306 241.827185 92.380888 -739.581415 -0.000000 -5.784613 -56.847718 -134.868624 -56.133929 -1.017087 -660.025314 ... 40.936 0.0 161.0 161.0 306_0001 0.549579 0.531 70.8100 tr948 True

21 rows × 37 columns


In [111]:
data.shape


Out[111]:
(47077, 36)

In [112]:
data.columns


Out[112]:
Index(['Chain', 'Chi', 'Rama', 'DSSP', 'P_AP', 'Water', 'Burial', 'Helix',
       'Electro', 'VTotal', 'score', 'fa_atr', 'fa_rep', 'fa_sol',
       'fa_intra_rep', 'fa_intra_sol_xover4', 'lk_ball_wtd', 'fa_elec',
       'pro_close', 'hbond_sr_bb', 'hbond_lr_bb', 'hbond_bb_sc', 'hbond_sc',
       'omega', 'fa_dun', 'p_aa_pp', 'ref', 'rama_prepro', 'allatom_rms',
       'maxsub', 'maxsub2.0', 'description', 'Q', 'RMSD', 'GDT', 'Protein'],
      dtype='object')

In [113]:
drop_col


Out[113]:
['Step',
 'Shake',
 'Excluded',
 'AMH-Go',
 'Frag_Mem',
 'Vec_FM',
 'Membrane',
 'SSB',
 'QGO',
 'SCORE:',
 'dslf_fa13',
 'yhh_planarity',
 'gdtmm',
 'gdtmm1_1',
 'gdtmm2_2',
 'gdtmm3_3',
 'gdtmm4_3',
 'gdtmm7_4',
 'irms',
 'rms']

In [116]:
data["description"].unique()


Out[116]:
array(['0_0001', '1_0001', '2_0001', ..., '2502_0001', '2503_0001',
       '2504_0001'], dtype=object)

In [82]:
rosetta.columns


Out[82]:
Index(['SCORE:', 'score', 'fa_atr', 'fa_rep', 'fa_sol', 'fa_intra_rep',
       'fa_intra_sol_xover4', 'lk_ball_wtd', 'fa_elec', 'pro_close',
       'hbond_sr_bb', 'hbond_lr_bb', 'hbond_bb_sc', 'hbond_sc', 'dslf_fa13',
       'omega', 'fa_dun', 'p_aa_pp', 'yhh_planarity', 'ref', 'rama_prepro',
       'allatom_rms', 'gdtmm', 'gdtmm1_1', 'gdtmm2_2', 'gdtmm3_3', 'gdtmm4_3',
       'gdtmm7_4', 'irms', 'maxsub', 'maxsub2.0', 'rms', 'description'],
      dtype='object')

In [83]:
rosetta.drop("maxsub", axis=1)


Out[83]:
SCORE: score fa_atr fa_rep fa_sol fa_intra_rep fa_intra_sol_xover4 lk_ball_wtd fa_elec pro_close ... gdtmm gdtmm1_1 gdtmm2_2 gdtmm3_3 gdtmm4_3 gdtmm7_4 irms maxsub2.0 rms description
0 SCORE: 48.568 -548.005 83.803 366.540 1.447 21.352 -14.902 -121.662 4.589 ... 1.0 1.0 1.0 1.0 1.0 1.0 0.0 106.0 0.0 0_0001
1 SCORE: 105.742 -499.091 73.443 354.679 1.441 21.727 -12.307 -122.473 17.150 ... 1.0 1.0 1.0 1.0 1.0 1.0 0.0 106.0 0.0 1_0001
2 SCORE: 128.475 -514.485 73.683 345.232 1.356 23.129 -14.297 -115.764 15.797 ... 1.0 1.0 1.0 1.0 1.0 1.0 0.0 106.0 0.0 2_0001
3 SCORE: 153.287 -520.665 66.100 352.879 1.368 23.227 -13.489 -116.879 9.704 ... 1.0 1.0 1.0 1.0 1.0 1.0 0.0 106.0 0.0 3_0001
4 SCORE: 130.432 -518.515 62.837 360.242 1.414 22.045 -16.393 -115.407 3.600 ... 1.0 1.0 1.0 1.0 1.0 1.0 0.0 106.0 0.0 4_0001
5 SCORE: 162.817 -501.489 44.380 341.114 1.443 23.839 -16.331 -115.550 16.551 ... 1.0 1.0 1.0 1.0 1.0 1.0 0.0 106.0 0.0 5_0001
6 SCORE: 178.254 -503.541 52.174 348.912 1.441 23.652 -13.313 -112.845 63.561 ... 1.0 1.0 1.0 1.0 1.0 1.0 0.0 106.0 0.0 6_0001
7 SCORE: 152.325 -518.002 59.843 346.831 1.425 23.136 -12.448 -115.313 13.194 ... 1.0 1.0 1.0 1.0 1.0 1.0 0.0 106.0 0.0 7_0001
8 SCORE: 242.533 -511.413 157.009 354.750 1.487 23.550 -15.617 -111.879 3.815 ... 1.0 1.0 1.0 1.0 1.0 1.0 0.0 106.0 0.0 8_0001
9 SCORE: 256.471 -488.543 191.608 332.774 1.443 23.170 -15.242 -102.333 12.342 ... 1.0 1.0 1.0 1.0 1.0 1.0 0.0 106.0 0.0 9_0001
10 SCORE: 635.922 -493.408 542.206 343.996 1.448 23.957 -14.935 -117.774 32.872 ... 1.0 1.0 1.0 1.0 1.0 1.0 0.0 106.0 0.0 10_0001
11 SCORE: 118.698 -524.962 67.647 365.206 1.492 24.338 -12.665 -123.983 19.740 ... 1.0 1.0 1.0 1.0 1.0 1.0 0.0 106.0 0.0 11_0001
12 SCORE: 123.510 -509.417 51.885 362.702 1.430 24.748 -14.024 -122.836 15.591 ... 1.0 1.0 1.0 1.0 1.0 1.0 0.0 106.0 0.0 12_0001
13 SCORE: 111.508 -517.019 54.871 364.282 1.409 23.946 -13.460 -120.072 21.853 ... 1.0 1.0 1.0 1.0 1.0 1.0 0.0 106.0 0.0 13_0001
14 SCORE: 103.111 -504.414 59.238 368.084 1.444 23.384 -11.535 -129.807 4.864 ... 1.0 1.0 1.0 1.0 1.0 1.0 0.0 106.0 0.0 14_0001
15 SCORE: 485.514 -531.906 453.579 363.971 1.470 24.053 -15.419 -128.919 21.991 ... 1.0 1.0 1.0 1.0 1.0 1.0 0.0 106.0 0.0 15_0001
16 SCORE: 126.624 -525.340 57.554 374.892 1.456 24.229 -12.806 -122.209 11.320 ... 1.0 1.0 1.0 1.0 1.0 1.0 0.0 106.0 0.0 16_0001
17 SCORE: 125.148 -519.086 61.159 374.772 1.378 23.997 -11.730 -127.305 21.136 ... 1.0 1.0 1.0 1.0 1.0 1.0 0.0 106.0 0.0 17_0001
18 SCORE: 353.702 -520.555 268.122 374.854 1.450 25.812 -13.194 -129.027 14.087 ... 1.0 1.0 1.0 1.0 1.0 1.0 0.0 106.0 0.0 18_0001
19 SCORE: 146.548 -518.801 62.060 363.510 1.444 24.585 -14.534 -131.286 29.164 ... 1.0 1.0 1.0 1.0 1.0 1.0 0.0 106.0 0.0 19_0001
20 SCORE: 141.805 -507.389 49.841 369.784 1.359 25.043 -10.061 -128.693 12.521 ... 1.0 1.0 1.0 1.0 1.0 1.0 0.0 106.0 0.0 20_0001
21 SCORE: 167.089 -504.258 47.720 355.108 1.424 25.986 -14.133 -116.082 79.885 ... 1.0 1.0 1.0 1.0 1.0 1.0 0.0 106.0 0.0 21_0001
22 SCORE: 192.973 -502.515 81.159 359.069 1.524 25.603 -12.032 -129.537 7.171 ... 1.0 1.0 1.0 1.0 1.0 1.0 0.0 106.0 0.0 22_0001
23 SCORE: 161.676 -516.132 66.329 368.636 1.461 25.834 -13.355 -130.843 2.868 ... 1.0 1.0 1.0 1.0 1.0 1.0 0.0 106.0 0.0 23_0001
24 SCORE: 91.025 -509.672 47.408 355.504 1.395 25.056 -11.709 -120.434 1.751 ... 1.0 1.0 1.0 1.0 1.0 1.0 0.0 106.0 0.0 24_0001
25 SCORE: 166.387 -502.012 54.486 357.130 1.420 25.039 -12.424 -121.578 39.372 ... 1.0 1.0 1.0 1.0 1.0 1.0 0.0 106.0 0.0 25_0001
26 SCORE: 157.005 -503.901 61.490 357.611 1.365 24.427 -14.566 -118.520 54.332 ... 1.0 1.0 1.0 1.0 1.0 1.0 0.0 106.0 0.0 26_0001
27 SCORE: 159.158 -504.448 58.381 358.706 1.466 23.570 -15.369 -123.460 51.229 ... 1.0 1.0 1.0 1.0 1.0 1.0 0.0 106.0 0.0 27_0001
28 SCORE: 133.615 -512.803 67.335 357.211 1.482 25.271 -11.800 -127.116 45.132 ... 1.0 1.0 1.0 1.0 1.0 1.0 0.0 106.0 0.0 28_0001
29 SCORE: 127.569 -514.512 47.931 367.828 1.461 24.053 -12.028 -126.540 43.516 ... 1.0 1.0 1.0 1.0 1.0 1.0 0.0 106.0 0.0 29_0001
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
2475 SCORE: 85.804 -551.550 72.603 359.781 1.409 25.397 -11.418 -136.106 12.527 ... 1.0 1.0 1.0 1.0 1.0 1.0 0.0 106.0 0.0 2475_0001
2476 SCORE: 142.213 -538.429 115.192 352.818 1.382 25.932 -14.704 -123.871 12.836 ... 1.0 1.0 1.0 1.0 1.0 1.0 0.0 106.0 0.0 2476_0001
2477 SCORE: 101.940 -538.877 72.235 360.509 1.482 23.152 -11.743 -139.460 9.592 ... 1.0 1.0 1.0 1.0 1.0 1.0 0.0 106.0 0.0 2477_0001
2478 SCORE: 312.942 -537.315 261.867 361.187 1.407 25.067 -10.605 -136.468 22.073 ... 1.0 1.0 1.0 1.0 1.0 1.0 0.0 106.0 0.0 2478_0001
2479 SCORE: 147.508 -540.631 82.168 360.548 1.470 24.733 -11.341 -134.282 68.954 ... 1.0 1.0 1.0 1.0 1.0 1.0 0.0 106.0 0.0 2479_0001
2480 SCORE: 559.718 -555.299 532.788 360.213 1.404 24.896 -13.264 -135.365 4.017 ... 1.0 1.0 1.0 1.0 1.0 1.0 0.0 106.0 0.0 2480_0001
2481 SCORE: 175.366 -547.895 139.958 361.154 1.461 25.118 -8.969 -136.934 10.389 ... 1.0 1.0 1.0 1.0 1.0 1.0 0.0 106.0 0.0 2481_0001
2482 SCORE: 141.341 -539.494 92.662 357.826 1.406 24.298 -13.873 -128.856 27.064 ... 1.0 1.0 1.0 1.0 1.0 1.0 0.0 106.0 0.0 2482_0001
2483 SCORE: 119.671 -541.627 70.104 359.500 1.449 23.136 -13.798 -127.914 41.008 ... 1.0 1.0 1.0 1.0 1.0 1.0 0.0 106.0 0.0 2483_0001
2484 SCORE: 314.686 -543.960 287.066 355.701 1.353 22.343 -11.662 -129.248 5.378 ... 1.0 1.0 1.0 1.0 1.0 1.0 0.0 106.0 0.0 2484_0001
2485 SCORE: 163.215 -534.009 153.664 356.171 1.428 24.741 -11.607 -132.387 3.010 ... 1.0 1.0 1.0 1.0 1.0 1.0 0.0 106.0 0.0 2485_0001
2486 SCORE: 148.744 -531.905 93.157 353.798 1.483 24.290 -13.622 -126.148 21.323 ... 1.0 1.0 1.0 1.0 1.0 1.0 0.0 106.0 0.0 2486_0001
2487 SCORE: 185.999 -534.669 91.416 351.733 1.420 22.618 -15.900 -127.739 15.070 ... 1.0 1.0 1.0 1.0 1.0 1.0 0.0 106.0 0.0 2487_0001
2488 SCORE: 267.173 -553.870 208.331 359.484 1.454 23.259 -14.744 -132.425 22.821 ... 1.0 1.0 1.0 1.0 1.0 1.0 0.0 106.0 0.0 2488_0001
2489 SCORE: 380.631 -536.439 341.713 355.433 1.432 23.625 -14.028 -125.152 8.292 ... 1.0 1.0 1.0 1.0 1.0 1.0 0.0 106.0 0.0 2489_0001
2490 SCORE: 173.626 -549.390 142.400 353.335 1.422 23.399 -12.059 -133.799 26.884 ... 1.0 1.0 1.0 1.0 1.0 1.0 0.0 106.0 0.0 2490_0001
2491 SCORE: 550.866 -553.855 521.560 359.070 1.442 23.253 -13.752 -124.691 17.765 ... 1.0 1.0 1.0 1.0 1.0 1.0 0.0 106.0 0.0 2491_0001
2492 SCORE: 228.330 -543.184 213.468 349.507 1.440 24.085 -11.584 -126.901 16.739 ... 1.0 1.0 1.0 1.0 1.0 1.0 0.0 106.0 0.0 2492_0001
2493 SCORE: 119.278 -541.553 107.349 357.680 1.387 23.405 -11.971 -132.879 5.973 ... 1.0 1.0 1.0 1.0 1.0 1.0 0.0 106.0 0.0 2493_0001
2494 SCORE: 124.947 -541.117 74.686 350.348 1.473 23.937 -14.763 -133.767 3.150 ... 1.0 1.0 1.0 1.0 1.0 1.0 0.0 106.0 0.0 2494_0001
2495 SCORE: 129.370 -540.940 139.054 349.906 1.471 23.007 -13.260 -134.282 6.568 ... 1.0 1.0 1.0 1.0 1.0 1.0 0.0 106.0 0.0 2495_0001
2496 SCORE: 191.824 -557.339 160.014 355.717 1.438 22.532 -12.825 -133.703 26.614 ... 1.0 1.0 1.0 1.0 1.0 1.0 0.0 106.0 0.0 2496_0001
2497 SCORE: 459.669 -531.770 391.147 343.837 1.384 22.096 -13.611 -118.787 31.368 ... 1.0 1.0 1.0 1.0 1.0 1.0 0.0 106.0 0.0 2497_0001
2498 SCORE: 161.002 -538.668 89.790 348.134 1.408 21.511 -11.069 -128.865 31.531 ... 1.0 1.0 1.0 1.0 1.0 1.0 0.0 106.0 0.0 2498_0001
2499 SCORE: 250.513 -543.590 255.802 344.227 1.411 23.230 -12.054 -118.598 3.513 ... 1.0 1.0 1.0 1.0 1.0 1.0 0.0 106.0 0.0 2499_0001
2500 SCORE: 70.852 -546.891 91.731 353.905 1.431 22.650 -14.120 -123.704 0.990 ... 1.0 1.0 1.0 1.0 1.0 1.0 0.0 106.0 0.0 2500_0001
2501 SCORE: 119.503 -525.514 71.511 347.320 1.418 22.472 -13.945 -126.748 20.205 ... 1.0 1.0 1.0 1.0 1.0 1.0 0.0 106.0 0.0 2501_0001
2502 SCORE: 143.897 -523.222 78.136 336.240 1.504 22.708 -17.266 -110.957 13.825 ... 1.0 1.0 1.0 1.0 1.0 1.0 0.0 106.0 0.0 2502_0001
2503 SCORE: 169.693 -532.175 95.949 352.750 1.501 22.716 -9.544 -123.742 2.407 ... 1.0 1.0 1.0 1.0 1.0 1.0 0.0 106.0 0.0 2503_0001
2504 SCORE: 266.378 -518.177 195.865 346.735 1.380 24.417 -12.360 -118.735 17.453 ... 1.0 1.0 1.0 1.0 1.0 1.0 0.0 106.0 0.0 2504_0001

2505 rows × 32 columns


In [78]:



Out[78]:
Step Chain Shake Chi Rama Excluded DSSP P_AP Water Burial ... gdtmm1_1 gdtmm2_2 gdtmm3_3 gdtmm4_3 gdtmm7_4 irms maxsub maxsub2.0 rms description
0 0 120.305046 0.0 25.030902 -505.107887 0.0 -0.000000 -0.107303 -34.178516 -92.446636 ... 1.0 1.0 1.0 1.0 1.0 0.0 106.0 106.0 0.0 0_0001
1 0 147.155070 0.0 22.444486 -491.517963 0.0 -0.000000 -0.000295 -26.463080 -92.809915 ... 1.0 1.0 1.0 1.0 1.0 0.0 106.0 106.0 0.0 1_0001
2 0 140.378840 0.0 25.081482 -493.970385 0.0 -0.000000 -0.417929 -26.765382 -92.506750 ... 1.0 1.0 1.0 1.0 1.0 0.0 106.0 106.0 0.0 2_0001
3 0 134.625889 0.0 30.279010 -501.437458 0.0 -0.000000 -0.520035 -26.874046 -93.514969 ... 1.0 1.0 1.0 1.0 1.0 0.0 106.0 106.0 0.0 3_0001
4 0 151.053641 0.0 36.293730 -497.221005 0.0 -0.000000 -0.682022 -20.044147 -90.688331 ... 1.0 1.0 1.0 1.0 1.0 0.0 106.0 106.0 0.0 4_0001
5 0 140.414193 0.0 23.658541 -484.762758 0.0 -0.000000 -0.626386 -24.541766 -92.988631 ... 1.0 1.0 1.0 1.0 1.0 0.0 106.0 106.0 0.0 5_0001
6 0 159.340721 0.0 28.608793 -483.378835 0.0 -0.000000 -0.441569 -20.504922 -92.248760 ... 1.0 1.0 1.0 1.0 1.0 0.0 106.0 106.0 0.0 6_0001
7 0 147.872491 0.0 32.334709 -489.868776 0.0 -0.000000 -0.909066 -26.898794 -92.782469 ... 1.0 1.0 1.0 1.0 1.0 0.0 106.0 106.0 0.0 7_0001
8 0 146.236786 0.0 27.978264 -477.439784 0.0 -0.000000 -0.870771 -22.715833 -91.274724 ... 1.0 1.0 1.0 1.0 1.0 0.0 106.0 106.0 0.0 8_0001
9 0 143.034875 0.0 41.921821 -478.790702 0.0 -0.000000 -0.893802 -23.115023 -91.586434 ... 1.0 1.0 1.0 1.0 1.0 0.0 106.0 106.0 0.0 9_0001
10 0 135.213040 0.0 27.732520 -480.987290 0.0 -0.000000 -1.028209 -23.427637 -92.011519 ... 1.0 1.0 1.0 1.0 1.0 0.0 106.0 106.0 0.0 10_0001
11 0 153.232782 0.0 35.476262 -493.140806 0.0 -0.000000 -1.182479 -21.911948 -90.307486 ... 1.0 1.0 1.0 1.0 1.0 0.0 106.0 106.0 0.0 11_0001
12 0 150.276648 0.0 32.635979 -483.119053 0.0 -0.000000 -1.393953 -20.819231 -92.631930 ... 1.0 1.0 1.0 1.0 1.0 0.0 106.0 106.0 0.0 12_0001
13 0 143.384450 0.0 28.317570 -501.934539 0.0 -0.000000 -0.791625 -27.083677 -93.122703 ... 1.0 1.0 1.0 1.0 1.0 0.0 106.0 106.0 0.0 13_0001
14 0 135.644470 0.0 27.406200 -504.506680 0.0 -0.000000 -0.528258 -23.223932 -92.482201 ... 1.0 1.0 1.0 1.0 1.0 0.0 106.0 106.0 0.0 14_0001
15 0 143.989256 0.0 26.933794 -500.357215 0.0 -0.000000 -0.496883 -24.541615 -93.506283 ... 1.0 1.0 1.0 1.0 1.0 0.0 106.0 106.0 0.0 15_0001
16 0 164.525371 0.0 30.930790 -496.284421 0.0 -0.000000 -0.679354 -22.719010 -92.583999 ... 1.0 1.0 1.0 1.0 1.0 0.0 106.0 106.0 0.0 16_0001
17 0 137.528899 0.0 38.185541 -488.721707 0.0 -0.000000 -0.508331 -19.814349 -93.350337 ... 1.0 1.0 1.0 1.0 1.0 0.0 106.0 106.0 0.0 17_0001
18 0 143.958009 0.0 28.877087 -488.371979 0.0 -0.000000 -0.676125 -23.206006 -92.723647 ... 1.0 1.0 1.0 1.0 1.0 0.0 106.0 106.0 0.0 18_0001
19 0 141.386863 0.0 26.174881 -493.309733 0.0 -0.000000 -0.712303 -24.678066 -93.300807 ... 1.0 1.0 1.0 1.0 1.0 0.0 106.0 106.0 0.0 19_0001
20 0 133.635051 0.0 33.961600 -504.836761 0.0 -0.000000 -0.551922 -24.156508 -91.949129 ... 1.0 1.0 1.0 1.0 1.0 0.0 106.0 106.0 0.0 20_0001
21 0 143.694197 0.0 34.527464 -485.833398 0.0 -0.000000 -0.870527 -23.981202 -91.800708 ... 1.0 1.0 1.0 1.0 1.0 0.0 106.0 106.0 0.0 21_0001
22 0 140.442942 0.0 26.590152 -498.494194 0.0 -0.000000 -0.503264 -23.382847 -92.830819 ... 1.0 1.0 1.0 1.0 1.0 0.0 106.0 106.0 0.0 22_0001
23 0 144.453285 0.0 33.293267 -497.078075 0.0 -0.000000 -0.522480 -23.797127 -91.605937 ... 1.0 1.0 1.0 1.0 1.0 0.0 106.0 106.0 0.0 23_0001
24 0 140.248439 0.0 31.324129 -498.777073 0.0 -0.000000 -0.700141 -19.824562 -91.957134 ... 1.0 1.0 1.0 1.0 1.0 0.0 106.0 106.0 0.0 24_0001
25 0 160.716713 0.0 38.806819 -496.444635 0.0 -0.000000 -0.526379 -24.207083 -92.012533 ... 1.0 1.0 1.0 1.0 1.0 0.0 106.0 106.0 0.0 25_0001
26 0 123.610173 0.0 34.483728 -488.702662 0.0 -0.000000 -0.630171 -21.838544 -92.192668 ... 1.0 1.0 1.0 1.0 1.0 0.0 106.0 106.0 0.0 26_0001
27 0 139.025104 0.0 31.753790 -477.896577 0.0 -0.000000 -0.507182 -24.233222 -91.429011 ... 1.0 1.0 1.0 1.0 1.0 0.0 106.0 106.0 0.0 27_0001
28 0 146.414796 0.0 38.993244 -485.896979 0.0 -0.000000 -0.232584 -25.412671 -92.016919 ... 1.0 1.0 1.0 1.0 1.0 0.0 106.0 106.0 0.0 28_0001
29 0 134.597668 0.0 23.498079 -481.905364 0.0 -0.000000 -0.513820 -23.836249 -91.356219 ... 1.0 1.0 1.0 1.0 1.0 0.0 106.0 106.0 0.0 29_0001
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
2475 0 143.500410 0.0 27.527275 -461.232850 0.0 -0.000000 -0.496328 -31.392281 -91.969864 ... 1.0 1.0 1.0 1.0 1.0 0.0 106.0 106.0 0.0 2475_0001
2476 0 141.044413 0.0 25.767333 -495.185025 0.0 -0.000000 -0.572001 -31.699846 -90.647001 ... 1.0 1.0 1.0 1.0 1.0 0.0 106.0 106.0 0.0 2476_0001
2477 0 151.574700 0.0 38.862934 -473.380933 0.0 -0.000000 -0.595049 -29.363491 -92.076271 ... 1.0 1.0 1.0 1.0 1.0 0.0 106.0 106.0 0.0 2477_0001
2478 0 156.121112 0.0 34.514658 -473.462135 0.0 -0.000000 -0.756229 -27.299196 -90.498805 ... 1.0 1.0 1.0 1.0 1.0 0.0 106.0 106.0 0.0 2478_0001
2479 0 138.725212 0.0 33.802057 -494.231397 0.0 -0.000000 -0.651890 -31.802798 -91.415721 ... 1.0 1.0 1.0 1.0 1.0 0.0 106.0 106.0 0.0 2479_0001
2480 0 141.812658 0.0 37.573935 -498.823848 0.0 -0.000000 -0.766810 -35.878406 -92.404477 ... 1.0 1.0 1.0 1.0 1.0 0.0 106.0 106.0 0.0 2480_0001
2481 0 153.071895 0.0 43.257861 -504.921250 0.0 -0.000000 -1.004538 -31.014675 -91.588969 ... 1.0 1.0 1.0 1.0 1.0 0.0 106.0 106.0 0.0 2481_0001
2482 0 144.902039 0.0 37.378434 -490.106863 0.0 -0.000000 -0.569492 -30.155383 -91.915139 ... 1.0 1.0 1.0 1.0 1.0 0.0 106.0 106.0 0.0 2482_0001
2483 0 121.821980 0.0 30.566567 -496.109948 0.0 -0.000000 -0.562667 -33.384204 -91.474754 ... 1.0 1.0 1.0 1.0 1.0 0.0 106.0 106.0 0.0 2483_0001
2484 0 163.801342 0.0 32.693539 -450.358949 0.0 -0.000000 -0.681815 -32.612401 -91.524081 ... 1.0 1.0 1.0 1.0 1.0 0.0 106.0 106.0 0.0 2484_0001
2485 0 142.069217 0.0 41.533407 -477.193173 0.0 -0.000000 -0.401418 -31.187313 -93.230361 ... 1.0 1.0 1.0 1.0 1.0 0.0 106.0 106.0 0.0 2485_0001
2486 0 153.181285 0.0 28.735831 -470.896436 0.0 -0.000000 -0.558581 -30.446182 -91.919048 ... 1.0 1.0 1.0 1.0 1.0 0.0 106.0 106.0 0.0 2486_0001
2487 0 145.294538 0.0 32.248756 -474.456634 0.0 -0.000000 -0.483374 -34.891455 -91.478248 ... 1.0 1.0 1.0 1.0 1.0 0.0 106.0 106.0 0.0 2487_0001
2488 0 163.341806 0.0 38.777808 -481.621529 0.0 -0.000005 -0.614666 -32.676692 -91.379758 ... 1.0 1.0 1.0 1.0 1.0 0.0 106.0 106.0 0.0 2488_0001
2489 0 147.871144 0.0 29.919083 -461.272216 0.0 -0.000000 -0.479011 -29.467632 -92.693253 ... 1.0 1.0 1.0 1.0 1.0 0.0 106.0 106.0 0.0 2489_0001
2490 0 147.171435 0.0 30.338722 -470.210311 0.0 -0.000000 -0.624036 -28.305416 -92.226263 ... 1.0 1.0 1.0 1.0 1.0 0.0 106.0 106.0 0.0 2490_0001
2491 0 124.834730 0.0 29.167205 -469.144052 0.0 -0.000000 -0.407078 -29.854115 -91.960722 ... 1.0 1.0 1.0 1.0 1.0 0.0 106.0 106.0 0.0 2491_0001
2492 0 158.892175 0.0 32.055676 -484.835729 0.0 -0.000000 -1.177468 -31.819434 -92.945373 ... 1.0 1.0 1.0 1.0 1.0 0.0 106.0 106.0 0.0 2492_0001
2493 0 161.183421 0.0 39.576143 -473.574855 0.0 -0.000000 -0.838813 -32.279055 -91.726346 ... 1.0 1.0 1.0 1.0 1.0 0.0 106.0 106.0 0.0 2493_0001
2494 0 156.579598 0.0 34.052063 -483.193473 0.0 -0.000000 -0.611950 -30.753871 -90.980360 ... 1.0 1.0 1.0 1.0 1.0 0.0 106.0 106.0 0.0 2494_0001
2495 0 154.616061 0.0 36.222479 -481.989325 0.0 -0.000000 -0.388308 -28.731607 -93.913854 ... 1.0 1.0 1.0 1.0 1.0 0.0 106.0 106.0 0.0 2495_0001
2496 0 157.757965 0.0 36.211323 -496.086541 0.0 -0.000000 -0.888573 -33.491814 -92.682359 ... 1.0 1.0 1.0 1.0 1.0 0.0 106.0 106.0 0.0 2496_0001
2497 0 152.590300 0.0 38.253588 -492.572440 0.0 -0.000000 -0.464048 -34.674987 -92.580828 ... 1.0 1.0 1.0 1.0 1.0 0.0 106.0 106.0 0.0 2497_0001
2498 0 160.450183 0.0 47.678975 -495.773880 0.0 -0.000000 -0.561333 -32.541248 -91.723372 ... 1.0 1.0 1.0 1.0 1.0 0.0 106.0 106.0 0.0 2498_0001
2499 0 135.133600 0.0 32.539568 -475.476939 0.0 -0.000000 -0.641543 -29.339899 -92.026443 ... 1.0 1.0 1.0 1.0 1.0 0.0 106.0 106.0 0.0 2499_0001
2500 0 131.744406 0.0 31.655785 -498.526057 0.0 -0.000000 -0.532125 -32.711950 -92.320811 ... 1.0 1.0 1.0 1.0 1.0 0.0 106.0 106.0 0.0 2500_0001
2501 0 154.433789 0.0 35.124787 -497.932267 0.0 -0.000000 -0.632271 -29.144837 -90.975083 ... 1.0 1.0 1.0 1.0 1.0 0.0 106.0 106.0 0.0 2501_0001
2502 0 130.618464 0.0 27.893502 -469.603429 0.0 -0.000000 -0.448264 -33.330741 -92.033359 ... 1.0 1.0 1.0 1.0 1.0 0.0 106.0 106.0 0.0 2502_0001
2503 0 163.449228 0.0 32.476593 -487.777045 0.0 -0.000000 -0.361438 -36.387904 -92.240249 ... 1.0 1.0 1.0 1.0 1.0 0.0 106.0 106.0 0.0 2503_0001
2504 0 174.165986 0.0 34.131403 -482.564129 0.0 -0.000000 -0.362679 -30.477296 -91.326050 ... 1.0 1.0 1.0 1.0 1.0 0.0 106.0 106.0 0.0 2504_0001

2505 rows × 52 columns


In [73]:
rosetta


Out[73]:
SCORE: score fa_atr fa_rep fa_sol fa_intra_rep fa_intra_sol_xover4 lk_ball_wtd fa_elec pro_close ... gdtmm1_1 gdtmm2_2 gdtmm3_3 gdtmm4_3 gdtmm7_4 irms maxsub maxsub2.0 rms description
0 SCORE: 48.568 -548.005 83.803 366.540 1.447 21.352 -14.902 -121.662 4.589 ... 1.0 1.0 1.0 1.0 1.0 0.0 106.0 106.0 0.0 0_0001
1 SCORE: 105.742 -499.091 73.443 354.679 1.441 21.727 -12.307 -122.473 17.150 ... 1.0 1.0 1.0 1.0 1.0 0.0 106.0 106.0 0.0 1_0001
2 SCORE: 128.475 -514.485 73.683 345.232 1.356 23.129 -14.297 -115.764 15.797 ... 1.0 1.0 1.0 1.0 1.0 0.0 106.0 106.0 0.0 2_0001
3 SCORE: 153.287 -520.665 66.100 352.879 1.368 23.227 -13.489 -116.879 9.704 ... 1.0 1.0 1.0 1.0 1.0 0.0 106.0 106.0 0.0 3_0001
4 SCORE: 130.432 -518.515 62.837 360.242 1.414 22.045 -16.393 -115.407 3.600 ... 1.0 1.0 1.0 1.0 1.0 0.0 106.0 106.0 0.0 4_0001
5 SCORE: 162.817 -501.489 44.380 341.114 1.443 23.839 -16.331 -115.550 16.551 ... 1.0 1.0 1.0 1.0 1.0 0.0 106.0 106.0 0.0 5_0001
6 SCORE: 178.254 -503.541 52.174 348.912 1.441 23.652 -13.313 -112.845 63.561 ... 1.0 1.0 1.0 1.0 1.0 0.0 106.0 106.0 0.0 6_0001
7 SCORE: 152.325 -518.002 59.843 346.831 1.425 23.136 -12.448 -115.313 13.194 ... 1.0 1.0 1.0 1.0 1.0 0.0 106.0 106.0 0.0 7_0001
8 SCORE: 242.533 -511.413 157.009 354.750 1.487 23.550 -15.617 -111.879 3.815 ... 1.0 1.0 1.0 1.0 1.0 0.0 106.0 106.0 0.0 8_0001
9 SCORE: 256.471 -488.543 191.608 332.774 1.443 23.170 -15.242 -102.333 12.342 ... 1.0 1.0 1.0 1.0 1.0 0.0 106.0 106.0 0.0 9_0001
10 SCORE: 635.922 -493.408 542.206 343.996 1.448 23.957 -14.935 -117.774 32.872 ... 1.0 1.0 1.0 1.0 1.0 0.0 106.0 106.0 0.0 10_0001
11 SCORE: 118.698 -524.962 67.647 365.206 1.492 24.338 -12.665 -123.983 19.740 ... 1.0 1.0 1.0 1.0 1.0 0.0 106.0 106.0 0.0 11_0001
12 SCORE: 123.510 -509.417 51.885 362.702 1.430 24.748 -14.024 -122.836 15.591 ... 1.0 1.0 1.0 1.0 1.0 0.0 106.0 106.0 0.0 12_0001
13 SCORE: 111.508 -517.019 54.871 364.282 1.409 23.946 -13.460 -120.072 21.853 ... 1.0 1.0 1.0 1.0 1.0 0.0 106.0 106.0 0.0 13_0001
14 SCORE: 103.111 -504.414 59.238 368.084 1.444 23.384 -11.535 -129.807 4.864 ... 1.0 1.0 1.0 1.0 1.0 0.0 106.0 106.0 0.0 14_0001
15 SCORE: 485.514 -531.906 453.579 363.971 1.470 24.053 -15.419 -128.919 21.991 ... 1.0 1.0 1.0 1.0 1.0 0.0 106.0 106.0 0.0 15_0001
16 SCORE: 126.624 -525.340 57.554 374.892 1.456 24.229 -12.806 -122.209 11.320 ... 1.0 1.0 1.0 1.0 1.0 0.0 106.0 106.0 0.0 16_0001
17 SCORE: 125.148 -519.086 61.159 374.772 1.378 23.997 -11.730 -127.305 21.136 ... 1.0 1.0 1.0 1.0 1.0 0.0 106.0 106.0 0.0 17_0001
18 SCORE: 353.702 -520.555 268.122 374.854 1.450 25.812 -13.194 -129.027 14.087 ... 1.0 1.0 1.0 1.0 1.0 0.0 106.0 106.0 0.0 18_0001
19 SCORE: 146.548 -518.801 62.060 363.510 1.444 24.585 -14.534 -131.286 29.164 ... 1.0 1.0 1.0 1.0 1.0 0.0 106.0 106.0 0.0 19_0001
20 SCORE: 141.805 -507.389 49.841 369.784 1.359 25.043 -10.061 -128.693 12.521 ... 1.0 1.0 1.0 1.0 1.0 0.0 106.0 106.0 0.0 20_0001
21 SCORE: 167.089 -504.258 47.720 355.108 1.424 25.986 -14.133 -116.082 79.885 ... 1.0 1.0 1.0 1.0 1.0 0.0 106.0 106.0 0.0 21_0001
22 SCORE: 192.973 -502.515 81.159 359.069 1.524 25.603 -12.032 -129.537 7.171 ... 1.0 1.0 1.0 1.0 1.0 0.0 106.0 106.0 0.0 22_0001
23 SCORE: 161.676 -516.132 66.329 368.636 1.461 25.834 -13.355 -130.843 2.868 ... 1.0 1.0 1.0 1.0 1.0 0.0 106.0 106.0 0.0 23_0001
24 SCORE: 91.025 -509.672 47.408 355.504 1.395 25.056 -11.709 -120.434 1.751 ... 1.0 1.0 1.0 1.0 1.0 0.0 106.0 106.0 0.0 24_0001
25 SCORE: 166.387 -502.012 54.486 357.130 1.420 25.039 -12.424 -121.578 39.372 ... 1.0 1.0 1.0 1.0 1.0 0.0 106.0 106.0 0.0 25_0001
26 SCORE: 157.005 -503.901 61.490 357.611 1.365 24.427 -14.566 -118.520 54.332 ... 1.0 1.0 1.0 1.0 1.0 0.0 106.0 106.0 0.0 26_0001
27 SCORE: 159.158 -504.448 58.381 358.706 1.466 23.570 -15.369 -123.460 51.229 ... 1.0 1.0 1.0 1.0 1.0 0.0 106.0 106.0 0.0 27_0001
28 SCORE: 133.615 -512.803 67.335 357.211 1.482 25.271 -11.800 -127.116 45.132 ... 1.0 1.0 1.0 1.0 1.0 0.0 106.0 106.0 0.0 28_0001
29 SCORE: 127.569 -514.512 47.931 367.828 1.461 24.053 -12.028 -126.540 43.516 ... 1.0 1.0 1.0 1.0 1.0 0.0 106.0 106.0 0.0 29_0001
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
2475 SCORE: 85.804 -551.550 72.603 359.781 1.409 25.397 -11.418 -136.106 12.527 ... 1.0 1.0 1.0 1.0 1.0 0.0 106.0 106.0 0.0 2475_0001
2476 SCORE: 142.213 -538.429 115.192 352.818 1.382 25.932 -14.704 -123.871 12.836 ... 1.0 1.0 1.0 1.0 1.0 0.0 106.0 106.0 0.0 2476_0001
2477 SCORE: 101.940 -538.877 72.235 360.509 1.482 23.152 -11.743 -139.460 9.592 ... 1.0 1.0 1.0 1.0 1.0 0.0 106.0 106.0 0.0 2477_0001
2478 SCORE: 312.942 -537.315 261.867 361.187 1.407 25.067 -10.605 -136.468 22.073 ... 1.0 1.0 1.0 1.0 1.0 0.0 106.0 106.0 0.0 2478_0001
2479 SCORE: 147.508 -540.631 82.168 360.548 1.470 24.733 -11.341 -134.282 68.954 ... 1.0 1.0 1.0 1.0 1.0 0.0 106.0 106.0 0.0 2479_0001
2480 SCORE: 559.718 -555.299 532.788 360.213 1.404 24.896 -13.264 -135.365 4.017 ... 1.0 1.0 1.0 1.0 1.0 0.0 106.0 106.0 0.0 2480_0001
2481 SCORE: 175.366 -547.895 139.958 361.154 1.461 25.118 -8.969 -136.934 10.389 ... 1.0 1.0 1.0 1.0 1.0 0.0 106.0 106.0 0.0 2481_0001
2482 SCORE: 141.341 -539.494 92.662 357.826 1.406 24.298 -13.873 -128.856 27.064 ... 1.0 1.0 1.0 1.0 1.0 0.0 106.0 106.0 0.0 2482_0001
2483 SCORE: 119.671 -541.627 70.104 359.500 1.449 23.136 -13.798 -127.914 41.008 ... 1.0 1.0 1.0 1.0 1.0 0.0 106.0 106.0 0.0 2483_0001
2484 SCORE: 314.686 -543.960 287.066 355.701 1.353 22.343 -11.662 -129.248 5.378 ... 1.0 1.0 1.0 1.0 1.0 0.0 106.0 106.0 0.0 2484_0001
2485 SCORE: 163.215 -534.009 153.664 356.171 1.428 24.741 -11.607 -132.387 3.010 ... 1.0 1.0 1.0 1.0 1.0 0.0 106.0 106.0 0.0 2485_0001
2486 SCORE: 148.744 -531.905 93.157 353.798 1.483 24.290 -13.622 -126.148 21.323 ... 1.0 1.0 1.0 1.0 1.0 0.0 106.0 106.0 0.0 2486_0001
2487 SCORE: 185.999 -534.669 91.416 351.733 1.420 22.618 -15.900 -127.739 15.070 ... 1.0 1.0 1.0 1.0 1.0 0.0 106.0 106.0 0.0 2487_0001
2488 SCORE: 267.173 -553.870 208.331 359.484 1.454 23.259 -14.744 -132.425 22.821 ... 1.0 1.0 1.0 1.0 1.0 0.0 106.0 106.0 0.0 2488_0001
2489 SCORE: 380.631 -536.439 341.713 355.433 1.432 23.625 -14.028 -125.152 8.292 ... 1.0 1.0 1.0 1.0 1.0 0.0 106.0 106.0 0.0 2489_0001
2490 SCORE: 173.626 -549.390 142.400 353.335 1.422 23.399 -12.059 -133.799 26.884 ... 1.0 1.0 1.0 1.0 1.0 0.0 106.0 106.0 0.0 2490_0001
2491 SCORE: 550.866 -553.855 521.560 359.070 1.442 23.253 -13.752 -124.691 17.765 ... 1.0 1.0 1.0 1.0 1.0 0.0 106.0 106.0 0.0 2491_0001
2492 SCORE: 228.330 -543.184 213.468 349.507 1.440 24.085 -11.584 -126.901 16.739 ... 1.0 1.0 1.0 1.0 1.0 0.0 106.0 106.0 0.0 2492_0001
2493 SCORE: 119.278 -541.553 107.349 357.680 1.387 23.405 -11.971 -132.879 5.973 ... 1.0 1.0 1.0 1.0 1.0 0.0 106.0 106.0 0.0 2493_0001
2494 SCORE: 124.947 -541.117 74.686 350.348 1.473 23.937 -14.763 -133.767 3.150 ... 1.0 1.0 1.0 1.0 1.0 0.0 106.0 106.0 0.0 2494_0001
2495 SCORE: 129.370 -540.940 139.054 349.906 1.471 23.007 -13.260 -134.282 6.568 ... 1.0 1.0 1.0 1.0 1.0 0.0 106.0 106.0 0.0 2495_0001
2496 SCORE: 191.824 -557.339 160.014 355.717 1.438 22.532 -12.825 -133.703 26.614 ... 1.0 1.0 1.0 1.0 1.0 0.0 106.0 106.0 0.0 2496_0001
2497 SCORE: 459.669 -531.770 391.147 343.837 1.384 22.096 -13.611 -118.787 31.368 ... 1.0 1.0 1.0 1.0 1.0 0.0 106.0 106.0 0.0 2497_0001
2498 SCORE: 161.002 -538.668 89.790 348.134 1.408 21.511 -11.069 -128.865 31.531 ... 1.0 1.0 1.0 1.0 1.0 0.0 106.0 106.0 0.0 2498_0001
2499 SCORE: 250.513 -543.590 255.802 344.227 1.411 23.230 -12.054 -118.598 3.513 ... 1.0 1.0 1.0 1.0 1.0 0.0 106.0 106.0 0.0 2499_0001
2500 SCORE: 70.852 -546.891 91.731 353.905 1.431 22.650 -14.120 -123.704 0.990 ... 1.0 1.0 1.0 1.0 1.0 0.0 106.0 106.0 0.0 2500_0001
2501 SCORE: 119.503 -525.514 71.511 347.320 1.418 22.472 -13.945 -126.748 20.205 ... 1.0 1.0 1.0 1.0 1.0 0.0 106.0 106.0 0.0 2501_0001
2502 SCORE: 143.897 -523.222 78.136 336.240 1.504 22.708 -17.266 -110.957 13.825 ... 1.0 1.0 1.0 1.0 1.0 0.0 106.0 106.0 0.0 2502_0001
2503 SCORE: 169.693 -532.175 95.949 352.750 1.501 22.716 -9.544 -123.742 2.407 ... 1.0 1.0 1.0 1.0 1.0 0.0 106.0 106.0 0.0 2503_0001
2504 SCORE: 266.378 -518.177 195.865 346.735 1.380 24.417 -12.360 -118.735 17.453 ... 1.0 1.0 1.0 1.0 1.0 0.0 106.0 106.0 0.0 2504_0001

2505 rows × 33 columns


In [72]:
awsem


Out[72]:
Step Chain Shake Chi Rama Excluded DSSP P_AP Water Burial Helix AMH-Go Frag_Mem Vec_FM Membrane SSB Electro QGO VTotal
0 0 120.305046 0.0 25.030902 -505.107887 0.0 -0.000000 -0.107303 -34.178516 -92.446636 -25.172317 0.0 0.0 0.0 0.0 0.0 -2.622472 0.0 -514.299186
1 0 147.155070 0.0 22.444486 -491.517963 0.0 -0.000000 -0.000295 -26.463080 -92.809915 -17.591866 0.0 0.0 0.0 0.0 0.0 -2.362889 0.0 -461.146453
2 0 140.378840 0.0 25.081482 -493.970385 0.0 -0.000000 -0.417929 -26.765382 -92.506750 -21.973372 0.0 0.0 0.0 0.0 0.0 -2.641619 0.0 -472.815114
3 0 134.625889 0.0 30.279010 -501.437458 0.0 -0.000000 -0.520035 -26.874046 -93.514969 -20.440403 0.0 0.0 0.0 0.0 0.0 -2.859405 0.0 -480.741417
4 0 151.053641 0.0 36.293730 -497.221005 0.0 -0.000000 -0.682022 -20.044147 -90.688331 -17.551906 0.0 0.0 0.0 0.0 0.0 -2.778603 0.0 -441.618644
5 0 140.414193 0.0 23.658541 -484.762758 0.0 -0.000000 -0.626386 -24.541766 -92.988631 -21.578898 0.0 0.0 0.0 0.0 0.0 -2.244195 0.0 -462.669900
6 0 159.340721 0.0 28.608793 -483.378835 0.0 -0.000000 -0.441569 -20.504922 -92.248760 -20.652650 0.0 0.0 0.0 0.0 0.0 -2.619187 0.0 -431.896409
7 0 147.872491 0.0 32.334709 -489.868776 0.0 -0.000000 -0.909066 -26.898794 -92.782469 -27.623052 0.0 0.0 0.0 0.0 0.0 -2.464644 0.0 -460.339601
8 0 146.236786 0.0 27.978264 -477.439784 0.0 -0.000000 -0.870771 -22.715833 -91.274724 -21.917459 0.0 0.0 0.0 0.0 0.0 -2.583176 0.0 -442.586697
9 0 143.034875 0.0 41.921821 -478.790702 0.0 -0.000000 -0.893802 -23.115023 -91.586434 -23.372143 0.0 0.0 0.0 0.0 0.0 -2.408531 0.0 -435.209938
10 0 135.213040 0.0 27.732520 -480.987290 0.0 -0.000000 -1.028209 -23.427637 -92.011519 -24.904458 0.0 0.0 0.0 0.0 0.0 -2.529449 0.0 -461.943003
11 0 153.232782 0.0 35.476262 -493.140806 0.0 -0.000000 -1.182479 -21.911948 -90.307486 -20.972645 0.0 0.0 0.0 0.0 0.0 -2.752883 0.0 -441.559203
12 0 150.276648 0.0 32.635979 -483.119053 0.0 -0.000000 -1.393953 -20.819231 -92.631930 -19.487376 0.0 0.0 0.0 0.0 0.0 -2.210180 0.0 -436.749096
13 0 143.384450 0.0 28.317570 -501.934539 0.0 -0.000000 -0.791625 -27.083677 -93.122703 -21.525666 0.0 0.0 0.0 0.0 0.0 -2.283150 0.0 -475.039340
14 0 135.644470 0.0 27.406200 -504.506680 0.0 -0.000000 -0.528258 -23.223932 -92.482201 -25.775818 0.0 0.0 0.0 0.0 0.0 -2.416920 0.0 -485.883138
15 0 143.989256 0.0 26.933794 -500.357215 0.0 -0.000000 -0.496883 -24.541615 -93.506283 -16.559409 0.0 0.0 0.0 0.0 0.0 -2.671673 0.0 -467.210028
16 0 164.525371 0.0 30.930790 -496.284421 0.0 -0.000000 -0.679354 -22.719010 -92.583999 -24.173413 0.0 0.0 0.0 0.0 0.0 -2.312121 0.0 -443.296157
17 0 137.528899 0.0 38.185541 -488.721707 0.0 -0.000000 -0.508331 -19.814349 -93.350337 -24.592710 0.0 0.0 0.0 0.0 0.0 -2.348784 0.0 -453.621778
18 0 143.958009 0.0 28.877087 -488.371979 0.0 -0.000000 -0.676125 -23.206006 -92.723647 -25.412838 0.0 0.0 0.0 0.0 0.0 -2.373214 0.0 -459.928712
19 0 141.386863 0.0 26.174881 -493.309733 0.0 -0.000000 -0.712303 -24.678066 -93.300807 -23.259784 0.0 0.0 0.0 0.0 0.0 -2.225128 0.0 -469.924076
20 0 133.635051 0.0 33.961600 -504.836761 0.0 -0.000000 -0.551922 -24.156508 -91.949129 -20.199186 0.0 0.0 0.0 0.0 0.0 -2.307536 0.0 -476.404390
21 0 143.694197 0.0 34.527464 -485.833398 0.0 -0.000000 -0.870527 -23.981202 -91.800708 -20.188484 0.0 0.0 0.0 0.0 0.0 -2.318028 0.0 -446.770685
22 0 140.442942 0.0 26.590152 -498.494194 0.0 -0.000000 -0.503264 -23.382847 -92.830819 -24.254626 0.0 0.0 0.0 0.0 0.0 -2.329103 0.0 -474.761760
23 0 144.453285 0.0 33.293267 -497.078075 0.0 -0.000000 -0.522480 -23.797127 -91.605937 -20.968929 0.0 0.0 0.0 0.0 0.0 -2.302041 0.0 -458.528037
24 0 140.248439 0.0 31.324129 -498.777073 0.0 -0.000000 -0.700141 -19.824562 -91.957134 -24.763273 0.0 0.0 0.0 0.0 0.0 -2.497942 0.0 -466.947556
25 0 160.716713 0.0 38.806819 -496.444635 0.0 -0.000000 -0.526379 -24.207083 -92.012533 -18.374025 0.0 0.0 0.0 0.0 0.0 -2.487256 0.0 -434.528379
26 0 123.610173 0.0 34.483728 -488.702662 0.0 -0.000000 -0.630171 -21.838544 -92.192668 -19.826124 0.0 0.0 0.0 0.0 0.0 -2.716325 0.0 -467.812593
27 0 139.025104 0.0 31.753790 -477.896577 0.0 -0.000000 -0.507182 -24.233222 -91.429011 -21.441210 0.0 0.0 0.0 0.0 0.0 -2.634183 0.0 -447.362493
28 0 146.414796 0.0 38.993244 -485.896979 0.0 -0.000000 -0.232584 -25.412671 -92.016919 -22.743471 0.0 0.0 0.0 0.0 0.0 -2.851942 0.0 -443.746526
29 0 134.597668 0.0 23.498079 -481.905364 0.0 -0.000000 -0.513820 -23.836249 -91.356219 -17.669486 0.0 0.0 0.0 0.0 0.0 -2.602972 0.0 -459.788364
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
2475 0 143.500410 0.0 27.527275 -461.232850 0.0 -0.000000 -0.496328 -31.392281 -91.969864 -19.678380 0.0 0.0 0.0 0.0 0.0 -3.328134 0.0 -437.070151
2476 0 141.044413 0.0 25.767333 -495.185025 0.0 -0.000000 -0.572001 -31.699846 -90.647001 -21.184993 0.0 0.0 0.0 0.0 0.0 -3.556516 0.0 -476.033636
2477 0 151.574700 0.0 38.862934 -473.380933 0.0 -0.000000 -0.595049 -29.363491 -92.076271 -23.862991 0.0 0.0 0.0 0.0 0.0 -3.561913 0.0 -432.403014
2478 0 156.121112 0.0 34.514658 -473.462135 0.0 -0.000000 -0.756229 -27.299196 -90.498805 -21.250488 0.0 0.0 0.0 0.0 0.0 -3.134541 0.0 -425.765624
2479 0 138.725212 0.0 33.802057 -494.231397 0.0 -0.000000 -0.651890 -31.802798 -91.415721 -23.527033 0.0 0.0 0.0 0.0 0.0 -3.600324 0.0 -472.701894
2480 0 141.812658 0.0 37.573935 -498.823848 0.0 -0.000000 -0.766810 -35.878406 -92.404477 -21.610192 0.0 0.0 0.0 0.0 0.0 -3.457086 0.0 -473.554227
2481 0 153.071895 0.0 43.257861 -504.921250 0.0 -0.000000 -1.004538 -31.014675 -91.588969 -19.885654 0.0 0.0 0.0 0.0 0.0 -3.282370 0.0 -455.367700
2482 0 144.902039 0.0 37.378434 -490.106863 0.0 -0.000000 -0.569492 -30.155383 -91.915139 -22.490429 0.0 0.0 0.0 0.0 0.0 -3.323838 0.0 -456.280669
2483 0 121.821980 0.0 30.566567 -496.109948 0.0 -0.000000 -0.562667 -33.384204 -91.474754 -23.012043 0.0 0.0 0.0 0.0 0.0 -3.390725 0.0 -495.545794
2484 0 163.801342 0.0 32.693539 -450.358949 0.0 -0.000000 -0.681815 -32.612401 -91.524081 -22.763531 0.0 0.0 0.0 0.0 0.0 -3.445103 0.0 -404.891000
2485 0 142.069217 0.0 41.533407 -477.193173 0.0 -0.000000 -0.401418 -31.187313 -93.230361 -24.097599 0.0 0.0 0.0 0.0 0.0 -3.271720 0.0 -445.778961
2486 0 153.181285 0.0 28.735831 -470.896436 0.0 -0.000000 -0.558581 -30.446182 -91.919048 -17.409367 0.0 0.0 0.0 0.0 0.0 -3.038001 0.0 -432.350499
2487 0 145.294538 0.0 32.248756 -474.456634 0.0 -0.000000 -0.483374 -34.891455 -91.478248 -23.461019 0.0 0.0 0.0 0.0 0.0 -3.054188 0.0 -450.281624
2488 0 163.341806 0.0 38.777808 -481.621529 0.0 -0.000005 -0.614666 -32.676692 -91.379758 -23.874395 0.0 0.0 0.0 0.0 0.0 -3.255653 0.0 -431.303084
2489 0 147.871144 0.0 29.919083 -461.272216 0.0 -0.000000 -0.479011 -29.467632 -92.693253 -22.158777 0.0 0.0 0.0 0.0 0.0 -2.865368 0.0 -431.146031
2490 0 147.171435 0.0 30.338722 -470.210311 0.0 -0.000000 -0.624036 -28.305416 -92.226263 -26.635828 0.0 0.0 0.0 0.0 0.0 -3.399094 0.0 -443.890792
2491 0 124.834730 0.0 29.167205 -469.144052 0.0 -0.000000 -0.407078 -29.854115 -91.960722 -21.841655 0.0 0.0 0.0 0.0 0.0 -3.281599 0.0 -462.487286
2492 0 158.892175 0.0 32.055676 -484.835729 0.0 -0.000000 -1.177468 -31.819434 -92.945373 -26.423857 0.0 0.0 0.0 0.0 0.0 -3.415155 0.0 -449.669164
2493 0 161.183421 0.0 39.576143 -473.574855 0.0 -0.000000 -0.838813 -32.279055 -91.726346 -25.072034 0.0 0.0 0.0 0.0 0.0 -3.220874 0.0 -425.952413
2494 0 156.579598 0.0 34.052063 -483.193473 0.0 -0.000000 -0.611950 -30.753871 -90.980360 -24.945622 0.0 0.0 0.0 0.0 0.0 -3.182015 0.0 -443.035630
2495 0 154.616061 0.0 36.222479 -481.989325 0.0 -0.000000 -0.388308 -28.731607 -93.913854 -23.666317 0.0 0.0 0.0 0.0 0.0 -3.230032 0.0 -441.080904
2496 0 157.757965 0.0 36.211323 -496.086541 0.0 -0.000000 -0.888573 -33.491814 -92.682359 -22.236258 0.0 0.0 0.0 0.0 0.0 -3.362179 0.0 -454.778436
2497 0 152.590300 0.0 38.253588 -492.572440 0.0 -0.000000 -0.464048 -34.674987 -92.580828 -24.519963 0.0 0.0 0.0 0.0 0.0 -3.268068 0.0 -457.236447
2498 0 160.450183 0.0 47.678975 -495.773880 0.0 -0.000000 -0.561333 -32.541248 -91.723372 -20.484714 0.0 0.0 0.0 0.0 0.0 -3.270762 0.0 -436.226152
2499 0 135.133600 0.0 32.539568 -475.476939 0.0 -0.000000 -0.641543 -29.339899 -92.026443 -22.255159 0.0 0.0 0.0 0.0 0.0 -3.104812 0.0 -455.171625
2500 0 131.744406 0.0 31.655785 -498.526057 0.0 -0.000000 -0.532125 -32.711950 -92.320811 -23.846600 0.0 0.0 0.0 0.0 0.0 -3.300071 0.0 -487.837424
2501 0 154.433789 0.0 35.124787 -497.932267 0.0 -0.000000 -0.632271 -29.144837 -90.975083 -20.108285 0.0 0.0 0.0 0.0 0.0 -2.966804 0.0 -452.200970
2502 0 130.618464 0.0 27.893502 -469.603429 0.0 -0.000000 -0.448264 -33.330741 -92.033359 -18.188276 0.0 0.0 0.0 0.0 0.0 -3.305820 0.0 -458.397923
2503 0 163.449228 0.0 32.476593 -487.777045 0.0 -0.000000 -0.361438 -36.387904 -92.240249 -23.509497 0.0 0.0 0.0 0.0 0.0 -3.476950 0.0 -447.827262
2504 0 174.165986 0.0 34.131403 -482.564129 0.0 -0.000000 -0.362679 -30.477296 -91.326050 -21.818005 0.0 0.0 0.0 0.0 0.0 -3.435639 0.0 -421.686410

2505 rows × 19 columns


In [4]:
import glob
a = glob.glob("/Users/weilu/Research/frustration_selection/q_and_rmsd/tr*")

In [7]:


In [29]:
pdb = pdb_list[0]
fileLocation = f"/Users/weilu/Research/frustration_selection/{pdb}/q.txt"
q = np.loadtxt(fileLocation)

In [30]:
q


Out[30]:
array([0.28347839, 0.27594457, 0.30014453, ..., 0.27034451, 0.26799367,
       0.26620382])

In [31]:
len(q)


Out[31]:
2505

In [ ]:


In [32]:
fileLocation = f"/Users/weilu/Research/frustration_selection/{pdb}/rmsd.txt"
rmsd = np.loadtxt(fileLocation)

In [34]:
len(rmsd)


Out[34]:
2505

In [35]:
fileLocation = f"/Users/weilu/Research/frustration_selection/{pdb}/gdt.txt"
gdt = np.loadtxt(fileLocation)

In [36]:
len(gdt)


Out[36]:
2505

In [45]:
gdt


Out[45]:
array([36.5575, 34.905 , 36.0875, ..., 34.67  , 34.4325, 34.1975])

In [59]:
fileLocation = f"/Users/weilu/Research/frustration_selection/{pdb}/awsem_energy.txt"
awsem_column = ['Step', 'Chain', 'Shake', 'Chi', 'Rama', 'Excluded', 'DSSP', 'P_AP', 'Water', 'Burial', 'Helix', 'AMH-Go', 'Frag_Mem', 'Vec_FM', 'Membrane', 'SSB', 'Electro', 'QGO', 'VTotal']
awsem = pd.read_csv(fileLocation, sep="\s+", names=awsem_column)

In [60]:
len(awsem)


Out[60]:
2505

In [39]:
fileLocation = f"/Users/weilu/Research/frustration_selection/{pdb}/rosetta_energy.txt"
rosetta = pd.read_csv(fileLocation, sep="\s+")

In [62]:
len(rosetta)


Out[62]:
2505

In [63]:
awsem.columns


Out[63]:
Index(['Step', 'Chain', 'Shake', 'Chi', 'Rama', 'Excluded', 'DSSP', 'P_AP',
       'Water', 'Burial', 'Helix', 'AMH-Go', 'Frag_Mem', 'Vec_FM', 'Membrane',
       'SSB', 'Electro', 'QGO', 'VTotal'],
      dtype='object')

In [64]:
awsem.dtypes


Out[64]:
Step          int64
Chain       float64
Shake       float64
Chi         float64
Rama        float64
Excluded    float64
DSSP        float64
P_AP        float64
Water       float64
Burial      float64
Helix       float64
AMH-Go      float64
Frag_Mem    float64
Vec_FM      float64
Membrane    float64
SSB         float64
Electro     float64
QGO         float64
VTotal      float64
dtype: object

In [65]:
rosetta.columns


Out[65]:
Index(['SCORE:', 'score', 'fa_atr', 'fa_rep', 'fa_sol', 'fa_intra_rep',
       'fa_intra_sol_xover4', 'lk_ball_wtd', 'fa_elec', 'pro_close',
       'hbond_sr_bb', 'hbond_lr_bb', 'hbond_bb_sc', 'hbond_sc', 'dslf_fa13',
       'omega', 'fa_dun', 'p_aa_pp', 'yhh_planarity', 'ref', 'rama_prepro',
       'allatom_rms', 'gdtmm', 'gdtmm1_1', 'gdtmm2_2', 'gdtmm3_3', 'gdtmm4_3',
       'gdtmm7_4', 'irms', 'maxsub', 'maxsub2.0', 'rms', 'description'],
      dtype='object')

In [67]:
rosetta.dtypes


Out[67]:
SCORE:                  object
score                  float64
fa_atr                 float64
fa_rep                 float64
fa_sol                 float64
fa_intra_rep           float64
fa_intra_sol_xover4    float64
lk_ball_wtd            float64
fa_elec                float64
pro_close              float64
hbond_sr_bb            float64
hbond_lr_bb            float64
hbond_bb_sc            float64
hbond_sc               float64
dslf_fa13              float64
omega                  float64
fa_dun                 float64
p_aa_pp                float64
yhh_planarity          float64
ref                    float64
rama_prepro            float64
allatom_rms            float64
gdtmm                  float64
gdtmm1_1               float64
gdtmm2_2               float64
gdtmm3_3               float64
gdtmm4_3               float64
gdtmm7_4               float64
irms                   float64
maxsub                 float64
maxsub2.0              float64
rms                    float64
description             object
dtype: object

In [18]:
len(gdt)


Out[18]:
1319

In [15]:
len(rmsd)


Out[15]:
1319

In [12]:
len(q)


Out[12]:
1319

In [2]:
# read energy, rw, bias, rmsd data from location
def read_data(name):
#     name="tr872"
    name_list = ["Step" , "Chain" , "Shake" , "Chi" , "Rama", "Excluded", "DSSP", "P_AP", "Water" ,"Burial", "Helix", "AMH_Go", "Frag_Mem", "Vec_FM", "Membrane", "SSB","VTotal"]

    # you probably want to change the location below
#     location = f"/Users/weilu/Research/server/sep_2018/03_week/02_week/{name}/"
    location = f"/Users/weilu/Research/server/dec_2018/structure_selection_2/{name}/"
    RMSD = pd.read_table(location+"rmsd.xvg", names=["i", "RMSD"], sep="\s+")
    bias = pd.read_table(location+"bias.log", names=["i", "biasQ", "bias"], sep="\s+").drop("i", axis=1)
    awsem = pd.read_table(location+"awsem.log", names=name_list)
    rw = pd.read_table(location+"rwplusScore.txt", names=["i", "Rw"], sep="\s+").drop("i", axis=1)
    qw = pd.read_table(location+"Qw.out", names=["i", "Qw"], sep="\s+", comment="#").drop("i", axis=1)
    # pc location
#     location = f"/Users/weilu/Research/server/sep_2018/03_week/{name}/"
#     location = f"/Users/weilu/Research/server/oct_2018/01_week/{name}/"
    pc = pd.read_table(location+"pcarmsd_scaled.txt", names=["i", "pc", "pc2"], sep="\s+", comment="#").drop("i", axis=1)
    raw_data = pd.concat([RMSD, rw, bias, qw, awsem, pc], axis=1)
    return raw_data.assign(name=name).reset_index().rename(columns={"index":"folder"})

def choose_top(data,col="RMSD", n=5, ascending=True):
    return data.assign(chosen=pd.DataFrame.rank(data[col], ascending=ascending, method='dense')<=n)

# read the pmf, rc. 
# def read_data_2(name):
# #     name = "tr894"
# #     location = f"/Users/weilu/Research/server/sep_2018/03_week/{name}/"
# #     location = f"/Users/weilu/Research/server/oct_2018/01_week/{name}/"
#     location = f"/Users/weilu/Research/server/dec_2018/structure_selection/{name}/"
#     rw = pd.read_table(location+"rc_rwplus", names=["pc","rw"], sep="\s+")
#     rmsd = pd.read_table(location+"rc_rmsdlowerBound", names=["pc", "rmsd"], sep="\s+")
#     awsem = pd.read_table(location+"rc_awsemEne", names=["pc", "awsem"], sep="\s+")
#     qw = pd.read_table(location+"rc_QwhigherBound", names=["pc", "qw"], sep="\s+")
#     freeE = pd.read_table(location+"pmf3000"
#                           , names=["pc", "f", "remove1", "remove2"], sep="\s+").drop(["remove1", "remove2"], axis=1)
#     raw_data = freeE.merge(rw, on="pc").merge(awsem, on="pc").merge(qw, on="pc").merge(rmsd, on="pc").assign(name=name)
#     return raw_data

def read_data_2(name):
#     name = "tr894"
#     location = f"/Users/weilu/Research/server/sep_2018/03_week/{name}/"
#     location = f"/Users/weilu/Research/server/oct_2018/01_week/{name}/"
    location = f"/Users/weilu/Research/server/dec_2018/structure_selection_2/{name}/"
    rw = pd.read_table(location+"rc_rwplus", names=["pc","rw"], sep="\s+")
    rmsd = pd.read_table(location+"rc_rmsdlowerBound", names=["pc", "rmsd"], sep="\s+")
#     awsem = pd.read_table(location+"rc_awsemEne", names=["pc", "awsem"], sep="\s+")
#     qw = pd.read_table(location+"rc_QwhigherBound", names=["pc", "qw"], sep="\s+")
    freeE = pd.read_table(location+"pmf3000"
                          , names=["pc", "f", "remove1", "remove2"], sep="\s+").drop(["remove1", "remove2"], axis=1)
    raw_data = freeE.merge(rw, on="pc").merge(rmsd, on="pc").assign(name=name)
    return raw_data

train based on free energy, rw and awsem.


In [3]:
# folder_list = ["tr894", "tr882", "tr594", "tr898", "tr862", "tr877", "tr872", "tr885", "tr866", "tr868", "tr884", "tr895", "tr896", "tr870", "tr921", "tr922", "tr891", "tr948"]
folder_list = ["tr884-halfDIHE", "tr872-halfDIHE", "tr948-halfDIHE", "tr898", "tr947", "tr894", "tr882", "tr594", "tr869", "tr862", "tr877", "tr872", "tr885", "tr866", "tr868", "tr884", "tr895", "tr896", "tr870", "tr921", "tr922", "tr891", "tr948"]
# folder_list = ["tr884-halfDIHE", "tr872-halfDIHE", "tr898", "tr947", "tr894", "tr882", "tr594", "tr869", "tr862", "tr877", "tr872", "tr885", "tr866", "tr868", "tr884", "tr895", "tr896", "tr870", "tr921", "tr922", "tr891", "tr948"]


# folder_list = [ "tr862", "tr877", "tr872", "tr885", "tr866", "tr868", "tr884", "tr895", "tr896", "tr870", "tr921", "tr922", "tr891", "tr948"]
# folder_list = ["tr862", "tr872", "tr885", "tr866", "tr868" , "tr895", "tr896", "tr870", "tr921", "tr891", "tr948"]
# "tr877","tr884", "tr922"
# "tr869"
# folder_list = ["tr894"]
# read all data

# tr884-halfDIHE
# tr872-halfDIHE
# tr948-halfDIHE
data_list = []
for name in folder_list:
    tmp = read_data_2(name)
    data_list.append(tmp)
raw_data_all = pd.concat(data_list)
n = 1
raw_data_all = raw_data_all.reset_index(drop=True).groupby("name").apply(choose_top, n=n, col="rmsd").reset_index(drop=True)


# train_name_list = ["tr872", "tr885", "tr948"]
# train_name_list = ["tr862", "tr872", "tr885", "tr866", "tr868" , "tr895", "tr896", "tr870", "tr921", "tr891", "tr948"]

# train_name_list = ["tr870"]
# train_name_list = ["tr891"]
# train_name_list = ["tr882"]
# train_name_list = ["tr894"]
# train_name_list = ["tr872"]
# train_name_list = ["tr869"]
# train_name_list = ["tr884"]
# train_name_list = ["tr866", "tr884"]
# train_name_list = ["tr870", "tr872"]
# train_name_list = ["tr866", "tr947"]
# train_name_list = ["tr872"]
# train_name_list = ["tr884", "tr872"]
train_name_list = ["tr866"]
# train_name_list = ["tr947"]
# select for training.
raw_data = raw_data_all.reset_index(drop=True).query(f'name in {train_name_list}')

In [4]:
# FEATURES = ["eigenvalues", "entropy", "pca"]
# FEATURES = ["eigenvalues", "entropy", "diffRMSD"]
# FEATURES = ["eigenvalues", "entropy"]
FEATURES = ["f",
    'rw',
     'awsem',
#     'RMSD', # test
#      'Burial',
#      'Water',
#      'Rama',
#      'DSSP',
#      'P_AP',
#      'Helix',
#      'Frag_Mem'
               ]
# FEATURES = ["eigenvalues"]
# LABEL = "diffRMSD"
# LABEL = "RMSD"
LABEL = "rmsd"
# LABEL = "qw"
DEGREE = 1

def pred_from_raw(a, clf):
    data = my_transform(a, label=LABEL, degree=DEGREE, FEATURES=FEATURES)
    test_y = data[:,-1]
    test_set = data[:,:-1]
    prediceted_rmsd= clf.predict(test_set)
    return a.assign(prediceted_rmsd=prediceted_rmsd)

def assign_lowest_f(a):
    return a.assign(lowest_f=a["f"].sort_values().iloc[0])

In [5]:
raw_data_all = raw_data_all.reset_index(drop=True).groupby("name").apply(assign_lowest_f).reset_index(drop=True)

In [6]:
# # data = my_transform(raw_data, label=LABEL, degree=DEGREE, FEATURES=FEATURES)
# # data = raw_data.groupby('name').apply(my_transform, label=LABEL, degree=DEGREE, FEATURES=FEATURES)[0]
# data = np.concatenate(raw_data.groupby('name').apply(my_transform, 
#                                                      label=LABEL, degree=DEGREE, FEATURES=FEATURES).values)
# train_y = data[:,-1]
# train_set = data[:,:-1]
# from sklearn import svm
# # clf = svm.SVC(probability=True)
# clf = LinearRegression()
# clf.fit(train_set, train_y)
# y_pred_svm = clf.predict(train_set)

# raw_data_all = raw_data_all.reset_index(drop=True).groupby("name").apply(pred_from_raw, clf).reset_index(drop=True)
# # raw_data_all = raw_data_all.reset_index(drop=True).groupby("name").apply(assign_lowest_f).reset_index(drop=True)



# picked_n = 1
# best = raw_data_all.groupby("name").apply(choose_top, col="rmsd"
#                                             , n=picked_n, ascending=True).reset_index(drop=True).query("chosen==True")
# picked = raw_data_all.groupby("name").apply(choose_top, col="prediceted_rmsd"
#                                             , n=picked_n, ascending=True).reset_index(drop=True).query("chosen==True")
# # init = raw_data_all.query("i == 0.0")
# all_results = pd.concat([best.assign(result='best'), 
#                          picked.assign(result='picked')])

# picked_keep = picked.copy()

In [7]:
from scipy.interpolate import interp1d
f_dic = {}
for name in folder_list:
    a = raw_data_all.query(f"name == '{name}'")
#     print(name ,a.shape)
    x = a["pc"].values
    y = a["f"].values
    f_dic[name] = interp1d(x, y, fill_value="extrapolate")

In [8]:
# g = sns.FacetGrid(raw_data_all, col="name", col_wrap=4)
# g = g.map(plt.plot, "pc", "f")
# plt.ylim([0,1])

In [9]:
# g = sns.FacetGrid(raw_data_all, col="name", col_wrap=4)
# g = g.map(plt.plot, "pc", "f")
# g = g.map(plt.plot, "pc", "prediceted_rmsd")

In [10]:
# raw_data_all.query("name == 'tr594'").plot("pc", "f")

In [11]:
# g = sns.FacetGrid(raw_data_all, col="name", col_wrap=4)
# g = g.map(plt.plot, "pc", "rmsd")
# plt.ylim([0,1])

used picked_keep as to filter the compelete data and select again


In [12]:
f_dic["tr594"](raw_data_all["pc"]).shape


Out[12]:
(608,)

In [13]:
def choose_top(data,col="RMSD", n=5, ascending=True):
    return data.assign(chosen=pd.DataFrame.rank(data[col], ascending=ascending, method='first')<=n)


# WIDTH = 100
# WIDTH = 0.1
# WIDTH = 1
# WIDTH = 0.2
# def with_in_range(data, width=WIDTH):
#     return data.assign(inrange= (data["pc"] < (data["pc_center"]+width)) & (data["pc"] > (data["pc_center"]-width)))

def with_in_range(data, width=5):
    name = data["name"].iloc[0]
    return data.assign(inrange= (0 < (f_dic[name](data["pc"]))) & ((f_dic[name](data["pc"])) < width))

In [14]:
# folder_list = ["tr898", "tr869", "tr947", "tr894", "tr882", "tr594", "tr862", "tr877", "tr872", "tr885", "tr866", "tr868", "tr884", "tr895", "tr896", "tr870", "tr921", "tr922", "tr891", "tr948"]
# folder_list = ["tr884-halfDIHE", "tr872-halfDIHE", "tr898", "tr947", "tr894", "tr882", "tr594", "tr869", "tr862", "tr877", "tr872", "tr885", "tr866", "tr868", "tr884", "tr895", "tr896", "tr870", "tr921", "tr922", "tr891", "tr948"]
# folder_list = ["tr872-halfDIHE", "tr898", "tr947", "tr894", "tr882", "tr594", "tr869", "tr862", "tr877", "tr872", "tr885", "tr866", "tr868", "tr884", "tr895", "tr896", "tr870", "tr921", "tr922", "tr891", "tr948"]



# "tr898"
# folder_list = ["tr894", "tr882", "tr594", "tr898", "tr862", "tr877", "tr872", "tr885", "tr866", "tr868", "tr884", "tr895", "tr896", "tr870", "tr921", "tr922", "tr891", "tr948"]
# folder_list = ["tr894", "tr882", "tr594", "tr869", "tr862", "tr877", "tr872", "tr885", "tr866", "tr868", "tr884", "tr895", "tr896", "tr870", "tr921", "tr922", "tr891", "tr948"]
# folder_list = [ "tr862", "tr877", "tr872", "tr885", "tr866", "tr868", "tr884", "tr895", "tr896", "tr870", "tr921", "tr922", "tr891", "tr948"]
# folder_list = ["tr862", "tr872", "tr885", "tr866", "tr868" , "tr895", "tr896", "tr870", "tr921", "tr891", "tr948"]
# "tr877","tr884", "tr922"
# "tr869"
# folder_list = ["tr894"]
# folder_list = ["tr866"]


# define top based on RMSD or Qw
# best_metric = "RMSD"
best_metric = "Qw"
if best_metric == "Qw":
    isAscending = False
else:
    isAscending = True
data_list = []
for name in folder_list:
#     print(name)
    tmp = read_data(name)
    data_list.append(tmp)
raw_data_all_2 = pd.concat(data_list).dropna()
n = 25
raw_data_all_2 = raw_data_all_2.reset_index(drop=True).groupby("name").\
        apply(choose_top, n=n, col=best_metric, ascending=isAscending).reset_index(drop=True)


raw_data = raw_data_all_2.reset_index(drop=True).query(f'name in {train_name_list}').dropna()
# a = raw_data_all_2.dropna().merge(picked_keep[["pc", "name"]].rename(columns={"pc":"pc_center"}),on="name")
a = raw_data_all_2.dropna()
filtered = a.groupby("name").apply(with_in_range).query("inrange == True").reset_index(drop=True)

In [19]:
filtered.reset_index(drop=True).to_csv("/Users/weilu/Research/server/dec_2018/structure_selection_3/filtered.csv")

In [15]:
filtered


Out[15]:
folder i RMSD Rw biasQ bias Qw Step Chain Shake ... Frag_Mem Vec_FM Membrane SSB VTotal pc pc2 name chosen inrange
0 0 0.0 0.465665 -10838.987781 0.590522 33.534424 0.464193 0.0 14.571923 0.0 ... -536.518284 0.0 0.0 0.0 -940.887277 -0.086922 -0.059985 tr594 False True
1 1 100.0 0.509175 -9916.464942 0.557170 39.219723 0.453318 0.0 9.111620 0.0 ... -532.556036 0.0 0.0 0.0 -949.290485 -1.048765 -0.032790 tr594 False True
2 2 200.0 0.522662 -9809.291344 0.562481 38.284555 0.436252 0.0 8.593084 0.0 ... -534.652029 0.0 0.0 0.0 -947.008881 -1.355717 -0.489933 tr594 False True
3 3 300.0 0.529569 -10111.629198 0.523389 45.431684 0.449343 0.0 6.567146 0.0 ... -536.498803 0.0 0.0 0.0 -941.325829 -1.610276 -0.458905 tr594 False True
4 4 400.0 0.541991 -9918.154310 0.525121 45.102051 0.432320 0.0 6.441661 0.0 ... -537.680262 0.0 0.0 0.0 -938.781766 -1.602802 -0.934757 tr594 False True
5 48 4800.0 0.621965 -9303.201390 0.452911 59.861201 0.413571 0.0 6.018852 0.0 ... -529.457606 0.0 0.0 0.0 -919.420140 -1.592254 -0.083006 tr594 False True
6 49 4900.0 0.673337 -9357.594222 0.493835 51.240594 0.397963 0.0 8.191024 0.0 ... -530.349964 0.0 0.0 0.0 -916.146867 -1.303263 -0.233821 tr594 False True
7 50 5000.0 0.662392 -9296.297303 0.493335 51.341927 0.408646 0.0 5.509543 0.0 ... -533.090343 0.0 0.0 0.0 -923.045825 -1.598746 -0.092803 tr594 False True
8 52 5200.0 0.689597 -9356.994993 0.443598 61.916707 0.402738 0.0 7.161900 0.0 ... -530.790559 0.0 0.0 0.0 -928.397126 -1.536486 0.310421 tr594 False True
9 57 5700.0 0.613158 -9420.456138 0.538359 42.622500 0.405299 0.0 5.272804 0.0 ... -538.792399 0.0 0.0 0.0 -944.003112 -1.569597 0.080288 tr594 False True
10 59 5900.0 0.677439 -9021.254091 0.466083 57.013521 0.391027 0.0 5.432674 0.0 ... -534.403954 0.0 0.0 0.0 -933.622474 -1.535101 -0.366646 tr594 False True
11 62 6200.0 0.643459 -9386.781543 0.489386 52.145373 0.408578 0.0 7.253485 0.0 ... -537.773403 0.0 0.0 0.0 -931.586592 -1.470661 -0.145013 tr594 False True
12 63 6300.0 0.614988 -9535.425190 0.463298 57.609865 0.421237 0.0 7.458456 0.0 ... -530.267172 0.0 0.0 0.0 -932.356310 -1.371848 -0.228594 tr594 False True
13 64 6400.0 0.586483 -9399.518607 0.420980 67.052790 0.408612 0.0 8.814095 0.0 ... -528.824428 0.0 0.0 0.0 -913.732435 -1.560224 -0.167094 tr594 False True
14 65 6500.0 0.587374 -9531.648111 0.503773 49.248325 0.402970 0.0 7.560530 0.0 ... -535.444009 0.0 0.0 0.0 -942.207064 -1.622906 -0.309311 tr594 False True
15 66 6600.0 0.576300 -9284.998390 0.520211 46.039471 0.427523 0.0 5.146234 0.0 ... -537.880954 0.0 0.0 0.0 -945.554047 -1.559099 -0.122496 tr594 False True
16 67 6700.0 0.601645 -9346.725491 0.468457 56.507624 0.390246 0.0 6.440939 0.0 ... -529.959657 0.0 0.0 0.0 -923.764972 -1.570806 -0.482668 tr594 False True
17 69 6900.0 0.619247 -9184.666542 0.469153 56.359664 0.402867 0.0 6.209315 0.0 ... -538.792552 0.0 0.0 0.0 -935.687195 -1.432406 -0.169271 tr594 False True
18 70 7000.0 0.639474 -9172.484700 0.470707 56.030184 0.416233 0.0 6.255095 0.0 ... -533.756661 0.0 0.0 0.0 -928.264082 -1.238577 -0.467167 tr594 False True
19 71 7100.0 0.613697 -9246.908827 0.469495 56.287072 0.428673 0.0 6.767947 0.0 ... -530.594660 0.0 0.0 0.0 -932.727427 -1.381525 -0.301255 tr594 False True
20 72 7200.0 0.622031 -9240.500512 0.450103 60.477245 0.443059 0.0 5.361324 0.0 ... -523.295198 0.0 0.0 0.0 -907.426228 -1.217635 0.017708 tr594 False True
21 73 7300.0 0.631637 -8980.089989 0.445487 61.496850 0.402151 0.0 8.629303 0.0 ... -531.891872 0.0 0.0 0.0 -925.915022 -1.199508 -0.347520 tr594 False True
22 74 7400.0 0.604258 -9233.859480 0.497528 50.495535 0.418786 0.0 7.816735 0.0 ... -537.019111 0.0 0.0 0.0 -938.174905 -1.377256 -0.635975 tr594 False True
23 75 7500.0 0.622700 -9427.830408 0.481416 53.785812 0.405511 0.0 5.678720 0.0 ... -531.200870 0.0 0.0 0.0 -931.758163 -1.123728 -0.620915 tr594 False True
24 82 8200.0 0.625078 -9308.236867 0.412060 69.134734 0.400529 0.0 8.187544 0.0 ... -523.173377 0.0 0.0 0.0 -901.131202 -1.375308 -0.228769 tr594 False True
25 83 8300.0 0.662626 -9241.532068 0.440148 62.686749 0.393837 0.0 6.397077 0.0 ... -527.050543 0.0 0.0 0.0 -903.244978 -1.356023 0.070050 tr594 False True
26 84 8400.0 0.602315 -9347.025048 0.488074 52.413745 0.424583 0.0 5.754571 0.0 ... -536.814089 0.0 0.0 0.0 -931.801980 -1.416061 0.055604 tr594 False True
27 85 8500.0 0.607790 -9388.986833 0.462851 57.705818 0.382716 0.0 6.068321 0.0 ... -531.005668 0.0 0.0 0.0 -926.400195 -1.340040 -0.431278 tr594 False True
28 87 8700.0 0.581966 -9437.420735 0.493212 51.366827 0.429686 0.0 7.081513 0.0 ... -532.261105 0.0 0.0 0.0 -932.919436 -1.598950 0.023925 tr594 False True
29 88 8800.0 0.619568 -9243.703066 0.515696 46.910139 0.401120 0.0 5.555758 0.0 ... -538.895297 0.0 0.0 0.0 -937.038712 -1.122959 0.018069 tr594 False True
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
46308 2475 47100.0 0.664531 -29238.158086 0.535266 43.195462 0.627001 0.0 12.720694 0.0 ... -1438.753539 0.0 0.0 0.0 -2474.251192 2.731779 -1.490331 tr948-halfDIHE False True
46309 2476 47200.0 0.645980 -28575.267513 0.553863 39.807586 0.610188 0.0 10.766470 0.0 ... -1489.595256 0.0 0.0 0.0 -2552.101901 2.266848 -1.271441 tr948-halfDIHE False True
46310 2477 47300.0 0.668837 -29100.780481 0.561323 38.487517 0.611376 0.0 10.773895 0.0 ... -1478.281230 0.0 0.0 0.0 -2547.996116 2.529044 -1.879383 tr948-halfDIHE False True
46311 2478 47400.0 0.656312 -29104.086956 0.546157 41.194629 0.614508 0.0 10.540180 0.0 ... -1472.942756 0.0 0.0 0.0 -2538.165947 2.544484 -2.486622 tr948-halfDIHE False True
46312 2479 47500.0 0.648945 -28742.747503 0.545662 41.284672 0.608867 0.0 11.570885 0.0 ... -1474.445657 0.0 0.0 0.0 -2546.821662 2.790202 -2.295615 tr948-halfDIHE False True
46313 2480 47600.0 0.646692 -29295.874974 0.552699 40.015648 0.616515 0.0 11.763866 0.0 ... -1469.248549 0.0 0.0 0.0 -2535.770368 2.403907 -2.410211 tr948-halfDIHE False True
46314 2481 47700.0 0.646120 -29275.488459 0.549853 40.526387 0.631026 0.0 10.460628 0.0 ... -1471.068108 0.0 0.0 0.0 -2538.634377 2.754876 -2.475985 tr948-halfDIHE False True
46315 2482 47800.0 0.650602 -28865.582862 0.572983 36.468727 0.615883 0.0 9.371698 0.0 ... -1476.593163 0.0 0.0 0.0 -2560.703038 2.825274 -2.236798 tr948-halfDIHE False True
46316 2483 47900.0 0.646467 -29047.992545 0.561672 38.426257 0.605711 0.0 11.164019 0.0 ... -1480.239083 0.0 0.0 0.0 -2548.463246 2.776486 -1.734102 tr948-halfDIHE False True
46317 2484 48000.0 0.642090 -29302.210047 0.561277 38.495626 0.605461 0.0 10.654320 0.0 ... -1472.067773 0.0 0.0 0.0 -2542.795485 2.783447 -2.175997 tr948-halfDIHE False True
46318 2485 48100.0 0.644168 -29176.616023 0.547463 40.957936 0.629942 0.0 9.562760 0.0 ... -1461.418923 0.0 0.0 0.0 -2541.440131 2.688586 -2.345910 tr948-halfDIHE False True
46319 2486 48200.0 0.636067 -29472.929644 0.526960 44.753283 0.615342 0.0 11.473004 0.0 ... -1444.956390 0.0 0.0 0.0 -2487.803658 2.578424 -2.021244 tr948-halfDIHE False True
46320 2487 48300.0 0.642547 -29197.443531 0.559664 38.779073 0.621361 0.0 11.602272 0.0 ... -1460.667977 0.0 0.0 0.0 -2525.987471 2.813637 -1.768482 tr948-halfDIHE False True
46321 2488 48400.0 0.637230 -29167.185449 0.548189 40.826610 0.634195 0.0 10.974162 0.0 ... -1454.930095 0.0 0.0 0.0 -2522.441224 2.938245 -1.649847 tr948-halfDIHE False True
46322 2489 48500.0 0.648128 -29399.083299 0.558208 39.036110 0.620118 0.0 10.967527 0.0 ... -1472.947269 0.0 0.0 0.0 -2546.576797 2.952948 -2.021106 tr948-halfDIHE False True
46323 2490 48600.0 0.630125 -29041.728520 0.565502 37.757635 0.621270 0.0 9.809122 0.0 ... -1463.845887 0.0 0.0 0.0 -2533.871645 2.601774 -1.980494 tr948-halfDIHE False True
46324 2491 48700.0 0.647063 -29033.626230 0.556033 39.421369 0.613638 0.0 11.965107 0.0 ... -1470.855796 0.0 0.0 0.0 -2534.020216 2.475865 -2.116861 tr948-halfDIHE False True
46325 2492 48800.0 0.643798 -29200.383270 0.564631 37.909295 0.616066 0.0 11.028881 0.0 ... -1478.423893 0.0 0.0 0.0 -2539.374255 2.767128 -1.720189 tr948-halfDIHE False True
46326 2493 48900.0 0.626581 -29133.246086 0.534863 43.270476 0.617303 0.0 11.230873 0.0 ... -1445.668297 0.0 0.0 0.0 -2484.545584 2.731528 -1.222750 tr948-halfDIHE False True
46327 2494 49000.0 0.648736 -29141.057142 0.554544 39.686199 0.609742 0.0 8.998190 0.0 ... -1463.298878 0.0 0.0 0.0 -2538.488207 2.743604 -2.217587 tr948-halfDIHE False True
46328 2495 49100.0 0.685668 -28671.003442 0.539182 42.470707 0.602916 0.0 9.949813 0.0 ... -1453.218659 0.0 0.0 0.0 -2517.287917 2.604783 -2.730466 tr948-halfDIHE False True
46329 2496 49200.0 0.636802 -29026.657620 0.557262 39.203458 0.620470 0.0 11.087543 0.0 ... -1480.758405 0.0 0.0 0.0 -2551.386257 2.661026 -1.448826 tr948-halfDIHE False True
46330 2497 49300.0 0.638779 -28902.094126 0.542794 41.807488 0.610915 0.0 8.117317 0.0 ... -1461.832169 0.0 0.0 0.0 -2527.610120 2.418022 -1.970964 tr948-halfDIHE False True
46331 2498 49400.0 0.660044 -29059.731620 0.529000 44.368116 0.620130 0.0 10.782999 0.0 ... -1447.017390 0.0 0.0 0.0 -2492.401515 2.737150 -2.048866 tr948-halfDIHE False True
46332 2499 49500.0 0.670794 -28458.474946 0.562937 38.204854 0.615567 0.0 9.404200 0.0 ... -1462.166721 0.0 0.0 0.0 -2538.646214 2.287633 -2.853603 tr948-halfDIHE False True
46333 2500 49600.0 0.636916 -29056.391924 0.531281 43.939560 0.619260 0.0 10.684813 0.0 ... -1451.048965 0.0 0.0 0.0 -2499.850517 2.382571 -2.619119 tr948-halfDIHE False True
46334 2501 49700.0 0.690867 -28405.607817 0.554126 39.760732 0.601165 0.0 12.160203 0.0 ... -1458.215656 0.0 0.0 0.0 -2521.931720 2.367532 -2.860109 tr948-halfDIHE False True
46335 2502 49800.0 0.687250 -28858.046354 0.557573 39.148356 0.619200 0.0 9.398444 0.0 ... -1462.184324 0.0 0.0 0.0 -2530.782884 2.377331 -2.776115 tr948-halfDIHE False True
46336 2503 49900.0 0.674822 -29090.494137 0.545762 41.266472 0.618320 0.0 9.060949 0.0 ... -1463.757249 0.0 0.0 0.0 -2537.671554 2.525844 -2.441068 tr948-halfDIHE False True
46337 2504 50000.0 0.659247 -28802.248293 0.542977 41.774009 0.609452 0.0 11.459437 0.0 ... -1473.040182 0.0 0.0 0.0 -2537.841217 2.442010 -1.905541 tr948-halfDIHE False True

46338 rows × 29 columns


In [ ]:


In [16]:
filtered.shape


Out[16]:
(46338, 29)

In [17]:
a.shape


Out[17]:
(57110, 28)

In [53]:
# FEATURES = ["eigenvalues", "entropy", "pca"]
# FEATURES = ["eigenvalues", "entropy", "diffRMSD"]
# FEATURES = ["eigenvalues", "entropy"]
FEATURES = [
    "biasQ",
    'Rw',
     'VTotal',
#     'RMSD', # test
#     'Qw',
#      'Burial',
#      'Water',
#      'Rama',
#      'DSSP',
#      'P_AP',
#      'Helix',
#      'Frag_Mem'
               ]
# FEATURES = ["eigenvalues"]
# LABEL = "diffRMSD"
# LABEL = "RMSD"
LABEL = "chosen"
DEGREE = 1

def pred_from_raw(a):
    data = my_transform(a, label=LABEL, degree=DEGREE, FEATURES=FEATURES)
    test_y = data[:,-1]
    test_set = data[:,:-1]
    prob= clf.predict_proba(test_set)[:,1]
    return a.assign(prob=prob)

# data = my_transform(raw_data, label=LABEL, degree=DEGREE, FEATURES=FEATURES)
# data = raw_data.groupby('name').apply(my_transform, label=LABEL, degree=DEGREE, FEATURES=FEATURES)[0]
data = np.concatenate(raw_data.groupby('name').apply(my_transform, 
                                                     label=LABEL, degree=DEGREE, FEATURES=FEATURES).values)
train_y = data[:,-1]
train_set = data[:,:-1]

# clf = svm.SVC(probability=True)
# p = 0.01
# clf = LogisticRegression(random_state=27, class_weight={0:p, 1:(1-p)})
clf = LogisticRegression(random_state=27)
clf.fit(train_set, train_y)

filtered = filtered.reset_index(drop=True).groupby("name").apply(pred_from_raw).reset_index(drop=True)


picked_n = 1
best = raw_data_all_2.groupby("name").apply(choose_top, col="RMSD"
                                            , n=1, ascending=True).reset_index(drop=True).query("chosen==True")
# if True:
picked_1 = filtered.groupby("name").apply(choose_top, col="prob"
                                        , n=1, ascending=False).reset_index(drop=True).query("chosen==True")

# if False:
picked_5 = filtered.groupby("name").apply(choose_top, col="prob"
                                            , n=5, ascending=False).reset_index(drop=True).query("chosen==True")
picked = picked_5.groupby("name").apply(choose_top, col="RMSD"
                                            , n=1, ascending=True).reset_index(drop=True).query("chosen==True")
worst = filtered.groupby("name").apply(choose_top, col="RMSD"
                                            , n=1, ascending=False).reset_index(drop=True).query("chosen==True")
init = raw_data_all_2.groupby("name").apply(choose_top, col="i"
                                            , n=1, ascending=True).reset_index(drop=True).query("chosen==True")
all_results = pd.concat([best.assign(result='best'), 
                         picked_1.assign(result='picked'), init.assign(result='init')
                        , worst.assign(result='worst')
                        ], sort=False)
# all_results = pd.concat([best.assign(result='best'), 
#                          picked.assign(result='picked')])
# picked.to_csv("/Users/weilu/Desktop/picked.csv

# sns.set(rc={'figure.figsize':(20,30)})
# plt.figure(figsize=(15,8))
fg = sns.FacetGrid(data=all_results.reset_index(), hue='result', height=8, aspect=1.63)
fg.map(plt.plot, 'name', 'RMSD').add_legend(fontsize=20)
# fg.set(ylim=(0, 10))


Out[53]:
<seaborn.axisgrid.FacetGrid at 0x1a27839358>

In [56]:
all_results.query("name == 'tr872-halfDIHE' or name == 'tr872'").query("result == 'picked_top1'")


Out[56]:
folder i RMSD Rw biasQ bias Qw Step Chain Shake ... Membrane SSB VTotal pc pc2 name chosen inrange prob result
13986 484 48400.0 0.315882 -12625.342314 0.675345 21.080226 0.654423 0.0 5.027838 0.0 ... 0.0 0.0 -1138.137918 -0.849180 1.877866 tr872 True True 0.357519 picked_top1
16566 912 41100.0 0.355024 -12638.637754 0.671949 21.523455 0.773383 0.0 4.560306 0.0 ... 0.0 0.0 -1128.656025 0.212775 1.396896 tr872-halfDIHE True True 0.276979 picked_top1

2 rows × 31 columns


In [55]:
all_results.query("name == 'tr948-halfDIHE' or name == 'tr948'").query("result == 'picked_top1'")


Out[55]:
folder i RMSD Rw biasQ bias Qw Step Chain Shake ... Membrane SSB VTotal pc pc2 name chosen inrange prob result
42499 810 30900.0 0.642138 -28698.131138 0.581505 35.027568 0.603420 0.0 9.830352 0.0 ... 0.0 0.0 -2589.281395 -0.896550 2.553332 tr948 True True 0.638205 picked_top1
46179 2343 33900.0 0.679436 -29090.797025 0.569006 37.151118 0.590497 0.0 8.691268 0.0 ... 0.0 0.0 -2580.851263 1.535779 -0.682960 tr948-halfDIHE True True 0.324637 picked_top1

2 rows × 31 columns


In [52]:
all_results.query("name == 'tr884-halfDIHE' or name == 'tr884'").query("result == 'picked_top1'")


Out[52]:
folder i RMSD Rw biasQ bias Qw Step Chain Shake ... Membrane SSB VTotal pc pc2 name chosen inrange prob result
23305 1448 44600.0 0.542798 -9418.042083 0.530488 44.088316 0.496556 0.0 3.516038 0.0 ... 0.0 0.0 -924.112214 0.739004 -2.026302 tr884 True True 0.245173 picked_top1
25653 1301 29900.0 0.320999 -9330.232483 0.615756 29.528697 0.700275 0.0 3.993937 0.0 ... 0.0 0.0 -899.346439 -1.224431 -1.495779 tr884-halfDIHE True True 0.265686 picked_top1

2 rows × 31 columns


In [54]:
all_results = pd.concat([picked_1.assign(result="picked_top1"), picked_5.assign(result='picked_top5'),
                         picked.assign(result='picked_best'),
                         best.assign(result='best'), 
                          init.assign(result='init'),
                         worst.assign(result='worst'), 
                        ], sort=False)

In [30]:
all_results.reset_index(drop=True).to_csv("/Users/weilu/Research/server/dec_2018/structure_selection_2/all_results.csv")

In [32]:
all_results = pd.read_csv("/Users/weilu/Research/server/dec_2018/structure_selection_2/all_results.csv", index_col=0)

In [62]:
all_results.query("result == 'picked_top5'").groupby("name").apply(pd.sort_values, "prob")


---------------------------------------------------------------------------
AttributeError                            Traceback (most recent call last)
<ipython-input-62-12b0d03d99dd> in <module>()
----> 1 all_results.query("result == 'picked_top5'").groupby("name").apply(pd.sort_values, "prob")

AttributeError: module 'pandas' has no attribute 'sort_values'

In [69]:
all_results.query("result == 'picked_top5'").sort_values(["name", "prob"], ascending=False)


Out[69]:
folder i RMSD Rw biasQ bias Qw Step Chain Shake ... Membrane SSB VTotal pc pc2 name chosen inrange prob result
46179 2343 33900.0 0.679436 -29090.797025 0.569006 37.151118 0.590497 0.0 8.691268 0.0 ... 0.0 0.0 -2580.851263 1.535779 -0.682960 tr948-halfDIHE True True 0.324637 picked_top5
46095 2259 25500.0 0.685912 -28563.342610 0.574794 36.159959 0.584476 0.0 10.473595 0.0 ... 0.0 0.0 -2575.861825 2.225680 0.153428 tr948-halfDIHE True True 0.277318 picked_top5
45093 1058 5600.0 0.614140 -28463.547489 0.571656 36.695699 0.633013 0.0 7.506891 0.0 ... 0.0 0.0 -2578.116063 0.704460 -0.745613 tr948-halfDIHE True True 0.251004 picked_top5
44653 617 11600.0 0.530739 -28601.483932 0.568418 37.252623 0.635398 0.0 11.437000 0.0 ... 0.0 0.0 -2579.379165 0.932754 0.857359 tr948-halfDIHE True True 0.245794 picked_top5
46189 2353 34900.0 0.677295 -28758.016932 0.575408 36.055693 0.596811 0.0 10.026178 0.0 ... 0.0 0.0 -2565.811382 2.009119 -1.507732 tr948-halfDIHE True True 0.236490 picked_top5
42499 810 30900.0 0.642138 -28698.131138 0.581505 35.027568 0.603420 0.0 9.830352 0.0 ... 0.0 0.0 -2589.281395 -0.896550 2.553332 tr948 True True 0.638205 picked_top5
42677 988 48700.0 0.556396 -28450.977135 0.581825 34.974074 0.609090 0.0 9.318828 0.0 ... 0.0 0.0 -2589.462466 -0.319427 2.296344 tr948 True True 0.603070 picked_top5
42551 862 36100.0 0.660701 -28421.082123 0.574724 36.171940 0.572905 0.0 9.357762 0.0 ... 0.0 0.0 -2589.151368 -0.797111 2.190730 tr948 True True 0.503406 picked_top5
42566 877 37600.0 0.645414 -28593.010397 0.577859 35.640522 0.581267 0.0 9.385711 0.0 ... 0.0 0.0 -2580.545925 -0.431302 2.888052 tr948 True True 0.489552 picked_top5
42662 973 47200.0 0.562767 -28345.005686 0.573871 36.317265 0.612495 0.0 9.712646 0.0 ... 0.0 0.0 -2586.339493 -0.358514 2.007456 tr948 True True 0.451314 picked_top5
41765 481 48100.0 0.956921 -27891.079857 0.610006 30.418991 0.482850 0.0 13.287310 0.0 ... 0.0 0.0 -2108.005624 -2.046083 0.185091 tr947 True True 0.225634 picked_top5
41559 224 22400.0 1.117747 -27211.797425 0.613078 29.941723 0.483026 0.0 14.849488 0.0 ... 0.0 0.0 -2100.315430 -2.620303 -2.236081 tr947 True True 0.151705 picked_top5
41835 1259 25700.0 1.085501 -24839.093022 0.613234 29.917587 0.429476 0.0 10.805366 0.0 ... 0.0 0.0 -2118.051083 -2.118343 0.546546 tr947 True True 0.111876 picked_top5
41687 365 36500.0 1.128184 -26804.835459 0.613135 29.932977 0.466394 0.0 12.563653 0.0 ... 0.0 0.0 -2095.237870 -2.005232 -2.284630 tr947 True True 0.105201 picked_top5
41550 214 21400.0 1.096977 -27340.419432 0.606326 30.995822 0.470749 0.0 14.083092 0.0 ... 0.0 0.0 -2096.288160 -2.527297 -2.366083 tr947 True True 0.096247 picked_top5
40074 1011 900.0 0.368668 -9636.845094 0.651477 24.293703 0.606792 0.0 4.764826 0.0 ... 0.0 0.0 -1010.273657 -0.056487 1.712977 tr922 True True 0.267637 picked_top5
40076 1013 1100.0 0.366574 -9503.266740 0.652070 24.210997 0.583931 0.0 3.950436 0.0 ... 0.0 0.0 -1006.058579 -0.063859 1.446985 tr922 True True 0.203823 picked_top5
40077 1014 1200.0 0.345203 -9535.371379 0.642239 25.598638 0.627784 0.0 3.638171 0.0 ... 0.0 0.0 -1006.189562 0.051855 1.359481 tr922 True True 0.197031 picked_top5
39165 83 8300.0 0.328628 -9463.305731 0.648948 24.647459 0.624558 0.0 4.517219 0.0 ... 0.0 0.0 -1003.368107 -1.002158 -0.550782 tr922 True True 0.174875 picked_top5
40082 1019 1700.0 0.374167 -9506.479143 0.643149 25.468478 0.621836 0.0 4.025615 0.0 ... 0.0 0.0 -1000.178152 0.187237 1.232725 tr922 True True 0.161172 picked_top5
38271 828 32700.0 0.324877 -21881.858026 0.680476 20.419091 0.624908 0.0 23.769130 0.0 ... 0.0 0.0 -2764.790089 0.245506 -0.892383 tr921 True True 0.181894 picked_top5
38071 623 12200.0 0.342607 -22022.468951 0.691004 19.095697 0.642496 0.0 27.640532 0.0 ... 0.0 0.0 -2731.362792 -1.308575 0.152117 tr921 True True 0.167187 picked_top5
38261 818 31700.0 0.321367 -21751.698844 0.677866 20.754120 0.625235 0.0 26.710333 0.0 ... 0.0 0.0 -2770.951548 -0.511215 -1.499475 tr921 True True 0.164029 picked_top5
38278 835 33400.0 0.317155 -21679.300416 0.680030 20.476212 0.659806 0.0 27.965579 0.0 ... 0.0 0.0 -2768.607043 -1.257537 -0.718024 tr921 True True 0.160837 picked_top5
38251 806 30500.0 0.343824 -21742.173855 0.683483 20.036555 0.598756 0.0 30.046051 0.0 ... 0.0 0.0 -2755.489751 -0.647251 -1.098670 tr921 True True 0.152171 picked_top5
37201 729 22800.0 1.346089 -19042.795684 0.568624 37.217128 0.398104 0.0 6.527535 0.0 ... 0.0 0.0 -1469.264433 -2.546374 -2.851364 tr898 True True 0.190190 picked_top5
37171 695 19400.0 1.354056 -18672.018002 0.567845 37.351664 0.387112 0.0 6.121254 0.0 ... 0.0 0.0 -1472.410391 -2.112595 -4.148558 tr898 True True 0.176817 picked_top5
37202 730 22900.0 1.357227 -19095.827437 0.574393 36.228299 0.391478 0.0 6.416127 0.0 ... 0.0 0.0 -1465.542918 -2.541665 -3.160339 tr898 True True 0.173510 picked_top5
37158 677 17600.0 1.459045 -18739.210101 0.563673 38.076315 0.368102 0.0 6.244340 0.0 ... 0.0 0.0 -1471.218067 -2.592507 -3.551837 tr898 True True 0.164555 picked_top5
37147 648 14700.0 1.356008 -18865.221868 0.564519 37.928806 0.396207 0.0 6.473311 0.0 ... 0.0 0.0 -1469.439056 -2.091135 -2.955682 tr898 True True 0.163697 picked_top5
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
11267 186 18600.0 0.911107 -20200.859983 0.630429 27.316528 0.326569 80000000.0 89.338784 0.0 ... 0.0 0.0 -1635.238984 -0.829420 0.255606 tr870 True True 0.026845 picked_top5
11210 129 12900.0 0.881563 -20157.706928 0.630429 27.316528 0.324614 80000000.0 89.338784 0.0 ... 0.0 0.0 -1635.238984 -1.121479 -0.345961 tr870 True True 0.026071 picked_top5
11573 499 49900.0 1.028841 -20156.584708 0.630429 27.316528 0.302463 80000000.0 89.338784 0.0 ... 0.0 0.0 -1635.238984 -1.304933 0.221292 tr870 True True 0.026051 picked_top5
11301 220 22000.0 0.945660 -20152.166487 0.630429 27.316528 0.324916 80000000.0 89.338784 0.0 ... 0.0 0.0 -1635.238984 -0.395241 1.395270 tr870 True True 0.025973 picked_top5
11374 293 29300.0 0.933194 -20146.905961 0.630429 27.316528 0.325829 80000000.0 89.338784 0.0 ... 0.0 0.0 -1635.238984 -0.907448 1.260252 tr870 True True 0.025881 picked_top5
8942 0 0.0 1.230950 -17101.278236 0.327521 90.445532 0.340709 0.0 6.684015 0.0 ... 0.0 0.0 -1364.887693 -0.000178 -0.048953 tr869 True True 0.317641 picked_top5
9344 501 0.0 1.230950 -17101.278236 0.327521 90.445532 0.340709 0.0 6.684015 0.0 ... 0.0 0.0 -1364.887693 -0.000178 -0.048953 tr869 True True 0.317641 picked_top5
9844 1002 0.0 1.230950 -17101.278236 0.327521 90.445532 0.340709 0.0 6.684015 0.0 ... 0.0 0.0 -1364.887693 -0.000178 -0.048953 tr869 True True 0.317641 picked_top5
10340 1503 0.0 1.230950 -17101.278236 0.327521 90.445532 0.340709 0.0 6.684015 0.0 ... 0.0 0.0 -1364.887693 -0.000178 -0.048953 tr869 True True 0.317641 picked_top5
10835 2004 0.0 1.230950 -17101.278236 0.327521 90.445532 0.340709 0.0 6.684015 0.0 ... 0.0 0.0 -1364.887693 -0.000178 -0.048953 tr869 True True 0.317641 picked_top5
7725 1009 700.0 0.255439 -19002.022160 0.515777 46.894413 0.759775 0.0 20.229688 0.0 ... 0.0 0.0 -1896.962431 0.139004 -0.381850 tr868 True True 0.434124 picked_top5
7490 774 27300.0 0.198462 -18795.694394 0.545927 41.236478 0.781562 0.0 15.006630 0.0 ... 0.0 0.0 -1850.986468 -0.176000 0.278655 tr868 True True 0.365764 picked_top5
7488 772 27100.0 0.194233 -18689.078432 0.534715 43.298055 0.804022 0.0 19.496037 0.0 ... 0.0 0.0 -1868.985796 -0.338582 -0.103741 tr868 True True 0.359272 picked_top5
7665 949 44800.0 0.201026 -19133.599935 0.525713 44.989571 0.793817 0.0 20.078757 0.0 ... 0.0 0.0 -1864.274740 -0.459874 -0.392121 tr868 True True 0.356494 picked_top5
6999 278 27800.0 0.205594 -18158.038125 0.526918 44.761400 0.775710 0.0 20.924675 0.0 ... 0.0 0.0 -1893.151896 -1.187462 2.812971 tr868 True True 0.327812 picked_top5
5258 436 43600.0 0.165094 -17053.726578 0.628532 27.597682 0.834830 0.0 11.126801 0.0 ... 0.0 0.0 -1442.122133 -0.424255 3.022483 tr866 True True 0.220069 picked_top5
5098 273 27300.0 0.166109 -17127.532661 0.627386 27.768206 0.862580 0.0 10.207970 0.0 ... 0.0 0.0 -1434.004674 -0.505575 5.068936 tr866 True True 0.163754 picked_top5
4962 127 12700.0 0.321337 -17026.444555 0.601809 31.711221 0.661329 0.0 8.726079 0.0 ... 0.0 0.0 -1445.665120 -0.735922 4.109533 tr866 True True 0.162460 picked_top5
5121 296 29600.0 0.152312 -16825.468015 0.627247 27.789014 0.867941 0.0 9.357269 0.0 ... 0.0 0.0 -1438.226706 -0.349591 3.006870 tr866 True True 0.148329 picked_top5
5215 391 39100.0 0.171415 -16686.245967 0.626311 27.928708 0.847414 0.0 8.871145 0.0 ... 0.0 0.0 -1441.508875 -0.515809 3.157518 tr866 True True 0.148148 picked_top5
3098 716 21500.0 0.621427 -14017.862442 0.795115 8.395546 0.461948 0.0 3.268449 0.0 ... 0.0 0.0 -1467.578457 -1.316991 -2.854982 tr862 True True 0.202037 picked_top5
3222 840 33900.0 0.591362 -14298.952979 0.792131 8.641868 0.457493 0.0 3.385219 0.0 ... 0.0 0.0 -1460.246571 -0.883616 -2.363559 tr862 True True 0.195641 picked_top5
3093 711 21000.0 0.628722 -13959.600064 0.804233 7.664932 0.440617 0.0 3.568598 0.0 ... 0.0 0.0 -1463.589674 -1.118755 -3.200069 tr862 True True 0.174863 picked_top5
3096 714 21300.0 0.639278 -14018.020344 0.785619 9.191880 0.443309 0.0 3.733206 0.0 ... 0.0 0.0 -1465.096458 -1.501777 -2.755732 tr862 True True 0.166096 picked_top5
3263 881 38000.0 0.595374 -14371.539225 0.775668 10.064969 0.442423 0.0 3.723739 0.0 ... 0.0 0.0 -1457.753334 -0.606648 -3.508047 tr862 True True 0.162568 picked_top5
1917 2028 2400.0 0.571845 -9655.812091 0.562631 38.258352 0.397447 0.0 5.116114 0.0 ... 0.0 0.0 -976.169211 1.783749 -0.897284 tr594 True True 0.349458 picked_top5
2173 2284 28000.0 0.611227 -9618.903287 0.570485 36.896556 0.424141 0.0 5.176804 0.0 ... 0.0 0.0 -971.931068 2.239968 -1.118442 tr594 True True 0.310490 picked_top5
1969 2080 7600.0 0.580387 -9250.084463 0.529520 44.270342 0.384082 0.0 4.010634 0.0 ... 0.0 0.0 -982.414995 2.497040 -0.160807 tr594 True True 0.242897 picked_top5
2141 2252 24800.0 0.585275 -9856.197753 0.534258 43.383165 0.426288 0.0 4.932738 0.0 ... 0.0 0.0 -973.913129 2.214772 -1.558762 tr594 True True 0.239219 picked_top5
2146 2257 25300.0 0.594052 -9847.422751 0.523620 45.387591 0.440995 0.0 4.988500 0.0 ... 0.0 0.0 -974.961953 1.646261 -1.170395 tr594 True True 0.215441 picked_top5

115 rows × 31 columns


In [41]:
for i, line in all_results.query("result == 'picked_top5'").reset_index(drop=True).iterrows():
    print(i, line["name"], line["folder"])
    os.system("")


0 tr594 2028
1 tr594 2080
2 tr594 2252
3 tr594 2257
4 tr594 2284
5 tr862 711
6 tr862 714
7 tr862 716
8 tr862 840
9 tr862 881
10 tr866 127
11 tr866 273
12 tr866 296
13 tr866 391
14 tr866 436
15 tr868 278
16 tr868 772
17 tr868 774
18 tr868 949
19 tr868 1009
20 tr869 0
21 tr869 501
22 tr869 1002
23 tr869 1503
24 tr869 2004
25 tr870 129
26 tr870 186
27 tr870 220
28 tr870 293
29 tr870 499
30 tr872 130
31 tr872 403
32 tr872 480
33 tr872 484
34 tr872 1597
35 tr872-halfDIHE 0
36 tr872-halfDIHE 501
37 tr872-halfDIHE 912
38 tr872-halfDIHE 940
39 tr872-halfDIHE 1507
40 tr877 0
41 tr877 501
42 tr877 1002
43 tr877 1503
44 tr877 2004
45 tr882 0
46 tr882 501
47 tr882 1002
48 tr882 1585
49 tr882 1965
50 tr884 492
51 tr884 497
52 tr884 1447
53 tr884 1448
54 tr884 1493
55 tr884-halfDIHE 1192
56 tr884-halfDIHE 1194
57 tr884-halfDIHE 1301
58 tr884-halfDIHE 1306
59 tr884-halfDIHE 2335
60 tr885 1065
61 tr885 1091
62 tr885 1111
63 tr885 1128
64 tr885 1267
65 tr891 86
66 tr891 561
67 tr891 570
68 tr891 611
69 tr891 1709
70 tr894 904
71 tr894 918
72 tr894 972
73 tr894 974
74 tr894 989
75 tr895 275
76 tr895 477
77 tr895 478
78 tr895 718
79 tr895 992
80 tr896 3
81 tr896 528
82 tr896 1062
83 tr896 1246
84 tr896 2068
85 tr898 648
86 tr898 677
87 tr898 695
88 tr898 729
89 tr898 730
90 tr921 623
91 tr921 806
92 tr921 818
93 tr921 828
94 tr921 835
95 tr922 83
96 tr922 1011
97 tr922 1013
98 tr922 1014
99 tr922 1019
100 tr947 214
101 tr947 224
102 tr947 365
103 tr947 481
104 tr947 1259
105 tr948 810
106 tr948 862
107 tr948 877
108 tr948 973
109 tr948 988
110 tr948-halfDIHE 617
111 tr948-halfDIHE 1058
112 tr948-halfDIHE 2259
113 tr948-halfDIHE 2343
114 tr948-halfDIHE 2353

In [23]:
all_results = pd.concat([picked_1.assign(result="picked_top1"), picked.assign(result='picked_top5'),
                         best.assign(result='best'), 
                          init.assign(result='init'),
                         worst.assign(result='worst'), 
                        ], sort=False)

In [25]:
picked.shape


Out[25]:
(20, 30)

In [32]:
all_results.reindex(columns=my_reorder(all_results.columns, ["name", "RMSD", "result"])) .reset_index(drop=True).to_csv("/Users/weilu/Desktop/selection_result.csv")

In [28]:
def my_reorder(a, first):
    # move first to the top. and keep the rest
    new_order = first.copy()
    for col in a:
        if col not in first:
            new_order.append(col)
    return new_order

In [31]:
all_results.reindex(columns=my_reorder(all_results.columns, ["name", "RMSD", "result"]))


Out[31]:
name RMSD result folder i Rw biasQ bias Qw Step ... Frag_Mem Vec_FM Membrane SSB VTotal pc pc2 chosen inrange prob
1917 tr594 5.71845 picked_top1 2028 2400.0 -9655.812091 0.562631 38.258352 0.397447 0.0 ... -542.733446 0.0 0.0 0.0 -976.169211 1.783749 -0.897284 True True 0.336318
3098 tr862 6.21427 picked_top1 716 21500.0 -14017.862442 0.795115 8.395546 0.461948 0.0 ... -860.309802 0.0 0.0 0.0 -1467.578457 -1.316991 -2.854982 True True 0.184471
5040 tr866 1.65094 picked_top1 436 43600.0 -17053.726578 0.628532 27.597682 0.834830 0.0 ... -730.772207 0.0 0.0 0.0 -1442.122133 -0.087063 3.699315 True True 0.220490
7964 tr868 2.55439 picked_top1 1009 700.0 -19002.022160 0.515777 46.894413 0.759775 0.0 ... -1184.431369 0.0 0.0 0.0 -1896.962431 0.139004 -0.381850 True True 0.419466
9181 tr869 12.30950 picked_top1 0 0.0 -17101.278236 0.327521 90.445532 0.340709 0.0 ... -745.292489 0.0 0.0 0.0 -1364.887693 -0.000178 -0.048953 True True 0.287940
11347 tr870 8.98989 picked_top1 27 2700.0 -18766.397302 0.366368 80.297996 0.325325 0.0 ... -919.447094 0.0 0.0 0.0 -1663.622985 -1.937195 -0.884915 True True 0.021734
13892 tr872 3.88397 picked_top1 130 13000.0 -12597.510264 0.673965 21.259789 0.654242 0.0 ... -546.317159 0.0 0.0 0.0 -1131.645842 0.635999 -1.004592 True True 0.232302
15145 tr877 3.00711 picked_top1 0 0.0 -24114.108027 0.568868 37.174966 0.648838 0.0 ... -821.245402 0.0 0.0 0.0 -1699.528374 -0.055574 -0.014200 True True 0.773073
18607 tr882 2.75337 picked_top1 1585 8200.0 -12227.807526 0.648724 24.679000 0.790488 0.0 ... -630.005603 0.0 0.0 0.0 -1123.346564 -0.326972 0.008144 True True 0.257559
20532 tr884 5.42798 picked_top1 1448 44600.0 -9418.042083 0.530488 44.088316 0.496556 0.0 ... -492.254269 0.0 0.0 0.0 -924.112214 0.739004 -2.026302 True True 0.231153
22564 tr885 3.18402 picked_top1 1065 6300.0 -19678.210041 0.713528 16.413273 0.723373 0.0 ... -879.121171 0.0 0.0 0.0 -1671.835269 0.400841 -0.236901 True True 0.125852
25261 tr891 1.91208 picked_top1 1709 20600.0 -18636.734867 0.645838 25.086161 0.814518 0.0 ... -811.315247 0.0 0.0 0.0 -1634.043036 0.290187 0.469162 True True 0.623671
26508 tr894 2.00769 picked_top1 904 40300.0 -6759.374211 0.667682 22.087109 0.764641 0.0 ... -339.543577 0.0 0.0 0.0 -671.963552 -0.318981 -0.765288 True True 0.385394
28827 tr895 4.87177 picked_top1 992 49100.0 -20488.493033 0.646876 24.939243 0.571973 0.0 ... -901.913395 0.0 0.0 0.0 -1650.549327 -0.933751 0.277437 True True 0.225033
29502 tr896 8.62961 picked_top1 3 300.0 -11703.621241 0.461786 57.934848 0.405691 0.0 ... -534.883801 0.0 0.0 0.0 -989.943449 -1.047856 0.015758 True True 0.497582
31988 tr898 13.46090 picked_top1 729 22800.0 -19042.795684 0.568624 37.217128 0.398104 0.0 ... -783.469742 0.0 0.0 0.0 -1469.264433 -2.546374 -2.851364 True True 0.170833
33058 tr921 3.24877 picked_top1 828 32700.0 -21881.858026 0.680476 20.419091 0.624908 0.0 ... -1871.646575 0.0 0.0 0.0 -2764.790089 0.245506 -0.892383 True True 0.165306
34861 tr922 3.68668 picked_top1 1011 900.0 -9636.845094 0.651477 24.293703 0.606792 0.0 ... -620.135642 0.0 0.0 0.0 -1010.273657 -0.056487 1.712977 True True 0.241481
36552 tr947 9.56921 picked_top1 481 48100.0 -27891.079857 0.610006 30.418991 0.482850 0.0 ... -1263.925386 0.0 0.0 0.0 -2108.005624 -2.046083 0.185091 True True 0.209786
37373 tr948 6.42138 picked_top1 810 30900.0 -28698.131138 0.581505 35.027568 0.603420 0.0 ... -1488.304814 0.0 0.0 0.0 -2589.281395 -0.825228 0.477354 True True 0.596875
0 tr594 5.71845 picked_top5 2028 2400.0 -9655.812091 0.562631 38.258352 0.397447 0.0 ... -542.733446 0.0 0.0 0.0 -976.169211 1.783749 -0.897284 True True 0.336318
8 tr862 5.91362 picked_top5 840 33900.0 -14298.952979 0.792131 8.641868 0.457493 0.0 ... -857.960775 0.0 0.0 0.0 -1460.246571 -0.883616 -2.363559 True True 0.176140
12 tr866 1.52312 picked_top5 296 29600.0 -16825.468015 0.627247 27.789014 0.867941 0.0 ... -729.065574 0.0 0.0 0.0 -1438.226706 -0.323353 4.008893 True True 0.149921
16 tr868 1.94233 picked_top5 772 27100.0 -18689.078432 0.534715 43.298055 0.804022 0.0 ... -1144.918529 0.0 0.0 0.0 -1868.985796 -0.338582 -0.103741 True True 0.328877
20 tr869 12.30950 picked_top5 0 0.0 -17101.278236 0.327521 90.445532 0.340709 0.0 ... -745.292489 0.0 0.0 0.0 -1364.887693 -0.000178 -0.048953 True True 0.287940
26 tr870 8.81563 picked_top5 129 12900.0 -20157.706928 0.630429 27.316528 0.324614 80000000.0 ... -985.958008 0.0 0.0 0.0 -1635.238984 -1.121479 -0.345961 True True 0.020210
31 tr872 3.38775 picked_top5 141 14100.0 -12447.029453 0.676140 20.977116 0.667752 0.0 ... -546.638457 0.0 0.0 0.0 -1131.691792 0.641274 -0.860320 True True 0.220697
35 tr877 3.00711 picked_top5 0 0.0 -24114.108027 0.568868 37.174966 0.648838 0.0 ... -821.245402 0.0 0.0 0.0 -1699.528374 -0.055574 -0.014200 True True 0.773073
44 tr882 2.28804 picked_top5 1965 46200.0 -11916.930776 0.636862 26.373853 0.759764 0.0 ... -633.406934 0.0 0.0 0.0 -1132.496525 -0.322323 0.313275 True True 0.212781
49 tr884 4.60271 picked_top5 1851 34800.0 -9350.865722 0.537420 42.796012 0.571265 0.0 ... -492.137189 0.0 0.0 0.0 -916.994515 0.261957 -0.655306 True True 0.138139
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
25047 tr885 2.35937 init 0 0.0 -20598.856085 0.685012 19.843456 0.830205 0.0 ... -870.801921 0.0 0.0 0.0 -1645.527867 -0.037832 0.039669 True NaN NaN
27552 tr891 1.60893 init 0 0.0 -18747.275083 0.620600 28.788908 0.860321 0.0 ... -820.247037 0.0 0.0 0.0 -1600.853027 -0.184893 -0.098650 True NaN NaN
30057 tr894 2.24606 init 0 0.0 -6810.073249 0.556208 39.390241 0.731370 0.0 ... -330.532789 0.0 0.0 0.0 -668.558313 -0.065067 -0.283718 True NaN NaN
32562 tr895 4.33382 init 0 0.0 -22028.450058 0.618778 29.065981 0.654401 0.0 ... -889.343989 0.0 0.0 0.0 -1623.103778 -0.010141 -0.051103 True NaN NaN
35067 tr896 8.22359 init 0 0.0 -12136.482066 0.466766 56.867616 0.443834 0.0 ... -525.593994 0.0 0.0 0.0 -947.681062 -0.058300 -0.054690 True NaN NaN
37572 tr898 13.99650 init 0 0.0 -18590.677602 0.528118 44.534476 0.357930 0.0 ... -782.373728 0.0 0.0 0.0 -1446.453004 0.042324 -0.167047 True NaN NaN
40077 tr921 3.55243 init 0 0.0 -22225.052777 0.676982 20.868098 0.633795 0.0 ... -1814.239425 0.0 0.0 0.0 -2679.102572 -0.101087 0.070372 True NaN NaN
42582 tr922 2.55776 init 0 0.0 -9494.179526 0.646615 24.976143 0.778997 0.0 ... -619.584850 0.0 0.0 0.0 -999.248416 0.115988 0.147087 True NaN NaN
45087 tr947 12.92220 init 0 0.0 -26347.737275 0.602246 31.641680 0.498832 0.0 ... -1262.954342 0.0 0.0 0.0 -2074.380569 1.187467 -0.301399 True NaN NaN
47592 tr948 6.72126 init 0 0.0 -29784.167159 0.534029 43.425714 0.623263 0.0 ... -1442.266587 0.0 0.0 0.0 -2474.731767 -0.029092 0.051793 True NaN NaN
2281 tr594 11.66420 worst 2392 38800.0 -7800.180486 0.405731 70.631048 0.293334 0.0 ... -528.484319 0.0 0.0 0.0 -945.059692 2.492546 0.800841 True True 0.000514
2842 tr862 8.64546 worst 459 45900.0 -13675.272843 0.744725 13.033074 0.552184 0.0 ... -836.017899 0.0 0.0 0.0 -1416.508005 -2.115153 0.313762 True True 0.004266
5982 tr866 5.49268 worst 1436 43400.0 -16316.523037 0.460162 58.285112 0.523121 0.0 ... -706.324188 0.0 0.0 0.0 -1342.476061 0.189714 -0.228174 True True 0.000023
8884 tr868 5.50126 worst 2067 6300.0 -18165.315305 0.465461 57.146452 0.608624 0.0 ... -1085.027900 0.0 0.0 0.0 -1783.826243 0.645704 -2.754277 True True 0.004290
9679 tr869 14.43870 worst 598 9700.0 -16097.470589 0.218214 122.237944 0.311339 0.0 ... -729.588674 0.0 0.0 0.0 -1323.833114 -0.808695 -0.587784 True True 0.001463
12776 tr870 11.43280 worst 1484 48200.0 -18635.189014 0.630429 27.316528 0.311343 80000000.0 ... -985.958008 0.0 0.0 0.0 -1635.238984 -1.256596 2.241660 True True 0.006724
15120 tr872 7.00137 worst 2011 700.0 -11677.938128 0.670047 21.773797 0.571122 0.0 ... -545.404119 0.0 0.0 0.0 -1101.906710 0.895718 0.631344 True True 0.007777
16772 tr877 5.97014 worst 1819 31600.0 -23656.864393 0.497143 50.573068 0.572003 0.0 ... -819.115369 0.0 0.0 0.0 -1632.289229 0.220377 3.612046 True True 0.001921
18310 tr882 4.10836 worst 1201 19900.0 -11669.955472 0.613084 29.940872 0.673967 0.0 ... -617.772188 0.0 0.0 0.0 -1084.858365 -0.434148 -0.384838 True True 0.000567
21428 tr884 8.92547 worst 2345 34100.0 -8695.952091 0.456176 59.148866 0.482172 0.0 ... -479.625249 0.0 0.0 0.0 -875.415909 2.096840 0.496279 True True 0.000063
22233 tr885 5.25420 worst 733 23200.0 -19040.407218 0.663800 22.606043 0.762638 0.0 ... -879.497813 0.0 0.0 0.0 -1663.557353 0.193633 -1.313889 True True 0.007403
26049 tr891 4.07153 worst 2497 49300.0 -17308.511883 0.599059 32.150760 0.652094 0.0 ... -791.156171 0.0 0.0 0.0 -1550.446442 2.179495 1.449662 True True 0.000239
27079 tr894 5.09216 worst 1485 48300.0 -6089.793333 0.505490 48.908058 0.487889 0.0 ... -326.511714 0.0 0.0 0.0 -661.998634 -0.766482 -2.990092 True True 0.003307
27974 tr895 7.98054 worst 43 4300.0 -18903.580144 0.583194 34.745474 0.565594 0.0 ... -892.775956 0.0 0.0 0.0 -1630.348760 -1.605658 3.039932 True True 0.001931
30562 tr896 11.04840 worst 1207 20500.0 -10977.597770 0.402719 71.348928 0.333853 0.0 ... -538.589762 0.0 0.0 0.0 -956.501625 -0.912589 -2.112027 True True 0.008942
31528 tr898 15.98600 worst 132 13200.0 -17054.063013 0.483393 53.376536 0.348017 0.0 ... -785.866058 0.0 0.0 0.0 -1457.509460 -2.655997 0.094145 True True 0.005441
32402 tr921 4.78319 worst 164 16400.0 -20379.263146 0.629018 27.525487 0.588459 0.0 ... -1754.727325 0.0 0.0 0.0 -2577.565026 -1.471915 -4.113387 True True 0.000025
36178 tr922 8.54551 worst 2429 42500.0 -8689.220583 0.406243 70.509502 0.408714 0.0 ... -507.886517 0.0 0.0 0.0 -879.592569 1.571574 4.837135 True True 0.000040
36219 tr947 15.48280 worst 15 1500.0 -25124.991722 0.569790 37.016088 0.459361 0.0 ... -1249.896273 0.0 0.0 0.0 -2041.418239 -1.986278 -0.695540 True True 0.000236
38817 tr948 10.08460 worst 2465 46100.0 -26585.016143 0.517020 46.653959 0.518408 0.0 ... -1433.128337 0.0 0.0 0.0 -2468.110974 0.743237 -1.782562 True True 0.000068

100 rows × 31 columns


In [ ]:
picked

In [19]:
count = 0
total = 0
for name in folder_list:
    init_tmp = init.query(f"name == '{name}'")["RMSD"].iloc[0]
    picked_tmp = picked.query(f"name == '{name}'")["RMSD"].iloc[0]
    improved = picked_tmp < init_tmp
    print(name, init_tmp, picked_tmp, round(init_tmp - picked_tmp, 3), improved)
    total += init_tmp - picked_tmp
    count += improved
print("improved: ", count, len(folder_list), total)


tr898 13.9965 13.4609 0.536 True
tr869 12.3095 12.3095 0.0 False
tr947 12.9222 8.10622 4.816 True
tr894 2.24606 1.6856900000000001 0.56 True
tr882 2.34977 2.28804 0.062 True
tr594 4.65665 5.71845 -1.062 False
tr862 5.53469 5.91362 -0.379 False
tr877 3.00711 3.00711 0.0 False
tr872 5.66699 3.38775 2.279 True
tr885 2.35937 2.72348 -0.364 False
tr866 3.23637 1.52312 1.713 True
tr868 1.97032 1.94233 0.028 True
tr884 3.81972 4.60271 -0.783 False
tr895 4.333819999999999 4.87177 -0.538 False
tr896 8.22359 8.01121 0.212 True
tr870 7.65488 8.81563 -1.161 False
tr921 3.55243 3.17155 0.381 True
tr922 2.55776 3.28628 -0.729 False
tr891 1.60893 1.91208 -0.303 False
tr948 6.721260000000001 5.56396 1.157 True
improved:  10 20 6.42652

In [26]:
count = 0
total = 0
for name in folder_list:
    init_tmp = init.query(f"name == '{name}'")["RMSD"].iloc[0]
    picked_tmp = picked_1.query(f"name == '{name}'")["RMSD"].iloc[0]
    improved = picked_tmp < init_tmp
    print(name, init_tmp, picked_tmp, round(init_tmp - picked_tmp, 3), improved)
    total += init_tmp - picked_tmp
    count += improved
print("improved: ", count, len(folder_list), total)


tr898 13.9965 13.4609 0.536 True
tr869 12.3095 12.3095 0.0 False
tr947 12.9222 9.56921 3.353 True
tr894 2.24606 2.00769 0.238 True
tr882 2.34977 2.75337 -0.404 False
tr594 4.65665 5.71845 -1.062 False
tr862 5.53469 6.21427 -0.68 False
tr877 3.00711 3.00711 0.0 False
tr872 5.66699 3.8839699999999997 1.783 True
tr885 2.35937 3.1840200000000003 -0.825 False
tr866 3.23637 1.65094 1.585 True
tr868 1.97032 2.55439 -0.584 False
tr884 3.81972 5.427980000000001 -1.608 False
tr895 4.333819999999999 4.87177 -0.538 False
tr896 8.22359 8.629610000000001 -0.406 False
tr870 7.65488 8.989889999999999 -1.335 False
tr921 3.55243 3.24877 0.304 True
tr922 2.55776 3.6866800000000004 -1.129 False
tr891 1.60893 1.91208 -0.303 False
tr948 6.721260000000001 6.42138 0.3 True
improved:  7 20 -0.7740600000000017

In [181]:
filtered = a
# FEATURES = ["eigenvalues", "entropy", "pca"]
# FEATURES = ["eigenvalues", "entropy", "diffRMSD"]
# FEATURES = ["eigenvalues", "entropy"]
FEATURES = [
    "biasQ",
    'Rw',
     'VTotal',
#     'RMSD', # test
#     'Qw',
#      'Burial',
#      'Water',
#      'Rama',
#      'DSSP',
#      'P_AP',
#      'Helix',
#      'Frag_Mem'
               ]
# FEATURES = ["eigenvalues"]
# LABEL = "diffRMSD"
# LABEL = "RMSD"
LABEL = "chosen"
DEGREE = 1

def pred_from_raw(a):
    data = my_transform(a, label=LABEL, degree=DEGREE, FEATURES=FEATURES)
    test_y = data[:,-1]
    test_set = data[:,:-1]
    prob= clf.predict_proba(test_set)[:,1]
    return a.assign(prob=prob)

# data = my_transform(raw_data, label=LABEL, degree=DEGREE, FEATURES=FEATURES)
# data = raw_data.groupby('name').apply(my_transform, label=LABEL, degree=DEGREE, FEATURES=FEATURES)[0]
data = np.concatenate(raw_data.groupby('name').apply(my_transform, 
                                                     label=LABEL, degree=DEGREE, FEATURES=FEATURES).values)
train_y = data[:,-1]
train_set = data[:,:-1]

# clf = svm.SVC(probability=True)
# p = 0.01
# clf = LogisticRegression(random_state=27, class_weight={0:p, 1:(1-p)})
clf = LogisticRegression(random_state=27)
clf.fit(train_set, train_y)

filtered = filtered.reset_index(drop=True).groupby("name").apply(pred_from_raw).reset_index(drop=True)


picked_n = 1
best = raw_data_all_2.groupby("name").apply(choose_top, col="RMSD"
                                            , n=1, ascending=True).reset_index(drop=True).query("chosen==True")
if True:
    picked = filtered.groupby("name").apply(choose_top, col="prob"
                                            , n=1, ascending=False).reset_index(drop=True).query("chosen==True")

if False:
    picked = filtered.groupby("name").apply(choose_top, col="prob"
                                                , n=5, ascending=False).reset_index(drop=True).query("chosen==True")
    picked = picked.groupby("name").apply(choose_top, col="RMSD"
                                                , n=1, ascending=True).reset_index(drop=True).query("chosen==True")
worst = filtered.groupby("name").apply(choose_top, col="RMSD"
                                            , n=1, ascending=False).reset_index(drop=True).query("chosen==True")
init = raw_data_all_2.groupby("name").apply(choose_top, col="i"
                                            , n=1, ascending=True).reset_index(drop=True).query("chosen==True")
all_results = pd.concat([best.assign(result='best'), 
                         picked.assign(result='picked'), init.assign(result='init')
                        , worst.assign(result='worst')
                        ], sort=False)
# all_results = pd.concat([best.assign(result='best'), 
#                          picked.assign(result='picked')])
# picked.to_csv("/Users/weilu/Desktop/picked.csv

# sns.set(rc={'figure.figsize':(20,30)})
# plt.figure(figsize=(15,8))
fg = sns.FacetGrid(data=all_results.reset_index(), hue='result', height=8, aspect=1.63)
fg.map(plt.plot, 'name', 'RMSD').add_legend(fontsize=20)
# fg.set(ylim=(0, 10))


Out[181]:
<seaborn.axisgrid.FacetGrid at 0x1a20ed8e10>

In [182]:
count = 0
total = 0
for name in folder_list:
    init_tmp = init.query(f"name == '{name}'")["RMSD"].iloc[0]
    picked_tmp = picked.query(f"name == '{name}'")["RMSD"].iloc[0]
    improved = picked_tmp < init_tmp
    print(name, init_tmp, picked_tmp, round(init_tmp - picked_tmp, 3), improved)
    total += init_tmp - picked_tmp
    count += improved
print("improved: ", count, len(folder_list), total)


tr898 13.9965 12.8408 1.156 True
tr869 12.3095 12.3095 0.0 False
tr947 12.9222 12.1452 0.777 True
tr894 2.24606 2.00769 0.238 True
tr882 2.34977 2.75337 -0.404 False
tr594 4.65665 5.71845 -1.062 False
tr862 5.53469 5.91362 -0.379 False
tr877 3.00711 3.00711 0.0 False
tr872 5.66699 4.53573 1.131 True
tr885 2.35937 3.1840200000000003 -0.825 False
tr866 3.23637 1.65094 1.585 True
tr868 1.97032 2.55439 -0.584 False
tr884 3.81972 4.81 -0.99 False
tr895 4.333819999999999 5.72387 -1.39 False
tr896 8.22359 8.629610000000001 -0.406 False
tr870 7.65488 9.11107 -1.456 False
tr921 3.55243 3.24877 0.304 True
tr922 2.55776 3.6866800000000004 -1.129 False
tr891 1.60893 1.91208 -0.303 False
tr948 6.721260000000001 6.42138 0.3 True
improved:  7 20 -3.436360000000002

In [339]:
all_results.query("name == 'tr594'")


Out[339]:
folder i RMSD Rw biasQ bias Qw Step Chain Shake ... SSB VTotal pc pc2 name chosen result pc_center inrange prob
0 0 0.0 4.65665 -10838.987781 0.590522 33.534424 0.464193 0.0 14.571923 0.0 ... 0.0 -940.887277 -0.086922 -0.059985 tr594 True best NaN NaN NaN
0 0 0.0 4.65665 -10838.987781 0.590522 33.534424 0.464193 0.0 14.571923 0.0 ... 0.0 -940.887277 -0.086922 -0.059985 tr594 True picked 0.703 True 0.042606
0 0 0.0 4.65665 -10838.987781 0.590522 33.534424 0.464193 0.0 14.571923 0.0 ... 0.0 -940.887277 -0.086922 -0.059985 tr594 True init NaN NaN NaN
1020 2380 37600.0 11.55900 -8163.823349 0.491720 51.669743 0.349195 0.0 4.380186 0.0 ... 0.0 -960.110987 1.421570 -0.122716 tr594 True worst 0.703 True 0.003102

4 rows × 32 columns


In [35]:
clf.coef_


Out[35]:
array([[ 0.24185815, -0.37308731, -0.43718463]])

In [221]:
Plot_Metric = "Qw"
if Plot_Metric:
    isAscending = False
else:
    isAscending = True

picked_n = 1
best = raw_data_all_2.groupby("name").apply(choose_top, col=Plot_Metric
                                            , n=1, ascending=isAscending).reset_index(drop=True).query("chosen==True")
picked = filtered.groupby("name").apply(choose_top, col="prob"
                                            , n=1, ascending=False).reset_index(drop=True).query("chosen==True")
worst = filtered.groupby("name").apply(choose_top, col=Plot_Metric
                                            , n=1, ascending=False).reset_index(drop=True).query("chosen==True")
init = raw_data_all_2.groupby("name").apply(choose_top, col="i"
                                            , n=1, ascending=True).reset_index(drop=True).query("chosen==True")
all_results = pd.concat([best.assign(result='best'), 
                         picked.assign(result='picked'), init.assign(result='init')
#                         , worst.assign(result='worst')
                        ], sort=False)
# all_results = pd.concat([best.assign(result='best'), 
#                          picked.assign(result='picked')])
# picked.to_csv("/Users/weilu/Desktop/picked.csv

# sns.set(rc={'figure.figsize':(20,30)})
# plt.figure(figsize=(15,8))
fg = sns.FacetGrid(data=all_results.reset_index(), hue='result', height=8, aspect=1.63)
fg.map(plt.plot, 'name', Plot_Metric).add_legend(fontsize=20)
fg.set(ylim=(0, 1))


Out[221]:
<seaborn.axisgrid.FacetGrid at 0x1a2d7cfd30>

In [170]:
picked["init_RMSD"] = init["RMSD"].values
picked["diff_RMSD"] = init["RMSD"].values - picked["RMSD"].values
out = picked[["name", "RMSD", "init_RMSD", "diff_RMSD", "folder"]].reset_index(drop=True)

In [206]:
fg = sns.FacetGrid(data=filtered, hue='name', height=8, aspect=1.63)
fg.map(plt.scatter, 'Qw', 'RMSD').add_legend(fontsize=20)


Out[206]:
<seaborn.axisgrid.FacetGrid at 0x1a2c9cb780>

In [35]:
filtered.plot.scatter("prob", "RMSD")


Out[35]:
<matplotlib.axes._subplots.AxesSubplot at 0x1a27569080>

In [25]:
out


Out[25]:
name RMSD init_RMSD diff_RMSD folder
0 tr894 1.36968 2.24606 0.87638 980

In [13]:
raw_data_all_2.plot("RMSD", "Rw")


Out[13]:
<matplotlib.axes._subplots.AxesSubplot at 0x1a23707f60>

In [14]:
raw_data_all_2.plot("RMSD", "pc")


Out[14]:
<matplotlib.axes._subplots.AxesSubplot at 0x1a23783828>

In [15]:
out


Out[15]:
name RMSD init_RMSD diff_RMSD folder
0 tr894 1.36968 2.24606 0.87638 980

In [16]:
out


Out[16]:
name RMSD init_RMSD diff_RMSD folder
0 tr894 1.36968 2.24606 0.87638 980

In [70]:
all_results


Out[70]:
folder i RMSD Rw biasQ bias Step Chain Shake Chi ... SSB VTotal pc pc2 name chosen result pc_center inrange prob
980 980 47900.0 1.36968 -6834.949362 0.645772 25.095468 0 3.685220 0.0 0.588609 ... 0.0 -656.184416 0.944122 -0.804111 tr894 True best NaN NaN NaN
981 981 48000.0 1.57229 -6850.297788 0.673878 21.271087 0 4.750551 0.0 0.512601 ... 0.0 -659.909501 0.762314 -0.531746 tr894 True picked 2.03 True 0.523816
0 0 0.0 2.24606 -6810.073249 0.556208 39.390241 0 5.546380 0.0 1.160888 ... 0.0 -668.558313 -0.065067 -0.283718 tr894 True init NaN NaN NaN

3 rows × 31 columns


In [13]:
# out.to_csv("/Users/weilu/Desktop/picked_3.csv")

In [26]:
clf.coef_


Out[26]:
array([[ 0.31865771, -0.24574338, -0.00429271, -0.15621297,  0.12086065,
         0.03529636,  0.05114406,  0.06779384,  0.23049113, -0.0941187 ]])

In [14]:
clf.coef_


Out[14]:
array([[ 0.20157408, -0.69485223,  0.04456798]])

In [15]:
fg = sns.FacetGrid(data=all_results.reset_index(), hue='result', height=8, aspect=1.63)
fg.map(plt.plot, 'name', 'RMSD').add_legend(fontsize=20)
fg.set(ylim=(0, 10))


Out[15]:
<seaborn.axisgrid.FacetGrid at 0x1a22576f60>

In [16]:
filtered["name"].unique().shape


Out[16]:
(17,)

In [17]:
picked[["RMSD", "name"]]


Out[17]:
RMSD name
0 5.14331 tr594
1136 4.31091 tr862
1890 3.14792 tr866
2586 1.61058 tr868
3123 8.69691 tr870
3576 2.81351 tr872
4796 3.07719 tr877
5446 2.25775 tr882
6372 4.06919 tr884
6989 2.35937 tr885
7922 1.60893 tr891
9265 1.18208 tr894
10424 4.67184 tr895
10781 8.22359 tr896
14297 3.25991 tr921
14558 3.84193 tr922
16140 5.40097 tr948

In [18]:
# picked.to_csv("/Users/weilu/Desktop/picked_2.csv")

In [19]:
name ="tr894"
name_list = ["Step" , "Chain" , "Shake" , "Chi" , "Rama", "Excluded", "DSSP", "P_AP", "Water" ,"Burial", "Helix", "AMH_Go", "Frag_Mem", "Vec_FM", "Membrane", "SSB","VTotal"]

# you probably want to change the location below
#     location = f"/Users/weilu/Research/server/sep_2018/03_week/02_week/{name}/"
location = f"/Users/weilu/Research/server/nov_2018/structure_selection/{name}/"
RMSD = pd.read_table(location+"rmsd-angstrom.xvg", names=["i", "RMSD"], sep="\s+")
bias = pd.read_table(location+"bias.log", names=["i", "biasQ", "bias"], sep="\s+").drop("i", axis=1)
awsem = pd.read_table(location+"awsem.log", names=name_list)
rw = pd.read_table(location+"rwplusScore.txt", names=["i", "Rw"], sep="\s+").drop("i", axis=1)
# pc location
#     location = f"/Users/weilu/Research/server/sep_2018/03_week/{name}/"
#     location = f"/Users/weilu/Research/server/oct_2018/01_week/{name}/"
pc = pd.read_table(location+"pcarmsd_scaled.txt", names=["i", "pc", "pc2"], sep="\s+", comment="#").drop("i", axis=1)
raw_data = pd.concat([RMSD, rw, bias, awsem, pc], axis=1)
raw_data.assign(name=name).reset_index().rename(columns={"index":"folder"})


Out[19]:
folder i RMSD Rw biasQ bias Step Chain Shake Chi ... Helix AMH_Go Frag_Mem Vec_FM Membrane SSB VTotal pc pc2 name
0 0 0.0 2.24606 -6810.073249 0.556208 39.390241 0 5.546380 0.0 1.160888 ... -11.494550 0.0 -330.532789 0.0 0.0 0.0 -668.558313 -0.065067 -0.283718 tr894
1 1 100.0 3.23173 -6286.940968 0.551209 40.282725 0 2.470745 0.0 0.544233 ... -12.663186 0.0 -332.365779 0.0 0.0 0.0 -661.611254 -2.030413 -1.872569 tr894
2 2 200.0 3.21661 -6289.430903 0.497013 50.599184 0 4.764399 0.0 0.562483 ... -13.188086 0.0 -332.765589 0.0 0.0 0.0 -658.149297 -2.028462 -1.644785 tr894
3 3 300.0 3.15813 -6190.501191 0.528574 44.448412 0 2.255989 0.0 0.468350 ... -13.341891 0.0 -333.111932 0.0 0.0 0.0 -662.850984 -1.765059 -1.880311 tr894
4 4 400.0 3.48106 -6070.736394 0.507673 48.477271 0 4.563115 0.0 0.358890 ... -13.054297 0.0 -332.096669 0.0 0.0 0.0 -655.505067 -2.050421 -0.862438 tr894
5 5 500.0 3.21603 -6207.393138 0.507665 48.478663 0 3.079867 0.0 0.562905 ... -12.983445 0.0 -334.065069 0.0 0.0 0.0 -654.514531 -2.024328 -0.505672 tr894
6 6 600.0 3.26742 -6270.166177 0.515849 46.880384 0 3.598324 0.0 0.689476 ... -12.473662 0.0 -329.046856 0.0 0.0 0.0 -661.071468 -2.225011 -1.368251 tr894
7 7 700.0 3.45545 -6275.141716 0.521223 45.845485 0 3.142972 0.0 0.471721 ... -12.879093 0.0 -330.526926 0.0 0.0 0.0 -660.649964 -1.917079 -0.934712 tr894
8 8 800.0 2.84899 -6353.542762 0.521744 45.745735 0 4.776124 0.0 0.537145 ... -12.874610 0.0 -330.107068 0.0 0.0 0.0 -666.482035 -1.836351 -1.092572 tr894
9 9 900.0 3.43413 -6114.807297 0.567139 37.473775 0 3.616298 0.0 0.553106 ... -13.392355 0.0 -332.670193 0.0 0.0 0.0 -664.643019 -1.984028 -1.763320 tr894
10 10 1000.0 3.39099 -6010.453477 0.536807 42.909507 0 3.596885 0.0 0.391673 ... -13.353049 0.0 -332.451981 0.0 0.0 0.0 -659.813810 -2.215176 0.442220 tr894
11 11 1100.0 3.48153 -6114.438719 0.581163 35.084917 0 2.720963 0.0 0.310693 ... -13.200931 0.0 -331.501808 0.0 0.0 0.0 -664.442638 -2.008016 0.629552 tr894
12 12 1200.0 3.23210 -6101.638205 0.519535 46.169300 0 5.605657 0.0 0.721162 ... -10.424754 0.0 -332.022908 0.0 0.0 0.0 -656.524265 -1.985955 -0.727112 tr894
13 13 1300.0 2.92439 -6059.757243 0.529431 44.287121 0 2.323098 0.0 0.547973 ... -13.361106 0.0 -334.500030 0.0 0.0 0.0 -660.984355 -2.111932 -1.343508 tr894
14 14 1400.0 2.90091 -6306.977750 0.545174 41.373291 0 3.194121 0.0 0.466709 ... -13.974103 0.0 -333.122664 0.0 0.0 0.0 -664.931939 -1.862049 -0.307235 tr894
15 15 1500.0 3.44498 -6054.804323 0.481283 53.813425 0 4.393513 0.0 0.413912 ... -10.416986 0.0 -329.519729 0.0 0.0 0.0 -640.413959 -1.964237 -0.545008 tr894
16 16 1600.0 3.72309 -5924.173405 0.517176 46.623769 0 4.403761 0.0 0.530923 ... -12.445575 0.0 -332.027049 0.0 0.0 0.0 -652.482812 -2.177049 -0.822812 tr894
17 17 1700.0 2.94234 -6170.545926 0.605790 31.080307 0 3.403110 0.0 0.394039 ... -13.070255 0.0 -334.764821 0.0 0.0 0.0 -662.858390 -1.977594 -0.605940 tr894
18 18 1800.0 3.14126 -6104.911588 0.565975 37.675492 0 2.148955 0.0 0.416200 ... -14.408324 0.0 -332.427927 0.0 0.0 0.0 -657.525813 -2.012379 -0.666930 tr894
19 19 1900.0 3.03640 -6248.712283 0.549830 40.530526 0 3.192358 0.0 0.373018 ... -12.509658 0.0 -331.034803 0.0 0.0 0.0 -652.148108 -1.860132 -0.951803 tr894
20 20 2000.0 2.80779 -6441.963711 0.556436 39.349743 0 2.222043 0.0 0.423124 ... -12.922997 0.0 -334.503676 0.0 0.0 0.0 -661.244525 -1.888069 -0.825117 tr894
21 21 2100.0 3.31259 -6410.346460 0.513748 47.288295 0 6.191025 0.0 0.604926 ... -12.253130 0.0 -332.203648 0.0 0.0 0.0 -651.160377 -2.001888 -0.592104 tr894
22 22 2200.0 2.91335 -6308.221083 0.593916 32.980882 0 3.234118 0.0 0.664409 ... -13.207077 0.0 -332.990401 0.0 0.0 0.0 -658.201126 -2.152202 -0.717611 tr894
23 23 2300.0 3.19933 -6374.911948 0.539413 42.428153 0 2.538853 0.0 0.341001 ... -13.408200 0.0 -331.572188 0.0 0.0 0.0 -662.264580 -2.190752 -0.847559 tr894
24 24 2400.0 2.94005 -6354.892279 0.603693 31.411886 0 1.888008 0.0 0.590554 ... -13.759548 0.0 -334.756167 0.0 0.0 0.0 -673.536860 -2.020588 -0.800902 tr894
25 25 2500.0 3.24435 -6448.101987 0.570527 36.889392 0 2.534727 0.0 0.389001 ... -12.327469 0.0 -332.104002 0.0 0.0 0.0 -664.744860 -1.970641 -1.751093 tr894
26 26 2600.0 3.21349 -6356.016826 0.552069 40.128420 0 3.240573 0.0 0.340624 ... -14.062783 0.0 -331.951360 0.0 0.0 0.0 -669.486651 -2.017166 -1.114400 tr894
27 27 2700.0 3.08817 -6015.780813 0.543925 41.600811 0 2.849521 0.0 0.563807 ... -11.707489 0.0 -333.438918 0.0 0.0 0.0 -656.923779 -2.079045 -1.713251 tr894
28 28 2800.0 2.91854 -6266.414740 0.570673 36.864257 0 2.305444 0.0 0.542672 ... -13.504228 0.0 -336.875255 0.0 0.0 0.0 -661.990310 -1.959546 -1.258906 tr894
29 29 2900.0 3.51806 -6095.122990 0.575797 35.989700 0 2.400738 0.0 0.481813 ... -12.959278 0.0 -333.359501 0.0 0.0 0.0 -662.430279 -2.148657 -0.842376 tr894
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
2475 2475 47100.0 3.04801 -6584.548221 0.528828 44.400564 0 3.443490 0.0 0.689302 ... -8.527353 0.0 -328.333283 0.0 0.0 0.0 -656.799558 1.489679 -1.073833 tr894
2476 2476 47200.0 3.06231 -6524.537951 0.501176 49.765102 0 4.955148 0.0 0.473575 ... -7.623936 0.0 -324.707280 0.0 0.0 0.0 -648.310152 1.433943 -0.830940 tr894
2477 2477 47300.0 3.28581 -6530.410132 0.574789 36.160841 0 4.166641 0.0 0.671689 ... -9.461912 0.0 -327.996908 0.0 0.0 0.0 -661.264516 1.390436 -1.208435 tr894
2478 2478 47400.0 3.23796 -6505.011426 0.455993 59.188681 0 7.585024 0.0 0.630827 ... -6.166685 0.0 -318.313982 0.0 0.0 0.0 -633.543536 1.532387 -1.189597 tr894
2479 2479 47500.0 3.47343 -6412.675255 0.509082 48.200074 0 4.337123 0.0 0.533898 ... -8.175169 0.0 -321.367115 0.0 0.0 0.0 -641.175122 1.170308 -0.850157 tr894
2480 2480 47600.0 3.22195 -6600.284916 0.546451 41.141258 0 4.499017 0.0 0.531768 ... -8.919736 0.0 -328.276578 0.0 0.0 0.0 -656.319864 1.024558 -1.633348 tr894
2481 2481 47700.0 3.29628 -6594.057433 0.450815 60.320799 0 5.516787 0.0 0.532782 ... -6.014002 0.0 -324.793170 0.0 0.0 0.0 -643.313596 1.289813 -1.317772 tr894
2482 2482 47800.0 3.22039 -6538.294794 0.492895 51.431169 0 4.771708 0.0 0.643649 ... -10.536290 0.0 -327.516119 0.0 0.0 0.0 -654.529729 0.546995 -1.410352 tr894
2483 2483 47900.0 3.71419 -6429.439945 0.486252 52.787339 0 4.245413 0.0 0.423871 ... -4.541977 0.0 -327.570965 0.0 0.0 0.0 -650.587467 0.814162 -1.430980 tr894
2484 2484 48000.0 3.19131 -6477.936224 0.522025 45.692109 0 4.294322 0.0 0.558186 ... -7.184502 0.0 -326.483925 0.0 0.0 0.0 -651.265310 1.539578 -1.387833 tr894
2485 2485 48100.0 3.07537 -6498.204684 0.521062 45.876390 0 4.032237 0.0 0.661476 ... -9.748129 0.0 -325.586983 0.0 0.0 0.0 -651.190938 1.369766 -0.825550 tr894
2486 2486 48200.0 3.37094 -6374.215012 0.539856 42.346474 0 3.503117 0.0 0.476007 ... -6.907826 0.0 -326.781573 0.0 0.0 0.0 -650.287189 1.779587 -0.777044 tr894
2487 2487 48300.0 3.28947 -6462.479014 0.529538 44.266976 0 4.450656 0.0 0.778137 ... -9.720084 0.0 -327.426579 0.0 0.0 0.0 -655.430363 1.632520 -1.162894 tr894
2488 2488 48400.0 3.32840 -6504.263804 0.487773 52.475263 0 3.823074 0.0 0.541481 ... -9.621493 0.0 -328.796522 0.0 0.0 0.0 -648.395792 0.591862 -1.299183 tr894
2489 2489 48500.0 3.43751 -6447.190797 0.507673 48.477255 0 5.101223 0.0 0.543194 ... -8.555008 0.0 -326.452097 0.0 0.0 0.0 -648.549022 0.661395 -1.650177 tr894
2490 2490 48600.0 3.82345 -6365.246694 0.495035 50.997870 0 3.198458 0.0 0.629422 ... -9.256821 0.0 -327.410325 0.0 0.0 0.0 -647.431837 -0.195077 -2.383967 tr894
2491 2491 48700.0 3.62716 -6430.212535 0.492129 51.586661 0 4.407783 0.0 0.580651 ... -8.285745 0.0 -323.436894 0.0 0.0 0.0 -646.712158 0.334757 -1.955750 tr894
2492 2492 48800.0 3.25856 -6733.348524 0.528495 44.463321 0 4.332744 0.0 0.787561 ... -10.086368 0.0 -326.230732 0.0 0.0 0.0 -653.059823 0.900004 -1.994071 tr894
2493 2493 48900.0 3.31050 -6597.765990 0.501219 49.756485 0 4.567093 0.0 0.523651 ... -9.982728 0.0 -325.830854 0.0 0.0 0.0 -652.938934 1.253588 -1.430779 tr894
2494 2494 49000.0 3.41650 -6464.407265 0.496882 50.625585 0 4.832824 0.0 0.524553 ... -8.364190 0.0 -325.062802 0.0 0.0 0.0 -650.974420 1.921487 -1.013259 tr894
2495 2495 49100.0 3.10904 -6387.091755 0.548666 40.740404 0 3.316020 0.0 0.581348 ... -10.967092 0.0 -328.988742 0.0 0.0 0.0 -659.262743 1.190546 -1.329242 tr894
2496 2496 49200.0 3.08787 -6544.665697 0.542510 41.859331 0 5.020698 0.0 0.864652 ... -9.712455 0.0 -326.961086 0.0 0.0 0.0 -653.968884 1.103267 -1.117054 tr894
2497 2497 49300.0 3.08903 -6565.835977 0.519321 46.210480 0 4.056949 0.0 0.782764 ... -10.167152 0.0 -325.731478 0.0 0.0 0.0 -649.584897 1.347216 -1.148235 tr894
2498 2498 49400.0 2.86923 -6705.263150 0.478287 54.436844 0 5.751120 0.0 0.550526 ... -8.520623 0.0 -326.361710 0.0 0.0 0.0 -647.484556 0.969805 -1.272144 tr894
2499 2499 49500.0 3.75897 -6463.716890 0.445395 61.517373 0 6.437783 0.0 0.522887 ... -8.618340 0.0 -320.399867 0.0 0.0 0.0 -635.290484 1.147018 -1.649743 tr894
2500 2500 49600.0 3.39286 -6501.433057 0.485665 52.908148 0 4.294195 0.0 0.505823 ... -10.501468 0.0 -323.573172 0.0 0.0 0.0 -647.184033 1.221193 -1.348389 tr894
2501 2501 49700.0 3.32162 -6391.616071 0.532840 43.647727 0 4.949311 0.0 0.640128 ... -9.125481 0.0 -329.320986 0.0 0.0 0.0 -656.508771 0.993919 -0.986276 tr894
2502 2502 49800.0 3.28345 -6563.961634 0.519311 46.212290 0 5.139785 0.0 0.632014 ... -9.679081 0.0 -325.602523 0.0 0.0 0.0 -647.098837 0.616816 -0.979727 tr894
2503 2503 49900.0 3.04609 -6472.654667 0.524059 45.303913 0 5.039938 0.0 0.796512 ... -9.205125 0.0 -324.496863 0.0 0.0 0.0 -647.414385 1.401103 -0.722207 tr894
2504 2504 50000.0 3.99691 -6555.354241 0.470289 56.118701 0 3.803515 0.0 0.692461 ... -8.788567 0.0 -329.318347 0.0 0.0 0.0 -652.710108 0.573119 -1.284276 tr894

2505 rows × 26 columns


In [ ]: