In [1]:
# written in python3
import numpy as np
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
import os
from datetime import datetime
import seaborn as sns
%matplotlib inline
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import FeatureUnion
from sklearn.preprocessing import PolynomialFeatures
from sklearn.metrics import confusion_matrix
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.linear_model import LinearRegression

class DataFrameSelector(BaseEstimator, TransformerMixin):
    def __init__(self, attribute_names):
        self.attribute_names = attribute_names
    def fit(self, X, y=None):
        return self
    def transform(self, X):
        return X[self.attribute_names].values
    
plt.rcParams['figure.figsize'] = (10,6.180)    #golden ratio


def my_transform(data, label, degree, FEATURES):

    # LABEL = "Qw"
    LABEL = label
    PolynomialDegree = degree

    num_attribs = FEATURES
    cat_attribs = [LABEL]
    num_pipeline = Pipeline([
            ('selector', DataFrameSelector(num_attribs)),
            ('std_scaler', StandardScaler()),
            ('poly', PolynomialFeatures(degree=PolynomialDegree, include_bias=False))
        ])
    cat_pipeline = Pipeline([
            ('selector', DataFrameSelector(cat_attribs))
        ])

    full_pipeline = FeatureUnion(transformer_list=[
            ("num_pipeline", num_pipeline),
            ("cat_pipeline", cat_pipeline),
        ])
    return full_pipeline.fit_transform(data)

functions for reading data


In [2]:
# read energy, rw, bias, rmsd data from location
def read_data(name):
#     name="tr872"
    name_list = ["Step" , "Chain" , "Shake" , "Chi" , "Rama", "Excluded", "DSSP", "P_AP", "Water" ,"Burial", "Helix", "AMH_Go", "Frag_Mem", "Vec_FM", "Membrane", "SSB","VTotal"]

    # you probably want to change the location below
#     location = f"/Users/weilu/Research/server/sep_2018/03_week/02_week/{name}/"
    location = f"/Users/weilu/Research/server/dec_2018/structure_selection_2/{name}/"
    RMSD = pd.read_table(location+"rmsd.xvg", names=["i", "RMSD"], sep="\s+")
    bias = pd.read_table(location+"bias.log", names=["i", "biasQ", "bias"], sep="\s+").drop("i", axis=1)
    awsem = pd.read_table(location+"awsem.log", names=name_list)
    rw = pd.read_table(location+"rwplusScore.txt", names=["i", "Rw"], sep="\s+").drop("i", axis=1)
    qw = pd.read_table(location+"Qw.out", names=["i", "Qw"], sep="\s+", comment="#").drop("i", axis=1)
    # pc location
#     location = f"/Users/weilu/Research/server/sep_2018/03_week/{name}/"
#     location = f"/Users/weilu/Research/server/oct_2018/01_week/{name}/"
    pc = pd.read_table(location+"pcarmsd_scaled.txt", names=["i", "pc", "pc2"], sep="\s+", comment="#").drop("i", axis=1)
    raw_data = pd.concat([RMSD, rw, bias, qw, awsem, pc], axis=1)
    return raw_data.assign(name=name).reset_index().rename(columns={"index":"folder"})

def choose_top(data,col="RMSD", n=5, ascending=True):
    return data.assign(chosen=pd.DataFrame.rank(data[col], ascending=ascending, method='dense')<=n)

# read the pmf, rc. 
# def read_data_2(name):
# #     name = "tr894"
# #     location = f"/Users/weilu/Research/server/sep_2018/03_week/{name}/"
# #     location = f"/Users/weilu/Research/server/oct_2018/01_week/{name}/"
#     location = f"/Users/weilu/Research/server/dec_2018/structure_selection/{name}/"
#     rw = pd.read_table(location+"rc_rwplus", names=["pc","rw"], sep="\s+")
#     rmsd = pd.read_table(location+"rc_rmsdlowerBound", names=["pc", "rmsd"], sep="\s+")
#     awsem = pd.read_table(location+"rc_awsemEne", names=["pc", "awsem"], sep="\s+")
#     qw = pd.read_table(location+"rc_QwhigherBound", names=["pc", "qw"], sep="\s+")
#     freeE = pd.read_table(location+"pmf3000"
#                           , names=["pc", "f", "remove1", "remove2"], sep="\s+").drop(["remove1", "remove2"], axis=1)
#     raw_data = freeE.merge(rw, on="pc").merge(awsem, on="pc").merge(qw, on="pc").merge(rmsd, on="pc").assign(name=name)
#     return raw_data

def read_data_2(name):
#     name = "tr894"
#     location = f"/Users/weilu/Research/server/sep_2018/03_week/{name}/"
#     location = f"/Users/weilu/Research/server/oct_2018/01_week/{name}/"
    location = f"/Users/weilu/Research/server/dec_2018/structure_selection_2/{name}/"
    rw = pd.read_table(location+"rc_rwplus", names=["pc","rw"], sep="\s+")
    rmsd = pd.read_table(location+"rc_rmsdlowerBound", names=["pc", "rmsd"], sep="\s+")
#     awsem = pd.read_table(location+"rc_awsemEne", names=["pc", "awsem"], sep="\s+")
#     qw = pd.read_table(location+"rc_QwhigherBound", names=["pc", "qw"], sep="\s+")
    freeE = pd.read_table(location+"pmf3000"
                          , names=["pc", "f", "remove1", "remove2"], sep="\s+").drop(["remove1", "remove2"], axis=1)
    raw_data = freeE.merge(rw, on="pc").merge(rmsd, on="pc").assign(name=name)
    return raw_data

train based on free energy, rw and awsem.


In [3]:
# folder_list = ["tr894", "tr882", "tr594", "tr898", "tr862", "tr877", "tr872", "tr885", "tr866", "tr868", "tr884", "tr895", "tr896", "tr870", "tr921", "tr922", "tr891", "tr948"]
folder_list = ["tr884-halfDIHE", "tr872-halfDIHE", "tr948-halfDIHE", "tr898", "tr947", "tr894", "tr882", "tr594", "tr869", "tr862", "tr877", "tr872", "tr885", "tr866", "tr868", "tr884", "tr895", "tr896", "tr870", "tr921", "tr922", "tr891", "tr948"]
# folder_list = ["tr884-halfDIHE", "tr872-halfDIHE", "tr898", "tr947", "tr894", "tr882", "tr594", "tr869", "tr862", "tr877", "tr872", "tr885", "tr866", "tr868", "tr884", "tr895", "tr896", "tr870", "tr921", "tr922", "tr891", "tr948"]


# folder_list = [ "tr862", "tr877", "tr872", "tr885", "tr866", "tr868", "tr884", "tr895", "tr896", "tr870", "tr921", "tr922", "tr891", "tr948"]
# folder_list = ["tr862", "tr872", "tr885", "tr866", "tr868" , "tr895", "tr896", "tr870", "tr921", "tr891", "tr948"]
# "tr877","tr884", "tr922"
# "tr869"
# folder_list = ["tr894"]
# read all data

# tr884-halfDIHE
# tr872-halfDIHE
# tr948-halfDIHE
data_list = []
for name in folder_list:
    tmp = read_data_2(name)
    data_list.append(tmp)
raw_data_all = pd.concat(data_list)
n = 1
raw_data_all = raw_data_all.reset_index(drop=True).groupby("name").apply(choose_top, n=n, col="rmsd").reset_index(drop=True)


# train_name_list = ["tr872", "tr885", "tr948"]
# train_name_list = ["tr862", "tr872", "tr885", "tr866", "tr868" , "tr895", "tr896", "tr870", "tr921", "tr891", "tr948"]

# train_name_list = ["tr870"]
# train_name_list = ["tr891"]
# train_name_list = ["tr882"]
# train_name_list = ["tr894"]
# train_name_list = ["tr872"]
# train_name_list = ["tr869"]
# train_name_list = ["tr884"]
# train_name_list = ["tr866", "tr884"]
# train_name_list = ["tr870", "tr872"]
# train_name_list = ["tr866", "tr947"]
# train_name_list = ["tr872"]
# train_name_list = ["tr884", "tr872"]
train_name_list = ["tr866"]
# train_name_list = ["tr947"]
# select for training.
raw_data = raw_data_all.reset_index(drop=True).query(f'name in {train_name_list}')

In [4]:
# FEATURES = ["eigenvalues", "entropy", "pca"]
# FEATURES = ["eigenvalues", "entropy", "diffRMSD"]
# FEATURES = ["eigenvalues", "entropy"]
FEATURES = ["f",
    'rw',
     'awsem',
#     'RMSD', # test
#      'Burial',
#      'Water',
#      'Rama',
#      'DSSP',
#      'P_AP',
#      'Helix',
#      'Frag_Mem'
               ]
# FEATURES = ["eigenvalues"]
# LABEL = "diffRMSD"
# LABEL = "RMSD"
LABEL = "rmsd"
# LABEL = "qw"
DEGREE = 1

def pred_from_raw(a, clf):
    data = my_transform(a, label=LABEL, degree=DEGREE, FEATURES=FEATURES)
    test_y = data[:,-1]
    test_set = data[:,:-1]
    prediceted_rmsd= clf.predict(test_set)
    return a.assign(prediceted_rmsd=prediceted_rmsd)

def assign_lowest_f(a):
    return a.assign(lowest_f=a["f"].sort_values().iloc[0])

In [5]:
raw_data_all = raw_data_all.reset_index(drop=True).groupby("name").apply(assign_lowest_f).reset_index(drop=True)

In [6]:
# # data = my_transform(raw_data, label=LABEL, degree=DEGREE, FEATURES=FEATURES)
# # data = raw_data.groupby('name').apply(my_transform, label=LABEL, degree=DEGREE, FEATURES=FEATURES)[0]
# data = np.concatenate(raw_data.groupby('name').apply(my_transform, 
#                                                      label=LABEL, degree=DEGREE, FEATURES=FEATURES).values)
# train_y = data[:,-1]
# train_set = data[:,:-1]
# from sklearn import svm
# # clf = svm.SVC(probability=True)
# clf = LinearRegression()
# clf.fit(train_set, train_y)
# y_pred_svm = clf.predict(train_set)

# raw_data_all = raw_data_all.reset_index(drop=True).groupby("name").apply(pred_from_raw, clf).reset_index(drop=True)
# # raw_data_all = raw_data_all.reset_index(drop=True).groupby("name").apply(assign_lowest_f).reset_index(drop=True)



# picked_n = 1
# best = raw_data_all.groupby("name").apply(choose_top, col="rmsd"
#                                             , n=picked_n, ascending=True).reset_index(drop=True).query("chosen==True")
# picked = raw_data_all.groupby("name").apply(choose_top, col="prediceted_rmsd"
#                                             , n=picked_n, ascending=True).reset_index(drop=True).query("chosen==True")
# # init = raw_data_all.query("i == 0.0")
# all_results = pd.concat([best.assign(result='best'), 
#                          picked.assign(result='picked')])

# picked_keep = picked.copy()

In [7]:
from scipy.interpolate import interp1d
f_dic = {}
for name in folder_list:
    a = raw_data_all.query(f"name == '{name}'")
#     print(name ,a.shape)
    x = a["pc"].values
    y = a["f"].values
    f_dic[name] = interp1d(x, y, fill_value="extrapolate")

In [8]:
# g = sns.FacetGrid(raw_data_all, col="name", col_wrap=4)
# g = g.map(plt.plot, "pc", "f")
# plt.ylim([0,1])

In [9]:
# g = sns.FacetGrid(raw_data_all, col="name", col_wrap=4)
# g = g.map(plt.plot, "pc", "f")
# g = g.map(plt.plot, "pc", "prediceted_rmsd")

In [10]:
# raw_data_all.query("name == 'tr594'").plot("pc", "f")

In [11]:
# g = sns.FacetGrid(raw_data_all, col="name", col_wrap=4)
# g = g.map(plt.plot, "pc", "rmsd")
# plt.ylim([0,1])

used picked_keep as to filter the compelete data and select again


In [12]:
f_dic["tr594"](raw_data_all["pc"]).shape


Out[12]:
(608,)

In [13]:
def choose_top(data,col="RMSD", n=5, ascending=True):
    return data.assign(chosen=pd.DataFrame.rank(data[col], ascending=ascending, method='first')<=n)


# WIDTH = 100
# WIDTH = 0.1
# WIDTH = 1
# WIDTH = 0.2
# def with_in_range(data, width=WIDTH):
#     return data.assign(inrange= (data["pc"] < (data["pc_center"]+width)) & (data["pc"] > (data["pc_center"]-width)))

def with_in_range(data, width=5):
    name = data["name"].iloc[0]
    return data.assign(inrange= (0 < (f_dic[name](data["pc"]))) & ((f_dic[name](data["pc"])) < width))

In [14]:
# folder_list = ["tr898", "tr869", "tr947", "tr894", "tr882", "tr594", "tr862", "tr877", "tr872", "tr885", "tr866", "tr868", "tr884", "tr895", "tr896", "tr870", "tr921", "tr922", "tr891", "tr948"]
# folder_list = ["tr884-halfDIHE", "tr872-halfDIHE", "tr898", "tr947", "tr894", "tr882", "tr594", "tr869", "tr862", "tr877", "tr872", "tr885", "tr866", "tr868", "tr884", "tr895", "tr896", "tr870", "tr921", "tr922", "tr891", "tr948"]
# folder_list = ["tr872-halfDIHE", "tr898", "tr947", "tr894", "tr882", "tr594", "tr869", "tr862", "tr877", "tr872", "tr885", "tr866", "tr868", "tr884", "tr895", "tr896", "tr870", "tr921", "tr922", "tr891", "tr948"]



# "tr898"
# folder_list = ["tr894", "tr882", "tr594", "tr898", "tr862", "tr877", "tr872", "tr885", "tr866", "tr868", "tr884", "tr895", "tr896", "tr870", "tr921", "tr922", "tr891", "tr948"]
# folder_list = ["tr894", "tr882", "tr594", "tr869", "tr862", "tr877", "tr872", "tr885", "tr866", "tr868", "tr884", "tr895", "tr896", "tr870", "tr921", "tr922", "tr891", "tr948"]
# folder_list = [ "tr862", "tr877", "tr872", "tr885", "tr866", "tr868", "tr884", "tr895", "tr896", "tr870", "tr921", "tr922", "tr891", "tr948"]
# folder_list = ["tr862", "tr872", "tr885", "tr866", "tr868" , "tr895", "tr896", "tr870", "tr921", "tr891", "tr948"]
# "tr877","tr884", "tr922"
# "tr869"
# folder_list = ["tr894"]
# folder_list = ["tr866"]


# define top based on RMSD or Qw
# best_metric = "RMSD"
best_metric = "Qw"
if best_metric == "Qw":
    isAscending = False
else:
    isAscending = True
data_list = []
for name in folder_list:
#     print(name)
    tmp = read_data(name)
    data_list.append(tmp)
raw_data_all_2 = pd.concat(data_list).dropna()
n = 25
raw_data_all_2 = raw_data_all_2.reset_index(drop=True).groupby("name").\
        apply(choose_top, n=n, col=best_metric, ascending=isAscending).reset_index(drop=True)


raw_data = raw_data_all_2.reset_index(drop=True).query(f'name in {train_name_list}').dropna()
# a = raw_data_all_2.dropna().merge(picked_keep[["pc", "name"]].rename(columns={"pc":"pc_center"}),on="name")
a = raw_data_all_2.dropna()
filtered = a.groupby("name").apply(with_in_range).query("inrange == True").reset_index(drop=True)

In [19]:
filtered.reset_index(drop=True).to_csv("/Users/weilu/Research/server/dec_2018/structure_selection_3/filtered.csv")

In [15]:
filtered


Out[15]:
folder i RMSD Rw biasQ bias Qw Step Chain Shake ... Frag_Mem Vec_FM Membrane SSB VTotal pc pc2 name chosen inrange
0 0 0.0 0.465665 -10838.987781 0.590522 33.534424 0.464193 0.0 14.571923 0.0 ... -536.518284 0.0 0.0 0.0 -940.887277 -0.086922 -0.059985 tr594 False True
1 1 100.0 0.509175 -9916.464942 0.557170 39.219723 0.453318 0.0 9.111620 0.0 ... -532.556036 0.0 0.0 0.0 -949.290485 -1.048765 -0.032790 tr594 False True
2 2 200.0 0.522662 -9809.291344 0.562481 38.284555 0.436252 0.0 8.593084 0.0 ... -534.652029 0.0 0.0 0.0 -947.008881 -1.355717 -0.489933 tr594 False True
3 3 300.0 0.529569 -10111.629198 0.523389 45.431684 0.449343 0.0 6.567146 0.0 ... -536.498803 0.0 0.0 0.0 -941.325829 -1.610276 -0.458905 tr594 False True
4 4 400.0 0.541991 -9918.154310 0.525121 45.102051 0.432320 0.0 6.441661 0.0 ... -537.680262 0.0 0.0 0.0 -938.781766 -1.602802 -0.934757 tr594 False True
5 48 4800.0 0.621965 -9303.201390 0.452911 59.861201 0.413571 0.0 6.018852 0.0 ... -529.457606 0.0 0.0 0.0 -919.420140 -1.592254 -0.083006 tr594 False True
6 49 4900.0 0.673337 -9357.594222 0.493835 51.240594 0.397963 0.0 8.191024 0.0 ... -530.349964 0.0 0.0 0.0 -916.146867 -1.303263 -0.233821 tr594 False True
7 50 5000.0 0.662392 -9296.297303 0.493335 51.341927 0.408646 0.0 5.509543 0.0 ... -533.090343 0.0 0.0 0.0 -923.045825 -1.598746 -0.092803 tr594 False True
8 52 5200.0 0.689597 -9356.994993 0.443598 61.916707 0.402738 0.0 7.161900 0.0 ... -530.790559 0.0 0.0 0.0 -928.397126 -1.536486 0.310421 tr594 False True
9 57 5700.0 0.613158 -9420.456138 0.538359 42.622500 0.405299 0.0 5.272804 0.0 ... -538.792399 0.0 0.0 0.0 -944.003112 -1.569597 0.080288 tr594 False True
10 59 5900.0 0.677439 -9021.254091 0.466083 57.013521 0.391027 0.0 5.432674 0.0 ... -534.403954 0.0 0.0 0.0 -933.622474 -1.535101 -0.366646 tr594 False True
11 62 6200.0 0.643459 -9386.781543 0.489386 52.145373 0.408578 0.0 7.253485 0.0 ... -537.773403 0.0 0.0 0.0 -931.586592 -1.470661 -0.145013 tr594 False True
12 63 6300.0 0.614988 -9535.425190 0.463298 57.609865 0.421237 0.0 7.458456 0.0 ... -530.267172 0.0 0.0 0.0 -932.356310 -1.371848 -0.228594 tr594 False True
13 64 6400.0 0.586483 -9399.518607 0.420980 67.052790 0.408612 0.0 8.814095 0.0 ... -528.824428 0.0 0.0 0.0 -913.732435 -1.560224 -0.167094 tr594 False True
14 65 6500.0 0.587374 -9531.648111 0.503773 49.248325 0.402970 0.0 7.560530 0.0 ... -535.444009 0.0 0.0 0.0 -942.207064 -1.622906 -0.309311 tr594 False True
15 66 6600.0 0.576300 -9284.998390 0.520211 46.039471 0.427523 0.0 5.146234 0.0 ... -537.880954 0.0 0.0 0.0 -945.554047 -1.559099 -0.122496 tr594 False True
16 67 6700.0 0.601645 -9346.725491 0.468457 56.507624 0.390246 0.0 6.440939 0.0 ... -529.959657 0.0 0.0 0.0 -923.764972 -1.570806 -0.482668 tr594 False True
17 69 6900.0 0.619247 -9184.666542 0.469153 56.359664 0.402867 0.0 6.209315 0.0 ... -538.792552 0.0 0.0 0.0 -935.687195 -1.432406 -0.169271 tr594 False True
18 70 7000.0 0.639474 -9172.484700 0.470707 56.030184 0.416233 0.0 6.255095 0.0 ... -533.756661 0.0 0.0 0.0 -928.264082 -1.238577 -0.467167 tr594 False True
19 71 7100.0 0.613697 -9246.908827 0.469495 56.287072 0.428673 0.0 6.767947 0.0 ... -530.594660 0.0 0.0 0.0 -932.727427 -1.381525 -0.301255 tr594 False True
20 72 7200.0 0.622031 -9240.500512 0.450103 60.477245 0.443059 0.0 5.361324 0.0 ... -523.295198 0.0 0.0 0.0 -907.426228 -1.217635 0.017708 tr594 False True
21 73 7300.0 0.631637 -8980.089989 0.445487 61.496850 0.402151 0.0 8.629303 0.0 ... -531.891872 0.0 0.0 0.0 -925.915022 -1.199508 -0.347520 tr594 False True
22 74 7400.0 0.604258 -9233.859480 0.497528 50.495535 0.418786 0.0 7.816735 0.0 ... -537.019111 0.0 0.0 0.0 -938.174905 -1.377256 -0.635975 tr594 False True
23 75 7500.0 0.622700 -9427.830408 0.481416 53.785812 0.405511 0.0 5.678720 0.0 ... -531.200870 0.0 0.0 0.0 -931.758163 -1.123728 -0.620915 tr594 False True
24 82 8200.0 0.625078 -9308.236867 0.412060 69.134734 0.400529 0.0 8.187544 0.0 ... -523.173377 0.0 0.0 0.0 -901.131202 -1.375308 -0.228769 tr594 False True
25 83 8300.0 0.662626 -9241.532068 0.440148 62.686749 0.393837 0.0 6.397077 0.0 ... -527.050543 0.0 0.0 0.0 -903.244978 -1.356023 0.070050 tr594 False True
26 84 8400.0 0.602315 -9347.025048 0.488074 52.413745 0.424583 0.0 5.754571 0.0 ... -536.814089 0.0 0.0 0.0 -931.801980 -1.416061 0.055604 tr594 False True
27 85 8500.0 0.607790 -9388.986833 0.462851 57.705818 0.382716 0.0 6.068321 0.0 ... -531.005668 0.0 0.0 0.0 -926.400195 -1.340040 -0.431278 tr594 False True
28 87 8700.0 0.581966 -9437.420735 0.493212 51.366827 0.429686 0.0 7.081513 0.0 ... -532.261105 0.0 0.0 0.0 -932.919436 -1.598950 0.023925 tr594 False True
29 88 8800.0 0.619568 -9243.703066 0.515696 46.910139 0.401120 0.0 5.555758 0.0 ... -538.895297 0.0 0.0 0.0 -937.038712 -1.122959 0.018069 tr594 False True
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
46308 2475 47100.0 0.664531 -29238.158086 0.535266 43.195462 0.627001 0.0 12.720694 0.0 ... -1438.753539 0.0 0.0 0.0 -2474.251192 2.731779 -1.490331 tr948-halfDIHE False True
46309 2476 47200.0 0.645980 -28575.267513 0.553863 39.807586 0.610188 0.0 10.766470 0.0 ... -1489.595256 0.0 0.0 0.0 -2552.101901 2.266848 -1.271441 tr948-halfDIHE False True
46310 2477 47300.0 0.668837 -29100.780481 0.561323 38.487517 0.611376 0.0 10.773895 0.0 ... -1478.281230 0.0 0.0 0.0 -2547.996116 2.529044 -1.879383 tr948-halfDIHE False True
46311 2478 47400.0 0.656312 -29104.086956 0.546157 41.194629 0.614508 0.0 10.540180 0.0 ... -1472.942756 0.0 0.0 0.0 -2538.165947 2.544484 -2.486622 tr948-halfDIHE False True
46312 2479 47500.0 0.648945 -28742.747503 0.545662 41.284672 0.608867 0.0 11.570885 0.0 ... -1474.445657 0.0 0.0 0.0 -2546.821662 2.790202 -2.295615 tr948-halfDIHE False True
46313 2480 47600.0 0.646692 -29295.874974 0.552699 40.015648 0.616515 0.0 11.763866 0.0 ... -1469.248549 0.0 0.0 0.0 -2535.770368 2.403907 -2.410211 tr948-halfDIHE False True
46314 2481 47700.0 0.646120 -29275.488459 0.549853 40.526387 0.631026 0.0 10.460628 0.0 ... -1471.068108 0.0 0.0 0.0 -2538.634377 2.754876 -2.475985 tr948-halfDIHE False True
46315 2482 47800.0 0.650602 -28865.582862 0.572983 36.468727 0.615883 0.0 9.371698 0.0 ... -1476.593163 0.0 0.0 0.0 -2560.703038 2.825274 -2.236798 tr948-halfDIHE False True
46316 2483 47900.0 0.646467 -29047.992545 0.561672 38.426257 0.605711 0.0 11.164019 0.0 ... -1480.239083 0.0 0.0 0.0 -2548.463246 2.776486 -1.734102 tr948-halfDIHE False True
46317 2484 48000.0 0.642090 -29302.210047 0.561277 38.495626 0.605461 0.0 10.654320 0.0 ... -1472.067773 0.0 0.0 0.0 -2542.795485 2.783447 -2.175997 tr948-halfDIHE False True
46318 2485 48100.0 0.644168 -29176.616023 0.547463 40.957936 0.629942 0.0 9.562760 0.0 ... -1461.418923 0.0 0.0 0.0 -2541.440131 2.688586 -2.345910 tr948-halfDIHE False True
46319 2486 48200.0 0.636067 -29472.929644 0.526960 44.753283 0.615342 0.0 11.473004 0.0 ... -1444.956390 0.0 0.0 0.0 -2487.803658 2.578424 -2.021244 tr948-halfDIHE False True
46320 2487 48300.0 0.642547 -29197.443531 0.559664 38.779073 0.621361 0.0 11.602272 0.0 ... -1460.667977 0.0 0.0 0.0 -2525.987471 2.813637 -1.768482 tr948-halfDIHE False True
46321 2488 48400.0 0.637230 -29167.185449 0.548189 40.826610 0.634195 0.0 10.974162 0.0 ... -1454.930095 0.0 0.0 0.0 -2522.441224 2.938245 -1.649847 tr948-halfDIHE False True
46322 2489 48500.0 0.648128 -29399.083299 0.558208 39.036110 0.620118 0.0 10.967527 0.0 ... -1472.947269 0.0 0.0 0.0 -2546.576797 2.952948 -2.021106 tr948-halfDIHE False True
46323 2490 48600.0 0.630125 -29041.728520 0.565502 37.757635 0.621270 0.0 9.809122 0.0 ... -1463.845887 0.0 0.0 0.0 -2533.871645 2.601774 -1.980494 tr948-halfDIHE False True
46324 2491 48700.0 0.647063 -29033.626230 0.556033 39.421369 0.613638 0.0 11.965107 0.0 ... -1470.855796 0.0 0.0 0.0 -2534.020216 2.475865 -2.116861 tr948-halfDIHE False True
46325 2492 48800.0 0.643798 -29200.383270 0.564631 37.909295 0.616066 0.0 11.028881 0.0 ... -1478.423893 0.0 0.0 0.0 -2539.374255 2.767128 -1.720189 tr948-halfDIHE False True
46326 2493 48900.0 0.626581 -29133.246086 0.534863 43.270476 0.617303 0.0 11.230873 0.0 ... -1445.668297 0.0 0.0 0.0 -2484.545584 2.731528 -1.222750 tr948-halfDIHE False True
46327 2494 49000.0 0.648736 -29141.057142 0.554544 39.686199 0.609742 0.0 8.998190 0.0 ... -1463.298878 0.0 0.0 0.0 -2538.488207 2.743604 -2.217587 tr948-halfDIHE False True
46328 2495 49100.0 0.685668 -28671.003442 0.539182 42.470707 0.602916 0.0 9.949813 0.0 ... -1453.218659 0.0 0.0 0.0 -2517.287917 2.604783 -2.730466 tr948-halfDIHE False True
46329 2496 49200.0 0.636802 -29026.657620 0.557262 39.203458 0.620470 0.0 11.087543 0.0 ... -1480.758405 0.0 0.0 0.0 -2551.386257 2.661026 -1.448826 tr948-halfDIHE False True
46330 2497 49300.0 0.638779 -28902.094126 0.542794 41.807488 0.610915 0.0 8.117317 0.0 ... -1461.832169 0.0 0.0 0.0 -2527.610120 2.418022 -1.970964 tr948-halfDIHE False True
46331 2498 49400.0 0.660044 -29059.731620 0.529000 44.368116 0.620130 0.0 10.782999 0.0 ... -1447.017390 0.0 0.0 0.0 -2492.401515 2.737150 -2.048866 tr948-halfDIHE False True
46332 2499 49500.0 0.670794 -28458.474946 0.562937 38.204854 0.615567 0.0 9.404200 0.0 ... -1462.166721 0.0 0.0 0.0 -2538.646214 2.287633 -2.853603 tr948-halfDIHE False True
46333 2500 49600.0 0.636916 -29056.391924 0.531281 43.939560 0.619260 0.0 10.684813 0.0 ... -1451.048965 0.0 0.0 0.0 -2499.850517 2.382571 -2.619119 tr948-halfDIHE False True
46334 2501 49700.0 0.690867 -28405.607817 0.554126 39.760732 0.601165 0.0 12.160203 0.0 ... -1458.215656 0.0 0.0 0.0 -2521.931720 2.367532 -2.860109 tr948-halfDIHE False True
46335 2502 49800.0 0.687250 -28858.046354 0.557573 39.148356 0.619200 0.0 9.398444 0.0 ... -1462.184324 0.0 0.0 0.0 -2530.782884 2.377331 -2.776115 tr948-halfDIHE False True
46336 2503 49900.0 0.674822 -29090.494137 0.545762 41.266472 0.618320 0.0 9.060949 0.0 ... -1463.757249 0.0 0.0 0.0 -2537.671554 2.525844 -2.441068 tr948-halfDIHE False True
46337 2504 50000.0 0.659247 -28802.248293 0.542977 41.774009 0.609452 0.0 11.459437 0.0 ... -1473.040182 0.0 0.0 0.0 -2537.841217 2.442010 -1.905541 tr948-halfDIHE False True

46338 rows × 29 columns


In [ ]:


In [16]:
filtered.shape


Out[16]:
(46338, 29)

In [17]:
a.shape


Out[17]:
(57110, 28)

In [53]:
# FEATURES = ["eigenvalues", "entropy", "pca"]
# FEATURES = ["eigenvalues", "entropy", "diffRMSD"]
# FEATURES = ["eigenvalues", "entropy"]
FEATURES = [
    "biasQ",
    'Rw',
     'VTotal',
#     'RMSD', # test
#     'Qw',
#      'Burial',
#      'Water',
#      'Rama',
#      'DSSP',
#      'P_AP',
#      'Helix',
#      'Frag_Mem'
               ]
# FEATURES = ["eigenvalues"]
# LABEL = "diffRMSD"
# LABEL = "RMSD"
LABEL = "chosen"
DEGREE = 1

def pred_from_raw(a):
    data = my_transform(a, label=LABEL, degree=DEGREE, FEATURES=FEATURES)
    test_y = data[:,-1]
    test_set = data[:,:-1]
    prob= clf.predict_proba(test_set)[:,1]
    return a.assign(prob=prob)

# data = my_transform(raw_data, label=LABEL, degree=DEGREE, FEATURES=FEATURES)
# data = raw_data.groupby('name').apply(my_transform, label=LABEL, degree=DEGREE, FEATURES=FEATURES)[0]
data = np.concatenate(raw_data.groupby('name').apply(my_transform, 
                                                     label=LABEL, degree=DEGREE, FEATURES=FEATURES).values)
train_y = data[:,-1]
train_set = data[:,:-1]

# clf = svm.SVC(probability=True)
# p = 0.01
# clf = LogisticRegression(random_state=27, class_weight={0:p, 1:(1-p)})
clf = LogisticRegression(random_state=27)
clf.fit(train_set, train_y)

filtered = filtered.reset_index(drop=True).groupby("name").apply(pred_from_raw).reset_index(drop=True)


picked_n = 1
best = raw_data_all_2.groupby("name").apply(choose_top, col="RMSD"
                                            , n=1, ascending=True).reset_index(drop=True).query("chosen==True")
# if True:
picked_1 = filtered.groupby("name").apply(choose_top, col="prob"
                                        , n=1, ascending=False).reset_index(drop=True).query("chosen==True")

# if False:
picked_5 = filtered.groupby("name").apply(choose_top, col="prob"
                                            , n=5, ascending=False).reset_index(drop=True).query("chosen==True")
picked = picked_5.groupby("name").apply(choose_top, col="RMSD"
                                            , n=1, ascending=True).reset_index(drop=True).query("chosen==True")
worst = filtered.groupby("name").apply(choose_top, col="RMSD"
                                            , n=1, ascending=False).reset_index(drop=True).query("chosen==True")
init = raw_data_all_2.groupby("name").apply(choose_top, col="i"
                                            , n=1, ascending=True).reset_index(drop=True).query("chosen==True")
all_results = pd.concat([best.assign(result='best'), 
                         picked_1.assign(result='picked'), init.assign(result='init')
                        , worst.assign(result='worst')
                        ], sort=False)
# all_results = pd.concat([best.assign(result='best'), 
#                          picked.assign(result='picked')])
# picked.to_csv("/Users/weilu/Desktop/picked.csv

# sns.set(rc={'figure.figsize':(20,30)})
# plt.figure(figsize=(15,8))
fg = sns.FacetGrid(data=all_results.reset_index(), hue='result', height=8, aspect=1.63)
fg.map(plt.plot, 'name', 'RMSD').add_legend(fontsize=20)
# fg.set(ylim=(0, 10))


Out[53]:
<seaborn.axisgrid.FacetGrid at 0x1a27839358>

In [56]:
all_results.query("name == 'tr872-halfDIHE' or name == 'tr872'").query("result == 'picked_top1'")


Out[56]:
folder i RMSD Rw biasQ bias Qw Step Chain Shake ... Membrane SSB VTotal pc pc2 name chosen inrange prob result
13986 484 48400.0 0.315882 -12625.342314 0.675345 21.080226 0.654423 0.0 5.027838 0.0 ... 0.0 0.0 -1138.137918 -0.849180 1.877866 tr872 True True 0.357519 picked_top1
16566 912 41100.0 0.355024 -12638.637754 0.671949 21.523455 0.773383 0.0 4.560306 0.0 ... 0.0 0.0 -1128.656025 0.212775 1.396896 tr872-halfDIHE True True 0.276979 picked_top1

2 rows × 31 columns


In [55]:
all_results.query("name == 'tr948-halfDIHE' or name == 'tr948'").query("result == 'picked_top1'")


Out[55]:
folder i RMSD Rw biasQ bias Qw Step Chain Shake ... Membrane SSB VTotal pc pc2 name chosen inrange prob result
42499 810 30900.0 0.642138 -28698.131138 0.581505 35.027568 0.603420 0.0 9.830352 0.0 ... 0.0 0.0 -2589.281395 -0.896550 2.553332 tr948 True True 0.638205 picked_top1
46179 2343 33900.0 0.679436 -29090.797025 0.569006 37.151118 0.590497 0.0 8.691268 0.0 ... 0.0 0.0 -2580.851263 1.535779 -0.682960 tr948-halfDIHE True True 0.324637 picked_top1

2 rows × 31 columns


In [52]:
all_results.query("name == 'tr884-halfDIHE' or name == 'tr884'").query("result == 'picked_top1'")


Out[52]:
folder i RMSD Rw biasQ bias Qw Step Chain Shake ... Membrane SSB VTotal pc pc2 name chosen inrange prob result
23305 1448 44600.0 0.542798 -9418.042083 0.530488 44.088316 0.496556 0.0 3.516038 0.0 ... 0.0 0.0 -924.112214 0.739004 -2.026302 tr884 True True 0.245173 picked_top1
25653 1301 29900.0 0.320999 -9330.232483 0.615756 29.528697 0.700275 0.0 3.993937 0.0 ... 0.0 0.0 -899.346439 -1.224431 -1.495779 tr884-halfDIHE True True 0.265686 picked_top1

2 rows × 31 columns


In [54]:
all_results = pd.concat([picked_1.assign(result="picked_top1"), picked_5.assign(result='picked_top5'),
                         picked.assign(result='picked_best'),
                         best.assign(result='best'), 
                          init.assign(result='init'),
                         worst.assign(result='worst'), 
                        ], sort=False)

In [30]:
all_results.reset_index(drop=True).to_csv("/Users/weilu/Research/server/dec_2018/structure_selection_2/all_results.csv")

In [32]:
all_results = pd.read_csv("/Users/weilu/Research/server/dec_2018/structure_selection_2/all_results.csv", index_col=0)

In [62]:
all_results.query("result == 'picked_top5'").groupby("name").apply(pd.sort_values, "prob")


---------------------------------------------------------------------------
AttributeError                            Traceback (most recent call last)
<ipython-input-62-12b0d03d99dd> in <module>()
----> 1 all_results.query("result == 'picked_top5'").groupby("name").apply(pd.sort_values, "prob")

AttributeError: module 'pandas' has no attribute 'sort_values'

In [69]:
all_results.query("result == 'picked_top5'").sort_values(["name", "prob"], ascending=False)


Out[69]:
folder i RMSD Rw biasQ bias Qw Step Chain Shake ... Membrane SSB VTotal pc pc2 name chosen inrange prob result
46179 2343 33900.0 0.679436 -29090.797025 0.569006 37.151118 0.590497 0.0 8.691268 0.0 ... 0.0 0.0 -2580.851263 1.535779 -0.682960 tr948-halfDIHE True True 0.324637 picked_top5
46095 2259 25500.0 0.685912 -28563.342610 0.574794 36.159959 0.584476 0.0 10.473595 0.0 ... 0.0 0.0 -2575.861825 2.225680 0.153428 tr948-halfDIHE True True 0.277318 picked_top5
45093 1058 5600.0 0.614140 -28463.547489 0.571656 36.695699 0.633013 0.0 7.506891 0.0 ... 0.0 0.0 -2578.116063 0.704460 -0.745613 tr948-halfDIHE True True 0.251004 picked_top5
44653 617 11600.0 0.530739 -28601.483932 0.568418 37.252623 0.635398 0.0 11.437000 0.0 ... 0.0 0.0 -2579.379165 0.932754 0.857359 tr948-halfDIHE True True 0.245794 picked_top5
46189 2353 34900.0 0.677295 -28758.016932 0.575408 36.055693 0.596811 0.0 10.026178 0.0 ... 0.0 0.0 -2565.811382 2.009119 -1.507732 tr948-halfDIHE True True 0.236490 picked_top5
42499 810 30900.0 0.642138 -28698.131138 0.581505 35.027568 0.603420 0.0 9.830352 0.0 ... 0.0 0.0 -2589.281395 -0.896550 2.553332 tr948 True True 0.638205 picked_top5
42677 988 48700.0 0.556396 -28450.977135 0.581825 34.974074 0.609090 0.0 9.318828 0.0 ... 0.0 0.0 -2589.462466 -0.319427 2.296344 tr948 True True 0.603070 picked_top5
42551 862 36100.0 0.660701 -28421.082123 0.574724 36.171940 0.572905 0.0 9.357762 0.0 ... 0.0 0.0 -2589.151368 -0.797111 2.190730 tr948 True True 0.503406 picked_top5
42566 877 37600.0 0.645414 -28593.010397 0.577859 35.640522 0.581267 0.0 9.385711 0.0 ... 0.0 0.0 -2580.545925 -0.431302 2.888052 tr948 True True 0.489552 picked_top5
42662 973 47200.0 0.562767 -28345.005686 0.573871 36.317265 0.612495 0.0 9.712646 0.0 ... 0.0 0.0 -2586.339493 -0.358514 2.007456 tr948 True True 0.451314 picked_top5
41765 481 48100.0 0.956921 -27891.079857 0.610006 30.418991 0.482850 0.0 13.287310 0.0 ... 0.0 0.0 -2108.005624 -2.046083 0.185091 tr947 True True 0.225634 picked_top5
41559 224 22400.0 1.117747 -27211.797425 0.613078 29.941723 0.483026 0.0 14.849488 0.0 ... 0.0 0.0 -2100.315430 -2.620303 -2.236081 tr947 True True 0.151705 picked_top5
41835 1259 25700.0 1.085501 -24839.093022 0.613234 29.917587 0.429476 0.0 10.805366 0.0 ... 0.0 0.0 -2118.051083 -2.118343 0.546546 tr947 True True 0.111876 picked_top5
41687 365 36500.0 1.128184 -26804.835459 0.613135 29.932977 0.466394 0.0 12.563653 0.0 ... 0.0 0.0 -2095.237870 -2.005232 -2.284630 tr947 True True 0.105201 picked_top5
41550 214 21400.0 1.096977 -27340.419432 0.606326 30.995822 0.470749 0.0 14.083092 0.0 ... 0.0 0.0 -2096.288160 -2.527297 -2.366083 tr947 True True 0.096247 picked_top5
40074 1011 900.0 0.368668 -9636.845094 0.651477 24.293703 0.606792 0.0 4.764826 0.0 ... 0.0 0.0 -1010.273657 -0.056487 1.712977 tr922 True True 0.267637 picked_top5
40076 1013 1100.0 0.366574 -9503.266740 0.652070 24.210997 0.583931 0.0 3.950436 0.0 ... 0.0 0.0 -1006.058579 -0.063859 1.446985 tr922 True True 0.203823 picked_top5
40077 1014 1200.0 0.345203 -9535.371379 0.642239 25.598638 0.627784 0.0 3.638171 0.0 ... 0.0 0.0 -1006.189562 0.051855 1.359481 tr922 True True 0.197031 picked_top5
39165 83 8300.0 0.328628 -9463.305731 0.648948 24.647459 0.624558 0.0 4.517219 0.0 ... 0.0 0.0 -1003.368107 -1.002158 -0.550782 tr922 True True 0.174875 picked_top5
40082 1019 1700.0 0.374167 -9506.479143 0.643149 25.468478 0.621836 0.0 4.025615 0.0 ... 0.0 0.0 -1000.178152 0.187237 1.232725 tr922 True True 0.161172 picked_top5
38271 828 32700.0 0.324877 -21881.858026 0.680476 20.419091 0.624908 0.0 23.769130 0.0 ... 0.0 0.0 -2764.790089 0.245506 -0.892383 tr921 True True 0.181894 picked_top5
38071 623 12200.0 0.342607 -22022.468951 0.691004 19.095697 0.642496 0.0 27.640532 0.0 ... 0.0 0.0 -2731.362792 -1.308575 0.152117 tr921 True True 0.167187 picked_top5
38261 818 31700.0 0.321367 -21751.698844 0.677866 20.754120 0.625235 0.0 26.710333 0.0 ... 0.0 0.0 -2770.951548 -0.511215 -1.499475 tr921 True True 0.164029 picked_top5
38278 835 33400.0 0.317155 -21679.300416 0.680030 20.476212 0.659806 0.0 27.965579 0.0 ... 0.0 0.0 -2768.607043 -1.257537 -0.718024 tr921 True True 0.160837 picked_top5
38251 806 30500.0 0.343824 -21742.173855 0.683483 20.036555 0.598756 0.0 30.046051 0.0 ... 0.0 0.0 -2755.489751 -0.647251 -1.098670 tr921 True True 0.152171 picked_top5
37201 729 22800.0 1.346089 -19042.795684 0.568624 37.217128 0.398104 0.0 6.527535 0.0 ... 0.0 0.0 -1469.264433 -2.546374 -2.851364 tr898 True True 0.190190 picked_top5
37171 695 19400.0 1.354056 -18672.018002 0.567845 37.351664 0.387112 0.0 6.121254 0.0 ... 0.0 0.0 -1472.410391 -2.112595 -4.148558 tr898 True True 0.176817 picked_top5
37202 730 22900.0 1.357227 -19095.827437 0.574393 36.228299 0.391478 0.0 6.416127 0.0 ... 0.0 0.0 -1465.542918 -2.541665 -3.160339 tr898 True True 0.173510 picked_top5
37158 677 17600.0 1.459045 -18739.210101 0.563673 38.076315 0.368102 0.0 6.244340 0.0 ... 0.0 0.0 -1471.218067 -2.592507 -3.551837 tr898 True True 0.164555 picked_top5
37147 648 14700.0 1.356008 -18865.221868 0.564519 37.928806 0.396207 0.0 6.473311 0.0 ... 0.0 0.0 -1469.439056 -2.091135 -2.955682 tr898 True True 0.163697 picked_top5
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
11267 186 18600.0 0.911107 -20200.859983 0.630429 27.316528 0.326569 80000000.0 89.338784 0.0 ... 0.0 0.0 -1635.238984 -0.829420 0.255606 tr870 True True 0.026845 picked_top5
11210 129 12900.0 0.881563 -20157.706928 0.630429 27.316528 0.324614 80000000.0 89.338784 0.0 ... 0.0 0.0 -1635.238984 -1.121479 -0.345961 tr870 True True 0.026071 picked_top5
11573 499 49900.0 1.028841 -20156.584708 0.630429 27.316528 0.302463 80000000.0 89.338784 0.0 ... 0.0 0.0 -1635.238984 -1.304933 0.221292 tr870 True True 0.026051 picked_top5
11301 220 22000.0 0.945660 -20152.166487 0.630429 27.316528 0.324916 80000000.0 89.338784 0.0 ... 0.0 0.0 -1635.238984 -0.395241 1.395270 tr870 True True 0.025973 picked_top5
11374 293 29300.0 0.933194 -20146.905961 0.630429 27.316528 0.325829 80000000.0 89.338784 0.0 ... 0.0 0.0 -1635.238984 -0.907448 1.260252 tr870 True True 0.025881 picked_top5
8942 0 0.0 1.230950 -17101.278236 0.327521 90.445532 0.340709 0.0 6.684015 0.0 ... 0.0 0.0 -1364.887693 -0.000178 -0.048953 tr869 True True 0.317641 picked_top5
9344 501 0.0 1.230950 -17101.278236 0.327521 90.445532 0.340709 0.0 6.684015 0.0 ... 0.0 0.0 -1364.887693 -0.000178 -0.048953 tr869 True True 0.317641 picked_top5
9844 1002 0.0 1.230950 -17101.278236 0.327521 90.445532 0.340709 0.0 6.684015 0.0 ... 0.0 0.0 -1364.887693 -0.000178 -0.048953 tr869 True True 0.317641 picked_top5
10340 1503 0.0 1.230950 -17101.278236 0.327521 90.445532 0.340709 0.0 6.684015 0.0 ... 0.0 0.0 -1364.887693 -0.000178 -0.048953 tr869 True True 0.317641 picked_top5
10835 2004 0.0 1.230950 -17101.278236 0.327521 90.445532 0.340709 0.0 6.684015 0.0 ... 0.0 0.0 -1364.887693 -0.000178 -0.048953 tr869 True True 0.317641 picked_top5
7725 1009 700.0 0.255439 -19002.022160 0.515777 46.894413 0.759775 0.0 20.229688 0.0 ... 0.0 0.0 -1896.962431 0.139004 -0.381850 tr868 True True 0.434124 picked_top5
7490 774 27300.0 0.198462 -18795.694394 0.545927 41.236478 0.781562 0.0 15.006630 0.0 ... 0.0 0.0 -1850.986468 -0.176000 0.278655 tr868 True True 0.365764 picked_top5
7488 772 27100.0 0.194233 -18689.078432 0.534715 43.298055 0.804022 0.0 19.496037 0.0 ... 0.0 0.0 -1868.985796 -0.338582 -0.103741 tr868 True True 0.359272 picked_top5
7665 949 44800.0 0.201026 -19133.599935 0.525713 44.989571 0.793817 0.0 20.078757 0.0 ... 0.0 0.0 -1864.274740 -0.459874 -0.392121 tr868 True True 0.356494 picked_top5
6999 278 27800.0 0.205594 -18158.038125 0.526918 44.761400 0.775710 0.0 20.924675 0.0 ... 0.0 0.0 -1893.151896 -1.187462 2.812971 tr868 True True 0.327812 picked_top5
5258 436 43600.0 0.165094 -17053.726578 0.628532 27.597682 0.834830 0.0 11.126801 0.0 ... 0.0 0.0 -1442.122133 -0.424255 3.022483 tr866 True True 0.220069 picked_top5
5098 273 27300.0 0.166109 -17127.532661 0.627386 27.768206 0.862580 0.0 10.207970 0.0 ... 0.0 0.0 -1434.004674 -0.505575 5.068936 tr866 True True 0.163754 picked_top5
4962 127 12700.0 0.321337 -17026.444555 0.601809 31.711221 0.661329 0.0 8.726079 0.0 ... 0.0 0.0 -1445.665120 -0.735922 4.109533 tr866 True True 0.162460 picked_top5
5121 296 29600.0 0.152312 -16825.468015 0.627247 27.789014 0.867941 0.0 9.357269 0.0 ... 0.0 0.0 -1438.226706 -0.349591 3.006870 tr866 True True 0.148329 picked_top5
5215 391 39100.0 0.171415 -16686.245967 0.626311 27.928708 0.847414 0.0 8.871145 0.0 ... 0.0 0.0 -1441.508875 -0.515809 3.157518 tr866 True True 0.148148 picked_top5
3098 716 21500.0 0.621427 -14017.862442 0.795115 8.395546 0.461948 0.0 3.268449 0.0 ... 0.0 0.0 -1467.578457 -1.316991 -2.854982 tr862 True True 0.202037 picked_top5
3222 840 33900.0 0.591362 -14298.952979 0.792131 8.641868 0.457493 0.0 3.385219 0.0 ... 0.0 0.0 -1460.246571 -0.883616 -2.363559 tr862 True True 0.195641 picked_top5
3093 711 21000.0 0.628722 -13959.600064 0.804233 7.664932 0.440617 0.0 3.568598 0.0 ... 0.0 0.0 -1463.589674 -1.118755 -3.200069 tr862 True True 0.174863 picked_top5
3096 714 21300.0 0.639278 -14018.020344 0.785619 9.191880 0.443309 0.0 3.733206 0.0 ... 0.0 0.0 -1465.096458 -1.501777 -2.755732 tr862 True True 0.166096 picked_top5
3263 881 38000.0 0.595374 -14371.539225 0.775668 10.064969 0.442423 0.0 3.723739 0.0 ... 0.0 0.0 -1457.753334 -0.606648 -3.508047 tr862 True True 0.162568 picked_top5
1917 2028 2400.0 0.571845 -9655.812091 0.562631 38.258352 0.397447 0.0 5.116114 0.0 ... 0.0 0.0 -976.169211 1.783749 -0.897284 tr594 True True 0.349458 picked_top5
2173 2284 28000.0 0.611227 -9618.903287 0.570485 36.896556 0.424141 0.0 5.176804 0.0 ... 0.0 0.0 -971.931068 2.239968 -1.118442 tr594 True True 0.310490 picked_top5
1969 2080 7600.0 0.580387 -9250.084463 0.529520 44.270342 0.384082 0.0 4.010634 0.0 ... 0.0 0.0 -982.414995 2.497040 -0.160807 tr594 True True 0.242897 picked_top5
2141 2252 24800.0 0.585275 -9856.197753 0.534258 43.383165 0.426288 0.0 4.932738 0.0 ... 0.0 0.0 -973.913129 2.214772 -1.558762 tr594 True True 0.239219 picked_top5
2146 2257 25300.0 0.594052 -9847.422751 0.523620 45.387591 0.440995 0.0 4.988500 0.0 ... 0.0 0.0 -974.961953 1.646261 -1.170395 tr594 True True 0.215441 picked_top5

115 rows × 31 columns


In [41]:
for i, line in all_results.query("result == 'picked_top5'").reset_index(drop=True).iterrows():
    print(i, line["name"], line["folder"])
    os.system("")


0 tr594 2028
1 tr594 2080
2 tr594 2252
3 tr594 2257
4 tr594 2284
5 tr862 711
6 tr862 714
7 tr862 716
8 tr862 840
9 tr862 881
10 tr866 127
11 tr866 273
12 tr866 296
13 tr866 391
14 tr866 436
15 tr868 278
16 tr868 772
17 tr868 774
18 tr868 949
19 tr868 1009
20 tr869 0
21 tr869 501
22 tr869 1002
23 tr869 1503
24 tr869 2004
25 tr870 129
26 tr870 186
27 tr870 220
28 tr870 293
29 tr870 499
30 tr872 130
31 tr872 403
32 tr872 480
33 tr872 484
34 tr872 1597
35 tr872-halfDIHE 0
36 tr872-halfDIHE 501
37 tr872-halfDIHE 912
38 tr872-halfDIHE 940
39 tr872-halfDIHE 1507
40 tr877 0
41 tr877 501
42 tr877 1002
43 tr877 1503
44 tr877 2004
45 tr882 0
46 tr882 501
47 tr882 1002
48 tr882 1585
49 tr882 1965
50 tr884 492
51 tr884 497
52 tr884 1447
53 tr884 1448
54 tr884 1493
55 tr884-halfDIHE 1192
56 tr884-halfDIHE 1194
57 tr884-halfDIHE 1301
58 tr884-halfDIHE 1306
59 tr884-halfDIHE 2335
60 tr885 1065
61 tr885 1091
62 tr885 1111
63 tr885 1128
64 tr885 1267
65 tr891 86
66 tr891 561
67 tr891 570
68 tr891 611
69 tr891 1709
70 tr894 904
71 tr894 918
72 tr894 972
73 tr894 974
74 tr894 989
75 tr895 275
76 tr895 477
77 tr895 478
78 tr895 718
79 tr895 992
80 tr896 3
81 tr896 528
82 tr896 1062
83 tr896 1246
84 tr896 2068
85 tr898 648
86 tr898 677
87 tr898 695
88 tr898 729
89 tr898 730
90 tr921 623
91 tr921 806
92 tr921 818
93 tr921 828
94 tr921 835
95 tr922 83
96 tr922 1011
97 tr922 1013
98 tr922 1014
99 tr922 1019
100 tr947 214
101 tr947 224
102 tr947 365
103 tr947 481
104 tr947 1259
105 tr948 810
106 tr948 862
107 tr948 877
108 tr948 973
109 tr948 988
110 tr948-halfDIHE 617
111 tr948-halfDIHE 1058
112 tr948-halfDIHE 2259
113 tr948-halfDIHE 2343
114 tr948-halfDIHE 2353

In [23]:
all_results = pd.concat([picked_1.assign(result="picked_top1"), picked.assign(result='picked_top5'),
                         best.assign(result='best'), 
                          init.assign(result='init'),
                         worst.assign(result='worst'), 
                        ], sort=False)

In [25]:
picked.shape


Out[25]:
(20, 30)

In [32]:
all_results.reindex(columns=my_reorder(all_results.columns, ["name", "RMSD", "result"])) .reset_index(drop=True).to_csv("/Users/weilu/Desktop/selection_result.csv")

In [28]:
def my_reorder(a, first):
    # move first to the top. and keep the rest
    new_order = first.copy()
    for col in a:
        if col not in first:
            new_order.append(col)
    return new_order

In [31]:
all_results.reindex(columns=my_reorder(all_results.columns, ["name", "RMSD", "result"]))


Out[31]:
name RMSD result folder i Rw biasQ bias Qw Step ... Frag_Mem Vec_FM Membrane SSB VTotal pc pc2 chosen inrange prob
1917 tr594 5.71845 picked_top1 2028 2400.0 -9655.812091 0.562631 38.258352 0.397447 0.0 ... -542.733446 0.0 0.0 0.0 -976.169211 1.783749 -0.897284 True True 0.336318
3098 tr862 6.21427 picked_top1 716 21500.0 -14017.862442 0.795115 8.395546 0.461948 0.0 ... -860.309802 0.0 0.0 0.0 -1467.578457 -1.316991 -2.854982 True True 0.184471
5040 tr866 1.65094 picked_top1 436 43600.0 -17053.726578 0.628532 27.597682 0.834830 0.0 ... -730.772207 0.0 0.0 0.0 -1442.122133 -0.087063 3.699315 True True 0.220490
7964 tr868 2.55439 picked_top1 1009 700.0 -19002.022160 0.515777 46.894413 0.759775 0.0 ... -1184.431369 0.0 0.0 0.0 -1896.962431 0.139004 -0.381850 True True 0.419466
9181 tr869 12.30950 picked_top1 0 0.0 -17101.278236 0.327521 90.445532 0.340709 0.0 ... -745.292489 0.0 0.0 0.0 -1364.887693 -0.000178 -0.048953 True True 0.287940
11347 tr870 8.98989 picked_top1 27 2700.0 -18766.397302 0.366368 80.297996 0.325325 0.0 ... -919.447094 0.0 0.0 0.0 -1663.622985 -1.937195 -0.884915 True True 0.021734
13892 tr872 3.88397 picked_top1 130 13000.0 -12597.510264 0.673965 21.259789 0.654242 0.0 ... -546.317159 0.0 0.0 0.0 -1131.645842 0.635999 -1.004592 True True 0.232302
15145 tr877 3.00711 picked_top1 0 0.0 -24114.108027 0.568868 37.174966 0.648838 0.0 ... -821.245402 0.0 0.0 0.0 -1699.528374 -0.055574 -0.014200 True True 0.773073
18607 tr882 2.75337 picked_top1 1585 8200.0 -12227.807526 0.648724 24.679000 0.790488 0.0 ... -630.005603 0.0 0.0 0.0 -1123.346564 -0.326972 0.008144 True True 0.257559
20532 tr884 5.42798 picked_top1 1448 44600.0 -9418.042083 0.530488 44.088316 0.496556 0.0 ... -492.254269 0.0 0.0 0.0 -924.112214 0.739004 -2.026302 True True 0.231153
22564 tr885 3.18402 picked_top1 1065 6300.0 -19678.210041 0.713528 16.413273 0.723373 0.0 ... -879.121171 0.0 0.0 0.0 -1671.835269 0.400841 -0.236901 True True 0.125852
25261 tr891 1.91208 picked_top1 1709 20600.0 -18636.734867 0.645838 25.086161 0.814518 0.0 ... -811.315247 0.0 0.0 0.0 -1634.043036 0.290187 0.469162 True True 0.623671
26508 tr894 2.00769 picked_top1 904 40300.0 -6759.374211 0.667682 22.087109 0.764641 0.0 ... -339.543577 0.0 0.0 0.0 -671.963552 -0.318981 -0.765288 True True 0.385394
28827 tr895 4.87177 picked_top1 992 49100.0 -20488.493033 0.646876 24.939243 0.571973 0.0 ... -901.913395 0.0 0.0 0.0 -1650.549327 -0.933751 0.277437 True True 0.225033
29502 tr896 8.62961 picked_top1 3 300.0 -11703.621241 0.461786 57.934848 0.405691 0.0 ... -534.883801 0.0 0.0 0.0 -989.943449 -1.047856 0.015758 True True 0.497582
31988 tr898 13.46090 picked_top1 729 22800.0 -19042.795684 0.568624 37.217128 0.398104 0.0 ... -783.469742 0.0 0.0 0.0 -1469.264433 -2.546374 -2.851364 True True 0.170833
33058 tr921 3.24877 picked_top1 828 32700.0 -21881.858026 0.680476 20.419091 0.624908 0.0 ... -1871.646575 0.0 0.0 0.0 -2764.790089 0.245506 -0.892383 True True 0.165306
34861 tr922 3.68668 picked_top1 1011 900.0 -9636.845094 0.651477 24.293703 0.606792 0.0 ... -620.135642 0.0 0.0 0.0 -1010.273657 -0.056487 1.712977 True True 0.241481
36552 tr947 9.56921 picked_top1 481 48100.0 -27891.079857 0.610006 30.418991 0.482850 0.0 ... -1263.925386 0.0 0.0 0.0 -2108.005624 -2.046083 0.185091 True True 0.209786
37373 tr948 6.42138 picked_top1 810 30900.0 -28698.131138 0.581505 35.027568 0.603420 0.0 ... -1488.304814 0.0 0.0 0.0 -2589.281395 -0.825228 0.477354 True True 0.596875
0 tr594 5.71845 picked_top5 2028 2400.0 -9655.812091 0.562631 38.258352 0.397447 0.0 ... -542.733446 0.0 0.0 0.0 -976.169211 1.783749 -0.897284 True True 0.336318
8 tr862 5.91362 picked_top5 840 33900.0 -14298.952979 0.792131 8.641868 0.457493 0.0 ... -857.960775 0.0 0.0 0.0 -1460.246571 -0.883616 -2.363559 True True 0.176140
12 tr866 1.52312 picked_top5 296 29600.0 -16825.468015 0.627247 27.789014 0.867941 0.0 ... -729.065574 0.0 0.0 0.0 -1438.226706 -0.323353 4.008893 True True 0.149921
16 tr868 1.94233 picked_top5 772 27100.0 -18689.078432 0.534715 43.298055 0.804022 0.0 ... -1144.918529 0.0 0.0 0.0 -1868.985796 -0.338582 -0.103741 True True 0.328877
20 tr869 12.30950 picked_top5 0 0.0 -17101.278236 0.327521 90.445532 0.340709 0.0 ... -745.292489 0.0 0.0 0.0 -1364.887693 -0.000178 -0.048953 True True 0.287940
26 tr870 8.81563 picked_top5 129 12900.0 -20157.706928 0.630429 27.316528 0.324614 80000000.0 ... -985.958008 0.0 0.0 0.0 -1635.238984 -1.121479 -0.345961 True True 0.020210
31 tr872 3.38775 picked_top5 141 14100.0 -12447.029453 0.676140 20.977116 0.667752 0.0 ... -546.638457 0.0 0.0 0.0 -1131.691792 0.641274 -0.860320 True True 0.220697
35 tr877 3.00711 picked_top5 0 0.0 -24114.108027 0.568868 37.174966 0.648838 0.0 ... -821.245402 0.0 0.0 0.0 -1699.528374 -0.055574 -0.014200 True True 0.773073
44 tr882 2.28804 picked_top5 1965 46200.0 -11916.930776 0.636862 26.373853 0.759764 0.0 ... -633.406934 0.0 0.0 0.0 -1132.496525 -0.322323 0.313275 True True 0.212781
49 tr884 4.60271 picked_top5 1851 34800.0 -9350.865722 0.537420 42.796012 0.571265 0.0 ... -492.137189 0.0 0.0 0.0 -916.994515 0.261957 -0.655306 True True 0.138139
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
25047 tr885 2.35937 init 0 0.0 -20598.856085 0.685012 19.843456 0.830205 0.0 ... -870.801921 0.0 0.0 0.0 -1645.527867 -0.037832 0.039669 True NaN NaN
27552 tr891 1.60893 init 0 0.0 -18747.275083 0.620600 28.788908 0.860321 0.0 ... -820.247037 0.0 0.0 0.0 -1600.853027 -0.184893 -0.098650 True NaN NaN
30057 tr894 2.24606 init 0 0.0 -6810.073249 0.556208 39.390241 0.731370 0.0 ... -330.532789 0.0 0.0 0.0 -668.558313 -0.065067 -0.283718 True NaN NaN
32562 tr895 4.33382 init 0 0.0 -22028.450058 0.618778 29.065981 0.654401 0.0 ... -889.343989 0.0 0.0 0.0 -1623.103778 -0.010141 -0.051103 True NaN NaN
35067 tr896 8.22359 init 0 0.0 -12136.482066 0.466766 56.867616 0.443834 0.0 ... -525.593994 0.0 0.0 0.0 -947.681062 -0.058300 -0.054690 True NaN NaN
37572 tr898 13.99650 init 0 0.0 -18590.677602 0.528118 44.534476 0.357930 0.0 ... -782.373728 0.0 0.0 0.0 -1446.453004 0.042324 -0.167047 True NaN NaN
40077 tr921 3.55243 init 0 0.0 -22225.052777 0.676982 20.868098 0.633795 0.0 ... -1814.239425 0.0 0.0 0.0 -2679.102572 -0.101087 0.070372 True NaN NaN
42582 tr922 2.55776 init 0 0.0 -9494.179526 0.646615 24.976143 0.778997 0.0 ... -619.584850 0.0 0.0 0.0 -999.248416 0.115988 0.147087 True NaN NaN
45087 tr947 12.92220 init 0 0.0 -26347.737275 0.602246 31.641680 0.498832 0.0 ... -1262.954342 0.0 0.0 0.0 -2074.380569 1.187467 -0.301399 True NaN NaN
47592 tr948 6.72126 init 0 0.0 -29784.167159 0.534029 43.425714 0.623263 0.0 ... -1442.266587 0.0 0.0 0.0 -2474.731767 -0.029092 0.051793 True NaN NaN
2281 tr594 11.66420 worst 2392 38800.0 -7800.180486 0.405731 70.631048 0.293334 0.0 ... -528.484319 0.0 0.0 0.0 -945.059692 2.492546 0.800841 True True 0.000514
2842 tr862 8.64546 worst 459 45900.0 -13675.272843 0.744725 13.033074 0.552184 0.0 ... -836.017899 0.0 0.0 0.0 -1416.508005 -2.115153 0.313762 True True 0.004266
5982 tr866 5.49268 worst 1436 43400.0 -16316.523037 0.460162 58.285112 0.523121 0.0 ... -706.324188 0.0 0.0 0.0 -1342.476061 0.189714 -0.228174 True True 0.000023
8884 tr868 5.50126 worst 2067 6300.0 -18165.315305 0.465461 57.146452 0.608624 0.0 ... -1085.027900 0.0 0.0 0.0 -1783.826243 0.645704 -2.754277 True True 0.004290
9679 tr869 14.43870 worst 598 9700.0 -16097.470589 0.218214 122.237944 0.311339 0.0 ... -729.588674 0.0 0.0 0.0 -1323.833114 -0.808695 -0.587784 True True 0.001463
12776 tr870 11.43280 worst 1484 48200.0 -18635.189014 0.630429 27.316528 0.311343 80000000.0 ... -985.958008 0.0 0.0 0.0 -1635.238984 -1.256596 2.241660 True True 0.006724
15120 tr872 7.00137 worst 2011 700.0 -11677.938128 0.670047 21.773797 0.571122 0.0 ... -545.404119 0.0 0.0 0.0 -1101.906710 0.895718 0.631344 True True 0.007777
16772 tr877 5.97014 worst 1819 31600.0 -23656.864393 0.497143 50.573068 0.572003 0.0 ... -819.115369 0.0 0.0 0.0 -1632.289229 0.220377 3.612046 True True 0.001921
18310 tr882 4.10836 worst 1201 19900.0 -11669.955472 0.613084 29.940872 0.673967 0.0 ... -617.772188 0.0 0.0 0.0 -1084.858365 -0.434148 -0.384838 True True 0.000567
21428 tr884 8.92547 worst 2345 34100.0 -8695.952091 0.456176 59.148866 0.482172 0.0 ... -479.625249 0.0 0.0 0.0 -875.415909 2.096840 0.496279 True True 0.000063
22233 tr885 5.25420 worst 733 23200.0 -19040.407218 0.663800 22.606043 0.762638 0.0 ... -879.497813 0.0 0.0 0.0 -1663.557353 0.193633 -1.313889 True True 0.007403
26049 tr891 4.07153 worst 2497 49300.0 -17308.511883 0.599059 32.150760 0.652094 0.0 ... -791.156171 0.0 0.0 0.0 -1550.446442 2.179495 1.449662 True True 0.000239
27079 tr894 5.09216 worst 1485 48300.0 -6089.793333 0.505490 48.908058 0.487889 0.0 ... -326.511714 0.0 0.0 0.0 -661.998634 -0.766482 -2.990092 True True 0.003307
27974 tr895 7.98054 worst 43 4300.0 -18903.580144 0.583194 34.745474 0.565594 0.0 ... -892.775956 0.0 0.0 0.0 -1630.348760 -1.605658 3.039932 True True 0.001931
30562 tr896 11.04840 worst 1207 20500.0 -10977.597770 0.402719 71.348928 0.333853 0.0 ... -538.589762 0.0 0.0 0.0 -956.501625 -0.912589 -2.112027 True True 0.008942
31528 tr898 15.98600 worst 132 13200.0 -17054.063013 0.483393 53.376536 0.348017 0.0 ... -785.866058 0.0 0.0 0.0 -1457.509460 -2.655997 0.094145 True True 0.005441
32402 tr921 4.78319 worst 164 16400.0 -20379.263146 0.629018 27.525487 0.588459 0.0 ... -1754.727325 0.0 0.0 0.0 -2577.565026 -1.471915 -4.113387 True True 0.000025
36178 tr922 8.54551 worst 2429 42500.0 -8689.220583 0.406243 70.509502 0.408714 0.0 ... -507.886517 0.0 0.0 0.0 -879.592569 1.571574 4.837135 True True 0.000040
36219 tr947 15.48280 worst 15 1500.0 -25124.991722 0.569790 37.016088 0.459361 0.0 ... -1249.896273 0.0 0.0 0.0 -2041.418239 -1.986278 -0.695540 True True 0.000236
38817 tr948 10.08460 worst 2465 46100.0 -26585.016143 0.517020 46.653959 0.518408 0.0 ... -1433.128337 0.0 0.0 0.0 -2468.110974 0.743237 -1.782562 True True 0.000068

100 rows × 31 columns


In [ ]:
picked

In [19]:
count = 0
total = 0
for name in folder_list:
    init_tmp = init.query(f"name == '{name}'")["RMSD"].iloc[0]
    picked_tmp = picked.query(f"name == '{name}'")["RMSD"].iloc[0]
    improved = picked_tmp < init_tmp
    print(name, init_tmp, picked_tmp, round(init_tmp - picked_tmp, 3), improved)
    total += init_tmp - picked_tmp
    count += improved
print("improved: ", count, len(folder_list), total)


tr898 13.9965 13.4609 0.536 True
tr869 12.3095 12.3095 0.0 False
tr947 12.9222 8.10622 4.816 True
tr894 2.24606 1.6856900000000001 0.56 True
tr882 2.34977 2.28804 0.062 True
tr594 4.65665 5.71845 -1.062 False
tr862 5.53469 5.91362 -0.379 False
tr877 3.00711 3.00711 0.0 False
tr872 5.66699 3.38775 2.279 True
tr885 2.35937 2.72348 -0.364 False
tr866 3.23637 1.52312 1.713 True
tr868 1.97032 1.94233 0.028 True
tr884 3.81972 4.60271 -0.783 False
tr895 4.333819999999999 4.87177 -0.538 False
tr896 8.22359 8.01121 0.212 True
tr870 7.65488 8.81563 -1.161 False
tr921 3.55243 3.17155 0.381 True
tr922 2.55776 3.28628 -0.729 False
tr891 1.60893 1.91208 -0.303 False
tr948 6.721260000000001 5.56396 1.157 True
improved:  10 20 6.42652

In [26]:
count = 0
total = 0
for name in folder_list:
    init_tmp = init.query(f"name == '{name}'")["RMSD"].iloc[0]
    picked_tmp = picked_1.query(f"name == '{name}'")["RMSD"].iloc[0]
    improved = picked_tmp < init_tmp
    print(name, init_tmp, picked_tmp, round(init_tmp - picked_tmp, 3), improved)
    total += init_tmp - picked_tmp
    count += improved
print("improved: ", count, len(folder_list), total)


tr898 13.9965 13.4609 0.536 True
tr869 12.3095 12.3095 0.0 False
tr947 12.9222 9.56921 3.353 True
tr894 2.24606 2.00769 0.238 True
tr882 2.34977 2.75337 -0.404 False
tr594 4.65665 5.71845 -1.062 False
tr862 5.53469 6.21427 -0.68 False
tr877 3.00711 3.00711 0.0 False
tr872 5.66699 3.8839699999999997 1.783 True
tr885 2.35937 3.1840200000000003 -0.825 False
tr866 3.23637 1.65094 1.585 True
tr868 1.97032 2.55439 -0.584 False
tr884 3.81972 5.427980000000001 -1.608 False
tr895 4.333819999999999 4.87177 -0.538 False
tr896 8.22359 8.629610000000001 -0.406 False
tr870 7.65488 8.989889999999999 -1.335 False
tr921 3.55243 3.24877 0.304 True
tr922 2.55776 3.6866800000000004 -1.129 False
tr891 1.60893 1.91208 -0.303 False
tr948 6.721260000000001 6.42138 0.3 True
improved:  7 20 -0.7740600000000017

In [181]:
filtered = a
# FEATURES = ["eigenvalues", "entropy", "pca"]
# FEATURES = ["eigenvalues", "entropy", "diffRMSD"]
# FEATURES = ["eigenvalues", "entropy"]
FEATURES = [
    "biasQ",
    'Rw',
     'VTotal',
#     'RMSD', # test
#     'Qw',
#      'Burial',
#      'Water',
#      'Rama',
#      'DSSP',
#      'P_AP',
#      'Helix',
#      'Frag_Mem'
               ]
# FEATURES = ["eigenvalues"]
# LABEL = "diffRMSD"
# LABEL = "RMSD"
LABEL = "chosen"
DEGREE = 1

def pred_from_raw(a):
    data = my_transform(a, label=LABEL, degree=DEGREE, FEATURES=FEATURES)
    test_y = data[:,-1]
    test_set = data[:,:-1]
    prob= clf.predict_proba(test_set)[:,1]
    return a.assign(prob=prob)

# data = my_transform(raw_data, label=LABEL, degree=DEGREE, FEATURES=FEATURES)
# data = raw_data.groupby('name').apply(my_transform, label=LABEL, degree=DEGREE, FEATURES=FEATURES)[0]
data = np.concatenate(raw_data.groupby('name').apply(my_transform, 
                                                     label=LABEL, degree=DEGREE, FEATURES=FEATURES).values)
train_y = data[:,-1]
train_set = data[:,:-1]

# clf = svm.SVC(probability=True)
# p = 0.01
# clf = LogisticRegression(random_state=27, class_weight={0:p, 1:(1-p)})
clf = LogisticRegression(random_state=27)
clf.fit(train_set, train_y)

filtered = filtered.reset_index(drop=True).groupby("name").apply(pred_from_raw).reset_index(drop=True)


picked_n = 1
best = raw_data_all_2.groupby("name").apply(choose_top, col="RMSD"
                                            , n=1, ascending=True).reset_index(drop=True).query("chosen==True")
if True:
    picked = filtered.groupby("name").apply(choose_top, col="prob"
                                            , n=1, ascending=False).reset_index(drop=True).query("chosen==True")

if False:
    picked = filtered.groupby("name").apply(choose_top, col="prob"
                                                , n=5, ascending=False).reset_index(drop=True).query("chosen==True")
    picked = picked.groupby("name").apply(choose_top, col="RMSD"
                                                , n=1, ascending=True).reset_index(drop=True).query("chosen==True")
worst = filtered.groupby("name").apply(choose_top, col="RMSD"
                                            , n=1, ascending=False).reset_index(drop=True).query("chosen==True")
init = raw_data_all_2.groupby("name").apply(choose_top, col="i"
                                            , n=1, ascending=True).reset_index(drop=True).query("chosen==True")
all_results = pd.concat([best.assign(result='best'), 
                         picked.assign(result='picked'), init.assign(result='init')
                        , worst.assign(result='worst')
                        ], sort=False)
# all_results = pd.concat([best.assign(result='best'), 
#                          picked.assign(result='picked')])
# picked.to_csv("/Users/weilu/Desktop/picked.csv

# sns.set(rc={'figure.figsize':(20,30)})
# plt.figure(figsize=(15,8))
fg = sns.FacetGrid(data=all_results.reset_index(), hue='result', height=8, aspect=1.63)
fg.map(plt.plot, 'name', 'RMSD').add_legend(fontsize=20)
# fg.set(ylim=(0, 10))


Out[181]:
<seaborn.axisgrid.FacetGrid at 0x1a20ed8e10>

In [182]:
count = 0
total = 0
for name in folder_list:
    init_tmp = init.query(f"name == '{name}'")["RMSD"].iloc[0]
    picked_tmp = picked.query(f"name == '{name}'")["RMSD"].iloc[0]
    improved = picked_tmp < init_tmp
    print(name, init_tmp, picked_tmp, round(init_tmp - picked_tmp, 3), improved)
    total += init_tmp - picked_tmp
    count += improved
print("improved: ", count, len(folder_list), total)


tr898 13.9965 12.8408 1.156 True
tr869 12.3095 12.3095 0.0 False
tr947 12.9222 12.1452 0.777 True
tr894 2.24606 2.00769 0.238 True
tr882 2.34977 2.75337 -0.404 False
tr594 4.65665 5.71845 -1.062 False
tr862 5.53469 5.91362 -0.379 False
tr877 3.00711 3.00711 0.0 False
tr872 5.66699 4.53573 1.131 True
tr885 2.35937 3.1840200000000003 -0.825 False
tr866 3.23637 1.65094 1.585 True
tr868 1.97032 2.55439 -0.584 False
tr884 3.81972 4.81 -0.99 False
tr895 4.333819999999999 5.72387 -1.39 False
tr896 8.22359 8.629610000000001 -0.406 False
tr870 7.65488 9.11107 -1.456 False
tr921 3.55243 3.24877 0.304 True
tr922 2.55776 3.6866800000000004 -1.129 False
tr891 1.60893 1.91208 -0.303 False
tr948 6.721260000000001 6.42138 0.3 True
improved:  7 20 -3.436360000000002

In [339]:
all_results.query("name == 'tr594'")


Out[339]:
folder i RMSD Rw biasQ bias Qw Step Chain Shake ... SSB VTotal pc pc2 name chosen result pc_center inrange prob
0 0 0.0 4.65665 -10838.987781 0.590522 33.534424 0.464193 0.0 14.571923 0.0 ... 0.0 -940.887277 -0.086922 -0.059985 tr594 True best NaN NaN NaN
0 0 0.0 4.65665 -10838.987781 0.590522 33.534424 0.464193 0.0 14.571923 0.0 ... 0.0 -940.887277 -0.086922 -0.059985 tr594 True picked 0.703 True 0.042606
0 0 0.0 4.65665 -10838.987781 0.590522 33.534424 0.464193 0.0 14.571923 0.0 ... 0.0 -940.887277 -0.086922 -0.059985 tr594 True init NaN NaN NaN
1020 2380 37600.0 11.55900 -8163.823349 0.491720 51.669743 0.349195 0.0 4.380186 0.0 ... 0.0 -960.110987 1.421570 -0.122716 tr594 True worst 0.703 True 0.003102

4 rows × 32 columns


In [35]:
clf.coef_


Out[35]:
array([[ 0.24185815, -0.37308731, -0.43718463]])

In [221]:
Plot_Metric = "Qw"
if Plot_Metric:
    isAscending = False
else:
    isAscending = True

picked_n = 1
best = raw_data_all_2.groupby("name").apply(choose_top, col=Plot_Metric
                                            , n=1, ascending=isAscending).reset_index(drop=True).query("chosen==True")
picked = filtered.groupby("name").apply(choose_top, col="prob"
                                            , n=1, ascending=False).reset_index(drop=True).query("chosen==True")
worst = filtered.groupby("name").apply(choose_top, col=Plot_Metric
                                            , n=1, ascending=False).reset_index(drop=True).query("chosen==True")
init = raw_data_all_2.groupby("name").apply(choose_top, col="i"
                                            , n=1, ascending=True).reset_index(drop=True).query("chosen==True")
all_results = pd.concat([best.assign(result='best'), 
                         picked.assign(result='picked'), init.assign(result='init')
#                         , worst.assign(result='worst')
                        ], sort=False)
# all_results = pd.concat([best.assign(result='best'), 
#                          picked.assign(result='picked')])
# picked.to_csv("/Users/weilu/Desktop/picked.csv

# sns.set(rc={'figure.figsize':(20,30)})
# plt.figure(figsize=(15,8))
fg = sns.FacetGrid(data=all_results.reset_index(), hue='result', height=8, aspect=1.63)
fg.map(plt.plot, 'name', Plot_Metric).add_legend(fontsize=20)
fg.set(ylim=(0, 1))


Out[221]:
<seaborn.axisgrid.FacetGrid at 0x1a2d7cfd30>

In [170]:
picked["init_RMSD"] = init["RMSD"].values
picked["diff_RMSD"] = init["RMSD"].values - picked["RMSD"].values
out = picked[["name", "RMSD", "init_RMSD", "diff_RMSD", "folder"]].reset_index(drop=True)

In [206]:
fg = sns.FacetGrid(data=filtered, hue='name', height=8, aspect=1.63)
fg.map(plt.scatter, 'Qw', 'RMSD').add_legend(fontsize=20)


Out[206]:
<seaborn.axisgrid.FacetGrid at 0x1a2c9cb780>

In [35]:
filtered.plot.scatter("prob", "RMSD")


Out[35]:
<matplotlib.axes._subplots.AxesSubplot at 0x1a27569080>

In [25]:
out


Out[25]:
name RMSD init_RMSD diff_RMSD folder
0 tr894 1.36968 2.24606 0.87638 980

In [13]:
raw_data_all_2.plot("RMSD", "Rw")


Out[13]:
<matplotlib.axes._subplots.AxesSubplot at 0x1a23707f60>

In [14]:
raw_data_all_2.plot("RMSD", "pc")


Out[14]:
<matplotlib.axes._subplots.AxesSubplot at 0x1a23783828>

In [15]:
out


Out[15]:
name RMSD init_RMSD diff_RMSD folder
0 tr894 1.36968 2.24606 0.87638 980

In [16]:
out


Out[16]:
name RMSD init_RMSD diff_RMSD folder
0 tr894 1.36968 2.24606 0.87638 980

In [70]:
all_results


Out[70]:
folder i RMSD Rw biasQ bias Step Chain Shake Chi ... SSB VTotal pc pc2 name chosen result pc_center inrange prob
980 980 47900.0 1.36968 -6834.949362 0.645772 25.095468 0 3.685220 0.0 0.588609 ... 0.0 -656.184416 0.944122 -0.804111 tr894 True best NaN NaN NaN
981 981 48000.0 1.57229 -6850.297788 0.673878 21.271087 0 4.750551 0.0 0.512601 ... 0.0 -659.909501 0.762314 -0.531746 tr894 True picked 2.03 True 0.523816
0 0 0.0 2.24606 -6810.073249 0.556208 39.390241 0 5.546380 0.0 1.160888 ... 0.0 -668.558313 -0.065067 -0.283718 tr894 True init NaN NaN NaN

3 rows × 31 columns


In [13]:
# out.to_csv("/Users/weilu/Desktop/picked_3.csv")

In [26]:
clf.coef_


Out[26]:
array([[ 0.31865771, -0.24574338, -0.00429271, -0.15621297,  0.12086065,
         0.03529636,  0.05114406,  0.06779384,  0.23049113, -0.0941187 ]])

In [14]:
clf.coef_


Out[14]:
array([[ 0.20157408, -0.69485223,  0.04456798]])

In [15]:
fg = sns.FacetGrid(data=all_results.reset_index(), hue='result', height=8, aspect=1.63)
fg.map(plt.plot, 'name', 'RMSD').add_legend(fontsize=20)
fg.set(ylim=(0, 10))


Out[15]:
<seaborn.axisgrid.FacetGrid at 0x1a22576f60>

In [16]:
filtered["name"].unique().shape


Out[16]:
(17,)

In [17]:
picked[["RMSD", "name"]]


Out[17]:
RMSD name
0 5.14331 tr594
1136 4.31091 tr862
1890 3.14792 tr866
2586 1.61058 tr868
3123 8.69691 tr870
3576 2.81351 tr872
4796 3.07719 tr877
5446 2.25775 tr882
6372 4.06919 tr884
6989 2.35937 tr885
7922 1.60893 tr891
9265 1.18208 tr894
10424 4.67184 tr895
10781 8.22359 tr896
14297 3.25991 tr921
14558 3.84193 tr922
16140 5.40097 tr948

In [18]:
# picked.to_csv("/Users/weilu/Desktop/picked_2.csv")

In [19]:
name ="tr894"
name_list = ["Step" , "Chain" , "Shake" , "Chi" , "Rama", "Excluded", "DSSP", "P_AP", "Water" ,"Burial", "Helix", "AMH_Go", "Frag_Mem", "Vec_FM", "Membrane", "SSB","VTotal"]

# you probably want to change the location below
#     location = f"/Users/weilu/Research/server/sep_2018/03_week/02_week/{name}/"
location = f"/Users/weilu/Research/server/nov_2018/structure_selection/{name}/"
RMSD = pd.read_table(location+"rmsd-angstrom.xvg", names=["i", "RMSD"], sep="\s+")
bias = pd.read_table(location+"bias.log", names=["i", "biasQ", "bias"], sep="\s+").drop("i", axis=1)
awsem = pd.read_table(location+"awsem.log", names=name_list)
rw = pd.read_table(location+"rwplusScore.txt", names=["i", "Rw"], sep="\s+").drop("i", axis=1)
# pc location
#     location = f"/Users/weilu/Research/server/sep_2018/03_week/{name}/"
#     location = f"/Users/weilu/Research/server/oct_2018/01_week/{name}/"
pc = pd.read_table(location+"pcarmsd_scaled.txt", names=["i", "pc", "pc2"], sep="\s+", comment="#").drop("i", axis=1)
raw_data = pd.concat([RMSD, rw, bias, awsem, pc], axis=1)
raw_data.assign(name=name).reset_index().rename(columns={"index":"folder"})


Out[19]:
folder i RMSD Rw biasQ bias Step Chain Shake Chi ... Helix AMH_Go Frag_Mem Vec_FM Membrane SSB VTotal pc pc2 name
0 0 0.0 2.24606 -6810.073249 0.556208 39.390241 0 5.546380 0.0 1.160888 ... -11.494550 0.0 -330.532789 0.0 0.0 0.0 -668.558313 -0.065067 -0.283718 tr894
1 1 100.0 3.23173 -6286.940968 0.551209 40.282725 0 2.470745 0.0 0.544233 ... -12.663186 0.0 -332.365779 0.0 0.0 0.0 -661.611254 -2.030413 -1.872569 tr894
2 2 200.0 3.21661 -6289.430903 0.497013 50.599184 0 4.764399 0.0 0.562483 ... -13.188086 0.0 -332.765589 0.0 0.0 0.0 -658.149297 -2.028462 -1.644785 tr894
3 3 300.0 3.15813 -6190.501191 0.528574 44.448412 0 2.255989 0.0 0.468350 ... -13.341891 0.0 -333.111932 0.0 0.0 0.0 -662.850984 -1.765059 -1.880311 tr894
4 4 400.0 3.48106 -6070.736394 0.507673 48.477271 0 4.563115 0.0 0.358890 ... -13.054297 0.0 -332.096669 0.0 0.0 0.0 -655.505067 -2.050421 -0.862438 tr894
5 5 500.0 3.21603 -6207.393138 0.507665 48.478663 0 3.079867 0.0 0.562905 ... -12.983445 0.0 -334.065069 0.0 0.0 0.0 -654.514531 -2.024328 -0.505672 tr894
6 6 600.0 3.26742 -6270.166177 0.515849 46.880384 0 3.598324 0.0 0.689476 ... -12.473662 0.0 -329.046856 0.0 0.0 0.0 -661.071468 -2.225011 -1.368251 tr894
7 7 700.0 3.45545 -6275.141716 0.521223 45.845485 0 3.142972 0.0 0.471721 ... -12.879093 0.0 -330.526926 0.0 0.0 0.0 -660.649964 -1.917079 -0.934712 tr894
8 8 800.0 2.84899 -6353.542762 0.521744 45.745735 0 4.776124 0.0 0.537145 ... -12.874610 0.0 -330.107068 0.0 0.0 0.0 -666.482035 -1.836351 -1.092572 tr894
9 9 900.0 3.43413 -6114.807297 0.567139 37.473775 0 3.616298 0.0 0.553106 ... -13.392355 0.0 -332.670193 0.0 0.0 0.0 -664.643019 -1.984028 -1.763320 tr894
10 10 1000.0 3.39099 -6010.453477 0.536807 42.909507 0 3.596885 0.0 0.391673 ... -13.353049 0.0 -332.451981 0.0 0.0 0.0 -659.813810 -2.215176 0.442220 tr894
11 11 1100.0 3.48153 -6114.438719 0.581163 35.084917 0 2.720963 0.0 0.310693 ... -13.200931 0.0 -331.501808 0.0 0.0 0.0 -664.442638 -2.008016 0.629552 tr894
12 12 1200.0 3.23210 -6101.638205 0.519535 46.169300 0 5.605657 0.0 0.721162 ... -10.424754 0.0 -332.022908 0.0 0.0 0.0 -656.524265 -1.985955 -0.727112 tr894
13 13 1300.0 2.92439 -6059.757243 0.529431 44.287121 0 2.323098 0.0 0.547973 ... -13.361106 0.0 -334.500030 0.0 0.0 0.0 -660.984355 -2.111932 -1.343508 tr894
14 14 1400.0 2.90091 -6306.977750 0.545174 41.373291 0 3.194121 0.0 0.466709 ... -13.974103 0.0 -333.122664 0.0 0.0 0.0 -664.931939 -1.862049 -0.307235 tr894
15 15 1500.0 3.44498 -6054.804323 0.481283 53.813425 0 4.393513 0.0 0.413912 ... -10.416986 0.0 -329.519729 0.0 0.0 0.0 -640.413959 -1.964237 -0.545008 tr894
16 16 1600.0 3.72309 -5924.173405 0.517176 46.623769 0 4.403761 0.0 0.530923 ... -12.445575 0.0 -332.027049 0.0 0.0 0.0 -652.482812 -2.177049 -0.822812 tr894
17 17 1700.0 2.94234 -6170.545926 0.605790 31.080307 0 3.403110 0.0 0.394039 ... -13.070255 0.0 -334.764821 0.0 0.0 0.0 -662.858390 -1.977594 -0.605940 tr894
18 18 1800.0 3.14126 -6104.911588 0.565975 37.675492 0 2.148955 0.0 0.416200 ... -14.408324 0.0 -332.427927 0.0 0.0 0.0 -657.525813 -2.012379 -0.666930 tr894
19 19 1900.0 3.03640 -6248.712283 0.549830 40.530526 0 3.192358 0.0 0.373018 ... -12.509658 0.0 -331.034803 0.0 0.0 0.0 -652.148108 -1.860132 -0.951803 tr894
20 20 2000.0 2.80779 -6441.963711 0.556436 39.349743 0 2.222043 0.0 0.423124 ... -12.922997 0.0 -334.503676 0.0 0.0 0.0 -661.244525 -1.888069 -0.825117 tr894
21 21 2100.0 3.31259 -6410.346460 0.513748 47.288295 0 6.191025 0.0 0.604926 ... -12.253130 0.0 -332.203648 0.0 0.0 0.0 -651.160377 -2.001888 -0.592104 tr894
22 22 2200.0 2.91335 -6308.221083 0.593916 32.980882 0 3.234118 0.0 0.664409 ... -13.207077 0.0 -332.990401 0.0 0.0 0.0 -658.201126 -2.152202 -0.717611 tr894
23 23 2300.0 3.19933 -6374.911948 0.539413 42.428153 0 2.538853 0.0 0.341001 ... -13.408200 0.0 -331.572188 0.0 0.0 0.0 -662.264580 -2.190752 -0.847559 tr894
24 24 2400.0 2.94005 -6354.892279 0.603693 31.411886 0 1.888008 0.0 0.590554 ... -13.759548 0.0 -334.756167 0.0 0.0 0.0 -673.536860 -2.020588 -0.800902 tr894
25 25 2500.0 3.24435 -6448.101987 0.570527 36.889392 0 2.534727 0.0 0.389001 ... -12.327469 0.0 -332.104002 0.0 0.0 0.0 -664.744860 -1.970641 -1.751093 tr894
26 26 2600.0 3.21349 -6356.016826 0.552069 40.128420 0 3.240573 0.0 0.340624 ... -14.062783 0.0 -331.951360 0.0 0.0 0.0 -669.486651 -2.017166 -1.114400 tr894
27 27 2700.0 3.08817 -6015.780813 0.543925 41.600811 0 2.849521 0.0 0.563807 ... -11.707489 0.0 -333.438918 0.0 0.0 0.0 -656.923779 -2.079045 -1.713251 tr894
28 28 2800.0 2.91854 -6266.414740 0.570673 36.864257 0 2.305444 0.0 0.542672 ... -13.504228 0.0 -336.875255 0.0 0.0 0.0 -661.990310 -1.959546 -1.258906 tr894
29 29 2900.0 3.51806 -6095.122990 0.575797 35.989700 0 2.400738 0.0 0.481813 ... -12.959278 0.0 -333.359501 0.0 0.0 0.0 -662.430279 -2.148657 -0.842376 tr894
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
2475 2475 47100.0 3.04801 -6584.548221 0.528828 44.400564 0 3.443490 0.0 0.689302 ... -8.527353 0.0 -328.333283 0.0 0.0 0.0 -656.799558 1.489679 -1.073833 tr894
2476 2476 47200.0 3.06231 -6524.537951 0.501176 49.765102 0 4.955148 0.0 0.473575 ... -7.623936 0.0 -324.707280 0.0 0.0 0.0 -648.310152 1.433943 -0.830940 tr894
2477 2477 47300.0 3.28581 -6530.410132 0.574789 36.160841 0 4.166641 0.0 0.671689 ... -9.461912 0.0 -327.996908 0.0 0.0 0.0 -661.264516 1.390436 -1.208435 tr894
2478 2478 47400.0 3.23796 -6505.011426 0.455993 59.188681 0 7.585024 0.0 0.630827 ... -6.166685 0.0 -318.313982 0.0 0.0 0.0 -633.543536 1.532387 -1.189597 tr894
2479 2479 47500.0 3.47343 -6412.675255 0.509082 48.200074 0 4.337123 0.0 0.533898 ... -8.175169 0.0 -321.367115 0.0 0.0 0.0 -641.175122 1.170308 -0.850157 tr894
2480 2480 47600.0 3.22195 -6600.284916 0.546451 41.141258 0 4.499017 0.0 0.531768 ... -8.919736 0.0 -328.276578 0.0 0.0 0.0 -656.319864 1.024558 -1.633348 tr894
2481 2481 47700.0 3.29628 -6594.057433 0.450815 60.320799 0 5.516787 0.0 0.532782 ... -6.014002 0.0 -324.793170 0.0 0.0 0.0 -643.313596 1.289813 -1.317772 tr894
2482 2482 47800.0 3.22039 -6538.294794 0.492895 51.431169 0 4.771708 0.0 0.643649 ... -10.536290 0.0 -327.516119 0.0 0.0 0.0 -654.529729 0.546995 -1.410352 tr894
2483 2483 47900.0 3.71419 -6429.439945 0.486252 52.787339 0 4.245413 0.0 0.423871 ... -4.541977 0.0 -327.570965 0.0 0.0 0.0 -650.587467 0.814162 -1.430980 tr894
2484 2484 48000.0 3.19131 -6477.936224 0.522025 45.692109 0 4.294322 0.0 0.558186 ... -7.184502 0.0 -326.483925 0.0 0.0 0.0 -651.265310 1.539578 -1.387833 tr894
2485 2485 48100.0 3.07537 -6498.204684 0.521062 45.876390 0 4.032237 0.0 0.661476 ... -9.748129 0.0 -325.586983 0.0 0.0 0.0 -651.190938 1.369766 -0.825550 tr894
2486 2486 48200.0 3.37094 -6374.215012 0.539856 42.346474 0 3.503117 0.0 0.476007 ... -6.907826 0.0 -326.781573 0.0 0.0 0.0 -650.287189 1.779587 -0.777044 tr894
2487 2487 48300.0 3.28947 -6462.479014 0.529538 44.266976 0 4.450656 0.0 0.778137 ... -9.720084 0.0 -327.426579 0.0 0.0 0.0 -655.430363 1.632520 -1.162894 tr894
2488 2488 48400.0 3.32840 -6504.263804 0.487773 52.475263 0 3.823074 0.0 0.541481 ... -9.621493 0.0 -328.796522 0.0 0.0 0.0 -648.395792 0.591862 -1.299183 tr894
2489 2489 48500.0 3.43751 -6447.190797 0.507673 48.477255 0 5.101223 0.0 0.543194 ... -8.555008 0.0 -326.452097 0.0 0.0 0.0 -648.549022 0.661395 -1.650177 tr894
2490 2490 48600.0 3.82345 -6365.246694 0.495035 50.997870 0 3.198458 0.0 0.629422 ... -9.256821 0.0 -327.410325 0.0 0.0 0.0 -647.431837 -0.195077 -2.383967 tr894
2491 2491 48700.0 3.62716 -6430.212535 0.492129 51.586661 0 4.407783 0.0 0.580651 ... -8.285745 0.0 -323.436894 0.0 0.0 0.0 -646.712158 0.334757 -1.955750 tr894
2492 2492 48800.0 3.25856 -6733.348524 0.528495 44.463321 0 4.332744 0.0 0.787561 ... -10.086368 0.0 -326.230732 0.0 0.0 0.0 -653.059823 0.900004 -1.994071 tr894
2493 2493 48900.0 3.31050 -6597.765990 0.501219 49.756485 0 4.567093 0.0 0.523651 ... -9.982728 0.0 -325.830854 0.0 0.0 0.0 -652.938934 1.253588 -1.430779 tr894
2494 2494 49000.0 3.41650 -6464.407265 0.496882 50.625585 0 4.832824 0.0 0.524553 ... -8.364190 0.0 -325.062802 0.0 0.0 0.0 -650.974420 1.921487 -1.013259 tr894
2495 2495 49100.0 3.10904 -6387.091755 0.548666 40.740404 0 3.316020 0.0 0.581348 ... -10.967092 0.0 -328.988742 0.0 0.0 0.0 -659.262743 1.190546 -1.329242 tr894
2496 2496 49200.0 3.08787 -6544.665697 0.542510 41.859331 0 5.020698 0.0 0.864652 ... -9.712455 0.0 -326.961086 0.0 0.0 0.0 -653.968884 1.103267 -1.117054 tr894
2497 2497 49300.0 3.08903 -6565.835977 0.519321 46.210480 0 4.056949 0.0 0.782764 ... -10.167152 0.0 -325.731478 0.0 0.0 0.0 -649.584897 1.347216 -1.148235 tr894
2498 2498 49400.0 2.86923 -6705.263150 0.478287 54.436844 0 5.751120 0.0 0.550526 ... -8.520623 0.0 -326.361710 0.0 0.0 0.0 -647.484556 0.969805 -1.272144 tr894
2499 2499 49500.0 3.75897 -6463.716890 0.445395 61.517373 0 6.437783 0.0 0.522887 ... -8.618340 0.0 -320.399867 0.0 0.0 0.0 -635.290484 1.147018 -1.649743 tr894
2500 2500 49600.0 3.39286 -6501.433057 0.485665 52.908148 0 4.294195 0.0 0.505823 ... -10.501468 0.0 -323.573172 0.0 0.0 0.0 -647.184033 1.221193 -1.348389 tr894
2501 2501 49700.0 3.32162 -6391.616071 0.532840 43.647727 0 4.949311 0.0 0.640128 ... -9.125481 0.0 -329.320986 0.0 0.0 0.0 -656.508771 0.993919 -0.986276 tr894
2502 2502 49800.0 3.28345 -6563.961634 0.519311 46.212290 0 5.139785 0.0 0.632014 ... -9.679081 0.0 -325.602523 0.0 0.0 0.0 -647.098837 0.616816 -0.979727 tr894
2503 2503 49900.0 3.04609 -6472.654667 0.524059 45.303913 0 5.039938 0.0 0.796512 ... -9.205125 0.0 -324.496863 0.0 0.0 0.0 -647.414385 1.401103 -0.722207 tr894
2504 2504 50000.0 3.99691 -6555.354241 0.470289 56.118701 0 3.803515 0.0 0.692461 ... -8.788567 0.0 -329.318347 0.0 0.0 0.0 -652.710108 0.573119 -1.284276 tr894

2505 rows × 26 columns


In [ ]: