notebook.community

Edit and run



In [65]:

    
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import datetime

sns.set(rc={'figure.figsize':(10,6.180)})
sns.set_style("whitegrid")

%matplotlib inline

plt.rcParams['figure.figsize'] = [16.18033, 10]    #golden ratio
plt.rcParams['figure.facecolor'] = 'w'
plt.rcParams['figure.dpi'] = 100



In [84]:

    
def my_reorder(a, first):
    # move first to the top. and keep the rest
    new_order = first.copy()
    for col in a:
        if col not in first:
            new_order.append(col)
    return new_order

def read_pdb(pre, name, run=30, rerun=2):
    all_data = []
    if run == -1:
        run_list = ["native"]
    else:
        run_list = list(range(run))
    for i in run_list:
        if rerun == -1:
            rerun_list = ["rerun"]
        else:
            rerun_list = list(range(rerun))
        for j in rerun_list:
            # pre = "/Users/weilu/Research/server/nov_2018/iterative_optimization_4/all_simulations/"
            location = pre + f"{name}/simulation/{i}/{j}/"
            try:
                wham = pd.read_csv(location+"wham.dat")
            except:
                print(f"PDB: {name}, Run: {i}, Rerun: {j} not exist")
                print(location+"wham.dat")
                continue
            wham.columns = wham.columns.str.strip()
            remove_columns = ['Tc', 'Energy']
            wham = wham.drop(remove_columns, axis=1)
            energy = pd.read_csv(location+"energy.dat")
            energy.columns = energy.columns.str.strip()
            remove_columns = ['Steps', 'Shake', 'Excluded', 'Helix', 'AMH-Go', 'Vec_FM', 'SSB']
            energy = energy.drop(remove_columns, axis=1)
            data = pd.concat([wham, energy], axis=1).assign(Repeat=i, Run=j)
            all_data.append(data)
    data = pd.concat(all_data).reset_index(drop=True)
    data = data.reindex(columns=my_reorder(data.columns, ["Steps", "Qw", "VTotal", "Run", "Repeat"]))
    print(name, len(data))
    return data



In [85]:

    
dataset = {"old":"1R69, 1UTG, 3ICB, 256BA, 4CPV, 1CCR, 2MHR, 1MBA, 2FHA".split(", "),
            "new":"1FC2C, 1ENH, 2GB1, 2CRO, 1CTF, 4ICB".split(", "),
            "test":["t089", "t120", "t251", "top7", "1ubq", "t0766", "t0778", "t0782", "t0792", "t0803", "t0815", "t0833", "t0842", "t0844"]}
dataset["combined"] = dataset["old"] + dataset["new"]

def get_complete_data(pre, folder_list, pdb_list, formatName=True, **kwargs):
    complete_all_data = []
    for folder in folder_list:
        # pre = "/Users/weilu/Research/server/april_2019/iterative_optimization_old_set/"
        pre_folder = f"{pre}{folder}/"
        all_data = []
        for p in pdb_list:
            if formatName:
                name = p.lower()[:4]
            else:
                name = p
            tmp = read_pdb(pre_folder, name, **kwargs)
            all_data.append(tmp.assign(Name=name))
        data = pd.concat(all_data)
        complete_all_data.append(data.assign(Folder=folder))
    data = pd.concat(complete_all_data)
    data = data.reindex(columns=my_reorder(data.columns, ["Name", "Folder"]))
    return data



In [86]:

    
pre = "/Users/weilu/Research/server/april_second_2019/test_set/"
# folder_list = ["multi_iter0", "original"]
folder_list = ["multi_iter0_with_minimization", "original_with_minimization"]
pdb_list = ['T0759-D1', 'T0953s2-D1', 'T0943-D1', 'T0773-D1', 'T0816-D1', 'T0854-D2', 'T0767-D1', 'T0853-D1', 'T0958-D1', 'T0834-D2', 'T0960-D3', 'T0862-D1', 'T0912-D3', 'T0898-D1', 'T0824-D1', 'T0782-D1', 'T0830-D2', 'T0761-D2', 'T0968s1-D1', 'T0870-D1', 'T0838-D1', 'T0803-D1']
data = get_complete_data(pre, folder_list, pdb_list, run=-1, rerun=-1, formatName=False)
data.Steps = data.Steps.astype(int)
native = data









    



T0759-D1 27
T0953s2-D1 27
T0943-D1 27
T0773-D1 27
T0816-D1 27
T0854-D2 27
T0767-D1 27
T0853-D1 27
T0958-D1 27
T0834-D2 27
T0960-D3 27
T0862-D1 27
T0912-D3 27
T0898-D1 27
T0824-D1 27
T0782-D1 27
T0830-D2 27
T0761-D2 27
T0968s1-D1 27
T0870-D1 27
T0838-D1 27
T0803-D1 27
T0759-D1 27
T0953s2-D1 27
T0943-D1 27
T0773-D1 27
T0816-D1 27
T0854-D2 27
T0767-D1 27
T0853-D1 27
T0958-D1 27
T0834-D2 27
T0960-D3 27
T0862-D1 27
T0912-D3 27
T0898-D1 27
T0824-D1 27
T0782-D1 27
T0830-D2 27
T0761-D2 27
T0968s1-D1 27
T0870-D1 27
T0838-D1 27
T0803-D1 27



In [89]:

    
native_energy = native.query("Steps == 0")



In [141]:

    
pre = "/Users/weilu/Research/server/april_second_2019/test_set/"
# folder_list = ["multi_iter0", "original"]
folder_list = ["multi_iter0_with_minimization", "original_with_minimization"]
pdb_list = ['T0759-D1', 'T0953s2-D1', 'T0943-D1', 'T0773-D1', 'T0816-D1', 'T0854-D2', 'T0767-D1', 'T0853-D1', 'T0958-D1', 'T0834-D2', 'T0960-D3', 'T0862-D1', 'T0912-D3', 'T0898-D1', 'T0824-D1', 'T0782-D1', 'T0830-D2', 'T0761-D2', 'T0968s1-D1', 'T0870-D1', 'T0838-D1', 'T0803-D1']
data = get_complete_data(pre, folder_list, pdb_list, run=10, rerun=-1, formatName=False)
data.Steps = data.Steps.astype(int)
subset_data = data.query("Steps % 80000 == 0")









    



T0759-D1 10020
T0953s2-D1 10020
T0943-D1 10020
T0773-D1 10020
T0816-D1 10020
T0854-D2 10020
T0767-D1 10020
T0853-D1 10020
T0958-D1 10020
T0834-D2 10020
T0960-D3 10020
T0862-D1 10020
T0912-D3 10020
T0898-D1 9888
T0824-D1 9765
T0782-D1 8885
T0830-D2 7739
T0761-D2 8281
T0968s1-D1 8142
T0870-D1 6737
T0838-D1 6532
T0803-D1 6236
T0759-D1 10020
T0953s2-D1 10020
T0943-D1 10020
T0773-D1 10020
T0816-D1 10020
T0854-D2 10020
T0767-D1 10020
T0853-D1 10020
T0958-D1 10020
T0834-D2 10020
T0960-D3 10020
T0862-D1 10020
T0912-D3 10020
T0898-D1 10020
T0824-D1 10020
T0782-D1 10020
T0830-D2 9776
T0761-D2 9511
T0968s1-D1 9962
T0870-D1 10020
T0838-D1 9880
T0803-D1 9681



In [142]:

    
data_today = pd.concat([data, native])



In [143]:

    
today = datetime.datetime.today().strftime('%m-%d')
print(today)
# data.reset_index(drop=True).to_csv(f"/Users/weilu/Research/data/optimization/energy_{today}.csv")
# a = pd.read_csv("/Users/weilu/Research/data/optimization/energy_03-31.csv", index_col=0)
# b = pd.read_csv("/Users/weilu/Research/data/optimization/energy_04-01.csv", index_col=0)
# data = pd.concat([a,b])
data.reset_index(drop=True).to_csv(f"/Users/weilu/Research/data/optimization/energy_{today}.csv")



In [156]:



In [157]:

    
prediction_energy = data.groupby(["Name", "Repeat", "Folder"]).tail(1).query("Steps > 4e6")
y_show = "Water"
g = sns.FacetGrid(prediction_energy, col="Name",col_wrap=4,  hue="Folder", sharey=False, sharex=False)
g = (g.map(plt.scatter, "Qw", y_show, alpha=0.5).add_legend())
# energy = native_energy.query("Name == 'T0759-D1' and Folder == 'multi_iter0_with_minimization'")["VTotal"][0]
# g.axes[0].axhline(energy, ls="--", color="blue", linewidth=4)
# energy = native_energy.query("Name == 'T0759-D1' and Folder == 'original_with_minimization'")["VTotal"][0]
# g.axes[0].axhline(energy, ls="--", color="orange", linewidth=4)
for ax in g.axes:
    name= ax.title.get_text().split(" ")[-1]
    # print(name)
    energy = native_energy.query(f"Name == '{name}'")[y_show].iloc[0]
    ax.axhline(energy, ls="--", color="blue", linewidth=4)
    energy = native_energy.query(f"Name == '{name}'")[y_show].iloc[1]
    ax.axhline(energy, ls="--", color="orange", linewidth=4)



In [130]:

    
native_energy.query("Name == 'T0759-D1'")["VTotal"]









    Out[130]:





0   -269.640860
0   -302.509194
Name: VTotal, dtype: float64



In [129]:

    
native_energy.query("Name == 'T0759-D1'")["VTotal"].iloc[1]









    Out[129]:





-302.509193990993



In [135]:

    
native_energy.query("Name == 'T0773-D1'")









    Out[135]:







  
    
      
      Name
      Folder
      Steps
      Qw
      VTotal
      Run
      Repeat
      Rg
      Chain
      Chi
      Rama
      DSSP
      P_AP
      Water
      Burial
      Frag_Mem
      Membrane
      Ebond
      Epair
    
  
  
    
      0
      T0773-D1
      multi_iter0_with_minimization
      0
      0.962184
      -816.800587
      rerun
      native
      10.281000
      5.54499
      2.321355
      -273.582624
      -34.583105
      -20.634047
      -44.846211
      -40.147558
      -410.873387
      0
      0.766555
      0.352036
    
    
      0
      T0773-D1
      original_with_minimization
      0
      0.952982
      -848.641638
      rerun
      native
      10.292924
      4.79076
      2.247894
      -274.704599
      -34.891223
      -19.586545
      -55.332057
      -60.135171
      -411.030696
      0
      0.843432
      0.382778



In [126]:

    
for ax in g.axes:
    name= ax.title.get_text().split(" ")[-1]
    print(name)
    energy = native_energy.query(f"Name == '{name}'")["VTotal"].iloc[0]
    ax.axhline(energy, ls="--", color="blue", linewidth=4)
    energy = native_energy.query(f"Name == '{name}'")["VTotal"].iloc[1]
    ax.axhline(energy, ls="--", color="orange", linewidth=4)









    



T0953s2-D1
T0943-D1
T0773-D1
T0816-D1
T0854-D2
T0767-D1
T0853-D1
T0958-D1
T0834-D2
T0960-D3
T0862-D1
T0912-D3
T0898-D1
T0824-D1
T0782-D1
T0830-D2
T0761-D2
T0968s1-D1
T0870-D1
T0838-D1
T0803-D1



In [140]:

    
y_show = "DSSP"
g = sns.FacetGrid(subset_data, col="Name",col_wrap=4,  hue="Folder", sharey=False, sharex=False)
g = (g.map(plt.scatter, "Qw", y_show, alpha=0.5).add_legend())
# energy = native_energy.query("Name == 'T0759-D1' and Folder == 'multi_iter0_with_minimization'")["VTotal"][0]
# g.axes[0].axhline(energy, ls="--", color="blue", linewidth=4)
# energy = native_energy.query("Name == 'T0759-D1' and Folder == 'original_with_minimization'")["VTotal"][0]
# g.axes[0].axhline(energy, ls="--", color="orange", linewidth=4)
for ax in g.axes:
    name= ax.title.get_text().split(" ")[-1]
    # print(name)
    energy = native_energy.query(f"Name == '{name}'")[y_show].iloc[0]
    ax.axhline(energy, ls="--", color="blue", linewidth=4)
    energy = native_energy.query(f"Name == '{name}'")[y_show].iloc[1]
    ax.axhline(energy, ls="--", color="orange", linewidth=4)



In [139]:

    
y_show = "Frag_Mem"
g = sns.FacetGrid(subset_data, col="Name",col_wrap=4,  hue="Folder", sharey=False, sharex=False)
g = (g.map(plt.scatter, "Qw", y_show, alpha=0.5).add_legend())
# energy = native_energy.query("Name == 'T0759-D1' and Folder == 'multi_iter0_with_minimization'")["VTotal"][0]
# g.axes[0].axhline(energy, ls="--", color="blue", linewidth=4)
# energy = native_energy.query("Name == 'T0759-D1' and Folder == 'original_with_minimization'")["VTotal"][0]
# g.axes[0].axhline(energy, ls="--", color="orange", linewidth=4)
for ax in g.axes:
    name= ax.title.get_text().split(" ")[-1]
    # print(name)
    energy = native_energy.query(f"Name == '{name}'")[y_show].iloc[0]
    ax.axhline(energy, ls="--", color="blue", linewidth=4)
    energy = native_energy.query(f"Name == '{name}'")[y_show].iloc[1]
    ax.axhline(energy, ls="--", color="orange", linewidth=4)



In [137]:

    
y_show = "Burial"
g = sns.FacetGrid(subset_data, col="Name",col_wrap=4,  hue="Folder", sharey=False, sharex=False)
g = (g.map(plt.scatter, "Qw", y_show, alpha=0.5).add_legend())
# energy = native_energy.query("Name == 'T0759-D1' and Folder == 'multi_iter0_with_minimization'")["VTotal"][0]
# g.axes[0].axhline(energy, ls="--", color="blue", linewidth=4)
# energy = native_energy.query("Name == 'T0759-D1' and Folder == 'original_with_minimization'")["VTotal"][0]
# g.axes[0].axhline(energy, ls="--", color="orange", linewidth=4)
for ax in g.axes:
    name= ax.title.get_text().split(" ")[-1]
    # print(name)
    energy = native_energy.query(f"Name == '{name}'")[y_show].iloc[0]
    ax.axhline(energy, ls="--", color="blue", linewidth=4)
    energy = native_energy.query(f"Name == '{name}'")[y_show].iloc[1]
    ax.axhline(energy, ls="--", color="orange", linewidth=4)



In [136]:

    
y_show = "Water"
g = sns.FacetGrid(subset_data, col="Name",col_wrap=4,  hue="Folder", sharey=False, sharex=False)
g = (g.map(plt.scatter, "Qw", y_show, alpha=0.5).add_legend())
# energy = native_energy.query("Name == 'T0759-D1' and Folder == 'multi_iter0_with_minimization'")["VTotal"][0]
# g.axes[0].axhline(energy, ls="--", color="blue", linewidth=4)
# energy = native_energy.query("Name == 'T0759-D1' and Folder == 'original_with_minimization'")["VTotal"][0]
# g.axes[0].axhline(energy, ls="--", color="orange", linewidth=4)
for ax in g.axes:
    name= ax.title.get_text().split(" ")[-1]
    # print(name)
    energy = native_energy.query(f"Name == '{name}'")[y_show].iloc[0]
    ax.axhline(energy, ls="--", color="blue", linewidth=4)
    energy = native_energy.query(f"Name == '{name}'")[y_show].iloc[1]
    ax.axhline(energy, ls="--", color="orange", linewidth=4)



In [131]:

    
g = sns.FacetGrid(subset_data, col="Name",col_wrap=4,  hue="Folder", sharey=False, sharex=False)
g = (g.map(plt.scatter, "Qw", "VTotal", alpha=0.5).add_legend())
# energy = native_energy.query("Name == 'T0759-D1' and Folder == 'multi_iter0_with_minimization'")["VTotal"][0]
# g.axes[0].axhline(energy, ls="--", color="blue", linewidth=4)
# energy = native_energy.query("Name == 'T0759-D1' and Folder == 'original_with_minimization'")["VTotal"][0]
# g.axes[0].axhline(energy, ls="--", color="orange", linewidth=4)
for ax in g.axes:
    name= ax.title.get_text().split(" ")[-1]
    print(name)
    energy = native_energy.query(f"Name == '{name}'")["VTotal"].iloc[0]
    ax.axhline(energy, ls="--", color="blue", linewidth=4)
    energy = native_energy.query(f"Name == '{name}'")["VTotal"].iloc[1]
    ax.axhline(energy, ls="--", color="orange", linewidth=4)









    



T0953s2-D1
T0943-D1
T0773-D1
T0816-D1
T0854-D2
T0767-D1
T0853-D1
T0958-D1
T0834-D2
T0960-D3
T0862-D1
T0912-D3
T0898-D1
T0824-D1
T0782-D1
T0830-D2
T0761-D2
T0968s1-D1
T0870-D1
T0838-D1
T0803-D1



In [ ]:

    
g.axes[0].axhline(native_energy.query("Name == 'T0759-D1' and Folder == 'multi_iter0_with_minimization'")["VTotal"], ls="--")



In [103]:

    
vtotal = native_energy.query("Name == 'T0759-D1' and Folder == 'multi_iter0_with_minimization'")["VTotal"][0]



In [102]:

    
vtotal[0]









    Out[102]:





-269.64085959520503



In [98]:

    
native_energy.query("Name == 'T0759-D1' and Folder == 'multi_iter0_with_minimization'")["VTotal"]









    Out[98]:





0   -269.64086
Name: VTotal, dtype: float64



In [97]:

    
native_energy.query("Name == 'T0759-D1' and Folder == 'original_with_minimization'")









    Out[97]:







  
    
      
      Name
      Folder
      Steps
      Qw
      VTotal
      Run
      Repeat
      Rg
      Chain
      Chi
      Rama
      DSSP
      P_AP
      Water
      Burial
      Frag_Mem
      Membrane
      Ebond
      Epair
    
  
  
    
      0
      T0759-D1
      original_with_minimization
      0
      0.944458
      -302.509194
      rerun
      native
      8.987343
      2.19446
      0.828416
      -125.067232
      0.0
      -1.30635
      -11.179329
      -30.917603
      -137.061555
      0
      0.565642
      0.212432



In [91]:

    
native_energy.shape









    Out[91]:





(44, 19)



In [83]:

    
native_energy









    Out[83]:





[]



In [82]:

    
g = sns.FacetGrid(subset_data, col="Name",col_wrap=4,  hue="Folder", sharey=False, sharex=False)
g = (g.map(plt.scatter, "Qw", "VTotal", alpha=0.5).add_legend())
g.axes[0].axhline(-510)









    Out[82]:





<matplotlib.lines.Line2D at 0x1a30667be0>



In [71]:

    
g = sns.FacetGrid(subset_data, col="Name",col_wrap=4,  hue="Folder", sharey=False, sharex=False)
g = (g.map(plt.scatter, "Qw", "Water", alpha=0.5).add_legend())



In [72]:

    
g = sns.FacetGrid(subset_data, col="Name",col_wrap=4,  hue="Folder", sharey=False, sharex=False)
g = (g.map(plt.scatter, "Frag_Mem", "Water", alpha=0.5).add_legend())



In [6]:

    
pre = "/Users/weilu/Research/server/april_2019/iterative_optimization_combined_train_set_with_frag/"
folder_list = ["multi_iter0"]
pdb_list = dataset["combined"]
data = get_complete_data(pre, folder_list, pdb_list, run=30, rerun=1)
data.Steps = data.Steps.astype(int)
subset_data = data.query("Steps % 80000 == 0")
today = datetime.datetime.today().strftime('%m-%d')
print(today)
# data.reset_index(drop=True).to_csv(f"/Users/weilu/Research/data/optimization/energy_{today}.csv")
# a = pd.read_csv("/Users/weilu/Research/data/optimization/energy_03-31.csv", index_col=0)
# b = pd.read_csv("/Users/weilu/Research/data/optimization/energy_04-01.csv", index_col=0)
# data = pd.concat([a,b])
data.reset_index(drop=True).to_csv(f"/Users/weilu/Research/data/optimization/energy_{today}.csv")









    



1r69 30000
1utg 30000
3icb 30000
256b 30000
4cpv 30000
1ccr 30000
2mhr 30000
1mba 19305
2fha 16112
1fc2 60000
1enh 60000
2gb1 60000
2cro 60000
1ctf 60000
4icb 60000






    



---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
<ipython-input-6-6ab97228223d> in <module>
      3 pdb_list = dataset["combined"]
      4 data = get_complete_data(pre, folder_list, pdb_list, run=30, rerun=1)
----> 5 subset_data = data.query("Steps % 80000 == 0")
      6 today = datetime.datetime.today().strftime('%m-%d')
      7 print(today)

~/anaconda3/envs/py36/lib/python3.6/site-packages/pandas/core/frame.py in query(self, expr, inplace, **kwargs)
   3086         kwargs['level'] = kwargs.pop('level', 0) + 1
   3087         kwargs['target'] = None
-> 3088         res = self.eval(expr, **kwargs)
   3089 
   3090         try:

~/anaconda3/envs/py36/lib/python3.6/site-packages/pandas/core/frame.py in eval(self, expr, inplace, **kwargs)
   3201             kwargs['target'] = self
   3202         kwargs['resolvers'] = kwargs.get('resolvers', ()) + tuple(resolvers)
-> 3203         return _eval(expr, inplace=inplace, **kwargs)
   3204 
   3205     def select_dtypes(self, include=None, exclude=None):

~/anaconda3/envs/py36/lib/python3.6/site-packages/pandas/core/computation/eval.py in eval(expr, parser, engine, truediv, local_dict, global_dict, resolvers, level, target, inplace)
    292 
    293         parsed_expr = Expr(expr, engine=engine, parser=parser, env=env,
--> 294                            truediv=truediv)
    295 
    296         # construct the engine and evaluate the parsed expression

~/anaconda3/envs/py36/lib/python3.6/site-packages/pandas/core/computation/expr.py in __init__(self, expr, engine, parser, env, truediv, level)
    747         self.env.scope['truediv'] = truediv
    748         self._visitor = _parsers[parser](self.env, self.engine, self.parser)
--> 749         self.terms = self.parse()
    750 
    751     @property

~/anaconda3/envs/py36/lib/python3.6/site-packages/pandas/core/computation/expr.py in parse(self)
    764     def parse(self):
    765         """Parse an expression"""
--> 766         return self._visitor.visit(self.expr)
    767 
    768     @property

~/anaconda3/envs/py36/lib/python3.6/site-packages/pandas/core/computation/expr.py in visit(self, node, **kwargs)
    329         method = 'visit_' + node.__class__.__name__
    330         visitor = getattr(self, method)
--> 331         return visitor(node, **kwargs)
    332 
    333     def visit_Module(self, node, **kwargs):

~/anaconda3/envs/py36/lib/python3.6/site-packages/pandas/core/computation/expr.py in visit_Module(self, node, **kwargs)
    335             raise SyntaxError('only a single expression is allowed')
    336         expr = node.body[0]
--> 337         return self.visit(expr, **kwargs)
    338 
    339     def visit_Expr(self, node, **kwargs):

~/anaconda3/envs/py36/lib/python3.6/site-packages/pandas/core/computation/expr.py in visit(self, node, **kwargs)
    329         method = 'visit_' + node.__class__.__name__
    330         visitor = getattr(self, method)
--> 331         return visitor(node, **kwargs)
    332 
    333     def visit_Module(self, node, **kwargs):

~/anaconda3/envs/py36/lib/python3.6/site-packages/pandas/core/computation/expr.py in visit_Expr(self, node, **kwargs)
    338 
    339     def visit_Expr(self, node, **kwargs):
--> 340         return self.visit(node.value, **kwargs)
    341 
    342     def _rewrite_membership_op(self, node, left, right):

~/anaconda3/envs/py36/lib/python3.6/site-packages/pandas/core/computation/expr.py in visit(self, node, **kwargs)
    329         method = 'visit_' + node.__class__.__name__
    330         visitor = getattr(self, method)
--> 331         return visitor(node, **kwargs)
    332 
    333     def visit_Module(self, node, **kwargs):

~/anaconda3/envs/py36/lib/python3.6/site-packages/pandas/core/computation/expr.py in visit_Compare(self, node, **kwargs)
    664             op = self.translate_In(ops[0])
    665             binop = ast.BinOp(op=op, left=node.left, right=comps[0])
--> 666             return self.visit(binop)
    667 
    668         # recursive case: we have a chained comparison, a CMP b CMP c, etc.

~/anaconda3/envs/py36/lib/python3.6/site-packages/pandas/core/computation/expr.py in visit(self, node, **kwargs)
    329         method = 'visit_' + node.__class__.__name__
    330         visitor = getattr(self, method)
--> 331         return visitor(node, **kwargs)
    332 
    333     def visit_Module(self, node, **kwargs):

~/anaconda3/envs/py36/lib/python3.6/site-packages/pandas/core/computation/expr.py in visit_BinOp(self, node, **kwargs)
    433 
    434     def visit_BinOp(self, node, **kwargs):
--> 435         op, op_class, left, right = self._maybe_transform_eq_ne(node)
    436         left, right = self._maybe_downcast_constants(left, right)
    437         return self._maybe_evaluate_binop(op, op_class, left, right)

~/anaconda3/envs/py36/lib/python3.6/site-packages/pandas/core/computation/expr.py in _maybe_transform_eq_ne(self, node, left, right)
    370     def _maybe_transform_eq_ne(self, node, left=None, right=None):
    371         if left is None:
--> 372             left = self.visit(node.left, side='left')
    373         if right is None:
    374             right = self.visit(node.right, side='right')

~/anaconda3/envs/py36/lib/python3.6/site-packages/pandas/core/computation/expr.py in visit(self, node, **kwargs)
    329         method = 'visit_' + node.__class__.__name__
    330         visitor = getattr(self, method)
--> 331         return visitor(node, **kwargs)
    332 
    333     def visit_Module(self, node, **kwargs):

~/anaconda3/envs/py36/lib/python3.6/site-packages/pandas/core/computation/expr.py in visit_BinOp(self, node, **kwargs)
    435         op, op_class, left, right = self._maybe_transform_eq_ne(node)
    436         left, right = self._maybe_downcast_constants(left, right)
--> 437         return self._maybe_evaluate_binop(op, op_class, left, right)
    438 
    439     def visit_Div(self, node, **kwargs):

~/anaconda3/envs/py36/lib/python3.6/site-packages/pandas/core/computation/expr.py in _maybe_evaluate_binop(self, op, op_class, lhs, rhs, eval_in_python, maybe_eval_in_python)
    410                             " '{lhs}' and '{rhs}'".format(op=res.op,
    411                                                           lhs=lhs.type,
--> 412                                                           rhs=rhs.type))
    413 
    414         if self.engine != 'pytables':

TypeError: unsupported operand type(s) for %: 'object' and '<class 'int'>'



In [19]:

    
a = data["Steps"].unique()



In [ ]:



In [39]:

    
from Bio.PDB.PDBParser import PDBParser
pdbFileLocation = '/Users/weilu/Research/database/chosen/T0869-D1.pdb'
structure = PDBParser().get_structure("a", pdbFileLocation)
len(list(structure.get_residues()))



In [47]:

    
data = pd.read_csv("/Users/weilu/Research/server/april_second_2019/test_set/seq_info.dat", names=["i", "protein", "length"], sep=" ")



In [56]:

    
d = data.sort_values("length").reset_index(drop=True).reset_index().drop("i", axis=1)



In [57]:

    
d.to_csv("/Users/weilu/Research/server/april_second_2019/test_set/seq_info.csv")



In [60]:

    
d = pd.read_csv("/Users/weilu/Research/server/april_second_2019/test_set/seq_info.csv", index_col=0)
d.query("length < 150 and index % 2 == 0")



In [64]:

    
d.query("length < 150 and index % 2 == 0")["protein"].tolist()









    Out[64]:





['T0759-D1',
 'T0953s2-D1',
 'T0943-D1',
 'T0773-D1',
 'T0816-D1',
 'T0854-D2',
 'T0767-D1',
 'T0853-D1',
 'T0958-D1',
 'T0834-D2',
 'T0960-D3',
 'T0862-D1',
 'T0912-D3',
 'T0898-D1',
 'T0824-D1',
 'T0782-D1',
 'T0830-D2',
 'T0761-D2',
 'T0968s1-D1',
 'T0870-D1',
 'T0838-D1',
 'T0803-D1']



In [34]:

    
g = sns.FacetGrid(subset_data, col="Name",col_wrap=4,  hue="Folder", sharey=False, sharex=False)
g = (g.map(plt.scatter, "Qw", "Water", alpha=0.5).add_legend())



In [33]:

    
g = sns.FacetGrid(subset_data, col="Name",col_wrap=4,  hue="Folder", sharey=False, sharex=False)
g = (g.map(plt.scatter, "Steps", "Water", alpha=0.5).add_legend())



In [32]:

    
g = sns.FacetGrid(subset_data, col="Name",col_wrap=4,  hue="Folder", sharey=False, sharex=False)
g = (g.map(plt.scatter, "Frag_Mem", "Water", alpha=0.5).add_legend())



In [ ]:

	Name	Folder	Steps	Qw	VTotal	Run	Repeat	Rg	Chain	Chi	Rama	DSSP	P_AP	Water	Burial	Frag_Mem	Membrane	Ebond	Epair
0	T0773-D1	multi_iter0_with_minimization	0	0.962184	-816.800587	rerun	native	10.281000	5.54499	2.321355	-273.582624	-34.583105	-20.634047	-44.846211	-40.147558	-410.873387	0	0.766555	0.352036
0	T0773-D1	original_with_minimization	0	0.952982	-848.641638	rerun	native	10.292924	4.79076	2.247894	-274.704599	-34.891223	-19.586545	-55.332057	-60.135171	-411.030696	0	0.843432	0.382778

	index	protein	length
0	0	T0759-D1	34
2	2	T0953s2-D1	44
4	4	T0943-D1	62
6	6	T0773-D1	67
8	8	T0816-D1	68
10	10	T0854-D2	70
12	12	T0767-D1	76
14	14	T0853-D1	76
16	16	T0958-D1	77
18	18	T0834-D2	86
20	20	T0960-D3	89
22	22	T0862-D1	93
24	24	T0912-D3	103
26	26	T0898-D1	106
28	28	T0824-D1	108
30	30	T0782-D1	110
32	32	T0830-D2	111
34	34	T0761-D2	113
36	36	T0968s1-D1	118
38	38	T0870-D1	123
40	40	T0838-D1	126
42	42	T0803-D1	134