notebook.community

Edit and run



In [1]:

    
from Bio.PDB import *
import os
import sys
import random
import time
from random import seed, randint
import argparse
import platform
from datetime import datetime
import imp
import numpy as np
import fileinput
from itertools import product
import pandas as pd
from scipy.interpolate import griddata
from scipy.interpolate import interp2d
import seaborn as sns
from os import listdir
import scipy
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.interpolate import griddata
import matplotlib as mpl
import filecmp
# from .. import notebookFunctions

%matplotlib inline
plt.rcParams['figure.figsize'] = (10,6.180)    #golden ratio
# %matplotlib notebook
%load_ext autoreload
%autoreload 2



In [38]:

    
pre = "/Users/weilu/Dropbox/GlpG_paper_2018/figures/"
file1 = "/Users/weilu/Dropbox/GlpG_paper_2018/figures/picked_structure_pdbs/complete_out/structure_0.pdb"
file2 = "/Users/weilu/Dropbox/GlpG_paper_2018/figures/selected_structure_complete/low_e_jun01_out/structure_13.pdb"
filecmp.cmp(file1, file2)









    Out[38]:





True



In [ ]:



In [32]:



In [34]:

    
data.query("index == 2")









    Out[34]:







  
    
      
      index
      AMH
      AMH-Go
      AMH_3H
      AMH_4H
      BiasTo
      DisReal
      Dis_h56
      Distance
      Energy
      ...
      z_average
      z_h1
      z_h2
      z_h3
      z_h4
      z_h5
      z_h6
      BiasedEnergy
      BiasEnergy
      Energy_with_all_bias
    
  
  
    
      2
      2
      NaN
      -368.794764
      NaN
      -280.810983
      274.0
      264.425563
      68.20642
      120.60384
      -673.649006
      ...
      -16.978985
      -18.064276
      -14.082236
      -20.113153
      -19.360805
      -11.60792
      -19.682536
      -451.72726
      1.833397
      -449.893863
    
  

1 rows × 52 columns



In [30]:

    
data.columns









    Out[30]:





Index(['Unnamed: 0', 'level_0', 'AMH', 'AMH-Go', 'AMH_3H', 'AMH_4H', 'BiasTo',
       'DisReal', 'Dis_h56', 'Distance', 'Energy', 'Lipid', 'Lipid1',
       'Lipid10', 'Lipid11', 'Lipid12', 'Lipid13', 'Lipid14', 'Lipid15',
       'Lipid2', 'Lipid3', 'Lipid4', 'Lipid5', 'Lipid6', 'Lipid7', 'Lipid8',
       'Lipid9', 'Membrane', 'Qw', 'Rg', 'Run', 'Step', 'Temp', 'TempT',
       'TotalE', 'abs_z_average', 'index', 'rg1', 'rg2', 'rg3', 'rg4', 'rg5',
       'rg6', 'rg_all', 'z_average', 'z_h1', 'z_h2', 'z_h3', 'z_h4', 'z_h5',
       'z_h6', 'BiasedEnergy', 'BiasEnergy', 'Energy_with_all_bias'],
      dtype='object')



In [202]:



In [251]:

    
def get_info(name=None):
    target = target_dic[name]
    data = pd.read_csv(pre + f"selected_structure_complete/{target}.csv")
#     data = data.drop(["level_0", "index", "Unnamed: 0"], axis=1).reset_index()
    data = data.drop(["index"], axis=1).reset_index()
    chosen_list = []
    for j in range(5):
        file1 = pre + f"picked_structure_pdbs/{name}/structure_{j}.pdb"
        for i in range(20):
            file2 = pre + f"selected_structure_complete/{target}/structure_{i}.pdb"
            if filecmp.cmp(file1, file2):
#                 print(i)
                chosen_list.append(data.query(f"index == {i}"))
                break
    chosen = pd.concat(chosen_list)
#     print(chosen)
    # data["Energy"] = data["Lipid"] + data["Rg"] + data["Membrane"] + data["AMH-Go"]
    chosen["z_h56"] = chosen["z_h5"] + chosen["z_h6"]
    chosen["average_z_h56"] = chosen["z_h56"]/2  
    chosen["z_average"] = chosen["z_average"]
#     info = chosen[["DisReal", "z_average", "Qw", "Dis_h56", "z_h56", "Lipid", "Rg", "Membrane", "AMH-Go"]]
    info = chosen[["DisReal", "z_average", "Qw", "Dis_h56", "average_z_h56", "Lipid", "Rg", "Membrane", "AMH-Go"]]
    info = info.describe().loc[["mean","std"]].T
#     out = info["mean"].map('{:.3f}'.format) + "±" + info["std"].map('{:.1f}'.format)
    out = info["mean"].map('{:.2f}'.format) + "$\pm$" + info["std"].map('{:.2f}'.format)
    return out



In [258]:

    
target_dic = {"complete_out":"low_e_jun01_out",
            "low_e_h12":"low_e_jun01_h12",
             "low_e_h34":"low_e_jun01_h34",
             "low_e_h56":"low_e_jun01_h56",
             "low_e_pre_transition":"low_e_jun01_pre",
             "low_e_transition":"low_e_jun01_transition",
             "low_e_post_transition":"low_e_jun01_post_transition",
             "other_path/low_e_path1":"other_path/low_e_path1",
             "other_path/low_e_path2":"other_path/low_e_path2",
             "native":"native"}
name_dic = {"complete_out":"U2",
            "low_e_h12":"U1",
             "low_e_h34":"I2",
             "low_e_h56":"I1",
             "low_e_pre_transition":"$\\alpha$",
             "low_e_transition":"$\\beta$",
             "low_e_post_transition":"$\gamma$",
             "other_path/low_e_path1":"path1",
             "other_path/low_e_path2":"path2",
            "native":"N"}

name_list = ["complete_out", "low_e_h12", "low_e_h34", "low_e_h56", 
             "low_e_post_transition", "low_e_transition","low_e_pre_transition", "native"]
# name_list = ["complete_out", "low_e_h12", "low_e_h34", "low_e_h56", 
#              "low_e_post_transition", "low_e_transition","low_e_pre_transition",
#             "other_path/low_e_path1", "other_path/low_e_path2"]
all_info_list = []
for name in name_list:
    a = get_info(name)
    a["name"] = name_dic[name]
    all_info_list.append(a)
all_info = pd.concat(all_info_list, axis=1).T.set_index('name').reset_index()



In [259]:

    
all_info[["name", "DisReal", "z_average", "Qw", "Dis_h56", "average_z_h56"]]









    Out[259]:







  
    
      
      name
      DisReal
      z_average
      Qw
      Dis_h56
      average_z_h56
    
  
  
    
      0
      U2
      271.92$\pm$7.33
      -18.89$\pm$1.80
      0.09$\pm$0.00
      73.53$\pm$3.32
      -17.13$\pm$1.18
    
    
      1
      U1
      226.45$\pm$2.65
      -12.60$\pm$1.45
      0.09$\pm$0.01
      73.45$\pm$2.34
      -17.01$\pm$0.66
    
    
      2
      I2
      145.80$\pm$6.41
      -9.66$\pm$0.82
      0.16$\pm$0.01
      67.45$\pm$4.91
      -16.10$\pm$1.33
    
    
      3
      I1
      83.98$\pm$7.36
      -6.58$\pm$0.85
      0.42$\pm$0.02
      61.94$\pm$6.30
      -17.44$\pm$2.68
    
    
      4
      $\gamma$
      67.08$\pm$1.55
      -5.51$\pm$0.37
      0.46$\pm$0.03
      53.40$\pm$5.05
      -13.78$\pm$1.07
    
    
      5
      $\beta$
      60.19$\pm$1.87
      -3.91$\pm$0.59
      0.41$\pm$0.06
      40.16$\pm$17.73
      -10.14$\pm$3.53
    
    
      6
      $\alpha$
      53.38$\pm$1.06
      -3.14$\pm$0.58
      0.59$\pm$0.03
      27.51$\pm$3.53
      -5.60$\pm$1.34
    
    
      7
      N
      34.94$\pm$2.84
      -2.55$\pm$0.35
      0.75$\pm$0.03
      26.52$\pm$2.10
      -5.45$\pm$0.24



In [254]:

    
all_info[["name", "AMH-Go", "Membrane", "Rg", "Lipid"]]









    Out[254]:







  
    
      
      name
      AMH-Go
      Membrane
      Rg
      Lipid
    
  
  
    
      0
      U2
      -369.97$\pm$1.28
      -26.65$\pm$2.83
      4.33$\pm$2.49
      0.00$\pm$0.00
    
    
      1
      U1
      -374.87$\pm$2.37
      -32.25$\pm$1.41
      3.60$\pm$0.93
      0.01$\pm$0.00
    
    
      2
      I2
      -397.57$\pm$4.88
      -35.24$\pm$1.88
      3.83$\pm$1.07
      -1.31$\pm$0.35
    
    
      3
      I1
      -437.50$\pm$2.53
      -35.38$\pm$1.32
      6.37$\pm$1.33
      -4.53$\pm$0.71
    
    
      4
      $\gamma$
      -432.65$\pm$6.00
      -38.59$\pm$2.29
      7.15$\pm$0.63
      -6.17$\pm$1.12
    
    
      5
      $\beta$
      -419.80$\pm$16.07
      -38.45$\pm$2.84
      6.98$\pm$1.35
      -7.04$\pm$2.23
    
    
      6
      $\alpha$
      -456.54$\pm$2.54
      -36.62$\pm$1.39
      6.95$\pm$0.51
      -9.24$\pm$0.67
    
    
      7
      N
      -504.54$\pm$2.73
      -36.85$\pm$0.56
      7.40$\pm$0.58
      -12.11$\pm$1.54



In [255]:

    
print(all_info[["name", "DisReal", "z_average", "Qw", "Dis_h56", "average_z_h56"]].to_latex(index=False, escape=False))









    



\begin{tabular}{llllll}
\toprule
     name &          DisReal &        z_average &             Qw &          Dis_h56 &    average_z_h56 \\
\midrule
       U2 &  271.92$\pm$7.33 &  -18.89$\pm$1.80 &  0.09$\pm$0.00 &   73.53$\pm$3.32 &  -17.13$\pm$1.18 \\
       U1 &  226.45$\pm$2.65 &  -12.60$\pm$1.45 &  0.09$\pm$0.01 &   73.45$\pm$2.34 &  -17.01$\pm$0.66 \\
       I2 &  145.80$\pm$6.41 &   -9.66$\pm$0.82 &  0.16$\pm$0.01 &   67.45$\pm$4.91 &  -16.10$\pm$1.33 \\
       I1 &   83.98$\pm$7.36 &   -6.58$\pm$0.85 &  0.42$\pm$0.02 &   61.94$\pm$6.30 &  -17.44$\pm$2.68 \\
 $\gamma$ &   67.08$\pm$1.55 &   -5.51$\pm$0.37 &  0.46$\pm$0.03 &   53.40$\pm$5.05 &  -13.78$\pm$1.07 \\
  $\beta$ &   60.19$\pm$1.87 &   -3.91$\pm$0.59 &  0.41$\pm$0.06 &  40.16$\pm$17.73 &  -10.14$\pm$3.53 \\
 $\alpha$ &   53.38$\pm$1.06 &   -3.14$\pm$0.58 &  0.59$\pm$0.03 &   27.51$\pm$3.53 &   -5.60$\pm$1.34 \\
        N &   34.94$\pm$2.84 &   -2.55$\pm$0.35 &  0.75$\pm$0.03 &   26.52$\pm$2.10 &   -5.45$\pm$0.24 \\
\bottomrule
\end{tabular}



In [261]:

    
print(all_info[["name", "AMH-Go", "Membrane", "Rg", "Lipid"]].to_latex(index=False, escape=False))









    



\begin{tabular}{lllll}
\toprule
     name &             AMH-Go &         Membrane &             Rg &            Lipid \\
\midrule
       U2 &   -369.97$\pm$1.28 &  -26.65$\pm$2.83 &  4.33$\pm$2.49 &    0.00$\pm$0.00 \\
       U1 &   -374.87$\pm$2.37 &  -32.25$\pm$1.41 &  3.60$\pm$0.93 &    0.01$\pm$0.00 \\
       I2 &   -397.57$\pm$4.88 &  -35.24$\pm$1.88 &  3.83$\pm$1.07 &   -1.31$\pm$0.35 \\
       I1 &   -437.50$\pm$2.53 &  -35.38$\pm$1.32 &  6.37$\pm$1.33 &   -4.53$\pm$0.71 \\
 $\gamma$ &   -432.65$\pm$6.00 &  -38.59$\pm$2.29 &  7.15$\pm$0.63 &   -6.17$\pm$1.12 \\
  $\beta$ &  -419.80$\pm$16.07 &  -38.45$\pm$2.84 &  6.98$\pm$1.35 &   -7.04$\pm$2.23 \\
 $\alpha$ &   -456.54$\pm$2.54 &  -36.62$\pm$1.39 &  6.95$\pm$0.51 &   -9.24$\pm$0.67 \\
        N &   -504.54$\pm$2.73 &  -36.85$\pm$0.56 &  7.40$\pm$0.58 &  -12.11$\pm$1.54 \\
\bottomrule
\end{tabular}



In [209]:

    
print(all_info.to_latex(index=False))









    



\begin{tabular}{llllllllll}
\toprule
            name &      DisReal &   z\_average &         Qw &      Dis\_h56 & average\_z\_h56 &        Lipid &         Rg &     Membrane &         AMH-Go \\
\midrule
    complete out &  271.923±7.3 &  -1.259±0.1 &  0.087±0.0 &   73.532±3.3 &    -1.142±0.1 &    0.003±0.0 &  4.327±2.5 &  -26.646±2.8 &   -369.970±1.3 \\
             h12 &  226.449±2.7 &  -0.840±0.1 &  0.092±0.0 &   73.454±2.3 &    -1.134±0.0 &    0.005±0.0 &  3.599±0.9 &  -32.248±1.4 &   -374.874±2.4 \\
             h34 &  145.800±6.4 &  -0.644±0.1 &  0.163±0.0 &   67.454±4.9 &    -1.074±0.1 &   -1.306±0.3 &  3.827±1.1 &  -35.244±1.9 &   -397.567±4.9 \\
             h56 &   83.985±7.4 &  -0.438±0.1 &  0.419±0.0 &   61.942±6.3 &    -1.163±0.2 &   -4.529±0.7 &  6.369±1.3 &  -35.380±1.3 &   -437.501±2.5 \\
 post\_transition &   67.076±1.6 &  -0.367±0.0 &  0.460±0.0 &   53.396±5.0 &    -0.919±0.1 &   -6.174±1.1 &  7.150±0.6 &  -38.595±2.3 &   -432.651±6.0 \\
      transition &   60.191±1.9 &  -0.261±0.0 &  0.409±0.1 &  40.160±17.7 &    -0.676±0.2 &   -7.038±2.2 &  6.978±1.3 &  -38.446±2.8 &  -419.796±16.1 \\
  pre transition &   53.385±1.1 &  -0.209±0.0 &  0.594±0.0 &   27.514±3.5 &    -0.373±0.1 &   -9.243±0.7 &  6.954±0.5 &  -36.620±1.4 &   -456.538±2.5 \\
          native &   34.944±2.8 &  -0.170±0.0 &  0.752±0.0 &   26.524±2.1 &    -0.363±0.0 &  -12.110±1.5 &  7.399±0.6 &  -36.848±0.6 &   -504.538±2.7 \\
\bottomrule
\end{tabular}



In [208]:

    
print(all_info.T.to_latex(header=False))









    



\begin{tabular}{lllllllll}
\toprule
name          &  complete out &           h12 &           h34 &           h56 &  post\_transition &     transition &  pre transition &        native \\
DisReal       &   271.923±7.3 &   226.449±2.7 &   145.800±6.4 &    83.985±7.4 &       67.076±1.6 &     60.191±1.9 &      53.385±1.1 &    34.944±2.8 \\
z\_average     &    -1.259±0.1 &    -0.840±0.1 &    -0.644±0.1 &    -0.438±0.1 &       -0.367±0.0 &     -0.261±0.0 &      -0.209±0.0 &    -0.170±0.0 \\
Qw            &     0.087±0.0 &     0.092±0.0 &     0.163±0.0 &     0.419±0.0 &        0.460±0.0 &      0.409±0.1 &       0.594±0.0 &     0.752±0.0 \\
Dis\_h56       &    73.532±3.3 &    73.454±2.3 &    67.454±4.9 &    61.942±6.3 &       53.396±5.0 &    40.160±17.7 &      27.514±3.5 &    26.524±2.1 \\
average\_z\_h56 &    -1.142±0.1 &    -1.134±0.0 &    -1.074±0.1 &    -1.163±0.2 &       -0.919±0.1 &     -0.676±0.2 &      -0.373±0.1 &    -0.363±0.0 \\
Lipid         &     0.003±0.0 &     0.005±0.0 &    -1.306±0.3 &    -4.529±0.7 &       -6.174±1.1 &     -7.038±2.2 &      -9.243±0.7 &   -12.110±1.5 \\
Rg            &     4.327±2.5 &     3.599±0.9 &     3.827±1.1 &     6.369±1.3 &        7.150±0.6 &      6.978±1.3 &       6.954±0.5 &     7.399±0.6 \\
Membrane      &   -26.646±2.8 &   -32.248±1.4 &   -35.244±1.9 &   -35.380±1.3 &      -38.595±2.3 &    -38.446±2.8 &     -36.620±1.4 &   -36.848±0.6 \\
AMH-Go        &  -369.970±1.3 &  -374.874±2.4 &  -397.567±4.9 &  -437.501±2.5 &     -432.651±6.0 &  -419.796±16.1 &    -456.538±2.5 &  -504.538±2.7 \\
\bottomrule
\end{tabular}



In [64]:

    
target = target_dic["complete_out"]
chosen_list = []
for j in range(5):
    file1 = pre + f"picked_structure_pdbs/{name}/structure_{j}.pdb"
    for i in range(20):
        file2 = pre + f"selected_structure_complete/{target}/structure_{i}.pdb"
        if filecmp.cmp(file1, file2):
#                 print(i)
            chosen_list.append(data.query(f"index == {i}"))
            break
chosen = pd.concat(chosen_list)
print(chosen)
# data["Energy"] = data["Lipid"] + data["Rg"] + data["Membrane"] + data["AMH-Go"]
chosen["z_h56"] = chosen["z_h5"] + chosen["z_h6"]
info = chosen[["DisReal", "z_average", "Qw", "Dis_h56", "z_h56", "Lipid", "Rg", "Membrane", "AMH-Go"]]
info = info.describe().loc[["mean","std"]].T
out = info["mean"].map('{:.4f}'.format) + "±" + info["std"].map('{:.1f}'.format)



In [76]:

    
info["mean_and_std"] = info["mean"].astype(str) + "±" + info["std"].round(2).astype(str)



In [77]:

    
info









    Out[77]:







  
    
      
      mean
      std
      mean_and_std
    
  
  
    
      DisReal
      271.923053
      7.331435
      271.923052679±7.33
    
    
      z_average
      -18.891734
      1.803124
      -18.8917337445±1.8
    
    
      Qw
      0.087463
      0.002548
      0.0874632048331±0.0
    
    
      Dis_h56
      73.531906
      3.320595
      73.5319055641±3.32
    
    
      z_h56
      -34.259260
      2.360749
      -34.259260334±2.36
    
    
      Lipid
      0.002629
      0.002888
      0.00262884889888±0.0
    
    
      Rg
      4.326651
      2.494501
      4.3266511143±2.49
    
    
      Membrane
      -26.646285
      2.831294
      -26.6462852962±2.83
    
    
      AMH-Go
      -369.970422
      1.278426
      -369.970421797±1.28



In [12]:

    
for i in range(5):
    file1 = pre + f"picked_structure_pdbs/complete_out/structure_{i}.pdb"
#     file2 = pre + f"selected_structure_complete/low_e_jun01_out/structure_{i}.pdb"
    print(filecmp.cmp(file1, file2))









    



True
False
False
False
False



In [ ]:

	name	DisReal	z_average	Qw	Dis_h56	average_z_h56
0	U2	271.92$\pm$7.33	-18.89$\pm$1.80	0.09$\pm$0.00	73.53$\pm$3.32	-17.13$\pm$1.18
1	U1	226.45$\pm$2.65	-12.60$\pm$1.45	0.09$\pm$0.01	73.45$\pm$2.34	-17.01$\pm$0.66
2	I2	145.80$\pm$6.41	-9.66$\pm$0.82	0.16$\pm$0.01	67.45$\pm$4.91	-16.10$\pm$1.33
3	I1	83.98$\pm$7.36	-6.58$\pm$0.85	0.42$\pm$0.02	61.94$\pm$6.30	-17.44$\pm$2.68
4	$\gamma$	67.08$\pm$1.55	-5.51$\pm$0.37	0.46$\pm$0.03	53.40$\pm$5.05	-13.78$\pm$1.07
5	$\beta$	60.19$\pm$1.87	-3.91$\pm$0.59	0.41$\pm$0.06	40.16$\pm$17.73	-10.14$\pm$3.53
6	$\alpha$	53.38$\pm$1.06	-3.14$\pm$0.58	0.59$\pm$0.03	27.51$\pm$3.53	-5.60$\pm$1.34
7	N	34.94$\pm$2.84	-2.55$\pm$0.35	0.75$\pm$0.03	26.52$\pm$2.10	-5.45$\pm$0.24

	name	AMH-Go	Membrane	Rg	Lipid
0	U2	-369.97$\pm$1.28	-26.65$\pm$2.83	4.33$\pm$2.49	0.00$\pm$0.00
1	U1	-374.87$\pm$2.37	-32.25$\pm$1.41	3.60$\pm$0.93	0.01$\pm$0.00
2	I2	-397.57$\pm$4.88	-35.24$\pm$1.88	3.83$\pm$1.07	-1.31$\pm$0.35
3	I1	-437.50$\pm$2.53	-35.38$\pm$1.32	6.37$\pm$1.33	-4.53$\pm$0.71
4	$\gamma$	-432.65$\pm$6.00	-38.59$\pm$2.29	7.15$\pm$0.63	-6.17$\pm$1.12
5	$\beta$	-419.80$\pm$16.07	-38.45$\pm$2.84	6.98$\pm$1.35	-7.04$\pm$2.23
6	$\alpha$	-456.54$\pm$2.54	-36.62$\pm$1.39	6.95$\pm$0.51	-9.24$\pm$0.67
7	N	-504.54$\pm$2.73	-36.85$\pm$0.56	7.40$\pm$0.58	-12.11$\pm$1.54

	mean	std	mean_and_std
DisReal	271.923053	7.331435	271.923052679±7.33
z_average	-18.891734	1.803124	-18.8917337445±1.8
Qw	0.087463	0.002548	0.0874632048331±0.0
Dis_h56	73.531906	3.320595	73.5319055641±3.32
z_h56	-34.259260	2.360749	-34.259260334±2.36
Lipid	0.002629	0.002888	0.00262884889888±0.0
Rg	4.326651	2.494501	4.3266511143±2.49
Membrane	-26.646285	2.831294	-26.6462852962±2.83
AMH-Go	-369.970422	1.278426	-369.970421797±1.28