In [1]:
from Bio.PDB import *
import os
import sys
import random
import time
from random import seed, randint
import argparse
import platform
from datetime import datetime
import imp
import numpy as np
import fileinput
from itertools import product
import pandas as pd
from scipy.interpolate import griddata
from scipy.interpolate import interp2d
import seaborn as sns
from os import listdir
import scipy
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.interpolate import griddata
import matplotlib as mpl
import filecmp
# from .. import notebookFunctions

%matplotlib inline
plt.rcParams['figure.figsize'] = (10,6.180)    #golden ratio
# %matplotlib notebook
%load_ext autoreload
%autoreload 2

In [38]:
pre = "/Users/weilu/Dropbox/GlpG_paper_2018/figures/"
file1 = "/Users/weilu/Dropbox/GlpG_paper_2018/figures/picked_structure_pdbs/complete_out/structure_0.pdb"
file2 = "/Users/weilu/Dropbox/GlpG_paper_2018/figures/selected_structure_complete/low_e_jun01_out/structure_13.pdb"
filecmp.cmp(file1, file2)


Out[38]:
True

In [ ]:


In [32]:


In [34]:
data.query("index == 2")


Out[34]:
index AMH AMH-Go AMH_3H AMH_4H BiasTo DisReal Dis_h56 Distance Energy ... z_average z_h1 z_h2 z_h3 z_h4 z_h5 z_h6 BiasedEnergy BiasEnergy Energy_with_all_bias
2 2 NaN -368.794764 NaN -280.810983 274.0 264.425563 68.20642 120.60384 -673.649006 ... -16.978985 -18.064276 -14.082236 -20.113153 -19.360805 -11.60792 -19.682536 -451.72726 1.833397 -449.893863

1 rows × 52 columns


In [30]:
data.columns


Out[30]:
Index(['Unnamed: 0', 'level_0', 'AMH', 'AMH-Go', 'AMH_3H', 'AMH_4H', 'BiasTo',
       'DisReal', 'Dis_h56', 'Distance', 'Energy', 'Lipid', 'Lipid1',
       'Lipid10', 'Lipid11', 'Lipid12', 'Lipid13', 'Lipid14', 'Lipid15',
       'Lipid2', 'Lipid3', 'Lipid4', 'Lipid5', 'Lipid6', 'Lipid7', 'Lipid8',
       'Lipid9', 'Membrane', 'Qw', 'Rg', 'Run', 'Step', 'Temp', 'TempT',
       'TotalE', 'abs_z_average', 'index', 'rg1', 'rg2', 'rg3', 'rg4', 'rg5',
       'rg6', 'rg_all', 'z_average', 'z_h1', 'z_h2', 'z_h3', 'z_h4', 'z_h5',
       'z_h6', 'BiasedEnergy', 'BiasEnergy', 'Energy_with_all_bias'],
      dtype='object')

In [202]:


In [251]:
def get_info(name=None):
    target = target_dic[name]
    data = pd.read_csv(pre + f"selected_structure_complete/{target}.csv")
#     data = data.drop(["level_0", "index", "Unnamed: 0"], axis=1).reset_index()
    data = data.drop(["index"], axis=1).reset_index()
    chosen_list = []
    for j in range(5):
        file1 = pre + f"picked_structure_pdbs/{name}/structure_{j}.pdb"
        for i in range(20):
            file2 = pre + f"selected_structure_complete/{target}/structure_{i}.pdb"
            if filecmp.cmp(file1, file2):
#                 print(i)
                chosen_list.append(data.query(f"index == {i}"))
                break
    chosen = pd.concat(chosen_list)
#     print(chosen)
    # data["Energy"] = data["Lipid"] + data["Rg"] + data["Membrane"] + data["AMH-Go"]
    chosen["z_h56"] = chosen["z_h5"] + chosen["z_h6"]
    chosen["average_z_h56"] = chosen["z_h56"]/2  
    chosen["z_average"] = chosen["z_average"]
#     info = chosen[["DisReal", "z_average", "Qw", "Dis_h56", "z_h56", "Lipid", "Rg", "Membrane", "AMH-Go"]]
    info = chosen[["DisReal", "z_average", "Qw", "Dis_h56", "average_z_h56", "Lipid", "Rg", "Membrane", "AMH-Go"]]
    info = info.describe().loc[["mean","std"]].T
#     out = info["mean"].map('{:.3f}'.format) + "±" + info["std"].map('{:.1f}'.format)
    out = info["mean"].map('{:.2f}'.format) + "$\pm$" + info["std"].map('{:.2f}'.format)
    return out

In [258]:
target_dic = {"complete_out":"low_e_jun01_out",
            "low_e_h12":"low_e_jun01_h12",
             "low_e_h34":"low_e_jun01_h34",
             "low_e_h56":"low_e_jun01_h56",
             "low_e_pre_transition":"low_e_jun01_pre",
             "low_e_transition":"low_e_jun01_transition",
             "low_e_post_transition":"low_e_jun01_post_transition",
             "other_path/low_e_path1":"other_path/low_e_path1",
             "other_path/low_e_path2":"other_path/low_e_path2",
             "native":"native"}
name_dic = {"complete_out":"U2",
            "low_e_h12":"U1",
             "low_e_h34":"I2",
             "low_e_h56":"I1",
             "low_e_pre_transition":"$\\alpha$",
             "low_e_transition":"$\\beta$",
             "low_e_post_transition":"$\gamma$",
             "other_path/low_e_path1":"path1",
             "other_path/low_e_path2":"path2",
            "native":"N"}

name_list = ["complete_out", "low_e_h12", "low_e_h34", "low_e_h56", 
             "low_e_post_transition", "low_e_transition","low_e_pre_transition", "native"]
# name_list = ["complete_out", "low_e_h12", "low_e_h34", "low_e_h56", 
#              "low_e_post_transition", "low_e_transition","low_e_pre_transition",
#             "other_path/low_e_path1", "other_path/low_e_path2"]
all_info_list = []
for name in name_list:
    a = get_info(name)
    a["name"] = name_dic[name]
    all_info_list.append(a)
all_info = pd.concat(all_info_list, axis=1).T.set_index('name').reset_index()

In [259]:
all_info[["name", "DisReal", "z_average", "Qw", "Dis_h56", "average_z_h56"]]


Out[259]:
name DisReal z_average Qw Dis_h56 average_z_h56
0 U2 271.92$\pm$7.33 -18.89$\pm$1.80 0.09$\pm$0.00 73.53$\pm$3.32 -17.13$\pm$1.18
1 U1 226.45$\pm$2.65 -12.60$\pm$1.45 0.09$\pm$0.01 73.45$\pm$2.34 -17.01$\pm$0.66
2 I2 145.80$\pm$6.41 -9.66$\pm$0.82 0.16$\pm$0.01 67.45$\pm$4.91 -16.10$\pm$1.33
3 I1 83.98$\pm$7.36 -6.58$\pm$0.85 0.42$\pm$0.02 61.94$\pm$6.30 -17.44$\pm$2.68
4 $\gamma$ 67.08$\pm$1.55 -5.51$\pm$0.37 0.46$\pm$0.03 53.40$\pm$5.05 -13.78$\pm$1.07
5 $\beta$ 60.19$\pm$1.87 -3.91$\pm$0.59 0.41$\pm$0.06 40.16$\pm$17.73 -10.14$\pm$3.53
6 $\alpha$ 53.38$\pm$1.06 -3.14$\pm$0.58 0.59$\pm$0.03 27.51$\pm$3.53 -5.60$\pm$1.34
7 N 34.94$\pm$2.84 -2.55$\pm$0.35 0.75$\pm$0.03 26.52$\pm$2.10 -5.45$\pm$0.24

In [254]:
all_info[["name", "AMH-Go", "Membrane", "Rg", "Lipid"]]


Out[254]:
name AMH-Go Membrane Rg Lipid
0 U2 -369.97$\pm$1.28 -26.65$\pm$2.83 4.33$\pm$2.49 0.00$\pm$0.00
1 U1 -374.87$\pm$2.37 -32.25$\pm$1.41 3.60$\pm$0.93 0.01$\pm$0.00
2 I2 -397.57$\pm$4.88 -35.24$\pm$1.88 3.83$\pm$1.07 -1.31$\pm$0.35
3 I1 -437.50$\pm$2.53 -35.38$\pm$1.32 6.37$\pm$1.33 -4.53$\pm$0.71
4 $\gamma$ -432.65$\pm$6.00 -38.59$\pm$2.29 7.15$\pm$0.63 -6.17$\pm$1.12
5 $\beta$ -419.80$\pm$16.07 -38.45$\pm$2.84 6.98$\pm$1.35 -7.04$\pm$2.23
6 $\alpha$ -456.54$\pm$2.54 -36.62$\pm$1.39 6.95$\pm$0.51 -9.24$\pm$0.67
7 N -504.54$\pm$2.73 -36.85$\pm$0.56 7.40$\pm$0.58 -12.11$\pm$1.54

In [255]:
print(all_info[["name", "DisReal", "z_average", "Qw", "Dis_h56", "average_z_h56"]].to_latex(index=False, escape=False))


\begin{tabular}{llllll}
\toprule
     name &          DisReal &        z_average &             Qw &          Dis_h56 &    average_z_h56 \\
\midrule
       U2 &  271.92$\pm$7.33 &  -18.89$\pm$1.80 &  0.09$\pm$0.00 &   73.53$\pm$3.32 &  -17.13$\pm$1.18 \\
       U1 &  226.45$\pm$2.65 &  -12.60$\pm$1.45 &  0.09$\pm$0.01 &   73.45$\pm$2.34 &  -17.01$\pm$0.66 \\
       I2 &  145.80$\pm$6.41 &   -9.66$\pm$0.82 &  0.16$\pm$0.01 &   67.45$\pm$4.91 &  -16.10$\pm$1.33 \\
       I1 &   83.98$\pm$7.36 &   -6.58$\pm$0.85 &  0.42$\pm$0.02 &   61.94$\pm$6.30 &  -17.44$\pm$2.68 \\
 $\gamma$ &   67.08$\pm$1.55 &   -5.51$\pm$0.37 &  0.46$\pm$0.03 &   53.40$\pm$5.05 &  -13.78$\pm$1.07 \\
  $\beta$ &   60.19$\pm$1.87 &   -3.91$\pm$0.59 &  0.41$\pm$0.06 &  40.16$\pm$17.73 &  -10.14$\pm$3.53 \\
 $\alpha$ &   53.38$\pm$1.06 &   -3.14$\pm$0.58 &  0.59$\pm$0.03 &   27.51$\pm$3.53 &   -5.60$\pm$1.34 \\
        N &   34.94$\pm$2.84 &   -2.55$\pm$0.35 &  0.75$\pm$0.03 &   26.52$\pm$2.10 &   -5.45$\pm$0.24 \\
\bottomrule
\end{tabular}


In [261]:
print(all_info[["name", "AMH-Go", "Membrane", "Rg", "Lipid"]].to_latex(index=False, escape=False))


\begin{tabular}{lllll}
\toprule
     name &             AMH-Go &         Membrane &             Rg &            Lipid \\
\midrule
       U2 &   -369.97$\pm$1.28 &  -26.65$\pm$2.83 &  4.33$\pm$2.49 &    0.00$\pm$0.00 \\
       U1 &   -374.87$\pm$2.37 &  -32.25$\pm$1.41 &  3.60$\pm$0.93 &    0.01$\pm$0.00 \\
       I2 &   -397.57$\pm$4.88 &  -35.24$\pm$1.88 &  3.83$\pm$1.07 &   -1.31$\pm$0.35 \\
       I1 &   -437.50$\pm$2.53 &  -35.38$\pm$1.32 &  6.37$\pm$1.33 &   -4.53$\pm$0.71 \\
 $\gamma$ &   -432.65$\pm$6.00 &  -38.59$\pm$2.29 &  7.15$\pm$0.63 &   -6.17$\pm$1.12 \\
  $\beta$ &  -419.80$\pm$16.07 &  -38.45$\pm$2.84 &  6.98$\pm$1.35 &   -7.04$\pm$2.23 \\
 $\alpha$ &   -456.54$\pm$2.54 &  -36.62$\pm$1.39 &  6.95$\pm$0.51 &   -9.24$\pm$0.67 \\
        N &   -504.54$\pm$2.73 &  -36.85$\pm$0.56 &  7.40$\pm$0.58 &  -12.11$\pm$1.54 \\
\bottomrule
\end{tabular}


In [209]:
print(all_info.to_latex(index=False))


\begin{tabular}{llllllllll}
\toprule
            name &      DisReal &   z\_average &         Qw &      Dis\_h56 & average\_z\_h56 &        Lipid &         Rg &     Membrane &         AMH-Go \\
\midrule
    complete out &  271.923±7.3 &  -1.259±0.1 &  0.087±0.0 &   73.532±3.3 &    -1.142±0.1 &    0.003±0.0 &  4.327±2.5 &  -26.646±2.8 &   -369.970±1.3 \\
             h12 &  226.449±2.7 &  -0.840±0.1 &  0.092±0.0 &   73.454±2.3 &    -1.134±0.0 &    0.005±0.0 &  3.599±0.9 &  -32.248±1.4 &   -374.874±2.4 \\
             h34 &  145.800±6.4 &  -0.644±0.1 &  0.163±0.0 &   67.454±4.9 &    -1.074±0.1 &   -1.306±0.3 &  3.827±1.1 &  -35.244±1.9 &   -397.567±4.9 \\
             h56 &   83.985±7.4 &  -0.438±0.1 &  0.419±0.0 &   61.942±6.3 &    -1.163±0.2 &   -4.529±0.7 &  6.369±1.3 &  -35.380±1.3 &   -437.501±2.5 \\
 post\_transition &   67.076±1.6 &  -0.367±0.0 &  0.460±0.0 &   53.396±5.0 &    -0.919±0.1 &   -6.174±1.1 &  7.150±0.6 &  -38.595±2.3 &   -432.651±6.0 \\
      transition &   60.191±1.9 &  -0.261±0.0 &  0.409±0.1 &  40.160±17.7 &    -0.676±0.2 &   -7.038±2.2 &  6.978±1.3 &  -38.446±2.8 &  -419.796±16.1 \\
  pre transition &   53.385±1.1 &  -0.209±0.0 &  0.594±0.0 &   27.514±3.5 &    -0.373±0.1 &   -9.243±0.7 &  6.954±0.5 &  -36.620±1.4 &   -456.538±2.5 \\
          native &   34.944±2.8 &  -0.170±0.0 &  0.752±0.0 &   26.524±2.1 &    -0.363±0.0 &  -12.110±1.5 &  7.399±0.6 &  -36.848±0.6 &   -504.538±2.7 \\
\bottomrule
\end{tabular}


In [208]:
print(all_info.T.to_latex(header=False))


\begin{tabular}{lllllllll}
\toprule
name          &  complete out &           h12 &           h34 &           h56 &  post\_transition &     transition &  pre transition &        native \\
DisReal       &   271.923±7.3 &   226.449±2.7 &   145.800±6.4 &    83.985±7.4 &       67.076±1.6 &     60.191±1.9 &      53.385±1.1 &    34.944±2.8 \\
z\_average     &    -1.259±0.1 &    -0.840±0.1 &    -0.644±0.1 &    -0.438±0.1 &       -0.367±0.0 &     -0.261±0.0 &      -0.209±0.0 &    -0.170±0.0 \\
Qw            &     0.087±0.0 &     0.092±0.0 &     0.163±0.0 &     0.419±0.0 &        0.460±0.0 &      0.409±0.1 &       0.594±0.0 &     0.752±0.0 \\
Dis\_h56       &    73.532±3.3 &    73.454±2.3 &    67.454±4.9 &    61.942±6.3 &       53.396±5.0 &    40.160±17.7 &      27.514±3.5 &    26.524±2.1 \\
average\_z\_h56 &    -1.142±0.1 &    -1.134±0.0 &    -1.074±0.1 &    -1.163±0.2 &       -0.919±0.1 &     -0.676±0.2 &      -0.373±0.1 &    -0.363±0.0 \\
Lipid         &     0.003±0.0 &     0.005±0.0 &    -1.306±0.3 &    -4.529±0.7 &       -6.174±1.1 &     -7.038±2.2 &      -9.243±0.7 &   -12.110±1.5 \\
Rg            &     4.327±2.5 &     3.599±0.9 &     3.827±1.1 &     6.369±1.3 &        7.150±0.6 &      6.978±1.3 &       6.954±0.5 &     7.399±0.6 \\
Membrane      &   -26.646±2.8 &   -32.248±1.4 &   -35.244±1.9 &   -35.380±1.3 &      -38.595±2.3 &    -38.446±2.8 &     -36.620±1.4 &   -36.848±0.6 \\
AMH-Go        &  -369.970±1.3 &  -374.874±2.4 &  -397.567±4.9 &  -437.501±2.5 &     -432.651±6.0 &  -419.796±16.1 &    -456.538±2.5 &  -504.538±2.7 \\
\bottomrule
\end{tabular}


In [64]:
target = target_dic["complete_out"]
chosen_list = []
for j in range(5):
    file1 = pre + f"picked_structure_pdbs/{name}/structure_{j}.pdb"
    for i in range(20):
        file2 = pre + f"selected_structure_complete/{target}/structure_{i}.pdb"
        if filecmp.cmp(file1, file2):
#                 print(i)
            chosen_list.append(data.query(f"index == {i}"))
            break
chosen = pd.concat(chosen_list)
print(chosen)
# data["Energy"] = data["Lipid"] + data["Rg"] + data["Membrane"] + data["AMH-Go"]
chosen["z_h56"] = chosen["z_h5"] + chosen["z_h6"]
info = chosen[["DisReal", "z_average", "Qw", "Dis_h56", "z_h56", "Lipid", "Rg", "Membrane", "AMH-Go"]]
info = info.describe().loc[["mean","std"]].T
out = info["mean"].map('{:.4f}'.format) + "±" + info["std"].map('{:.1f}'.format)

In [76]:
info["mean_and_std"] = info["mean"].astype(str) + "±" + info["std"].round(2).astype(str)

In [77]:
info


Out[77]:
mean std mean_and_std
DisReal 271.923053 7.331435 271.923052679±7.33
z_average -18.891734 1.803124 -18.8917337445±1.8
Qw 0.087463 0.002548 0.0874632048331±0.0
Dis_h56 73.531906 3.320595 73.5319055641±3.32
z_h56 -34.259260 2.360749 -34.259260334±2.36
Lipid 0.002629 0.002888 0.00262884889888±0.0
Rg 4.326651 2.494501 4.3266511143±2.49
Membrane -26.646285 2.831294 -26.6462852962±2.83
AMH-Go -369.970422 1.278426 -369.970421797±1.28

In [12]:
for i in range(5):
    file1 = pre + f"picked_structure_pdbs/complete_out/structure_{i}.pdb"
#     file2 = pre + f"selected_structure_complete/low_e_jun01_out/structure_{i}.pdb"
    print(filecmp.cmp(file1, file2))


True
False
False
False
False

In [ ]: