In [1]:
import os
import sys
import random
import time
from random import seed, randint
import argparse
import platform
from datetime import datetime
import imp
import numpy as np
import fileinput
from itertools import product
import pandas as pd
from scipy.interpolate import griddata
from scipy.interpolate import interp2d
import seaborn as sns
from os import listdir

import matplotlib.pyplot as plt
import seaborn as sns
from scipy.interpolate import griddata
import matplotlib as mpl
sys.path.insert(0,'..')
# from notebookFunctions import *
# from .. import notebookFunctions

%matplotlib inline
plt.rcParams['figure.figsize'] = (10,6.180)    #golden ratio
# %matplotlib notebook
%load_ext autoreload
%autoreload 2

import sys
sys.path.insert(0, '/Users/weilu/openmmawsem/')

from openmmawsem import *

In [2]:
os.chdir("/Users/weilu/openmmawsem/_local/from_lammps_to_openmm/")

In [6]:
simulation_platform = "OpenCL" # OpenCL, CUDA, CPU, or Reference
pdb_id = '1r69'
pdb = f"{pdb_id}.pdb"
chain='T'

# download(pdb_id)
os.system("cp /Users/weilu/opt/parameters/globular_parameters/burial_gamma.dat .")
os.system("cp /Users/weilu/opt/parameters/globular_parameters/gamma.dat .")
input_pdb_filename, cleaned_pdb_filename = prepare_pdb(pdb, chain)
# ensure_atom_order(input_pdb_filename)
# getSeqFromCleanPdb(input_pdb_filename, chains='A')

In [ ]:
ensure_atom_order(input_pdb_filename)

In [158]:
pwd


Out[158]:
'/Users/weilu/openmmawsem/_local/from_lammps_to_openmm'

In [13]:
a = open(pdb).read().split("END")

In [143]:
os.system("rm openmmMovie.pdb")
os.system(f"echo 'REMARK converted from awsem lammps output' >> openmmMovie.pdb")
for i in range(30):
    with open("tmp.pdb", "w") as out:
        out.write(a[i])
    input_pdb_filename, cleaned_pdb_filename =prepare_pdb("tmp.pdb", chain)
    os.system(f"echo 'MODEL  {i+1}' >> openmmMovie.pdb")
    os.system("cat tmp-openmmawsem.pdb >> openmmMovie.pdb")

In [159]:
pdb_trajectory = read_trajectory_pdb_positions("openmmMovie.pdb")
oa = OpenMMAWSEMSystem(input_pdb_filename, k_awsem=1.0, xml_filename="../../awsem.xml") # k_awsem is an overall scaling factor that will affect the relevant temperature scales

# apply forces
# forceGroupTable_Rev = {11:"Con", 12:"Chain", 13:"Chi", 14:"Excluded", 15:"Rama", 16:"Direct", 
#                   17:"Burial", 18:"Mediated", 19:"Fragment"}
forceGroupTable = {"Con":11, "Chain":12, "Chi":13, "Excluded":14, "Rama":15, "Direct":16,
                    "Burial":17, "Mediated":18, "Contact":18, "Fragment":19, "Total":list(range(11, 20)),
                    "Water":[16, 18], "Q":1}
forces = [
    oa.q_value("crystal_structure-cleaned.pdb", "A"),
#     oa.con_term(),
    oa.chain_term(),
    oa.chi_term(),
#     oa.excl_term(),
    oa.rama_term(),
    oa.rama_proline_term(),
    oa.direct_term(),
    oa.burial_term(),
    oa.mediated_term(),
#     oa.fragment_memory_term(frag_location_pre="./")
]
oa.addForcesWithDefaultForceGroup(forces)

# start simulation
collision_rate = 5.0 / picoseconds

integrator = LangevinIntegrator(300*kelvin, 1/picosecond, 2*femtoseconds)
simulation = Simulation(oa.pdb.topology, oa.system, integrator, Platform.getPlatformByName("OpenCL"))

In [160]:
showEnergy = ["Q", "Con", "Chain", "Chi", "Excluded", "Rama", "Water", "Burial", "Fragment", "Total"]
# print("Steps", *showEnergy)
print(" ".join(["{0:<8s}".format(i) for i in ["Steps"] + showEnergy]))
for step, pdb in enumerate(pdb_trajectory):
    simulation.context.setPositions(pdb.positions)
    e = []
    for term in showEnergy:
        if type(forceGroupTable[term]) == list:
            g = set(forceGroupTable[term])
        elif forceGroupTable[term] == -1:
            g = -1
        else:
            g = {forceGroupTable[term]}
        state = simulation.context.getState(getEnergy=True, groups=g)
        termEnergy = state.getPotentialEnergy().value_in_unit(kilocalories_per_mole)
        e.append(termEnergy)
#     print(*e)
    print(" ".join([f"{step:<8}"] +  ["{0:<8.2f}".format(i) for i in e]))
#         print(forceGroupTable[term], state.getPotentialEnergy().value_in_unit(kilocalories_per_mole))


Steps    Q        Con      Chain    Chi      Excluded Rama     Water    Burial   Fragment Total   
0        0.00     0.00     20.00    1.34     0.00     -133.72  0.00     -34.38   0.00     -146.76 
1        0.04     0.00     564.83   104.41   0.00     -64.29   -0.15    -50.97   0.00     553.83  
2        0.07     0.00     498.49   64.70    0.00     -63.21   -6.54    -53.70   0.00     439.74  
3        0.07     0.00     322.47   41.74    0.00     -79.27   -0.94    -51.73   0.00     232.27  
4        0.08     0.00     333.68   65.62    0.00     -99.04   -8.78    -55.24   0.00     236.24  
5        0.09     0.00     296.15   62.99    0.00     -105.74  -5.32    -53.39   0.00     194.70  
6        0.10     0.00     351.38   63.06    0.00     -93.40   -3.81    -54.43   0.00     262.79  
7        0.09     0.00     291.82   41.01    0.00     -107.20  -2.96    -54.26   0.00     168.40  
8        0.11     0.00     269.54   45.22    0.00     -101.12  -2.05    -53.24   0.00     158.35  
9        0.11     0.00     304.17   48.61    0.00     -106.86  -2.13    -55.30   0.00     188.50  
10       0.11     0.00     326.38   54.83    0.00     -105.46  -11.09   -53.89   0.00     210.78  
11       0.10     0.00     266.48   50.02    0.00     -117.17  -11.96   -53.77   0.00     133.60  
12       0.10     0.00     358.88   70.40    0.00     -114.46  -22.17   -53.88   0.00     238.77  
13       0.10     0.00     281.87   43.53    0.00     -109.04  -19.07   -53.68   0.00     143.61  
14       0.10     0.00     266.18   39.57    0.00     -98.52   -21.80   -54.76   0.00     130.68  
15       0.08     0.00     260.10   62.95    0.00     -110.75  -19.95   -55.10   0.00     137.25  
16       0.09     0.00     290.26   64.78    0.00     -96.05   -21.71   -55.83   0.00     181.45  
17       0.10     0.00     292.99   54.96    0.00     -105.50  -18.48   -56.38   0.00     167.58  
18       0.10     0.00     288.73   54.67    0.00     -106.82  -18.19   -56.16   0.00     162.23  
19       0.10     0.00     317.81   72.77    0.00     -110.50  -25.43   -54.50   0.00     200.15  
20       0.10     0.00     306.72   55.61    0.00     -111.04  -26.10   -55.41   0.00     169.79  
21       0.11     0.00     314.85   50.07    0.00     -112.42  -24.14   -54.75   0.00     173.62  
22       0.10     0.00     286.33   54.17    0.00     -118.22  -30.51   -54.82   0.00     136.96  
23       0.10     0.00     215.36   50.61    0.00     -113.86  -25.53   -55.24   0.00     71.34   
24       0.10     0.00     298.30   48.08    0.00     -119.07  -28.34   -55.81   0.00     143.16  
25       0.10     0.00     276.35   57.00    0.00     -108.69  -20.90   -55.57   0.00     148.18  
26       0.11     0.00     309.48   58.95    0.00     -115.02  -19.24   -54.81   0.00     179.36  
27       0.11     0.00     326.61   45.27    0.00     -108.62  -18.41   -54.15   0.00     190.70  
28       0.10     0.00     288.43   53.50    0.00     -114.66  -17.11   -56.56   0.00     153.61  
29       0.11     0.00     284.81   51.49    0.00     -117.40  -21.68   -54.97   0.00     142.24  

In [147]:
124+284+51+9-119-21-54-305


Out[147]:
-31

In [149]:
284+51-117-21-54-305+127+9


Out[149]:
-26

In [ ]:


In [89]:
for pdb in pdb_trajectory:
    simulation.context.setPositions(pdb.positions)
    
    state = simulation.context.getState(getEnergy=True, groups={2})
    print(state.getPotentialEnergy().value_in_unit(kilocalories_per_mole))


0.0
0.0
0.0

In [65]:
for pdb in pdb_trajectory:
    simulation.context.setPositions(pdb.positions)
    
    state = simulation.context.getState(getEnergy=True, groups={2})
    print(state.getPotentialEnergy().value_in_unit(kilocalories_per_mole))


19.998466535462477
564.831239543499
498.48847544216056

In [ ]:
order_parameters_to_compute = {
#     "Qvalue": oa.q_value(qvalue_reference_structure, qvalue_reference_chain, min_seq_sep=3, max_seq_sep=np.inf), 
    "Con": oa.con_term(),
    "Chain": oa.chain_term(),
    "Chi": oa.chi_term(),
}

In [15]:
a[1]


Out[15]:
'\nATOM      1  CA  SER T   1     -25.844 -25.886   4.088  1.00  0.00           C\nATOM      2  C   SER T   1     -25.118 -27.205   4.034  1.00  0.00           C\nATOM      3  O   SER T   1     -25.635 -28.157   4.488  1.00  0.00           O\nATOM      4  CB  SER T   1     -25.753 -25.059   5.387  1.00  0.00           C\nATOM      5  N   ILE T   2     -23.914 -27.226   3.469  1.00  0.00           N\nATOM      6  CA  ILE T   2     -23.043 -28.392   3.314  1.00  0.00           C\nATOM      7  C   ILE T   2     -22.019 -28.229   4.438  1.00  0.00           C\nATOM      8  O   ILE T   2     -21.345 -27.296    4.45  1.00  0.00           O\nATOM      9  CB  ILE T   2     -22.543 -28.281   1.795  1.00  0.00           C\nATOM     10  N   SER T   3     -21.929 -29.161   5.373  1.00  0.00           N\nATOM     11  CA  SER T   3     -21.007 -29.194   6.543  1.00  0.00           C\nATOM     12  C   SER T   3     -19.659 -28.682   6.455  1.00  0.00           C\nATOM     13  O   SER T   3     -18.929 -28.975    7.33  1.00  0.00           O\nATOM     14  CB  SER T   3     -20.604 -30.613   6.778  1.00  0.00           C\nATOM     15  N   SER T   4     -19.359 -27.915   5.377  1.00  0.00           N\nATOM     16  CA  SER T   4     -18.111 -27.315   5.093  1.00  0.00           C\nATOM     17  C   SER T   4     -17.459 -27.458   3.768  1.00  0.00           C\nATOM     18  O   SER T   4     -16.261 -27.941    3.64  1.00  0.00           O\nATOM     19  CB  SER T   4     -16.802 -28.026   5.854  1.00  0.00           C\nATOM     20  N   ARG T   5     -18.283 -27.026   2.796  1.00  0.00           N\nATOM     21  CA  ARG T   5     -17.865 -27.069   1.441  1.00  0.00           C\nATOM     22  C   ARG T   5     -17.197 -25.807   0.889  1.00  0.00           C\nATOM     23  O   ARG T   5     -17.835 -24.873   0.313  1.00  0.00           O\nATOM     24  CB  ARG T   5     -19.256 -26.557   0.801  1.00  0.00           C\nATOM     25  N   VAL T   6     -15.904 -25.816   1.084  1.00  0.00           N\nATOM     26  CA  VAL T   6     -15.068 -24.704   0.634  1.00  0.00           C\nATOM     27  C   VAL T   6     -14.246 -25.461  -0.358  1.00  0.00           C\nATOM     28  O   VAL T   6      -13.18 -26.135   0.041  1.00  0.00           O\nATOM     29  CB  VAL T   6     -14.202  -24.17   1.604  1.00  0.00           C\nATOM     30  N   LYS T   7     -14.776 -25.328  -1.651  1.00  0.00           N\nATOM     31  CA  LYS T   7     -14.151 -25.969  -2.773  1.00  0.00           C\nATOM     32  C   LYS T   7     -13.473 -24.785  -3.595  1.00  0.00           C\nATOM     33  O   LYS T   7     -14.066 -24.112  -4.381  1.00  0.00           O\nATOM     34  CB  LYS T   7     -15.246 -26.667   -3.84  1.00  0.00           C\nATOM     35  N   SER T   8      -12.22 -24.559  -3.387  1.00  0.00           N\nATOM     36  CA  SER T   8     -11.382 -23.471  -4.072  1.00  0.00           C\nATOM     37  C   SER T   8     -11.221 -22.435  -2.971  1.00  0.00           C\nATOM     38  O   SER T   8     -12.043 -22.361    -1.8  1.00  0.00           O\nATOM     39  CB  SER T   8     -12.298 -22.481  -4.803  1.00  0.00           C\nATOM     40  N   LYS T   9     -10.141 -21.649  -3.387  1.00  0.00           N\nATOM     41  CA  LYS T   9      -9.793 -20.585  -2.495  1.00  0.00           C\nATOM     42  C   LYS T   9     -10.886 -19.443  -2.752  1.00  0.00           C\nATOM     43  O   LYS T   9     -10.949 -18.809  -3.724  1.00  0.00           O\nATOM     44  CB  LYS T   9      -8.381 -19.752  -2.715  1.00  0.00           C\nATOM     45  N   ARG T  10     -11.735 -19.208  -1.854  1.00  0.00           N\nATOM     46  CA  ARG T  10      -12.86 -18.155  -1.909  1.00  0.00           C\nATOM     47  C   ARG T  10     -12.536  -17.12  -0.786  1.00  0.00           C\nATOM     48  O   ARG T  10      -12.14 -17.426   0.332  1.00  0.00           O\nATOM     49  CB  ARG T  10     -13.925 -19.138  -1.678  1.00  0.00           C\nATOM     50  N   ILE T  11     -12.717 -15.898  -1.122  1.00  0.00           N\nATOM     51  CA  ILE T  11     -12.464 -14.752  -0.196  1.00  0.00           C\nATOM     52  C   ILE T  11     -13.605 -14.443   0.793  1.00  0.00           C\nATOM     53  O   ILE T  11     -14.816 -14.615    0.51  1.00  0.00           O\nATOM     54  CB  ILE T  11     -12.973 -13.398  -0.863  1.00  0.00           C\nATOM     55  N   GLN T  12     -13.177 -13.986   1.953  1.00  0.00           N\nATOM     56  CA  GLN T  12     -14.101 -13.626   3.047  1.00  0.00           C\nATOM     57  C   GLN T  12     -13.448 -12.217   3.219  1.00  0.00           C\nATOM     58  O   GLN T  12     -12.314 -12.042   3.361  1.00  0.00           O\nATOM     59  CB  GLN T  12     -14.348 -14.139   4.551  1.00  0.00           C\nATOM     60  N   LEU T  13     -14.198 -11.228   3.203  1.00  0.00           N\nATOM     61  CA  LEU T  13     -13.765  -9.796   3.351  1.00  0.00           C\nATOM     62  C   LEU T  13     -15.291  -9.377   3.118  1.00  0.00           C\nATOM     63  O   LEU T  13     -15.725  -9.027   2.137  1.00  0.00           O\nATOM     64  CB  LEU T  13     -13.128  -9.393   1.938  1.00  0.00           C\nATOM     65  N   GLY T  14      -16.08  -9.425   4.048  1.00  0.00           N\nATOM     66  CA  GLY T  14     -17.576  -9.065   4.019  1.00  0.00           C\nATOM     67  C   GLY T  14     -17.687  -7.627   4.377  1.00  0.00           C\nATOM     68  O   GLY T  14     -17.968  -7.357   5.572  1.00  0.00           O\nATOM     69  HB  GLY T  14     -18.128  -9.047   3.351  1.00  0.00           H\nATOM     70  N   LEU T  15      -17.46  -6.724   3.309  1.00  0.00           N\nATOM     71  CA  LEU T  15     -17.514  -5.283    3.42  1.00  0.00           C\nATOM     72  C   LEU T  15     -18.807   -4.44   3.577  1.00  0.00           C\nATOM     73  O   LEU T  15     -19.275  -4.099   4.607  1.00  0.00           O\nATOM     74  CB  LEU T  15     -16.579  -5.055   2.357  1.00  0.00           C\nATOM     75  N   ASN T  16     -19.362  -4.119   2.528  1.00  0.00           N\nATOM     76  CA  ASN T  16     -20.608  -3.314   2.466  1.00  0.00           C\nATOM     77  C   ASN T  16     -19.961  -1.953    2.21  1.00  0.00           C\nATOM     78  O   ASN T  16      -20.06  -1.047   3.071  1.00  0.00           O\nATOM     79  CB  ASN T  16     -20.877  -2.818   3.915  1.00  0.00           C\nATOM     80  N   GLN T  17     -19.301  -1.846   1.007  1.00  0.00           N\nATOM     81  CA  GLN T  17     -18.603  -0.625   0.551  1.00  0.00           C\nATOM     82  C   GLN T  17     -17.283  -0.536    1.46  1.00  0.00           C\nATOM     83  O   GLN T  17      -16.32   -1.25   1.331  1.00  0.00           O\nATOM     84  CB  GLN T  17     -19.612   0.315   1.143  1.00  0.00           C\nATOM     85  N   ALA T  18     -17.274   0.359   2.375  1.00  0.00           N\nATOM     86  CA  ALA T  18     -16.106   0.608   3.352  1.00  0.00           C\nATOM     87  C   ALA T  18       -15.9  -0.835   3.859  1.00  0.00           C\nATOM     88  O   ALA T  18     -16.807  -1.476   4.371  1.00  0.00           O\nATOM     89  CB  ALA T  18     -16.182   1.754   4.531  1.00  0.00           C\nATOM     90  N   GLU T  19     -14.688  -1.317     3.7  1.00  0.00           N\nATOM     91  CA  GLU T  19     -14.274  -2.682   4.118  1.00  0.00           C\nATOM     92  C   GLU T  19     -14.857   -2.48   5.543  1.00  0.00           C\nATOM     93  O   GLU T  19     -14.931   -3.32   6.356  1.00  0.00           O\nATOM     94  CB  GLU T  19     -12.998  -3.122   4.114  1.00  0.00           C\nATOM     95  N   LEU T  20     -15.264  -1.345   5.813  1.00  0.00           N\nATOM     96  CA  LEU T  20     -15.856   -0.95   7.122  1.00  0.00           C\nATOM     97  C   LEU T  20     -14.576  -0.647   8.128  1.00  0.00           C\nATOM     98  O   LEU T  20     -14.532  -0.538   9.534  1.00  0.00           O\nATOM     99  CB  LEU T  20     -16.831  -2.023   7.078  1.00  0.00           C\nATOM    100  N   ALA T  21     -13.548  -0.518   7.365  1.00  0.00           N\nATOM    101  CA  ALA T  21     -12.222  -0.227   8.107  1.00  0.00           C\nATOM    102  C   ALA T  21     -11.501    0.04   6.813  1.00  0.00           C\nATOM    103  O   ALA T  21     -11.286   1.078   6.443  1.00  0.00           O\nATOM    104  CB  ALA T  21     -11.238  -1.218    8.67  1.00  0.00           C\nATOM    105  N   GLN T  22     -11.141  -0.926   6.146  1.00  0.00           N\nATOM    106  CA  GLN T  22     -10.436  -0.876   4.878  1.00  0.00           C\nATOM    107  C   GLN T  22      -8.944  -1.074   4.743  1.00  0.00           C\nATOM    108  O   GLN T  22        -8.5  -2.119   4.125  1.00  0.00           O\nATOM    109  CB  GLN T  22     -10.503   0.521   4.622  1.00  0.00           C\nATOM    110  N   LYS T  23      -8.196  -0.043   5.339  1.00  0.00           N\nATOM    111  CA  LYS T  23      -6.738  -0.022   5.334  1.00  0.00           C\nATOM    112  C   LYS T  23      -6.578   1.206   6.416  1.00  0.00           C\nATOM    113  O   LYS T  23      -6.767   2.295   6.335  1.00  0.00           O\nATOM    114  CB  LYS T  23       -6.74   0.077   3.739  1.00  0.00           C\nATOM    115  N   VAL T  24      -6.227   0.994   7.421  1.00  0.00           N\nATOM    116  CA  VAL T  24      -6.019   2.037   8.566  1.00  0.00           C\nATOM    117  C   VAL T  24      -4.621   2.807   8.131  1.00  0.00           C\nATOM    118  O   VAL T  24      -3.568   2.351   7.673  1.00  0.00           O\nATOM    119  CB  VAL T  24      -6.108    0.86   9.626  1.00  0.00           C\nATOM    120  N   GLY T  25       -4.65   3.979    8.29  1.00  0.00           N\nATOM    121  CA  GLY T  25      -3.422   4.882   7.936  1.00  0.00           C\nATOM    122  C   GLY T  25      -3.904   6.137   7.322  1.00  0.00           C\nATOM    123  O   GLY T  25      -3.317   6.647   6.229  1.00  0.00           O\nATOM    124  HB  GLY T  25      -3.339   4.298   8.082  1.00  0.00           H\nATOM    125  N   THR T  26      -4.983   6.611   8.058  1.00  0.00           N\nATOM    126  CA  THR T  26      -5.613   7.808   7.657  1.00  0.00           C\nATOM    127  C   THR T  26      -5.391   8.813   8.851  1.00  0.00           C\nATOM    128  O   THR T  26      -4.919   8.513   9.918  1.00  0.00           O\nATOM    129  CB  THR T  26      -6.981   8.214   7.363  1.00  0.00           C\nATOM    130  N   THR T  27      -5.743  10.005   8.636  1.00  0.00           N\nATOM    131  CA  THR T  27      -5.614  11.119   9.648  1.00  0.00           C\nATOM    132  C   THR T  27      -6.924  10.747  10.461  1.00  0.00           C\nATOM    133  O   THR T  27      -7.684  11.534  11.369  1.00  0.00           O\nATOM    134  CB  THR T  27      -5.558  12.371   8.913  1.00  0.00           C\nATOM    135  N   GLN T  28      -7.157    9.53  10.105  1.00  0.00           N\nATOM    136  CA  GLN T  28      -8.357    8.97  10.753  1.00  0.00           C\nATOM    137  C   GLN T  28      -9.765   9.381  10.351  1.00  0.00           C\nATOM    138  O   GLN T  28     -10.741   8.702  10.626  1.00  0.00           O\nATOM    139  CB  GLN T  28      -8.254    9.53  12.393  1.00  0.00           C\nATOM    140  N   GLN T  29      -9.833  10.507   9.696  1.00  0.00           N\nATOM    141  CA  GLN T  29     -11.087  11.084   9.216  1.00  0.00           C\nATOM    142  C   GLN T  29     -10.859  11.822   7.909  1.00  0.00           C\nATOM    143  O   GLN T  29     -10.389  12.818   7.899  1.00  0.00           O\nATOM    144  CB  GLN T  29     -11.438  12.422   9.974  1.00  0.00           C\nATOM    145  N   SER T  30     -11.206  11.302   6.818  1.00  0.00           N\nATOM    146  CA  SER T  30     -11.071  11.854   5.457  1.00  0.00           C\nATOM    147  C   SER T  30     -11.784  13.269   5.357  1.00  0.00           C\nATOM    148  O   SER T  30     -11.163  14.303    5.01  1.00  0.00           O\nATOM    149  CB  SER T  30      -11.85  11.351    4.13  1.00  0.00           C\nATOM    150  N   ILE T  31       -13.1  13.276   5.669  1.00  0.00           N\nATOM    151  CA  ILE T  31     -13.979  14.525    5.64  1.00  0.00           C\nATOM    152  C   ILE T  31     -13.765  14.776   7.171  1.00  0.00           C\nATOM    153  O   ILE T  31     -14.183  14.025   7.879  1.00  0.00           O\nATOM    154  CB  ILE T  31     -15.244  13.923   4.994  1.00  0.00           C\nATOM    155  N   GLU T  32     -13.105  15.849   7.649  1.00  0.00           N\nATOM    156  CA  GLU T  32     -12.789  16.274   9.089  1.00  0.00           C\nATOM    157  C   GLU T  32     -14.343  16.477   9.299  1.00  0.00           C\nATOM    158  O   GLU T  32     -15.012  16.016  10.407  1.00  0.00           O\nATOM    159  CB  GLU T  32     -12.271  17.465   9.492  1.00  0.00           C\nATOM    160  N   GLN T  33     -14.885  17.178   8.204  1.00  0.00           N\nATOM    161  CA  GLN T  33     -16.359  17.491    8.18  1.00  0.00           C\nATOM    162  C   GLN T  33     -16.651  18.463   7.017  1.00  0.00           C\nATOM    163  O   GLN T  33     -17.483  19.442   7.081  1.00  0.00           O\nATOM    164  CB  GLN T  33     -16.278  18.192   9.462  1.00  0.00           C\nATOM    165  N   LEU T  34     -15.943  18.163   5.962  1.00  0.00           N\nATOM    166  CA  LEU T  34     -16.065  18.962   4.735  1.00  0.00           C\nATOM    167  C   LEU T  34     -14.782  18.739   3.924  1.00  0.00           C\nATOM    168  O   LEU T  34     -13.763  18.808   4.317  1.00  0.00           O\nATOM    169  CB  LEU T  34     -16.847   19.87   4.505  1.00  0.00           C\nATOM    170  N   GLU T  35     -14.869  18.472    2.79  1.00  0.00           N\nATOM    171  CA  GLU T  35     -13.754  18.224   1.858  1.00  0.00           C\nATOM    172  C   GLU T  35     -13.494  17.301   0.701  1.00  0.00           C\nATOM    173  O   GLU T  35     -13.514  16.077   0.913  1.00  0.00           O\nATOM    174  CB  GLU T  35     -13.072  16.974   2.373  1.00  0.00           C\nATOM    175  N   ASN T  36     -13.254  17.926  -0.517  1.00  0.00           N\nATOM    176  CA  ASN T  36     -12.979   17.23   -1.77  1.00  0.00           C\nATOM    177  C   ASN T  36     -11.498  17.652  -2.194  1.00  0.00           C\nATOM    178  O   ASN T  36      -11.03  17.492  -3.255  1.00  0.00           O\nATOM    179  CB  ASN T  36     -14.389  16.986  -2.535  1.00  0.00           C\nATOM    180  N   GLY T  37     -10.788  18.191  -1.336  1.00  0.00           N\nATOM    181  CA  GLY T  37      -9.346  18.664  -1.546  1.00  0.00           C\nATOM    182  C   GLY T  37      -8.918  19.857  -0.449  1.00  0.00           C\nATOM    183  O   GLY T  37      -9.466  20.973  -0.249  1.00  0.00           O\nATOM    184  HB  GLY T  37      -8.736  19.035   -1.34  1.00  0.00           H\nATOM    185  N   LYS T  38      -7.928  19.585   0.246  1.00  0.00           N\nATOM    186  CA  LYS T  38       -7.36  20.584   1.346  1.00  0.00           C\nATOM    187  C   LYS T  38       -6.08  20.876   0.638  1.00  0.00           C\nATOM    188  O   LYS T  38      -5.541  22.036   0.669  1.00  0.00           O\nATOM    189  CB  LYS T  38      -7.069  19.838   2.725  1.00  0.00           C\nATOM    190  N   THR T  39      -5.618  19.791   0.007  1.00  0.00           N\nATOM    191  CA  THR T  39        -4.4  19.841  -0.738  1.00  0.00           C\nATOM    192  C   THR T  39      -2.869  19.823  -0.495  1.00  0.00           C\nATOM    193  O   THR T  39      -2.263   19.06   0.361  1.00  0.00           O\nATOM    194  CB  THR T  39      -4.025  21.588  -0.481  1.00  0.00           C\nATOM    195  N   LYS T  40      -2.271  20.682  -1.271  1.00  0.00           N\nATOM    196  CA  LYS T  40      -0.807  20.829  -1.205  1.00  0.00           C\nATOM    197  C   LYS T  40       0.112  19.708  -1.694  1.00  0.00           C\nATOM    198  O   LYS T  40      -0.122  18.993  -2.577  1.00  0.00           O\nATOM    199  CB  LYS T  40      -0.955  20.146   0.472  1.00  0.00           C\nATOM    200  N   ARG T  41       1.157  19.584  -1.095  1.00  0.00           N\nATOM    201  CA  ARG T  41       2.166  18.572  -1.412  1.00  0.00           C\nATOM    202  C   ARG T  41       3.191  19.674  -1.254  1.00  0.00           C\nATOM    203  O   ARG T  41       4.417  19.465  -1.064  1.00  0.00           O\nATOM    204  CB  ARG T  41       1.493  18.656  -2.922  1.00  0.00           C\nATOM    205  N   PRO T  42       2.648  20.843  -1.338  1.00  0.00           N\nATOM    206  CA  PRO T  42       3.448  22.037  -1.214  1.00  0.00           C\nATOM    207  C   PRO T  42       3.999  21.863   0.054  1.00  0.00           C\nATOM    208  O   PRO T  42       3.175  21.788   0.968  1.00  0.00           O\nATOM    209  CB  PRO T  42       2.395  22.705  -2.291  1.00  0.00           C\nATOM    210  N   ARG T  43       5.408  21.803   0.074  1.00  0.00           N\nATOM    211  CA  ARG T  43       6.162  21.637   1.196  1.00  0.00           C\nATOM    212  C   ARG T  43       6.832  22.734   2.123  1.00  0.00           C\nATOM    213  O   ARG T  43       7.408  22.541   3.398  1.00  0.00           O\nATOM    214  CB  ARG T  43       7.407  20.511   0.736  1.00  0.00           C\nATOM    215  N   PHE T  44       6.737  23.881   1.445  1.00  0.00           N\nATOM    216  CA  PHE T  44       7.309  25.066   2.134  1.00  0.00           C\nATOM    217  C   PHE T  44       8.774  24.954   2.569  1.00  0.00           C\nATOM    218  O   PHE T  44       9.494  25.776   2.463  1.00  0.00           O\nATOM    219  CB  PHE T  44        6.33  25.059   3.172  1.00  0.00           C\nATOM    220  N   LEU T  45       9.185  23.916   3.056  1.00  0.00           N\nATOM    221  CA  LEU T  45      10.556  23.618   3.533  1.00  0.00           C\nATOM    222  C   LEU T  45      11.805  24.417   3.187  1.00  0.00           C\nATOM    223  O   LEU T  45      12.799  23.935   2.399  1.00  0.00           O\nATOM    224  CB  LEU T  45       10.36    22.6   2.337  1.00  0.00           C\nATOM    225  N   PRO T  46      11.719  25.645     3.8  1.00  0.00           N\nATOM    226  CA  PRO T  46      12.803  26.582   3.612  1.00  0.00           C\nATOM    227  C   PRO T  46      12.512  27.531   4.744  1.00  0.00           C\nATOM    228  O   PRO T  46      11.432  27.523   5.133  1.00  0.00           O\nATOM    229  CB  PRO T  46      12.377  27.316    2.32  1.00  0.00           C\nATOM    230  N   GLU T  47      13.506  28.341   5.254  1.00  0.00           N\nATOM    231  CA  GLU T  47      13.438  29.331   6.349  1.00  0.00           C\nATOM    232  C   GLU T  47      12.347  29.012   7.461  1.00  0.00           C\nATOM    233  O   GLU T  47      11.922  29.816   8.312  1.00  0.00           O\nATOM    234  CB  GLU T  47      12.822  30.613   5.471  1.00  0.00           C\nATOM    235  N   LEU T  48      11.914  27.821   7.423  1.00  0.00           N\nATOM    236  CA  LEU T  48      10.868  27.311   8.396  1.00  0.00           C\nATOM    237  C   LEU T  48       12.04  26.498   9.129  1.00  0.00           C\nATOM    238  O   LEU T  48      12.148  26.493  10.271  1.00  0.00           O\nATOM    239  CB  LEU T  48      10.298  25.919   7.786  1.00  0.00           C\nATOM    240  N   ALA T  49      12.905   25.82   8.439  1.00  0.00           N\nATOM    241  CA  ALA T  49      14.103  24.972   8.954  1.00  0.00           C\nATOM    242  C   ALA T  49       13.95  24.371  10.303  1.00  0.00           C\nATOM    243  O   ALA T  49      14.548  24.776  11.285  1.00  0.00           O\nATOM    244  CB  ALA T  49      15.208  25.892   8.647  1.00  0.00           C\nATOM    245  N   SER T  50      13.135  23.399  10.314  1.00  0.00           N\nATOM    246  CA  SER T  50      12.843  22.683  11.506  1.00  0.00           C\nATOM    247  C   SER T  50      11.391  22.177  11.616  1.00  0.00           C\nATOM    248  O   SER T  50      10.893  22.135  12.666  1.00  0.00           O\nATOM    249  CB  SER T  50      13.472  23.762  12.073  1.00  0.00           C\nATOM    250  N   ALA T  51      10.737    21.8  10.506  1.00  0.00           N\nATOM    251  CA  ALA T  51        9.33  21.281  10.391  1.00  0.00           C\nATOM    252  C   ALA T  51       9.181  20.018  11.155  1.00  0.00           C\nATOM    253  O   ALA T  51       8.302  19.394  11.022  1.00  0.00           O\nATOM    254  CB  ALA T  51       9.162  20.868   8.943  1.00  0.00           C\nATOM    255  N   LEU T  52      10.063  19.671  11.953  1.00  0.00           N\nATOM    256  CA  LEU T  52      10.099   18.49  12.778  1.00  0.00           C\nATOM    257  C   LEU T  52      10.394   17.48  11.644  1.00  0.00           C\nATOM    258  O   LEU T  52       9.752  16.501  11.545  1.00  0.00           O\nATOM    259  CB  LEU T  52       8.382   18.86  13.201  1.00  0.00           C\nATOM    260  N   GLY T  53       11.38  17.752    10.8  1.00  0.00           N\nATOM    261  CA  GLY T  53      11.828  16.913   9.638  1.00  0.00           C\nATOM    262  C   GLY T  53      12.742  15.619  10.305  1.00  0.00           C\nATOM    263  O   GLY T  53      12.548  14.323   10.21  1.00  0.00           O\nATOM    264  HB  GLY T  53      11.357  15.161  12.108  1.00  0.00           H\nATOM    265  N   VAL T  54      13.736  16.015  10.977  1.00  0.00           N\nATOM    266  CA  VAL T  54      14.731  14.949  11.692  1.00  0.00           C\nATOM    267  C   VAL T  54      15.745  14.897  10.514  1.00  0.00           C\nATOM    268  O   VAL T  54      16.552  13.966   10.34  1.00  0.00           O\nATOM    269  CB  VAL T  54      13.898  13.664  10.806  1.00  0.00           C\nATOM    270  N   SER T  55      15.676  15.921   9.721  1.00  0.00           N\nATOM    271  CA  SER T  55      16.556   16.07    8.53  1.00  0.00           C\nATOM    272  C   SER T  55      16.658  17.554   8.319  1.00  0.00           C\nATOM    273  O   SER T  55      15.647  18.291   8.025  1.00  0.00           O\nATOM    274  CB  SER T  55      16.318  15.892   7.024  1.00  0.00           C\nATOM    275  N   VAL T  56      17.905   17.96   8.479  1.00  0.00           N\nATOM    276  CA  VAL T  56      18.232  19.345   8.322  1.00  0.00           C\nATOM    277  C   VAL T  56      18.416  19.517   6.838  1.00  0.00           C\nATOM    278  O   VAL T  56      18.836   18.67   6.284  1.00  0.00           O\nATOM    279  CB  VAL T  56      19.597  19.077   8.829  1.00  0.00           C\nATOM    280  N   ASP T  57      18.091  20.634   6.221  1.00  0.00           N\nATOM    281  CA  ASP T  57      18.191  20.998   4.794  1.00  0.00           C\nATOM    282  C   ASP T  57      18.963  22.207   4.783  1.00  0.00           C\nATOM    283  O   ASP T  57      19.785  22.433   5.668  1.00  0.00           O\nATOM    284  CB  ASP T  57      16.603  21.235   4.217  1.00  0.00           C\nATOM    285  N   TRP T  58      18.671  22.968   3.758  1.00  0.00           N\nATOM    286  CA  TRP T  58      19.296  24.178   3.552  1.00  0.00           C\nATOM    287  C   TRP T  58      20.713  24.273   3.295  1.00  0.00           C\nATOM    288  O   TRP T  58      21.507  25.379   3.757  1.00  0.00           O\nATOM    289  CB  TRP T  58      19.265  25.546   3.791  1.00  0.00           C\nATOM    290  N   LEU T  59      20.997  23.087   2.548  1.00  0.00           N\nATOM    291  CA  LEU T  59        22.3  22.943   2.178  1.00  0.00           C\nATOM    292  C   LEU T  59      23.684  22.469   2.719  1.00  0.00           C\nATOM    293  O   LEU T  59      24.542   23.01   2.734  1.00  0.00           O\nATOM    294  CB  LEU T  59      23.084  24.303   2.047  1.00  0.00           C\nATOM    295  N   LEU T  60      23.867  21.449   3.159  1.00  0.00           N\nATOM    296  CA  LEU T  60      25.121  20.835    3.72  1.00  0.00           C\nATOM    297  C   LEU T  60      24.992  19.279   3.974  1.00  0.00           C\nATOM    298  O   LEU T  60      25.263   18.82    5.04  1.00  0.00           O\nATOM    299  CB  LEU T  60      25.844  22.168   4.385  1.00  0.00           C\nATOM    300  N   ASN T  61      24.574  18.493   2.966  1.00  0.00           N\nATOM    301  CA  ASN T  61      24.379  16.969   2.998  1.00  0.00           C\nATOM    302  C   ASN T  61      24.171  16.885    1.45  1.00  0.00           C\nATOM    303  O   ASN T  61      24.637  17.589   0.695  1.00  0.00           O\nATOM    304  CB  ASN T  61      22.938  16.796   3.077  1.00  0.00           C\nATOM    305  N   GLY T  62      23.461  16.007    1.01  1.00  0.00           N\nATOM    306  CA  GLY T  62      23.141  15.764  -0.438  1.00  0.00           C\nATOM    307  C   GLY T  62      22.262  16.731  -0.962  1.00  0.00           C\nATOM    308  O   GLY T  62      21.497  16.433  -1.969  1.00  0.00           O\nATOM    309  HB  GLY T  62      20.769  14.435   1.614  1.00  0.00           H\nATOM    310  N   THR T  63      22.399  17.889  -0.248  1.00  0.00           N\nATOM    311  CA  THR T  63       21.65  18.961  -0.573  1.00  0.00           C\nATOM    312  O   THR T  63      20.306  21.262  -1.306  1.00  0.00           O\nATOM    313  CB  THR T  63      20.958  19.565  -1.614  1.00  0.00           C\n'

In [1]:
input_pdb_filename = "/Users/weilu/Research/server/dec_2018/T0958_single_chain/openmm_T0958/T0958-openmmawsem.pdb"

In [60]:
data = pd.read_table(input_pdb_filename, sep="\s+", header=None, names=["_","i","type","res","chain","res_id", "x","y","z","_1","_2","_3"])
data = data.dropna().reset_index()
data["res_id"] = data["res_id"].apply(lambda x: int(x))

In [56]:
def compute_chi(data):
    ca_all = data.query("type == 'CA'")[["x","y","z"]].values
    cb_all = data.query("type == 'CB'")[["x","y","z"]].values
    c_all = data.query("type == 'C'")[["x","y","z"]].values
    n_all = data.query("type == 'N'")[["x","y","z"]].values
    energy = 0 
    for i in range(len(n_all)):
        ca = ca_all[i]
        cb = cb_all[i]
        c = c_all[i]
        n = n_all[i]
        chi0 = -0.71
        k_chi = 60*4.184
        r_ca_cb = cb-ca
        r_c_ca = ca-c
        r_ca_n = n-ca
        norm_r_ca_cb = np.sum(r_ca_cb**2)**0.5
        norm_r_c_ca = np.sum(r_c_ca**2)**0.5
        norm_r_ca_n = np.sum(r_ca_n**2)**0.5
        a = np.cross(-r_c_ca,r_ca_n)/norm_r_c_ca/norm_r_ca_n
        chi = np.dot(a,r_ca_cb)/norm_r_ca_cb
        dchi = chi - chi0
        energy += k_chi*dchi*dchi
    return energy

In [62]:
chosen = data.query("res != 'IGL' and res_id != 1 and res_id != 63")
compute_chi(chosen)/4.184


Out[62]:
64.70332860251312

In [61]:
data


Out[61]:
index _ i type res chain res_id x y z _1 _2 _3
0 0 ATOM 1.0 CA NGP A 1 -4.514 -24.801 10.243 1.0 0.0 C
1 1 ATOM 2.0 C NGP A 1 -5.909 -25.417 10.716 1.0 0.0 C
2 2 ATOM 3.0 O NGP A 1 -6.285 -25.465 11.924 1.0 0.0 O
3 3 ATOM 4.0 CB NGP A 1 -3.986 -23.638 10.995 1.0 0.0 B
4 4 ATOM 5.0 N NGP A 2 -6.656 -25.883 9.731 1.0 0.0 N
5 5 ATOM 6.0 H NGP A 2 -6.353 -25.845 8.757 1.0 0.0 H
6 6 ATOM 7.0 CA NGP A 2 -8.029 -26.514 9.960 1.0 0.0 C
7 7 ATOM 8.0 C NGP A 2 -8.095 -27.697 8.943 1.0 0.0 C
8 8 ATOM 9.0 O NGP A 2 -7.165 -28.142 8.443 1.0 0.0 O
9 9 ATOM 10.0 CB NGP A 2 -9.303 -25.503 9.633 1.0 0.0 B
10 10 ATOM 11.0 N NGP A 3 -9.219 -28.185 8.658 1.0 0.0 N
11 11 ATOM 12.0 H NGP A 3 -9.969 -27.828 9.061 1.0 0.0 H
12 12 ATOM 13.0 CA NGP A 3 -9.491 -29.321 7.706 1.0 0.0 C
13 13 ATOM 14.0 C NGP A 3 -8.819 -28.888 6.387 1.0 0.0 C
14 14 ATOM 15.0 O NGP A 3 -8.268 -29.683 5.731 1.0 0.0 O
15 15 ATOM 16.0 CB NGP A 3 -11.087 -29.561 7.496 1.0 0.0 B
16 16 ATOM 17.0 N NGP A 4 -8.884 -27.613 6.028 1.0 0.0 N
17 17 ATOM 18.0 H NGP A 4 -9.328 -26.973 6.557 1.0 0.0 H
18 18 ATOM 19.0 CA NGP A 4 -8.303 -26.987 4.796 1.0 0.0 C
19 19 ATOM 20.0 C NGP A 4 -7.904 -25.648 5.451 1.0 0.0 C
20 20 ATOM 21.0 O NGP A 4 -7.163 -25.577 6.228 1.0 0.0 O
21 21 ATOM 22.0 CB NGP A 4 -9.721 -26.916 4.145 1.0 0.0 B
22 22 ATOM 23.0 N NGP A 5 -8.417 -24.601 5.114 1.0 0.0 N
23 23 ATOM 24.0 H NGP A 5 -9.014 -24.660 4.487 1.0 0.0 H
24 24 ATOM 25.0 CA NGP A 5 -8.162 -23.220 5.627 1.0 0.0 C
25 25 ATOM 26.0 C NGP A 5 -9.184 -22.209 5.003 1.0 0.0 C
26 26 ATOM 27.0 O NGP A 5 -9.756 -22.299 3.902 1.0 0.0 O
27 27 ATOM 28.0 CB NGP A 5 -6.512 -23.175 5.304 1.0 0.0 B
28 28 ATOM 29.0 N NGP A 6 -9.389 -21.252 5.738 1.0 0.0 N
29 29 ATOM 30.0 H NGP A 6 -8.928 -21.180 6.626 1.0 0.0 H
... ... ... ... ... ... ... ... ... ... ... ... ... ...
338 338 ATOM 339.0 O NGP A 58 10.776 14.866 11.188 1.0 0.0 O
339 339 ATOM 340.0 CB NGP A 58 13.208 13.085 9.547 1.0 0.0 B
340 340 ATOM 341.0 N NGP A 59 11.638 16.323 9.365 1.0 0.0 N
341 341 ATOM 342.0 H NGP A 59 12.358 16.659 8.628 1.0 0.0 H
342 342 ATOM 343.0 CA NGP A 59 10.530 17.112 9.337 1.0 0.0 C
343 343 ATOM 344.0 C NGP A 59 9.512 17.269 8.172 1.0 0.0 C
344 344 ATOM 345.0 O NGP A 59 8.821 18.057 8.165 1.0 0.0 O
345 345 ATOM 346.0 CB NGP A 59 10.707 18.685 8.991 1.0 0.0 B
346 346 ATOM 347.0 N NGP A 60 9.447 16.499 7.197 1.0 0.0 N
347 347 ATOM 348.0 H NGP A 60 10.004 15.864 7.203 1.0 0.0 H
348 348 ATOM 349.0 CA NGP A 60 8.536 16.490 5.983 1.0 0.0 C
349 349 ATOM 350.0 C NGP A 60 8.567 15.042 5.544 1.0 0.0 C
350 350 ATOM 351.0 O NGP A 60 7.760 14.537 4.835 1.0 0.0 O
351 351 ATOM 352.0 CB NGP A 60 9.137 17.848 5.715 1.0 0.0 B
352 352 ATOM 353.0 N NGP A 61 9.520 14.402 5.986 1.0 0.0 N
353 353 ATOM 354.0 H NGP A 61 10.171 14.810 6.557 1.0 0.0 H
354 354 ATOM 355.0 CA NGP A 61 9.729 13.002 5.682 1.0 0.0 C
355 355 ATOM 356.0 C NGP A 61 9.079 11.608 5.876 1.0 0.0 C
356 356 ATOM 357.0 O NGP A 61 8.187 11.360 6.726 1.0 0.0 O
357 357 ATOM 358.0 CB NGP A 61 10.640 12.562 6.623 1.0 0.0 B
358 358 ATOM 359.0 N IGL A 62 9.555 10.718 5.066 1.0 0.0 N
359 359 ATOM 360.0 H IGL A 62 10.276 10.918 4.381 1.0 0.0 H
360 360 ATOM 361.0 CA IGL A 62 9.073 9.318 5.083 1.0 0.0 C
361 361 ATOM 362.0 C IGL A 62 9.824 8.197 5.814 1.0 0.0 C
362 362 ATOM 363.0 O IGL A 62 9.302 7.213 5.980 1.0 0.0 O
363 363 ATOM 364.0 N NGP A 63 11.056 8.379 6.241 1.0 0.0 N
364 364 ATOM 365.0 H NGP A 63 11.477 9.173 6.107 1.0 0.0 H
365 365 ATOM 366.0 CA NGP A 63 11.953 7.425 6.967 1.0 0.0 C
366 366 ATOM 367.0 O NGP A 63 13.303 8.289 5.804 1.0 0.0 O
367 367 ATOM 368.0 CB NGP A 63 13.106 8.326 7.233 1.0 0.0 B

368 rows × 13 columns


In [162]:
pdb_file = "/Users/weilu/openmmawsem/_local/run_openmm_awsem/crystal_structure-cleaned.pdb"
chain_name = "A"
min_seq_sep=3
max_seq_sep=np.inf
contact_threshold=0.8*nanometers
structure_interactions = []
parser = PDBParser()
structure = parser.get_structure('X', pdb_file)
chain = structure[0][chain_name]
residues = [x for x in chain]
for i, residue_i in enumerate(residues):
    for j, residue_j in enumerate(residues):
        ca_list = []
        cb_list = []
        atom_list_i = []
        atom_list_j = []
        if i-j >= min_seq_sep and i-j <= max_seq_sep:  # taking the signed value to avoid double counting
            ca_i = residue_i['CA']
            ca_list.append(ca_i)
            atom_list_i.append(ca_i)
            ca_j = residue_j['CA']
            ca_list.append(ca_j)
            atom_list_j.append(ca_j)
            if not residue_i.get_resname() == "GLY":
                cb_i = residue_i['CB']
                cb_list.append(cb_i)
                atom_list_i.append(cb_i)
            if not residue_j.get_resname() == "GLY":
                cb_j = residue_j['CB']
                cb_list.append(cb_j)
                atom_list_j.append(cb_j)
            for atom_i, atom_j in product(atom_list_i, atom_list_j):
                r_ijN = abs(atom_i - atom_j)/10.0*nanometers # convert to nm
                
                if r_ijN <= contact_threshold:
                    sigma_ij = 0.1*abs(i-j)**0.15 # 0.1 nm = 1 A
                    gamma_ij = 1.0
#                     if atom_i in ca_list:
#                         i_index = self.ca[i]
#                     if atom_i in cb_list:
#                         i_index = self.cb[i]
#                     if atom_j in ca_list:
#                         j_index = self.ca[j]
#                     if atom_j in cb_list:
#                         j_index = self.cb[j]
#                     structure_interaction = [i_index, j_index, [gamma_ij, r_ijN, sigma_ij]]
#                     structure_interactions.append(structure_interaction)

In [168]:
for i, residue_i in enumerate(residues):
    print(residue_i['CA'])


<Atom CA>
<Atom CA>
<Atom CA>
<Atom CA>
<Atom CA>
<Atom CA>
<Atom CA>
<Atom CA>
<Atom CA>
<Atom CA>
<Atom CA>
<Atom CA>
<Atom CA>
<Atom CA>
<Atom CA>
<Atom CA>
<Atom CA>
<Atom CA>
<Atom CA>
<Atom CA>
<Atom CA>
<Atom CA>
<Atom CA>
<Atom CA>
<Atom CA>
<Atom CA>
<Atom CA>
<Atom CA>
<Atom CA>
<Atom CA>
<Atom CA>
<Atom CA>
<Atom CA>
<Atom CA>
<Atom CA>
<Atom CA>
<Atom CA>
<Atom CA>
<Atom CA>
<Atom CA>
<Atom CA>
<Atom CA>
<Atom CA>
<Atom CA>
<Atom CA>
<Atom CA>
<Atom CA>
<Atom CA>
<Atom CA>
<Atom CA>
<Atom CA>
<Atom CA>
<Atom CA>
<Atom CA>
<Atom CA>
<Atom CA>
<Atom CA>
<Atom CA>
<Atom CA>
<Atom CA>
<Atom CA>
<Atom CA>
<Atom CA>

In [5]:
a = getSeqFromCleanPdb("/Users/weilu/openmmawsem/_local/2fha_openmm/2fha/2fha-openmmawsem.pdb")

In [6]:
len(a)


Out[6]:
150

In [8]:
# determine number of rows to skip
skipRows = 0
found = False
with open(cleaned_pdb_filename) as f:
    for line in f:
        if len(line) > 5:
            if line[:4] == "ATOM":
                found = True
                break
        skipRows += 1
if not found:
    print("No ATOM found")
a = pd.read_table(cleaned_pdb_filename, skiprows=skipRows, sep="\s+", names=["ATOM", "i", "Type", "Res", "Chain", "ResId", "x", "y", "z", "_", "_1", "_2"]).dropna()

# save chain seq to pdb.fasta
import textwrap
with open(fastaFile, "w") as out:
    for chain in chains:
        out.write(f">{pdb.upper()}:{chain.upper()}|PDBID|CHAIN|SEQUENCE\n")
        threeLetterSeq = a.query(f"Chain == '{chain}' and Type == 'CA'")["Res"]
        chain_seq = "".join([ThreeToOne[i] for i in threeLetterSeq])
        out.write("\n".join(textwrap.wrap(chain_seq, width=80))+"\n")
threeLetterSeq = a.query("Type == 'CA'")["Res"]
seq = "".join([ThreeToOne[i] for i in threeLetterSeq])

In [11]:
parser = PDBParser()

In [30]:
input_pdb_filename = "/Users/weilu/openmmawsem/_local/2fha_openmm/2fha/2fha-openmmawsem.pdb"
chains = "A"
cleaned_pdb_filename = input_pdb_filename.replace("openmmawsem.pdb", "cleaned.pdb")
pdb = input_pdb_filename.replace("-openmmawsem.pdb", "")
fastaFile = pdb + ".fasta"
ThreeToOne = {'ALA':'A','ARG':'R','ASN':'N','ASP':'D','CYS':'C','GLU':'E','GLN':'Q','GLY':'G','HIS':'H',
       'ILE':'I','LEU':'L','LYS':'K','MET':'M','PHE':'F','PRO':'P','SER':'S','THR':'T','TRP':'W',
       'TYR':'Y','VAL':'V'}

s = parser.get_structure("X", cleaned_pdb_filename)
m = s[0] # model 0
seq = ""
with open(fastaFile, "w") as out:
    for chain in chains:
        out.write(f">{pdb.upper()}:{chain.upper()}|PDBID|CHAIN|SEQUENCE\n")
        c = m[chain]
        chain_seq = ""
        for residue in c:
            residue_name = residue.get_resname()
            chain_seq += ThreeToOne[residue_name]
        out.write("\n".join(textwrap.wrap(chain_seq, width=80))+"\n")
        seq += chain_seq

In [31]:


In [32]:
len(seq)


Out[32]:
172

In [33]:
seq


Out[33]:
'TSQVRQNYHQDSEAAINRQINLELYASYVYLSMSYYFDRDDVALKNFAKYFLHQSHEEREHAEKLMKLQNQRGGRIFLQDIQKPDCDDWESGLNAMECALHLEKNVNQSLLELHKLATDKNDPHLCDFIETHYLNEQVKAIKELGDHVTNLRKMGAPESGLAEYLFDKHTLG'

In [10]:
pd.read_table(cleaned_pdb_filename, skiprows=skipRows, sep="\s+", names=["ATOM", "i", "Type", "Res", "Chain", "ResId", "x", "y", "z", "_", "_1", "_2"])


Out[10]:
ATOM i Type Res Chain ResId x y z _ _1 _2
0 ATOM 1.0 N THR A 1.0 -0.521 -2.002 -0.796 1.0 0.00 N
1 ATOM 2.0 H2 THR A 1.0 -1.473 -2.223 -0.099 1.0 0.00 H
2 ATOM 3.0 H3 THR A 1.0 -0.916 -2.230 -1.909 1.0 0.00 H
3 ATOM 4.0 CA THR A 1.0 0.097 -0.680 -0.796 1.0 0.00 C
4 ATOM 5.0 C THR A 1.0 1.603 -0.786 -0.796 1.0 0.00 C
5 ATOM 6.0 O THR A 1.0 2.193 -1.558 -0.068 1.0 0.00 O
6 ATOM 7.0 CB THR A 1.0 -0.399 0.159 0.429 1.0 0.00 C
7 ATOM 8.0 CG2 THR A 1.0 0.182 1.583 0.564 1.0 0.00 C
8 ATOM 9.0 OG1 THR A 1.0 -1.806 0.353 0.365 1.0 0.00 O
9 ATOM 10.0 H THR A 1.0 0.140 -2.971 -0.533 1.0 0.00 H
10 ATOM 11.0 HA THR A 1.0 -0.364 -0.131 -1.750 1.0 0.00 H
11 ATOM 12.0 HB THR A 1.0 -0.082 -0.410 1.430 1.0 0.00 H
12 ATOM 13.0 HG1 THR A 1.0 -2.296 0.113 1.419 1.0 0.00 H
13 ATOM 14.0 HG21 THR A 1.0 -0.360 2.135 1.481 1.0 0.00 H
14 ATOM 15.0 HG22 THR A 1.0 1.329 1.664 0.893 1.0 0.00 H
15 ATOM 16.0 HG23 THR A 1.0 -0.035 2.324 -0.351 1.0 0.00 H
16 ATOM 17.0 N SER A 2.0 2.283 -0.035 -1.594 1.0 0.00 N
17 ATOM 18.0 CA SER A 2.0 3.739 -0.138 -1.593 1.0 0.00 C
18 ATOM 19.0 C SER A 2.0 4.378 1.227 -1.691 1.0 0.00 C
19 ATOM 20.0 O SER A 2.0 3.982 2.077 -2.489 1.0 0.00 O
20 ATOM 21.0 CB SER A 2.0 4.226 -1.077 -2.725 1.0 0.00 C
21 ATOM 22.0 OG SER A 2.0 4.031 -0.524 -4.030 1.0 0.00 O
22 ATOM 23.0 H SER A 2.0 1.834 0.557 -2.523 1.0 0.00 H
23 ATOM 24.0 HA SER A 2.0 4.157 -0.670 -0.612 1.0 0.00 H
24 ATOM 25.0 HB2 SER A 2.0 3.819 -2.197 -2.619 1.0 0.00 H
25 ATOM 26.0 HB3 SER A 2.0 5.413 -1.224 -2.762 1.0 0.00 H
26 ATOM 27.0 HG SER A 2.0 3.633 -1.360 -4.773 1.0 0.00 H
27 ATOM 28.0 N GLN A 3.0 5.369 1.512 -0.916 1.0 0.00 N
28 ATOM 29.0 CA GLN A 3.0 5.987 2.831 -1.010 1.0 0.00 C
29 ATOM 30.0 C GLN A 3.0 7.491 2.734 -0.912 1.0 0.00 C
... ... ... ... ... ... ... ... ... ... ... ... ...
2844 ATOM 2845.0 N LEU A 171.0 467.037 195.609-158.530 1.000 0.0 N NaN
2845 ATOM 2846.0 CA LEU A 171.0 466.822 196.485-159.679 1.000 0.0 C NaN
2846 ATOM 2847.0 C LEU A 171.0 467.720 196.097-160.829 1.000 0.0 C NaN
2847 ATOM 2848.0 O LEU A 171.0 468.922 195.839-160.667 1.000 0.0 O NaN
2848 ATOM 2849.0 CB LEU A 171.0 467.042 197.972-159.280 1.000 0.0 C NaN
2849 ATOM 2850.0 CG LEU A 171.0 466.177 198.556-158.130 1.000 0.0 C NaN
2850 ATOM 2851.0 CD1 LEU A 171.0 466.462 200.056-157.964 1.000 0.0 C NaN
2851 ATOM 2852.0 CD2 LEU A 171.0 464.671 198.336-158.350 1.000 0.0 C NaN
2852 ATOM 2853.0 H LEU A 171.0 468.186 195.461-158.257 1.000 0.0 H NaN
2853 ATOM 2854.0 HA LEU A 171.0 465.744 196.359-160.170 1.000 0.0 H NaN
2854 ATOM 2855.0 HB2 LEU A 171.0 466.920 198.629-160.273 1.000 0.0 H NaN
2855 ATOM 2856.0 HB3 LEU A 171.0 468.198 198.106-159.003 1.000 0.0 H NaN
2856 ATOM 2857.0 HG LEU A 171.0 466.527 198.124-157.076 1.000 0.0 H NaN
2857 ATOM 2858.0 HD11 LEU A 171.0 467.612 200.304-157.724 1.000 0.0 H NaN
2858 ATOM 2859.0 HD12 LEU A 171.0 466.217 200.780-158.888 1.000 0.0 H NaN
2859 ATOM 2860.0 HD13 LEU A 171.0 465.928 200.627-157.055 1.000 0.0 H NaN
2860 ATOM 2861.0 HD21 LEU A 171.0 464.326 198.931-159.335 1.000 0.0 H NaN
2861 ATOM 2862.0 HD22 LEU A 171.0 463.895 197.428-158.449 1.000 0.0 H NaN
2862 ATOM 2863.0 HD23 LEU A 171.0 464.069 198.891-157.472 1.000 0.0 H NaN
2863 ATOM 2864.0 N GLY A 172.0 467.221 196.033-162.017 1.000 0.0 N NaN
2864 ATOM 2865.0 CA GLY A 172.0 468.089 195.657-163.129 1.000 0.0 C NaN
2865 ATOM 2866.0 HA2 GLY A 172.0 469.253 195.805-162.893 1.000 0.0 H NaN
2866 ATOM 2867.0 C GLY A 172.0 467.799 196.499-164.349 1.000 0.0 C NaN
2867 ATOM 2868.0 O GLY A 172.0 466.931 197.376-164.347 1.000 0.0 O NaN
2868 ATOM 2869.0 H GLY A 172.0 466.095 195.837-162.350 1.000 0.0 H NaN
2869 ATOM 2870.0 HA3 GLY A 172.0 467.851 194.508-163.361 1.000 0.0 H NaN
2870 ATOM 2871.0 HA GLY A 172.0 474.286 195.558-164.836 1.000 0.0 H NaN
2871 ATOM 2872.0 OXT GLY A 172.0 468.671 195.855-164.887 1.000 0.0 O NaN
2872 TER 2873.0 GLY A 172 NaN NaN NaN NaN NaN NaN NaN
2873 END NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN

2874 rows × 12 columns


In [9]:
a


Out[9]:
ATOM i Type Res Chain ResId x y z _ _1 _2
0 ATOM 1.0 N THR A 1.0 -0.521 -2.002 -0.796 1.0 0.00 N
1 ATOM 2.0 H2 THR A 1.0 -1.473 -2.223 -0.099 1.0 0.00 H
2 ATOM 3.0 H3 THR A 1.0 -0.916 -2.230 -1.909 1.0 0.00 H
3 ATOM 4.0 CA THR A 1.0 0.097 -0.680 -0.796 1.0 0.00 C
4 ATOM 5.0 C THR A 1.0 1.603 -0.786 -0.796 1.0 0.00 C
5 ATOM 6.0 O THR A 1.0 2.193 -1.558 -0.068 1.0 0.00 O
6 ATOM 7.0 CB THR A 1.0 -0.399 0.159 0.429 1.0 0.00 C
7 ATOM 8.0 CG2 THR A 1.0 0.182 1.583 0.564 1.0 0.00 C
8 ATOM 9.0 OG1 THR A 1.0 -1.806 0.353 0.365 1.0 0.00 O
9 ATOM 10.0 H THR A 1.0 0.140 -2.971 -0.533 1.0 0.00 H
10 ATOM 11.0 HA THR A 1.0 -0.364 -0.131 -1.750 1.0 0.00 H
11 ATOM 12.0 HB THR A 1.0 -0.082 -0.410 1.430 1.0 0.00 H
12 ATOM 13.0 HG1 THR A 1.0 -2.296 0.113 1.419 1.0 0.00 H
13 ATOM 14.0 HG21 THR A 1.0 -0.360 2.135 1.481 1.0 0.00 H
14 ATOM 15.0 HG22 THR A 1.0 1.329 1.664 0.893 1.0 0.00 H
15 ATOM 16.0 HG23 THR A 1.0 -0.035 2.324 -0.351 1.0 0.00 H
16 ATOM 17.0 N SER A 2.0 2.283 -0.035 -1.594 1.0 0.00 N
17 ATOM 18.0 CA SER A 2.0 3.739 -0.138 -1.593 1.0 0.00 C
18 ATOM 19.0 C SER A 2.0 4.378 1.227 -1.691 1.0 0.00 C
19 ATOM 20.0 O SER A 2.0 3.982 2.077 -2.489 1.0 0.00 O
20 ATOM 21.0 CB SER A 2.0 4.226 -1.077 -2.725 1.0 0.00 C
21 ATOM 22.0 OG SER A 2.0 4.031 -0.524 -4.030 1.0 0.00 O
22 ATOM 23.0 H SER A 2.0 1.834 0.557 -2.523 1.0 0.00 H
23 ATOM 24.0 HA SER A 2.0 4.157 -0.670 -0.612 1.0 0.00 H
24 ATOM 25.0 HB2 SER A 2.0 3.819 -2.197 -2.619 1.0 0.00 H
25 ATOM 26.0 HB3 SER A 2.0 5.413 -1.224 -2.762 1.0 0.00 H
26 ATOM 27.0 HG SER A 2.0 3.633 -1.360 -4.773 1.0 0.00 H
27 ATOM 28.0 N GLN A 3.0 5.369 1.512 -0.916 1.0 0.00 N
28 ATOM 29.0 CA GLN A 3.0 5.987 2.831 -1.010 1.0 0.00 C
29 ATOM 30.0 C GLN A 3.0 7.491 2.734 -0.912 1.0 0.00 C
... ... ... ... ... ... ... ... ... ... ... ... ...
2486 ATOM 2487.0 HG22 VAL A 148.0 433.012 170.425 -96.817 1.0 0.00 H
2487 ATOM 2488.0 HG23 VAL A 148.0 434.057 170.853 -95.343 1.0 0.00 H
2488 ATOM 2489.0 N THR A 149.0 435.596 175.352 -96.434 1.0 0.00 N
2489 ATOM 2490.0 CA THR A 149.0 436.062 176.544 -97.135 1.0 0.00 C
2490 ATOM 2491.0 C THR A 149.0 437.340 176.264 -97.888 1.0 0.00 C
2491 ATOM 2492.0 O THR A 149.0 438.260 175.649 -97.388 1.0 0.00 O
2492 ATOM 2493.0 CB THR A 149.0 436.259 177.730 -96.131 1.0 0.00 C
2493 ATOM 2494.0 CG2 THR A 149.0 436.761 179.060 -96.733 1.0 0.00 C
2494 ATOM 2495.0 OG1 THR A 149.0 435.026 178.065 -95.506 1.0 0.00 O
2495 ATOM 2496.0 H THR A 149.0 436.418 174.889 -95.709 1.0 0.00 H
2496 ATOM 2497.0 HA THR A 149.0 435.248 176.946 -97.907 1.0 0.00 H
2497 ATOM 2498.0 HB THR A 149.0 437.061 177.432 -95.298 1.0 0.00 H
2498 ATOM 2499.0 HG1 THR A 149.0 435.216 178.352 -94.371 1.0 0.00 H
2499 ATOM 2500.0 HG21 THR A 149.0 436.673 179.955 -95.938 1.0 0.00 H
2500 ATOM 2501.0 HG22 THR A 149.0 437.944 179.026 -96.896 1.0 0.00 H
2501 ATOM 2502.0 HG23 THR A 149.0 436.091 179.480 -97.630 1.0 0.00 H
2502 ATOM 2503.0 N ASN A 150.0 437.459 176.692 -99.099 1.0 0.00 N
2503 ATOM 2504.0 CA ASN A 150.0 438.695 176.422 -99.827 1.0 0.00 C
2508 ATOM 2509.0 ND2 ASN A 150.0 438.937 173.165 -99.449 1.0 0.00 N
2510 ATOM 2511.0 H ASN A 150.0 436.562 177.025 -99.807 1.0 0.00 H
2511 ATOM 2512.0 HA ASN A 150.0 439.606 176.143 -99.111 1.0 0.00 H
2514 ATOM 2515.0 HD21 ASN A 150.0 438.647 172.077 -99.058 1.0 0.00 H
2515 ATOM 2516.0 HD22 ASN A 150.0 440.127 173.185 -99.400 1.0 0.00 H
2521 ATOM 2522.0 CG LEU A 151.0 439.549 180.842 -99.725 1.0 0.00 C
2522 ATOM 2523.0 CD1 LEU A 151.0 439.781 182.165 -98.982 1.0 0.00 C
2527 ATOM 2528.0 HB3 LEU A 151.0 441.712 180.239 -99.676 1.0 0.00 H
2528 ATOM 2529.0 HG LEU A 151.0 439.352 180.085 -98.827 1.0 0.00 H
2529 ATOM 2530.0 HD11 LEU A 151.0 440.688 182.139 -98.197 1.0 0.00 H
2530 ATOM 2531.0 HD12 LEU A 151.0 440.015 183.150 -99.625 1.0 0.00 H
2531 ATOM 2532.0 HD13 LEU A 151.0 438.890 182.537 -98.269 1.0 0.00 H

2516 rows × 12 columns


In [ ]: