notebook.community

Edit and run



In [1]:

    
import os
import sys
import random
import time
from random import seed, randint
import argparse
import platform
from datetime import datetime
import imp
import numpy as np
import fileinput
from itertools import product
import pandas as pd
from scipy.interpolate import griddata
from scipy.interpolate import interp2d
import seaborn as sns
from os import listdir

import matplotlib.pyplot as plt
import seaborn as sns
from scipy.interpolate import griddata
import matplotlib as mpl
sys.path.insert(0,'/Users/weilu/Research/opt_server/')
# from notebookFunctions import *
# from .. import notebookFunctions

from Bio.PDB.PDBParser import PDBParser
from pyCodeLib import *
%matplotlib inline
# plt.rcParams['figure.figsize'] = (10,6.180)    #golden ratio
# %matplotlib notebook
%load_ext autoreload
%autoreload 2



In [2]:

    
from Bio.PDB.Polypeptide import d1_to_index
from Bio.PDB.Polypeptide import dindex_to_1
from Bio.PDB.Polypeptide import aa3



In [3]:

    
plt.rcParams['figure.figsize'] = [16.18033, 10]    #golden ratio
plt.rcParams['figure.facecolor'] = 'w'
plt.rcParams['figure.dpi'] = 100



In [4]:

    
a = pd.read_csv("/Users/weilu/Research/server/mar_2020/environmental_information/all_data_no_round_iterative_native_with_cbd_info.csv", index_col=0)
a["D_H"] = a["Density_H_x"] + a["Density_H_y"]
a["D_P"] = a["Density_P_x"] + a["Density_P_y"]
data_native = a
data_native.groupby(["Res1", "Res2"])["Theta"].count().reset_index().sort_values("Theta")









    Out[4]:







  
    
      
      Res1
      Res2
      Theta
    
  
  
    
      83
      CYS
      ASP
      4
    
    
      64
      ASP
      CYS
      4
    
    
      124
      GLU
      CYS
      7
    
    
      85
      CYS
      GLN
      7
    
    
      86
      CYS
      GLU
      7
    
    
      ...
      ...
      ...
      ...
    
    
      199
      ILE
      VAL
      370
    
    
      209
      LEU
      ILE
      424
    
    
      190
      ILE
      LEU
      424
    
    
      219
      LEU
      VAL
      490
    
    
      390
      VAL
      LEU
      490
    
  

400 rows × 3 columns



In [5]:

    
a = pd.read_csv("/Users/weilu/Research/server/mar_2020/environmental_information/all_data_no_round_iterative_with_cbd_info.csv", index_col=0)
a["D_H"] = a["Density_H_x"] + a["Density_H_y"]
a["D_P"] = a["Density_P_x"] + a["Density_P_y"]
data_iterative = a
data_iterative.groupby(["Res1", "Res2"])["Theta"].count().reset_index().sort_values("Theta")









    Out[5]:







  
    
      
      Res1
      Res2
      Theta
    
  
  
    
      84
      CYS
      CYS
      1
    
    
      242
      MET
      ASN
      5
    
    
      52
      ASN
      MET
      5
    
    
      105
      GLN
      GLN
      7
    
    
      357
      TRP
      TRP
      7
    
    
      ...
      ...
      ...
      ...
    
    
      209
      LEU
      ILE
      440
    
    
      389
      VAL
      ILE
      512
    
    
      199
      ILE
      VAL
      512
    
    
      219
      LEU
      VAL
      614
    
    
      390
      VAL
      LEU
      614
    
  

400 rows × 3 columns



In [12]:

    
a = pd.read_csv("/Users/weilu/Research/server/mar_2020/environmental_information/all_data_no_round_cath.csv", index_col=0)
a["D_H"] = a["Density_H_x"] + a["Density_H_y"]
a["D_P"] = a["Density_P_x"] + a["Density_P_y"]
data_old = a
data_old.groupby(["Res1", "Res2"])["Theta"].count().reset_index().sort_values("Theta")









    Out[12]:







  
    
      
      Res1
      Res2
      Theta
    
  
  
    
      357
      TRP
      TRP
      35
    
    
      168
      HIS
      HIS
      49
    
    
      44
      ASN
      CYS
      54
    
    
      82
      CYS
      ASN
      54
    
    
      64
      ASP
      CYS
      55
    
    
      ...
      ...
      ...
      ...
    
    
      390
      VAL
      LEU
      3135
    
    
      219
      LEU
      VAL
      3135
    
    
      190
      ILE
      LEU
      3583
    
    
      209
      LEU
      ILE
      3583
    
    
      210
      LEU
      LEU
      3753
    
  

400 rows × 3 columns



In [13]:

    
a = pd.read_csv("/Users/weilu/Research/server/mar_2020/environmental_information/all_data_no_round_cath_with_cbd_info.csv", index_col=0)
a["D_H"] = a["Density_H_x"] + a["Density_H_y"]
a["D_P"] = a["Density_P_x"] + a["Density_P_y"]
data2 = a
data2.groupby(["Res1", "Res2"])["Theta"].count().reset_index().sort_values("Theta")









    Out[13]:







  
    
      
      Res1
      Res2
      Theta
    
  
  
    
      83
      CYS
      ASP
      31
    
    
      64
      ASP
      CYS
      31
    
    
      104
      GLN
      CYS
      42
    
    
      44
      ASN
      CYS
      42
    
    
      82
      CYS
      ASN
      42
    
    
      ...
      ...
      ...
      ...
    
    
      390
      VAL
      LEU
      3564
    
    
      219
      LEU
      VAL
      3564
    
    
      210
      LEU
      LEU
      4051
    
    
      190
      ILE
      LEU
      4107
    
    
      209
      LEU
      ILE
      4107
    
  

400 rows × 3 columns



In [9]:

    
b = data2.query("Res1=='VAL' and Res2=='LEU'").reset_index(drop=True)
b["D_H"] = b["Density_H_x"] + b["Density_H_y"]
b["D_P"] = b["Density_P_x"] + b["Density_P_y"]
sns.jointplot("D_H", "D_P", data=b, kind="kde")









    Out[9]:





<seaborn.axisgrid.JointGrid at 0x101472320>



In [28]:

    
b = data2.query("Res1=='SER' and Res2=='ILE'").reset_index(drop=True)
b["D_H"] = b["Density_H_x"] + b["Density_H_y"]
b["D_P"] = b["Density_P_x"] + b["Density_P_y"]
sns.jointplot("D_H", "D_P", data=b, kind="kde")









    Out[28]:





<seaborn.axisgrid.JointGrid at 0x12141a358>



In [33]:

    
data2.groupby(["Res1", "Res2"])["Theta"].count().reset_index().sort_values("Theta")[:-100].hist("Theta", bins=50)









    Out[33]:





array([[<matplotlib.axes._subplots.AxesSubplot object at 0x1211ddf60>]],
      dtype=object)



In [16]:

    
b = data2.query("Res1=='GLU' and Res2=='ALA'").reset_index(drop=True)
b["D_H"] = b["Density_H_x"] + b["Density_H_y"]
b["D_P"] = b["Density_P_x"] + b["Density_P_y"]
sns.jointplot("D_H", "D_P", data=b, kind="kde")









    Out[16]:





<seaborn.axisgrid.JointGrid at 0x1202deeb8>



In [14]:

    
data2 = pd.read_csv("/Users/weilu/Research/server/mar_2020/environmental_information/all_data_no_round_cath.csv", index_col=0)
data2.groupby(["Res1", "Res2"])["Theta"].count().reset_index().sort_values("Theta")
b = data2.query("Res1=='GLU' and Res2=='ALA'").reset_index(drop=True)
b["D_H"] = b["Density_H_x"] + b["Density_H_y"]
b["D_P"] = b["Density_P_x"] + b["Density_P_y"]
sns.jointplot("D_H", "D_P", data=b, kind="kde")









    Out[14]:





<seaborn.axisgrid.JointGrid at 0x120668550>



In [28]:

    
# data2 = pd.read_csv("/Users/weilu/Research/server/mar_2020/environmental_information/all_data_no_round_cath.csv", index_col=0)
# data2.groupby(["Res1", "Res2"])["Theta"].count().reset_index().sort_values("Theta")
a = data_old.query("Res1=='LEU' and Res2=='LEU'").reset_index(drop=True)
sns.jointplot("D_H", "D_P", data=a, kind="kde")









    Out[28]:





<seaborn.axisgrid.JointGrid at 0x11eee8f60>



In [25]:

    
# data2 = pd.read_csv("/Users/weilu/Research/server/mar_2020/environmental_information/all_data_no_round_cath.csv", index_col=0)
# data2.groupby(["Res1", "Res2"])["Theta"].count().reset_index().sort_values("Theta")
a = data2.query("Res1=='LEU' and Res2=='LEU'").reset_index(drop=True)
a["D_H"] = a["Density_H_x"] + a["Density_H_y"]
a["D_P"] = a["Density_P_x"] + a["Density_P_y"]
sns.jointplot("D_H", "D_P", data=a, kind="kde")









    Out[25]:





<seaborn.axisgrid.JointGrid at 0x11f340860>



In [30]:

    
a = data_iterative.query("Res1=='LEU' and Res2=='LEU'").reset_index(drop=True)
a["D_H"] = a["Density_H_x"] + a["Density_H_y"]
a["D_P"] = a["Density_P_x"] + a["Density_P_y"]
sns.jointplot("D_H", "D_P", data=a, kind="kde")









    Out[30]:





<seaborn.axisgrid.JointGrid at 0x103b57eb8>



In [30]:

    
info = pd.read_csv("/Users/weilu/Research/server/mar_2020/environmental_information/info_collection.csv")



In [33]:

    
info = info.query("Steps==2001")[["Q", "Run", "Protein", "Folder"]].reset_index(drop=True)



In [34]:

    
info.query("Folder")









    Out[34]:







  
    
      
      Q
      Run
      Protein
      Folder
    
  
  
    
      0
      0.36
      0
      1hoe
      iteration_3
    
    
      1
      0.42
      1
      1hoe
      iteration_3
    
    
      2
      0.43
      0
      1tif
      iteration_3
    
    
      3
      0.45
      1
      1tif
      iteration_3
    
    
      4
      0.80
      0
      1vcc
      iteration_3
    
    
      ...
      ...
      ...
      ...
      ...
    
    
      149
      0.47
      1
      2hbg
      iteration_3
    
    
      150
      0.81
      0
      1akr
      iteration_3
    
    
      151
      0.76
      1
      1akr
      iteration_3
    
    
      152
      0.51
      0
      1osa
      iteration_3
    
    
      153
      0.44
      1
      1osa
      iteration_3
    
  

154 rows × 4 columns



In [38]:

    
data_iterative = data_iterative.merge(info, on=["Protein", "Run"])



In [43]:

    
a = data_iterative.query("Res1=='LEU' and Res2=='LEU' and Q < 0.5").reset_index(drop=True)
sns.jointplot("D_H", "D_P", data=a, kind="kde")









    Out[43]:





<seaborn.axisgrid.JointGrid at 0x12041e2b0>



In [44]:

    
a = data_iterative.query("Res1=='LEU' and Res2=='LEU' and Q > 0.5").reset_index(drop=True)
sns.jointplot("D_H", "D_P", data=a, kind="kde")









    Out[44]:





<seaborn.axisgrid.JointGrid at 0x11e60ae10>



In [42]:

    
a = data_iterative.query("Res1=='LEU' and Res2=='LEU'").reset_index(drop=True)
sns.jointplot("D_H", "D_P", data=a, kind="kde")









    Out[42]:





<seaborn.axisgrid.JointGrid at 0x128da7cc0>



In [40]:

    
sns.scatterplot("D_H", "D_P", hue="Q", data=data_iterative.query("Res1=='LEU' and Res2=='LEU'"), alpha=0.5)









    Out[40]:





<matplotlib.axes._subplots.AxesSubplot at 0x11e965780>



In [153]:

    
sns.scatterplot("D_H", "D_P", hue="isTP", data=new_d, alpha=0.5)









    Out[153]:





<matplotlib.axes._subplots.AxesSubplot at 0x12ff224e0>



In [10]:

    
sns.scatterplot("D_H", "D_P", hue="isTP", data=new_d, alpha=0.5)









    Out[10]:





<matplotlib.axes._subplots.AxesSubplot at 0x1109d5e80>



In [101]:

    
pre = "/Users/weilu/Research/server/mar_2020/environmental_information/"
pdb = "1akr"
ii = 1
data = pd.read_csv(f"{pre}/iterative/{pdb}_{ii}.csv", index_col=0)
data_envr = pd.read_csv(f"{pre}/iterative/{pdb}_{ii}_environment.csv", index_col=0)

# data_envr["Density_H"] = data_envr["Density_H"].round()
# data_envr["Density_P"] = data_envr["Density_P"].round()

data_with_info = data.merge(data_envr, how='left', left_on="Index1", right_on="index").merge(data_envr, how='left', left_on="Index2", right_on="index")
data_ = data_with_info.query("Theta > 5e-2 and Type == 'Direct'").reset_index(drop=True)
# data_ = data_with_info



In [102]:

    
data_.query(f"Res1=='{res1}' and Res2=='{res2}'")









    Out[102]:







  
    
      
      Res1
      Res2
      Type
      Theta
      Index1
      Index2
      r
      ResId1
      ResId2
      index_x
      Density_H_x
      Density_P_x
      index_y
      Density_H_y
      Density_P_y
    
  
  
    
      16
      LEU
      LEU
      Direct
      0.475
      3
      44
      6.877109
      4
      45
      3
      2.000
      4.751
      44
      2.011
      1.475
    
    
      83
      LEU
      LEU
      Direct
      1.000
      24
      52
      6.056805
      25
      53
      24
      1.131
      5.574
      52
      2.000
      5.403
    
    
      86
      LEU
      LEU
      Direct
      0.409
      24
      50
      6.903615
      25
      51
      24
      1.131
      5.574
      50
      2.465
      3.409
    
    
      151
      LEU
      LEU
      Direct
      0.207
      53
      76
      7.001105
      54
      77
      53
      0.000
      6.172
      76
      2.000
      4.037
    
    
      205
      LEU
      LEU
      Direct
      1.000
      76
      110
      5.694576
      77
      111
      76
      2.000
      4.037
      110
      1.996
      4.020



In [128]:

    
# true positive
pdb = "1akr"
res1 = "LEU"
res2 = "LEU"
run = 0
a = data_native.query(f"Protein=='{pdb}'and Res1=='{res1}' and Res2=='{res2}'").reset_index(drop=True)
b = data_iterative.query(f"Protein=='{pdb}' and Run=='{run}' and Res1=='{res1}' and Res2=='{res2}'").reset_index(drop=True)



In [119]:

    
contacts = set(b["Index1"].astype(str) + "_" + b["Index2"].astype(str))



In [120]:

    
contacts









    Out[120]:





{'24_50', '24_52', '3_52', '44_72', '76_110'}



In [6]:

    
def isTP(contact, native_contacts):
    if contact in native_contacts:
        return "TP"
    if contact not in native_contacts:
        return "FP"


res1 = "THR"
res2 = "PRO"
run = 0
pdb = "1akr"
pdb_list = data_iterative["Protein"].unique()
new_d = []
for run in range(2):
    for pdb in pdb_list:
        a = data_native.query(f"Protein=='{pdb}'and Res1=='{res1}' and Res2=='{res2}'").reset_index(drop=True)
        b = data_iterative.query(f"Protein=='{pdb}' and Run=='{run}' and Res1=='{res1}' and Res2=='{res2}'").reset_index(drop=True)
        native_contacts = set(a["Index1"].astype(str) + "_" + a["Index2"].astype(str))
        b["Contact"] = b["Index1"].astype(str) + "_" + b["Index2"].astype(str)
        b["isTP"] = b["Contact"].apply(isTP, native_contacts=native_contacts)
        new_d.append(b)
new_d = pd.concat(new_d).reset_index(drop=True)



In [7]:

    
new_d









    Out[7]:







  
    
      
      Res1
      Res2
      Type
      Theta
      Index1
      Index2
      r
      ResId1
      ResId2
      index_x
      ...
      Density_P_x
      index_y
      Density_H_y
      Density_P_y
      Protein
      Run
      D_H
      D_P
      Contact
      isTP
    
  
  
    
      0
      THR
      PRO
      Direct
      1.000
      28
      2
      4.986226
      29
      3
      28
      ...
      0.000
      2
      1.038
      1.000
      1by9
      0
      5.038
      1.000
      28_2
      FP
    
    
      1
      THR
      PRO
      Direct
      0.994
      65
      81
      6.079741
      66
      82
      65
      ...
      2.000
      81
      0.994
      1.000
      1fna
      0
      3.435
      3.000
      65_81
      TP
    
    
      2
      THR
      PRO
      Direct
      0.908
      88
      58
      6.357295
      89
      59
      88
      ...
      1.010
      58
      0.908
      1.129
      1fna
      0
      2.815
      2.139
      88_58
      TP
    
    
      3
      THR
      PRO
      Direct
      0.693
      58
      75
      6.504425
      59
      76
      58
      ...
      1.998
      75
      1.693
      1.000
      1bm8
      0
      3.386
      2.998
      58_75
      TP
    
    
      4
      THR
      PRO
      Direct
      1.000
      64
      51
      5.319895
      65
      52
      64
      ...
      2.000
      51
      1.246
      1.001
      3vub
      0
      3.206
      3.001
      64_51
      TP
    
    
      5
      THR
      PRO
      Direct
      1.000
      47
      19
      5.083985
      48
      20
      47
      ...
      0.000
      19
      2.000
      0.177
      1kte
      0
      5.980
      0.177
      47_19
      TP
    
    
      6
      THR
      PRO
      Direct
      1.000
      95
      77
      5.553338
      96
      78
      95
      ...
      1.028
      77
      4.153
      1.000
      1bkf
      0
      9.006
      2.028
      95_77
      TP
    
    
      7
      THR
      PRO
      Direct
      1.000
      19
      93
      4.892044
      20
      94
      19
      ...
      1.050
      93
      2.000
      0.912
      1sfp
      0
      4.000
      1.962
      19_93
      FP
    
    
      8
      THR
      PRO
      Direct
      1.000
      30
      62
      5.411886
      31
      63
      30
      ...
      2.020
      62
      1.001
      1.000
      1sfp
      0
      3.811
      3.020
      30_62
      TP
    
    
      9
      THR
      PRO
      Direct
      1.000
      22
      1
      5.513818
      23
      2
      22
      ...
      0.598
      1
      1.022
      0.999
      2mcm
      0
      4.372
      1.597
      22_1
      FP
    
    
      10
      THR
      PRO
      Direct
      0.953
      117
      20
      6.284565
      118
      21
      117
      ...
      0.913
      20
      1.198
      2.071
      2a0b
      0
      3.151
      2.984
      117_20
      TP
    
    
      11
      THR
      PRO
      Direct
      1.000
      14
      26
      4.916763
      15
      27
      14
      ...
      1.986
      26
      1.294
      0.856
      2sak
      0
      2.769
      2.842
      14_26
      TP
    
    
      12
      THR
      PRO
      Direct
      0.999
      74
      32
      5.849476
      75
      33
      74
      ...
      0.000
      32
      4.984
      0.999
      2sak
      0
      9.983
      0.999
      74_32
      FP
    
    
      13
      THR
      PRO
      Direct
      0.941
      54
      98
      6.308799
      55
      99
      54
      ...
      0.752
      98
      3.224
      1.024
      1bgf
      0
      6.262
      1.776
      54_98
      TP
    
    
      14
      THR
      PRO
      Direct
      0.992
      35
      2
      6.100786
      36
      3
      35
      ...
      0.000
      2
      2.973
      0.759
      1opy
      0
      5.966
      0.759
      35_2
      FP
    
    
      15
      THR
      PRO
      Direct
      0.430
      69
      83
      6.613797
      70
      84
      69
      ...
      1.000
      83
      3.475
      2.000
      1opy
      0
      4.835
      3.000
      69_83
      TP
    
    
      16
      THR
      PRO
      Direct
      1.000
      12
      28
      4.872084
      13
      29
      12
      ...
      1.102
      28
      2.048
      1.007
      1rlw
      0
      5.040
      2.109
      12_28
      FP
    
    
      17
      THR
      PRO
      Direct
      0.997
      12
      53
      5.991587
      13
      54
      12
      ...
      1.102
      53
      2.999
      2.451
      1rlw
      0
      5.991
      3.553
      12_53
      FP
    
    
      18
      THR
      PRO
      Direct
      0.956
      15
      26
      6.278195
      16
      27
      15
      ...
      0.995
      26
      4.104
      1.000
      1rlw
      0
      7.954
      1.995
      15_26
      TP
    
    
      19
      THR
      PRO
      Direct
      0.725
      52
      37
      6.488734
      53
      38
      52
      ...
      0.997
      37
      3.735
      0.000
      1hmt
      0
      7.908
      0.997
      52_37
      TP
    
    
      20
      THR
      PRO
      Direct
      0.998
      52
      37
      5.943890
      53
      38
      52
      ...
      1.001
      37
      2.819
      1.001
      1crb
      0
      4.817
      2.002
      52_37
      TP
    
    
      21
      THR
      PRO
      Direct
      1.000
      54
      44
      5.338830
      55
      45
      54
      ...
      2.156
      44
      3.000
      0.999
      1ax8
      0
      6.004
      3.155
      54_44
      TP
    
    
      22
      THR
      PRO
      Direct
      0.493
      12
      2
      6.588423
      13
      3
      12
      ...
      2.823
      2
      1.493
      1.316
      1rss
      0
      4.068
      4.139
      12_2
      TP
    
    
      23
      THR
      PRO
      Direct
      1.000
      140
      27
      5.474146
      141
      28
      140
      ...
      4.120
      27
      2.733
      4.004
      1jon
      0
      4.212
      8.124
      140_27
      FP
    
    
      24
      THR
      PRO
      Direct
      1.000
      95
      72
      5.552165
      96
      73
      95
      ...
      0.072
      72
      3.002
      0.278
      1aly
      0
      5.005
      0.350
      95_72
      TP
    
    
      25
      THR
      PRO
      Direct
      1.000
      28
      2
      5.575712
      29
      3
      28
      ...
      0.000
      2
      2.952
      0.000
      1by9
      1
      5.953
      0.000
      28_2
      FP
    
    
      26
      THR
      PRO
      Direct
      0.364
      65
      81
      6.641636
      66
      82
      65
      ...
      1.138
      81
      0.364
      0.139
      1fna
      1
      3.515
      1.277
      65_81
      TP
    
    
      27
      THR
      PRO
      Direct
      0.671
      88
      58
      6.514388
      89
      59
      88
      ...
      2.025
      58
      0.672
      1.011
      1fna
      1
      2.030
      3.036
      88_58
      TP
    
    
      28
      THR
      PRO
      Direct
      0.998
      68
      35
      5.970904
      69
      36
      68
      ...
      1.106
      35
      3.930
      0.000
      1plc
      1
      7.994
      1.106
      68_35
      FP
    
    
      29
      THR
      PRO
      Direct
      1.000
      58
      75
      5.666871
      59
      76
      58
      ...
      0.156
      75
      2.162
      2.000
      1bm8
      1
      3.162
      2.156
      58_75
      TP
    
    
      30
      THR
      PRO
      Direct
      0.782
      64
      51
      6.458181
      65
      52
      64
      ...
      1.054
      51
      1.782
      0.237
      3vub
      1
      2.564
      1.291
      64_51
      TP
    
    
      31
      THR
      PRO
      Direct
      1.000
      47
      19
      4.833291
      48
      20
      47
      ...
      0.000
      19
      2.000
      0.157
      1kte
      1
      5.998
      0.157
      47_19
      TP
    
    
      32
      THR
      PRO
      Direct
      1.000
      95
      77
      5.206377
      96
      78
      95
      ...
      0.986
      77
      3.997
      2.000
      1bkf
      1
      6.993
      2.986
      95_77
      TP
    
    
      33
      THR
      PRO
      Direct
      1.000
      30
      65
      5.047096
      31
      66
      30
      ...
      0.990
      65
      3.001
      1.153
      1sfp
      1
      6.001
      2.143
      30_65
      FP
    
    
      34
      THR
      PRO
      Direct
      0.998
      86
      62
      5.947662
      87
      63
      86
      ...
      1.076
      62
      1.000
      1.000
      1sfp
      1
      2.996
      2.076
      86_62
      TP
    
    
      35
      THR
      PRO
      Direct
      0.691
      94
      40
      6.505275
      95
      41
      94
      ...
      1.008
      40
      1.724
      1.002
      1neu
      1
      7.285
      2.010
      94_40
      FP
    
    
      36
      THR
      PRO
      Direct
      1.000
      14
      26
      5.060741
      15
      27
      14
      ...
      1.989
      26
      2.056
      0.739
      2sak
      1
      4.052
      2.728
      14_26
      TP
    
    
      37
      THR
      PRO
      Direct
      1.000
      54
      98
      5.010488
      55
      99
      54
      ...
      0.954
      98
      1.998
      2.005
      1bgf
      1
      5.871
      2.959
      54_98
      TP
    
    
      38
      THR
      PRO
      Direct
      0.957
      69
      83
      6.274398
      70
      84
      69
      ...
      1.000
      83
      2.776
      2.332
      1opy
      1
      3.764
      3.332
      69_83
      TP
    
    
      39
      THR
      PRO
      Direct
      0.154
      52
      37
      6.756447
      53
      38
      52
      ...
      0.914
      37
      2.203
      0.000
      1hmt
      1
      6.351
      0.914
      52_37
      TP
    
    
      40
      THR
      PRO
      Direct
      1.000
      27
      10
      5.427485
      28
      11
      27
      ...
      0.999
      10
      2.290
      0.000
      1htp
      1
      6.219
      0.999
      27_10
      FP
    
    
      41
      THR
      PRO
      Direct
      1.000
      52
      37
      5.246543
      53
      38
      52
      ...
      1.001
      37
      3.900
      1.906
      1crb
      1
      5.900
      2.907
      52_37
      TP
    
    
      42
      THR
      PRO
      Direct
      0.982
      54
      44
      6.184997
      55
      45
      54
      ...
      2.082
      44
      3.622
      2.991
      1ax8
      1
      6.604
      5.073
      54_44
      TP
    
    
      43
      THR
      PRO
      Direct
      1.000
      95
      72
      4.828091
      96
      73
      95
      ...
      0.000
      72
      2.984
      0.007
      1aly
      1
      5.984
      0.007
      95_72
      TP
    
    
      44
      THR
      PRO
      Direct
      0.996
      16
      128
      6.042614
      17
      129
      16
      ...
      0.610
      128
      2.494
      1.011
      1akr
      1
      6.476
      1.621
      16_128
      TP
    
  

45 rows × 21 columns



In [ ]:



In [ ]:



In [125]:

    
native_contacts









    Out[125]:





{'24_50',
 '24_52',
 '3_44',
 '44_72',
 '53_110',
 '53_72',
 '53_76',
 '76_110',
 '76_113'}



In [131]:



In [143]:



In [8]:

    
gammaFile = "/Users/weilu/Research/server/mar_2020/mass_iterative_optimization/optimization_iter3/saved_gammas/iter3_z_weighted_2_cutoff400_impose_Aprime_constraint"
gamma_info = get_contact_gamma_info(gammaFile)



In [9]:

    
gamma_info.query("Interaction=='Direct'").sort_values("Gamma")









    Out[9]:







  
    
      
      Interaction
      Res1
      Res2
      Index
      Gamma
    
  
  
    
      372
      Direct
      Y
      P
      193
      -1.124
    
    
      371
      Direct
      P
      Y
      193
      -1.124
    
    
      379
      Direct
      W
      S
      197
      -1.121
    
    
      378
      Direct
      S
      W
      197
      -1.121
    
    
      214
      Direct
      K
      E
      110
      -1.080
    
    
      ...
      ...
      ...
      ...
      ...
      ...
    
    
      351
      Direct
      F
      F
      182
      1.058
    
    
      243
      Direct
      F
      G
      125
      1.091
    
    
      242
      Direct
      G
      F
      125
      1.091
    
    
      277
      Direct
      H
      V
      143
      1.524
    
    
      278
      Direct
      V
      H
      143
      1.524
    
  

400 rows × 5 columns



In [11]:

    
gamma_info.query("Interaction=='Direct' and Res1=='L'")









    Out[11]:







  
    
      
      Interaction
      Res1
      Res2
      Index
      Gamma
    
  
  
    
      20
      Direct
      L
      A
      10
      -0.388
    
    
      57
      Direct
      L
      R
      29
      0.280
    
    
      92
      Direct
      L
      N
      47
      0.045
    
    
      125
      Direct
      L
      D
      64
      0.576
    
    
      156
      Direct
      L
      C
      80
      0.056
    
    
      185
      Direct
      L
      Q
      95
      -0.039
    
    
      212
      Direct
      L
      E
      109
      -0.480
    
    
      237
      Direct
      L
      G
      122
      -0.340
    
    
      260
      Direct
      L
      H
      134
      -0.417
    
    
      281
      Direct
      L
      I
      145
      -0.322
    
    
      300
      Direct
      L
      L
      155
      -0.074
    
    
      301
      Direct
      L
      K
      156
      -0.287
    
    
      303
      Direct
      L
      M
      157
      -0.639
    
    
      305
      Direct
      L
      F
      158
      -0.365
    
    
      307
      Direct
      L
      P
      159
      -0.455
    
    
      309
      Direct
      L
      S
      160
      0.464
    
    
      311
      Direct
      L
      T
      161
      -0.118
    
    
      313
      Direct
      L
      W
      162
      -0.176
    
    
      315
      Direct
      L
      Y
      163
      -0.610
    
    
      317
      Direct
      L
      V
      164
      -0.835



In [2]:

    
def get_contact_gamma_info(gammaFile):
    # check the gamma.
    # read in gamma, and sort by size.
    # gammaFile = "/Users/weilu/Research/server/mar_2020/mass_iterative_optimization/optimization_new_4_withoutBurial/saved_gammas/new_4_cutoff600_impose_Aprime_constraint"
    gamma = np.loadtxt(gammaFile)

    res_type_map_letters = ['A', 'R', 'N', 'D', 'C', 'Q', 'E', 'G',
                            'H', 'I', 'L', 'K', 'M', 'F', 'P', 'S', 'T', 'W', 'Y', 'V']

    inverse_res_type_map = dict(list(zip(list(range(20)), res_type_map_letters)))
    c = 0
    info_ = []
    for i in range(20):
        for j in range(i, 20):
            info_.append(["Direct", res_type_map_letters[i], res_type_map_letters[j], c, round(gamma[c],3)])
            if i != j:
                info_.append(["Direct", res_type_map_letters[j], res_type_map_letters[i], c, round(gamma[c],3)])
            c += 1
    for i in range(20):
        for j in range(i, 20):
            info_.append(["Protein", res_type_map_letters[i], res_type_map_letters[j], c, round(gamma[c],3)])
            if i != j:
                info_.append(["Protein", res_type_map_letters[j], res_type_map_letters[i], c, round(gamma[c],3)])
            info_.append(["Water", res_type_map_letters[i], res_type_map_letters[j], c+210, round(gamma[c+210],3)])
            if i != j:
                info_.append(["Water", res_type_map_letters[j], res_type_map_letters[i], c+210, round(gamma[c+210],3)])
            c += 1
    contact_gammas = pd.DataFrame(info_, columns=["Interaction", "Res1", "Res2", "Index", "Gamma"])
    return contact_gammas



In [59]:

    
data_native









    Out[59]:







  
    
      
      Res1
      Res2
      Type
      Theta
      Index1
      Index2
      r
      ResId1
      ResId2
      index_x
      Density_H_x
      Density_P_x
      index_y
      Density_H_y
      Density_P_y
      Protein
      D_H
      D_P
    
  
  
    
      0
      ASP
      ASP
      Direct
      0.615
      0
      39
      6.387458
      1
      40
      0
      2.615
      0.000
      39
      2.612
      0.001
      1hoe
      5.227
      0.001
    
    
      1
      THR
      ASP
      Direct
      1.000
      1
      39
      3.803764
      2
      40
      1
      1.026
      1.000
      39
      2.612
      0.001
      1hoe
      3.638
      1.001
    
    
      2
      ARG
      ASP
      Direct
      0.997
      67
      39
      5.408635
      68
      40
      67
      3.023
      1.000
      39
      2.612
      0.001
      1hoe
      5.635
      1.001
    
    
      3
      ASP
      ARG
      Direct
      1.000
      0
      67
      4.196109
      1
      68
      0
      2.615
      0.000
      67
      3.023
      1.000
      1hoe
      5.638
      1.000
    
    
      4
      VAL
      ARG
      Direct
      1.000
      35
      67
      5.400923
      36
      68
      35
      4.864
      1.000
      67
      3.023
      1.000
      1hoe
      7.887
      2.000
    
    
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
    
    
      20225
      LEU
      PHE
      Direct
      0.111
      111
      91
      7.342279
      112
      92
      111
      0.000
      2.111
      91
      0.986
      4.953
      1osa
      0.986
      7.064
    
    
      20226
      GLU
      ASP
      Direct
      1.000
      103
      92
      5.176369
      104
      93
      103
      3.997
      1.000
      92
      5.000
      2.004
      1osa
      8.997
      3.004
    
    
      20227
      GLU
      ARG
      Direct
      0.874
      103
      93
      6.003034
      104
      94
      103
      3.997
      1.000
      93
      1.874
      0.076
      1osa
      5.871
      1.076
    
    
      20228
      HIS
      ARG
      Direct
      1.000
      106
      93
      4.551530
      107
      94
      106
      2.970
      0.000
      93
      1.874
      0.076
      1osa
      4.844
      0.076
    
    
      20229
      VAL
      ARG
      Direct
      0.076
      107
      93
      6.888728
      108
      94
      107
      0.687
      3.946
      93
      1.874
      0.076
      1osa
      2.561
      4.022
    
  

20230 rows × 18 columns



In [54]:

    
data_native.query(f"Res1=='{res1}' and Res2=='{res2}'")









    Out[54]:







  
    
      
      Res1
      Res2
      Type
      Theta
      Index1
      Index2
      r
      ResId1
      ResId2
      index_x
      Density_H_x
      Density_P_x
      index_y
      Density_H_y
      Density_P_y
      Protein
      D_H
      D_P
    
  
  
    
      16
      LEU
      LEU
      Direct
      1.0
      13
      69
      4.486654
      14
      70
      13
      2.793
      2.000
      69
      1.999
      5.000
      1hoe
      4.792
      7.000
    
    
      212
      LEU
      LEU
      Direct
      1.0
      15
      42
      6.106652
      16
      43
      15
      2.019
      5.527
      42
      2.988
      5.999
      1tif
      5.007
      11.526
    
    
      214
      LEU
      LEU
      Direct
      1.0
      32
      42
      5.137943
      33
      43
      32
      3.892
      1.886
      42
      2.988
      5.999
      1tif
      6.880
      7.885
    
    
      366
      LEU
      LEU
      Direct
      1.0
      10
      30
      5.106055
      11
      31
      10
      2.003
      5.997
      30
      1.932
      5.996
      1vcc
      3.935
      11.993
    
    
      492
      LEU
      LEU
      Direct
      1.0
      6
      50
      5.629869
      7
      51
      6
      0.000
      9.000
      50
      0.000
      8.000
      1by9
      0.000
      17.000
    
    
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
    
    
      19877
      LEU
      LEU
      Direct
      1.0
      53
      110
      5.940358
      54
      111
      53
      0.000
      9.958
      110
      1.999
      7.008
      1akr
      1.999
      16.966
    
    
      19878
      LEU
      LEU
      Direct
      1.0
      76
      110
      5.347788
      77
      111
      76
      2.279
      6.739
      110
      1.999
      7.008
      1akr
      4.278
      13.747
    
    
      20052
      LEU
      LEU
      Direct
      1.0
      76
      113
      5.790491
      77
      114
      76
      2.279
      6.739
      113
      2.935
      1.959
      1akr
      5.214
      8.698
    
    
      20083
      LEU
      LEU
      Direct
      1.0
      3
      68
      5.984374
      4
      69
      3
      1.000
      3.000
      68
      0.998
      3.996
      1osa
      1.998
      6.996
    
    
      20125
      LEU
      LEU
      Direct
      1.0
      31
      47
      5.299009
      32
      48
      31
      0.784
      6.989
      47
      5.163
      2.328
      1osa
      5.947
      9.317
    
  

289 rows × 18 columns



In [24]:

    
data_native.query("Res1=='LEU' and Res2=='LEU' and D_H > 9")









    Out[24]:







  
    
      
      Res1
      Res2
      Type
      Theta
      Index1
      Index2
      r
      ResId1
      ResId2
      index_x
      Density_H_x
      Density_P_x
      index_y
      Density_H_y
      Density_P_y
      Protein
      D_H
      D_P
    
  
  
    
      7390
      LEU
      LEU
      Direct
      0.999
      4
      44
      6.195842
      5
      45
      4
      5.270
      3.561
      44
      4.985
      0.999
      1tmy
      10.255
      4.560
    
    
      11901
      LEU
      LEU
      Direct
      1.000
      6
      63
      5.423016
      7
      64
      6
      5.892
      1.000
      63
      4.004
      1.000
      1cpq
      9.896
      2.000
    
    
      14426
      LEU
      LEU
      Direct
      0.998
      10
      116
      6.235893
      11
      117
      10
      3.262
      3.131
      116
      5.969
      1.133
      2end
      9.231
      4.264



In [23]:

    
sns.scatterplot("D_H", "D_P", data=data_native.query("Res1=='LEU' and Res2=='LEU'"), alpha=0.5)









    Out[23]:





<matplotlib.axes._subplots.AxesSubplot at 0x11fd9dda0>



In [22]:

    
sns.scatterplot("D_H", "D_P", data=data_native.query("Res1=='LEU' and Res2=='LEU'"), alpha=0.5)
sns.scatterplot("D_H", "D_P", data=data_iterative.query("Res1=='LEU' and Res2=='LEU'"), alpha=0.5)









    



No handles with labels found to put in legend.






    Out[22]:





<matplotlib.legend.Legend at 0x11e58e0b8>



In [39]:

    
a = data_native.query("Res1=='LEU' and Res2=='LEU'").reset_index(drop=True)
a["D_H"] = a["Density_H_x"] + a["Density_H_y"]
a["D_P"] = a["Density_P_x"] + a["Density_P_y"]
sns.jointplot("D_H", "D_P", data=a, kind="reg")









    Out[39]:





<seaborn.axisgrid.JointGrid at 0x120a03da0>



In [20]:

    
cbd_info = pd.read_csv("/Users/weilu/opt/parameters/side_chain/cbd_cbd_real_contact_symmetric.csv")



In [22]:

    
cbd_info.query("ResName1 == 'SER'")



In [ ]:

    
def get_r_min_max(a, res1, res2, type="Direct"):
    res1_name = res1.get_resname()
    res2_name = res2.get_resname()
    if type == "Direct":
        if res1_name == "GLY" or res2_name == "GLY":
            r_min_res1_res2 = 2.5
            r_max_res1_res2 = 6.5
        else:
            b = a.query(f"ResName1=='{res1_name}' and ResName2=='{res2_name}'")
            if len(b) == 0:
                b = a.query(f"ResName1=='{res2_name}' and ResName2=='{res1_name}'")
            try:
                r_min_res1_res2 = float(b["r_min"]) - 0.5
                r_max_res1_res2 = float(b["r_max"]) + 1.5
            except:
                print("problem", b)
    else:
        if res1_name == "GLY" or res2_name == "GLY":
            r_min_res1_res2 = 6.5
            r_max_res1_res2 = 9.5
        else:
            b = a.query(f"ResName1=='{res1_name}' and ResName2=='{res2_name}'")
            if len(b) == 0:
                b = a.query(f"ResName1=='{res2_name}' and ResName2=='{res1_name}'")
            try:
                r_min_res1_res2 = float(b["r_max"]) + 1.5
                r_max_res1_res2 = float(b["r_max"]) + 4.5
            except:
                print(b)
    return r_min_res1_res2, r_max_res1_res2

def get_interaction_data_with_cbd_info(structure, cbd_info):
    # get all the pair of interaction, direct and mediated. as a dataFrame.
    res_list = get_res_list(structure)
    neighbor_list = get_neighbor_list(structure)
    sequence = get_sequence_from_structure(structure)
    # cb_density = calculate_cb_density(res_list, neighbor_list)
    cb_density = calculate_cb_density_com_wellCenter(res_list, neighbor_list, cbd_info)
    r_min_direct = 2.5
    r_max_direct = 6.5
    r_min = 6.5
    r_max = 9.5
    kappa = 5.0
    min_seq_sep = 10
    density_threshold = 2.6
    density_kappa = 7.0
    # phi_mediated_contact_well = np.zeros((2, 20,20))
    v_mediated = 0
    data_ = []
    for res1globalindex, res1 in enumerate(res_list):
        res1index = get_local_index(res1)
        res1chain = get_chain(res1)
        rho_i = cb_density[res1globalindex]
        for res2 in get_neighbors_within_radius(neighbor_list, res1, r_max+4.0):
            res2index = get_local_index(res2)
            res2chain = get_chain(res2)
            res2globalindex = get_global_index(res_list, res2)
            rho_j = cb_density[res2globalindex]

            # if res2globalindex - res1globalindex >= min_seq_sep or (res1chain != res2chain and res2globalindex > res1globalindex):
            if abs(res2globalindex - res1globalindex) >= min_seq_sep or (res1chain != res2chain):
                if res1.resname == res2.resname:
                    if not (res2globalindex - res1globalindex >= min_seq_sep or (res1chain != res2chain and res2globalindex > res1globalindex)):
                        continue
                res1type = get_res_type(res_list, res1)
                res2type = get_res_type(res_list, res2)
                rij = get_interaction_distance(res1, res2)
                # theta = interaction_well(rij, r_min, r_max, kappa)
                r_min_res1_res2, r_max_res1_res2 = get_r_min_max(cbd_info, res1, res2, type="Mediated")
                theta = interaction_well(rij, r_min_res1_res2, r_max_res1_res2, kappa)
                
                water_theta = prot_water_switchFunc_sigmaWater(rho_i, rho_j, density_threshold, density_kappa) * theta
                protein_theta = prot_water_switchFunc_sigmaProt(rho_i, rho_j, density_threshold, density_kappa) * theta
                data_.append([res1.resname, res2.resname, "Protein", round(protein_theta, 3), res1globalindex, res2globalindex, rij, res1index, res2index])
                data_.append([res1.resname, res2.resname, "Water", round(water_theta, 3), res1globalindex, res2globalindex, rij, res1index, res2index])
                r_min_res1_res2, r_max_res1_res2 = get_r_min_max(cbd_info, res1, res2, type="Direct")
                direct_theta = interaction_well(rij, r_min_res1_res2, r_max_res1_res2, kappa)
                data_.append([res1.resname, res2.resname, "Direct", round(direct_theta, 3), res1globalindex, res2globalindex, rij, res1index, res2index])
                # protein_gamma = protein_gamma_ijm[0][res1type][res2type]*k_hypercharge
                # water_gamma = water_gamma_ijm[0][res1type][res2type]*k_hypercharge
    data = pd.DataFrame(data_, columns=["Res1", "Res2", "Type", "Theta", "Index1", "Index2", "r", "ResId1", "ResId2"])

    # contact_gammas["Res1"] = contact_gammas.apply(lambda x: one_to_three(x["Res1"]), axis=1)
    # contact_gammas["Res2"] = contact_gammas.apply(lambda x: one_to_three(x["Res2"]), axis=1)
    # contact_gammas["Type"] = contact_gammas["Interaction"]
    # a = data.merge(contact_gammas, on=["Res1", "Res2", "Type"])
    # a["theta_gamma"] = a["Theta"] * a["Gamma"]
    return data


def calculate_property_density_with_cbd_info(res_list, neighbor_list, propertyTable, cbd_info, min_seq_sep=2, rmin=2.5):
    num_residues = len(res_list)
    density = np.zeros(num_residues)
    for res1globalindex, res1 in enumerate(res_list):
        res1index = get_local_index(res1)
        res1chain = get_chain(res1)
        for res2 in get_neighbors_within_radius(neighbor_list, res1, 9.0):
            res2index = get_local_index(res2)
            res2chain = get_chain(res2)
            res2globalindex = get_global_index(res_list, res2)
            if abs(res2index - res1index) >= min_seq_sep or (res1chain != res2chain):
                rij = get_interaction_distance(res1, res2)
                hasProperty = propertyTable[three_to_one(res2.resname)]
                r_min_res1_res2, r_max_res1_res2 = get_r_min_max(cbd_info, res1, res2, type="Direct")
                density[res1globalindex] += hasProperty * interaction_well(rij, r_min_res1_res2, r_max_res1_res2, 5)
    return density

def get_environment_with_cbd_info(structure, cbd_info):
    res_type_map_HP = {
        'C': 0,
        'M': 0,
        'F': 0,
        'I': 0,
        'L': 0,
        'V': 0,
        'W': 0,
        'Y': 0,
        'A': 1,
        'H': 1,
        'T': 1,
        'G': 1,
        'P': 1,
        'D': 1,
        'E': 1,
        'N': 1,
        'Q': 1,
        'R': 1,
        'K': 1,
        'S': 1
    }
    isH = {}
    isP = {}
    for i in range(20):
        isH[dindex_to_1[i]] = res_type_map_HP[dindex_to_1[i]]
        isP[dindex_to_1[i]] = 1 - res_type_map_HP[dindex_to_1[i]]
    res_list = get_res_list(structure)
    neighbor_list = get_neighbor_list(structure)
    sequence = get_sequence_from_structure(structure)
    density_H = calculate_property_density_with_cbd_info(res_list, neighbor_list, isH, cbd_info).round(3)
    density_P = calculate_property_density_with_cbd_info(res_list, neighbor_list, isP, cbd_info).round(3)
    environment_info = pd.DataFrame([density_H, density_P], index=["Density_H", "Density_P"]).T.reset_index()
    return environment_info



In [ ]:



In [6]:

    
def get_interaction_data(structure):
    # get all the pair of interaction, direct and mediated. as a dataFrame.
    res_list = get_res_list(structure)
    neighbor_list = get_neighbor_list(structure)
    sequence = get_sequence_from_structure(structure)
    cb_density = calculate_cb_density(res_list, neighbor_list)
    r_min_direct = 2.5
    r_max_direct = 6.5
    r_min = 6.5
    r_max = 9.5
    kappa = 5.0
    min_seq_sep = 10
    density_threshold = 2.6
    density_kappa = 7.0
    # phi_mediated_contact_well = np.zeros((2, 20,20))
    v_mediated = 0
    data_ = []
    for res1globalindex, res1 in enumerate(res_list):
        res1index = get_local_index(res1)
        res1chain = get_chain(res1)
        rho_i = cb_density[res1globalindex]
        for res2 in get_neighbors_within_radius(neighbor_list, res1, r_max+4.0):
            res2index = get_local_index(res2)
            res2chain = get_chain(res2)
            res2globalindex = get_global_index(res_list, res2)
            rho_j = cb_density[res2globalindex]

            # if res2globalindex - res1globalindex >= min_seq_sep or (res1chain != res2chain and res2globalindex > res1globalindex):
            if abs(res2globalindex - res1globalindex) >= min_seq_sep or (res1chain != res2chain):
                if res1.resname == res2.resname:
                    if not (res2globalindex - res1globalindex >= min_seq_sep or (res1chain != res2chain and res2globalindex > res1globalindex)):
                        continue
                res1type = get_res_type(res_list, res1)
                res2type = get_res_type(res_list, res2)
                rij = get_interaction_distance(res1, res2)
                theta = interaction_well(rij, r_min, r_max, kappa)
                water_theta = prot_water_switchFunc_sigmaWater(rho_i, rho_j, density_threshold, density_kappa) * theta
                protein_theta = prot_water_switchFunc_sigmaProt(rho_i, rho_j, density_threshold, density_kappa) * theta
                data_.append([res1.resname, res2.resname, "Protein", round(protein_theta, 3), res1globalindex, res2globalindex, rij, res1index, res2index])
                data_.append([res1.resname, res2.resname, "Water", round(water_theta, 3), res1globalindex, res2globalindex, rij, res1index, res2index])
                direct_theta = interaction_well(rij, r_min_direct, r_max_direct, kappa)
                data_.append([res1.resname, res2.resname, "Direct", round(direct_theta, 3), res1globalindex, res2globalindex, rij, res1index, res2index])
                # protein_gamma = protein_gamma_ijm[0][res1type][res2type]*k_hypercharge
                # water_gamma = water_gamma_ijm[0][res1type][res2type]*k_hypercharge
    data = pd.DataFrame(data_, columns=["Res1", "Res2", "Type", "Theta", "Index1", "Index2", "r", "ResId1", "ResId2"])

    # contact_gammas["Res1"] = contact_gammas.apply(lambda x: one_to_three(x["Res1"]), axis=1)
    # contact_gammas["Res2"] = contact_gammas.apply(lambda x: one_to_three(x["Res2"]), axis=1)
    # contact_gammas["Type"] = contact_gammas["Interaction"]
    # a = data.merge(contact_gammas, on=["Res1", "Res2", "Type"])
    # a["theta_gamma"] = a["Theta"] * a["Gamma"]
    return data


def get_contact_gamma_info(gammaFile):
    # check the gamma.
    # read in gamma, and sort by size.
    # gammaFile = "/Users/weilu/Research/server/mar_2020/mass_iterative_optimization/optimization_new_4_withoutBurial/saved_gammas/new_4_cutoff600_impose_Aprime_constraint"
    gamma = np.loadtxt(gammaFile)

    res_type_map_letters = ['A', 'R', 'N', 'D', 'C', 'Q', 'E', 'G',
                            'H', 'I', 'L', 'K', 'M', 'F', 'P', 'S', 'T', 'W', 'Y', 'V']

    inverse_res_type_map = dict(list(zip(list(range(20)), res_type_map_letters)))
    c = 0
    info_ = []
    for i in range(20):
        for j in range(i, 20):
            info_.append(["Direct", res_type_map_letters[i], res_type_map_letters[j], c, round(gamma[c],3)])
            if i != j:
                info_.append(["Direct", res_type_map_letters[j], res_type_map_letters[i], c, round(gamma[c],3)])
            c += 1
    for i in range(20):
        for j in range(i, 20):
            info_.append(["Protein", res_type_map_letters[i], res_type_map_letters[j], c, round(gamma[c],3)])
            if i != j:
                info_.append(["Protein", res_type_map_letters[j], res_type_map_letters[i], c, round(gamma[c],3)])
            info_.append(["Water", res_type_map_letters[i], res_type_map_letters[j], c+210, round(gamma[c+210],3)])
            if i != j:
                info_.append(["Water", res_type_map_letters[j], res_type_map_letters[i], c+210, round(gamma[c+210],3)])
            c += 1
    contact_gammas = pd.DataFrame(info_, columns=["Interaction", "Res1", "Res2", "Index", "Gamma"])
    return contact_gammas



In [6]:

    
pdbFile = "/Users/weilu/Research/server/mar_2020/mass_iterative_run/iteration_new_4_without_burial/1poa/1/lastFrame.pdb"
parser = PDBParser()
structure = parser.get_structure("X", pdbFile)
data = get_interaction_data(structure)



In [19]:



In [72]:

    
pdbFile = "/Users/weilu/Research/server/mar_2020/mass_iterative_run/setups/1fna/cbd_1fna.pdb"
parser = PDBParser()
structure = parser.get_structure("X", pdbFile)
data = get_interaction_data(structure)



In [21]:



In [99]:

    
def calculate_property_density(res_list, neighbor_list, propertyTable, min_seq_sep=2, rmin=2.5):
    num_residues = len(res_list)
    density = np.zeros(num_residues)
    for res1globalindex, res1 in enumerate(res_list):
        res1index = get_local_index(res1)
        res1chain = get_chain(res1)
        for res2 in get_neighbors_within_radius(neighbor_list, res1, 9.0):
            res2index = get_local_index(res2)
            res2chain = get_chain(res2)
            res2globalindex = get_global_index(res_list, res2)
            if abs(res2index - res1index) >= min_seq_sep or (res1chain != res2chain):
                rij = get_interaction_distance(res1, res2)
                hasProperty = propertyTable[three_to_one(res2.resname)]
                density[res1globalindex] += hasProperty * interaction_well(rij, rmin, 6.5, 5)
    return density

def get_environment(structure):
    res_type_map_HP = {
        'C': 0,
        'M': 0,
        'F': 0,
        'I': 0,
        'L': 0,
        'V': 0,
        'W': 0,
        'Y': 0,
        'A': 1,
        'H': 1,
        'T': 1,
        'G': 1,
        'P': 1,
        'D': 1,
        'E': 1,
        'N': 1,
        'Q': 1,
        'R': 1,
        'K': 1,
        'S': 1
    }
    isH = {}
    isP = {}
    for i in range(20):
        isH[dindex_to_1[i]] = res_type_map_HP[dindex_to_1[i]]
        isP[dindex_to_1[i]] = 1 - res_type_map_HP[dindex_to_1[i]]
    res_list = get_res_list(structure)
    neighbor_list = get_neighbor_list(structure)
    sequence = get_sequence_from_structure(structure)
    density_H = calculate_property_density(res_list, neighbor_list, isH).round(3)
    density_P = calculate_property_density(res_list, neighbor_list, isP).round(3)
    environment_info = pd.DataFrame([density_H, density_P], index=["Density_H", "Density_P"]).T.reset_index()
    return environment_info



In [118]:

    
data_list = []
data = pd.read_csv("/Users/weilu/Research/server/mar_2020/environmental_information/1fna.csv", index_col=0)
data_envr = pd.read_csv("/Users/weilu/Research/server/mar_2020/environmental_information/1fna_environment.csv", index_col=0)

data_envr["Density_H"] = data_envr["Density_H"].round()
data_envr["Density_P"] = data_envr["Density_P"].round()

data_with_info = data.merge(data_envr, left_on="Index1", right_on="index").merge(data_envr, left_on="Index2", right_on="index")
data_ = data_with_info.query("Theta > 1e-1 and Type == 'Direct'").reset_index(drop=True)
data_list.append(data_.assign(Protein=pdb))



In [122]:

    
data = pd.read_csv("/Users/weilu/Research/server/mar_2020/environmental_information/all_data.csv", index_col=0)



In [209]:



In [210]:

    
data2 = pd.read_csv("/Users/weilu/Research/server/mar_2020/environmental_information/all_data_no_round_cath.csv", index_col=0)
data2.groupby(["Res1", "Res2"])["Theta"].count().reset_index().sort_values("Theta")









    Out[210]:







  
    
      
      Res1
      Res2
      Theta
    
  
  
    
      357
      TRP
      TRP
      35
    
    
      168
      HIS
      HIS
      49
    
    
      44
      ASN
      CYS
      54
    
    
      82
      CYS
      ASN
      54
    
    
      64
      ASP
      CYS
      55
    
    
      ...
      ...
      ...
      ...
    
    
      390
      VAL
      LEU
      3135
    
    
      219
      LEU
      VAL
      3135
    
    
      190
      ILE
      LEU
      3583
    
    
      209
      LEU
      ILE
      3583
    
    
      210
      LEU
      LEU
      3753
    
  

400 rows × 3 columns



In [203]:

    
data2.groupby(["Res1", "Res2"])["Theta"].count().reset_index().sort_values("Theta")









    Out[203]:







  
    
      
      Res1
      Res2
      Theta
    
  
  
    
      357
      TRP
      TRP
      2
    
    
      83
      CYS
      ASP
      5
    
    
      64
      ASP
      CYS
      5
    
    
      348
      TRP
      HIS
      7
    
    
      177
      HIS
      TRP
      7
    
    
      ...
      ...
      ...
      ...
    
    
      199
      ILE
      VAL
      291
    
    
      209
      LEU
      ILE
      368
    
    
      190
      ILE
      LEU
      368
    
    
      219
      LEU
      VAL
      442
    
    
      390
      VAL
      LEU
      442
    
  

400 rows × 3 columns



In [212]:



In [158]:

    
pdbFile = "/Users/weilu/Research/server/mar_2020/mass_iterative_run/setups/1pne/cbd_1pne.pdb"
parser = PDBParser()
structure = parser.get_structure("X", pdbFile)
data = get_interaction_data(structure)



In [159]:

    
res_type_map_HP = {
    'C': 0,
    'M': 0,
    'F': 0,
    'I': 0,
    'L': 0,
    'V': 0,
    'W': 0,
    'Y': 0,
    'A': 1,
    'H': 1,
    'T': 1,
    'G': 1,
    'P': 1,
    'D': 1,
    'E': 1,
    'N': 1,
    'Q': 1,
    'R': 1,
    'K': 1,
    'S': 1
}
isH = {}
isP = {}
for i in range(20):
    isH[dindex_to_1[i]] = res_type_map_HP[dindex_to_1[i]]
    isP[dindex_to_1[i]] = 1 - res_type_map_HP[dindex_to_1[i]]
res_list = get_res_list(structure)
neighbor_list = get_neighbor_list(structure)
sequence = get_sequence_from_structure(structure)
density_H = calculate_property_density(res_list, neighbor_list, isH).round(3)
density_P = calculate_property_density(res_list, neighbor_list, isP).round(3)
environment_info = pd.DataFrame([density_H, density_P], index=["Density_H", "Density_P"]).T.reset_index()



In [197]:

    
def calculate_property_density_debug(res_list, neighbor_list, propertyTable, min_seq_sep=2, rmin=2.5):
    num_residues = len(res_list)
    density = np.zeros(num_residues)
    for res1globalindex, res1 in enumerate(res_list):
        res1index = get_local_index(res1)
        res1chain = get_chain(res1)
        for res2 in get_neighbors_within_radius(neighbor_list, res1, 9.0):
            res2index = get_local_index(res2)
            res2chain = get_chain(res2)
            res2globalindex = get_global_index(res_list, res2)
            if abs(res2index - res1index) >= min_seq_sep or (res1chain != res2chain):

                rij = get_interaction_distance(res1, res2)
                hasProperty = propertyTable[three_to_one(res2.resname)]
                theta = interaction_well(rij, rmin, 6.5, 5)
                if res1globalindex == 64:
                    if hasProperty * theta > 0.1:
                        print(res1index, res1.resname, res2.resname, res2index, rij, hasProperty, theta)
                density[res1globalindex] += hasProperty * interaction_well(rij, rmin, 6.5, 5)
    return density



In [198]:

    
calculate_property_density_debug(res_list, neighbor_list, isP)[64]









    



65 LEU CYS 70 4.830256 1 0.9999999438976255
65 LEU LEU 50 5.5888577 1 0.9998896147329259
65 LEU PHE 39 5.879114 1 0.9979925058011887
65 LEU LEU 63 5.7178817 1 0.9995990140910096
65 LEU LEU 87 6.0440707 1 0.989639013538697
65 LEU ILE 42 5.4401717 1 0.9999750417908473
65 LEU VAL 102 6.6332526 1 0.20874180797125436
65 LEU ILE 100 5.9916315 1 0.9938411353589731
65 LEU LEU 111 5.7475533 1 0.9994605746142904






    Out[198]:





8.189138664358062



In [164]:

    
environment_info.query("index==108")









    Out[164]:







  
    
      
      index
      Density_H
      Density_P
    
  
  
    
      108
      108
      0.039
      8.954



In [195]:

    
data2.query("ResId2=='65' and Protein == '1pne'")









    Out[195]:







  
    
      
      Res1
      Res2
      Type
      Theta
      Index1
      Index2
      r
      ResId1
      ResId2
      index_x
      Density_H_x
      Density_P_x
      index_y
      Density_H_y
      Density_P_y
      Protein
    
  
  
    
      14367
      PHE
      LEU
      Direct
      0.998
      38
      64
      5.879114
      39
      65
      38
      2.776
      4.976
      64
      0.999
      8.189
      1pne
    
    
      14368
      ILE
      LEU
      Direct
      1.000
      41
      64
      5.440172
      42
      65
      41
      2.991
      5.830
      64
      0.999
      8.189
      1pne
    
    
      14369
      GLU
      LEU
      Direct
      0.999
      45
      64
      5.846354
      46
      65
      45
      4.000
      1.999
      64
      0.999
      8.189
      1pne
    
    
      14370
      LEU
      LEU
      Direct
      1.000
      49
      64
      5.588858
      50
      65
      49
      0.004
      6.959
      64
      0.999
      8.189
      1pne
    
    
      14371
      ILE
      LEU
      Direct
      0.994
      99
      64
      5.991632
      100
      65
      99
      0.886
      5.010
      64
      0.999
      8.189
      1pne
    
    
      14372
      VAL
      LEU
      Direct
      0.209
      101
      64
      6.633253
      102
      65
      101
      0.000
      6.422
      64
      0.999
      8.189
      1pne



In [221]:

    
data2 = pd.read_csv("/Users/weilu/Research/server/mar_2020/environmental_information/all_data_no_round_cath.csv", index_col=0)
data2.groupby(["Res1", "Res2"])["Theta"].count().reset_index().sort_values("Theta")
b = data2.query("Res1=='GLU' and Res2=='ALA'").reset_index(drop=True)
b["D_H"] = b["Density_H_x"] + b["Density_H_y"]
b["D_P"] = b["Density_P_x"] + b["Density_P_y"]
sns.jointplot("D_H", "D_P", data=b, kind="kde")









    Out[221]:





<seaborn.axisgrid.JointGrid at 0x130776080>



In [13]:

    
a = data2.query("Res1=='LEU' and Res2=='LEU'").reset_index(drop=True)
a["D_H"] = a["Density_H_x"] + a["Density_H_y"]
a["D_P"] = a["Density_P_x"] + a["Density_P_y"]









    



---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-13-8f515669a5ec> in <module>()
----> 1 a = data2.query("Res1=='LEU' and Res2=='LEU'").reset_index(drop=True)
      2 a["D_H"] = a["Density_H_x"] + a["Density_H_y"]
      3 a["D_P"] = a["Density_P_x"] + a["Density_P_y"]

NameError: name 'data2' is not defined



In [154]:

    
a.sort_values("D_P")









    Out[154]:







  
    
      
      Res1
      Res2
      Type
      Theta
      Index1
      Index2
      r
      index_x
      Density_H_x
      Density_P_x
      index_y
      Density_H_y
      Density_P_y
      Protein
      D_H
      D_P
    
  
  
    
      135
      LEU
      LEU
      Direct
      1.000
      6
      63
      5.423016
      6
      5.211
      1.000
      63
      3.988
      1.000
      1cpq
      9.199
      2.000
    
    
      175
      LEU
      LEU
      Direct
      1.000
      36
      88
      4.225886
      36
      3.000
      2.000
      88
      1.698
      1.000
      2sns
      4.698
      3.000
    
    
      79
      LEU
      LEU
      Direct
      1.000
      2
      70
      4.998909
      2
      3.856
      2.049
      70
      1.023
      1.000
      1a6f
      4.879
      3.049
    
    
      154
      LEU
      LEU
      Direct
      0.114
      90
      127
      6.704611
      90
      3.000
      2.121
      127
      1.305
      1.108
      1kuh
      4.305
      3.229
    
    
      70
      LEU
      LEU
      Direct
      0.935
      82
      102
      6.232599
      82
      1.000
      2.935
      102
      2.125
      0.935
      1a1x
      3.125
      3.870
    
    
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
    
    
      123
      LEU
      LEU
      Direct
      1.000
      32
      71
      5.142135
      32
      0.009
      7.098
      71
      0.000
      7.966
      1rlw
      0.009
      15.064
    
    
      168
      LEU
      LEU
      Direct
      1.000
      49
      64
      5.588858
      49
      0.004
      6.959
      64
      0.999
      8.189
      1pne
      1.003
      15.148
    
    
      172
      LEU
      LEU
      Direct
      0.990
      64
      86
      6.044071
      64
      0.999
      8.189
      86
      0.008
      6.990
      1pne
      1.007
      15.179
    
    
      165
      LEU
      LEU
      Direct
      1.000
      49
      108
      5.543379
      49
      0.004
      6.959
      108
      0.039
      8.954
      1pne
      0.043
      15.913
    
    
      4
      LEU
      LEU
      Direct
      1.000
      6
      50
      5.629869
      6
      0.000
      8.373
      50
      0.001
      7.982
      1by9
      0.001
      16.355
    
  

258 rows × 16 columns



In [214]:

    
sns.jointplot("D_H", "D_P", data=a, kind="kde")









    Out[214]:





<seaborn.axisgrid.JointGrid at 0x1305430b8>



In [151]:

    
sns.jointplot("D_H", "D_P", data=a, kind="kde")









    Out[151]:





<seaborn.axisgrid.JointGrid at 0x12fb76630>



In [ ]:



In [140]:

    
sns.jointplot("D_H", "D_P", data=a, kind="kde")









    Out[140]:





<seaborn.axisgrid.JointGrid at 0x127ec8e48>



In [152]:

    
sns.jointplot("D_H", "D_P", data=a, kind="hex")









    Out[152]:





<seaborn.axisgrid.JointGrid at 0x12fb679e8>



In [139]:

    
sns.jointplot("D_H", "D_P", data=a, kind="hex")









    Out[139]:





<seaborn.axisgrid.JointGrid at 0x127edfb38>



In [ ]:



In [109]:

    
data_with_info = data.merge(data_envr, left_on="Index1", right_on="index").merge(data_envr, left_on="Index2", right_on="index")
data_with_info.query("Theta > 1e-1 and Type == 'Direct'").reset_index(drop=True)









    Out[109]:







  
    
      
      Res1
      Res2
      Type
      Theta
      Index1
      Index2
      r
      index_x
      Density_H_x
      Density_P_x
      index_y
      Density_H_y
      Density_P_y
    
  
  
    
      0
      ALA
      SER
      Direct
      0.985
      68
      78
      6.081271
      68
      3.233
      2.011
      78
      3.792
      1.000
    
    
      1
      ARG
      TYR
      Direct
      1.000
      27
      67
      5.312370
      27
      1.678
      2.179
      67
      4.000
      0.000
    
    
      2
      THR
      TYR
      Direct
      1.000
      29
      67
      5.425073
      29
      2.678
      2.000
      67
      4.000
      0.000
    
    
      3
      LEU
      ILE
      Direct
      0.915
      2
      64
      6.262689
      2
      1.001
      5.733
      64
      0.000
      4.962
    
    
      4
      LEU
      ILE
      Direct
      0.949
      12
      64
      6.207278
      12
      1.001
      5.948
      64
      0.000
      4.962
    
    
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
    
    
      102
      VAL
      THR
      Direct
      0.587
      23
      70
      6.464955
      23
      4.024
      1.060
      70
      3.616
      1.587
    
    
      103
      ARG
      GLY
      Direct
      1.000
      24
      71
      4.846040
      24
      1.041
      0.000
      71
      1.996
      0.001
    
    
      104
      ARG
      GLU
      Direct
      1.000
      27
      41
      4.193047
      27
      1.678
      2.179
      41
      2.003
      0.000
    
    
      105
      THR
      GLU
      Direct
      1.000
      29
      41
      5.378955
      29
      2.678
      2.000
      41
      2.003
      0.000
    
    
      106
      THR
      VAL
      Direct
      1.000
      29
      39
      4.840663
      29
      2.678
      2.000
      39
      2.000
      0.000
    
  

107 rows × 13 columns



In [111]:

    
data_with_info.shape









    Out[111]:





(2349, 13)



In [39]:

    
isH = {}
isP = {}
for i in range(20):
    isH[dindex_to_1[i]] = res_type_map_HP[dindex_to_1[i]]
    isP[dindex_to_1[i]] = 1 - res_type_map_HP[dindex_to_1[i]]



In [75]:

    
res_list = get_res_list(structure)
neighbor_list = get_neighbor_list(structure)
sequence = get_sequence_from_structure(structure)
density_H = calculate_property_density(res_list, neighbor_list, isH).round()
density_P = calculate_property_density(res_list, neighbor_list, isP).round()



In [76]:

    
environment_info = pd.DataFrame([density_H, density_P], index=["Density_H", "Density_P"]).T.reset_index()



In [77]:

    
data_with_info = data.merge(environment_info, left_on="Index1", right_on="index").merge(environment_info, left_on="Index2", right_on="index")



In [ ]:



In [80]:

    
a = data_with_info.query("Type=='Direct' and Theta > 1e-1")



In [87]:

    
a.groupby(["Res1", "Res2"])["Theta"].count().reset_index().sort_values("Theta")









    Out[87]:







  
    
      
      Res1
      Res2
      Theta
    
  
  
    
      0
      ALA
      GLU
      1
    
    
      31
      PHE
      ILE
      1
    
    
      32
      PRO
      ALA
      1
    
    
      33
      PRO
      GLU
      1
    
    
      34
      PRO
      ILE
      1
    
    
      ...
      ...
      ...
      ...
    
    
      1
      ALA
      SER
      3
    
    
      68
      VAL
      ILE
      3
    
    
      27
      LEU
      THR
      3
    
    
      22
      ILE
      ILE
      6
    
    
      25
      LEU
      ILE
      6
    
  

72 rows × 3 columns



In [112]:

    
a.shape









    Out[112]:





(107, 13)



In [88]:

    
a.query("Res1 =='LEU' and Res2 == 'ILE'")









    Out[88]:







  
    
      
      Res1
      Res2
      Type
      Theta
      Index1
      Index2
      r
      index_x
      Density_H_x
      Density_P_x
      index_y
      Density_H_y
      Density_P_y
    
  
  
    
      131
      LEU
      ILE
      Direct
      1.000
      2
      84
      4.197815
      2
      1.0
      6.0
      84
      0.0
      2.0
    
    
      791
      LEU
      ILE
      Direct
      1.000
      2
      14
      5.333030
      2
      1.0
      6.0
      14
      1.0
      7.0
    
    
      929
      LEU
      ILE
      Direct
      0.915
      2
      64
      6.262689
      2
      1.0
      6.0
      64
      0.0
      5.0
    
    
      938
      LEU
      ILE
      Direct
      0.949
      12
      64
      6.207278
      12
      1.0
      6.0
      64
      0.0
      5.0
    
    
      1142
      LEU
      ILE
      Direct
      1.000
      2
      82
      5.090292
      2
      1.0
      6.0
      82
      2.0
      2.0
    
    
      1535
      LEU
      ILE
      Direct
      1.000
      12
      53
      4.717994
      12
      1.0
      6.0
      53
      0.0
      7.0



In [70]:

    
data.merge(environment_info, left_on="Index2", right_on="index")









    Out[70]:







  
    
      
      Res1
      Res2
      Type
      Theta
      Index1
      Index2
      r
      index_x
      Density_H_x
      Density_P_x
      index_y
      Density_H_y
      Density_P_y
    
  
  
    
      0
      ARG
      VAL
      Protein
      0.0
      0
      69
      13.319039
      0
      1.892
      3.972
      69
      2.902
      1.0
    
    
      1
      ARG
      VAL
      Water
      0.0
      0
      69
      13.319039
      0
      1.892
      3.972
      69
      2.902
      1.0
    
    
      2
      ARG
      VAL
      Direct
      0.0
      0
      69
      13.319039
      0
      1.892
      3.972
      69
      2.902
      1.0
    
    
      3
      TRP
      VAL
      Protein
      0.0
      16
      69
      12.675022
      16
      2.649
      4.679
      69
      2.902
      1.0
    
    
      4
      TRP
      VAL
      Water
      0.0
      16
      69
      12.675022
      16
      2.649
      4.679
      69
      2.902
      1.0
    
    
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
    
    
      2344
      ILE
      VAL
      Water
      0.0
      28
      39
      10.884260
      28
      1.000
      5.199
      39
      2.000
      0.0
    
    
      2345
      ILE
      VAL
      Direct
      0.0
      28
      39
      10.884260
      28
      1.000
      5.199
      39
      2.000
      0.0
    
    
      2346
      THR
      VAL
      Protein
      0.0
      29
      39
      4.840663
      29
      2.678
      2.000
      39
      2.000
      0.0
    
    
      2347
      THR
      VAL
      Water
      0.0
      29
      39
      4.840663
      29
      2.678
      2.000
      39
      2.000
      0.0
    
    
      2348
      THR
      VAL
      Direct
      1.0
      29
      39
      4.840663
      29
      2.678
      2.000
      39
      2.000
      0.0
    
  

2349 rows × 13 columns



In [71]:

    
data









    Out[71]:







  
    
      
      Res1
      Res2
      Type
      Theta
      Index1
      Index2
      r
      index
      Density_H
      Density_P
    
  
  
    
      0
      ARG
      VAL
      Protein
      0.0
      0
      69
      13.319039
      0
      1.892
      3.972
    
    
      1
      ARG
      VAL
      Water
      0.0
      0
      69
      13.319039
      0
      1.892
      3.972
    
    
      2
      ARG
      VAL
      Direct
      0.0
      0
      69
      13.319039
      0
      1.892
      3.972
    
    
      3
      ARG
      TYR
      Protein
      0.0
      0
      67
      10.726437
      0
      1.892
      3.972
    
    
      4
      ARG
      TYR
      Water
      0.0
      0
      67
      10.726437
      0
      1.892
      3.972
    
    
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
    
    
      2344
      VAL
      SER
      Water
      0.0
      69
      79
      8.581514
      69
      2.902
      1.000
    
    
      2345
      VAL
      SER
      Direct
      0.0
      69
      79
      8.581514
      69
      2.902
      1.000
    
    
      2346
      THR
      LYS
      Protein
      0.0
      70
      80
      13.351789
      70
      3.616
      1.587
    
    
      2347
      THR
      LYS
      Water
      0.0
      70
      80
      13.351789
      70
      3.616
      1.587
    
    
      2348
      THR
      LYS
      Direct
      0.0
      70
      80
      13.351789
      70
      3.616
      1.587
    
  

2349 rows × 10 columns



In [66]:

    
environment_info









    Out[66]:







  
    
      
      index
      Density_H
      Density_P
    
  
  
    
      0
      0
      1.892
      3.972
    
    
      1
      1
      1.443
      0.000
    
    
      2
      2
      1.001
      5.733
    
    
      3
      3
      1.443
      1.000
    
    
      4
      4
      1.000
      3.294
    
    
      ...
      ...
      ...
      ...
    
    
      86
      86
      1.959
      2.000
    
    
      87
      87
      1.000
      0.686
    
    
      88
      88
      2.552
      3.960
    
    
      89
      89
      2.956
      0.000
    
    
      90
      90
      2.007
      0.000
    
  

91 rows × 3 columns



In [ ]:



In [57]:

    
density_H









    Out[57]:





array([1.892, 1.443, 1.001, 1.443, 1.   , 2.001, 0.   , 1.021, 2.956,
       2.553, 2.954, 2.992, 1.001, 3.002, 1.   , 3.   , 2.649, 2.   ,
       1.001, 2.891, 0.   , 3.988, 0.893, 4.024, 1.041, 1.278, 3.994,
       1.678, 1.   , 2.678, 1.   , 0.993, 2.983, 3.003, 0.   , 1.995,
       0.   , 1.978, 0.029, 2.   , 0.035, 2.003, 1.   , 0.012, 1.034,
       2.009, 0.038, 1.   , 1.162, 2.   , 1.162, 0.   , 1.855, 0.436,
       1.884, 0.955, 1.203, 1.   , 2.998, 2.   , 3.004, 2.12 , 3.044,
       4.01 , 0.   , 3.009, 1.001, 4.   , 3.233, 2.902, 3.616, 1.996,
       1.889, 3.493, 1.001, 1.173, 0.   , 1.552, 3.792, 2.4  , 0.   ,
       1.263, 1.779, 2.159, 0.   , 3.   , 1.959, 1.   , 2.552, 2.956,
       2.007])



In [50]:

    
density_P.round(1)









    Out[50]:





array([4., 0., 6., 1., 3., 1., 1., 3., 0., 0., 0., 0., 6., 1., 7., 1., 5.,
       0., 1., 3., 0., 0., 0., 1., 0., 1., 4., 2., 5., 2., 3., 2., 2., 0.,
       0., 0., 0., 0., 0., 0., 1., 0., 4., 1., 4., 0., 2., 0., 0., 1., 1.,
       5., 1., 7., 0., 0., 4., 1., 0., 1., 1., 0., 2., 0., 5., 1., 5., 0.,
       2., 1., 2., 0., 1., 0., 1., 0., 0., 0., 1., 1., 1., 1., 2., 0., 2.,
       0., 2., 1., 4., 0., 0.])



In [51]:

    
cb_density.round(1)









    Out[51]:





array([6., 1., 7., 2., 4., 3., 1., 4., 3., 3., 3., 3., 7., 4., 8., 4., 7.,
       2., 2., 6., 0., 4., 1., 5., 1., 2., 8., 4., 6., 5., 4., 3., 5., 3.,
       0., 2., 0., 2., 0., 2., 1., 2., 5., 1., 5., 2., 2., 1., 1., 3., 2.,
       5., 3., 7., 2., 1., 5., 2., 3., 3., 4., 2., 5., 4., 5., 4., 6., 4.,
       5., 4., 5., 2., 3., 3., 2., 1., 0., 2., 5., 3., 1., 2., 4., 2., 2.,
       3., 4., 2., 7., 3., 2.])



In [31]:

    
# get environmental information
res_list = get_res_list(structure)
neighbor_list = get_neighbor_list(structure)
sequence = get_sequence_from_structure(structure)
cb_density = calculate_cb_density(res_list, neighbor_list)
r_min_direct = 2.5
r_max_direct = 6.5
r_min = 6.5
r_max = 9.5
kappa = 5.0
min_seq_sep = 10
density_threshold = 2.6
density_kappa = 7.0
# phi_mediated_contact_well = np.zeros((2, 20,20))
v_mediated = 0
data_ = []
for res1globalindex, res1 in enumerate(res_list):
    res1index = get_local_index(res1)
    res1chain = get_chain(res1)
    rho_i = cb_density[res1globalindex]
    for res2 in get_neighbors_within_radius(neighbor_list, res1, r_max+2.0):
        res2index = get_local_index(res2)
        res2chain = get_chain(res2)
        res2globalindex = get_global_index(res_list, res2)
        rho_j = cb_density[res2globalindex]

        if res2globalindex - res1globalindex >= min_seq_sep or (res1chain != res2chain and res2globalindex > res1globalindex):
            res1type = get_res_type(res_list, res1)
            res2type = get_res_type(res_list, res2)
            rij = get_interaction_distance(res1, res2)
            theta = interaction_well(rij, r_min, r_max, kappa)
            water_theta = prot_water_switchFunc_sigmaWater(rho_i, rho_j, density_threshold, density_kappa) * theta
            protein_theta = prot_water_switchFunc_sigmaProt(rho_i, rho_j, density_threshold, density_kappa) * theta
            data_.append([res1.resname, res2.resname, "Protein", round(protein_theta, 3), res1globalindex, res2globalindex, rij])
            data_.append([res1.resname, res2.resname, "Water", round(water_theta, 3), res1globalindex, res2globalindex, rij])
            direct_theta = interaction_well(rij, r_min_direct, r_max_direct, kappa)
            data_.append([res1.resname, res2.resname, "Direct", round(direct_theta, 3), res1globalindex, res2globalindex, rij])
            # protein_gamma = protein_gamma_ijm[0][res1type][res2type]*k_hypercharge
            # water_gamma = water_gamma_ijm[0][res1type][res2type]*k_hypercharge
data = pd.DataFrame(data_, columns=["Res1", "Res2", "Type", "Theta", "Index1", "Index2", "r"])



In [47]:

    
structure = s
kappa=5.0
res_list = get_res_list(structure)
neighbor_list = get_neighbor_list(structure)
sequence = get_sequence_from_structure(structure)
cb_density = calculate_cb_density(res_list, neighbor_list)
r_min_direct = 2.5
r_max_direct = 6.5
r_min = 6.5
r_max = 9.5
# kappa = 5.0
min_seq_sep = 10
density_threshold = 2.6
density_kappa = 7.0
# phi_mediated_contact_well = np.zeros((2, 20,20))
v_mediated = 0
data_ = []
for res1globalindex, res1 in enumerate(res_list):
    res1index = get_local_index(res1)
    res1chain = get_chain(res1)
    rho_i = cb_density[res1globalindex]
    for res2 in get_neighbors_within_radius(neighbor_list, res1, r_max+2.0):
        res2index = get_local_index(res2)
        res2chain = get_chain(res2)
        res2globalindex = get_global_index(res_list, res2)
        rho_j = cb_density[res2globalindex]

        if res2globalindex - res1globalindex >= min_seq_sep or (res1chain != res2chain and res2globalindex > res1globalindex):
            res1type = get_res_type(res_list, res1)
            res2type = get_res_type(res_list, res2)
            rij = get_interaction_distance(res1, res2)
            theta = interaction_well(rij, r_min, r_max, kappa)
            water_theta = prot_water_switchFunc_sigmaWater(rho_i, rho_j, density_threshold, density_kappa) * theta
            protein_theta = prot_water_switchFunc_sigmaProt(rho_i, rho_j, density_threshold, density_kappa) * theta
            data_.append([res1.resname, res2.resname, "Protein", round(protein_theta, 3), res1globalindex, res2globalindex])
            data_.append([res1.resname, res2.resname, "Water", round(water_theta, 3), res1globalindex, res2globalindex])
            direct_theta = interaction_well(rij, r_min_direct, r_max_direct, kappa)
            data_.append([res1.resname, res2.resname, "Direct", round(direct_theta, 3), res1globalindex, res2globalindex])
            # protein_gamma = protein_gamma_ijm[0][res1type][res2type]*k_hypercharge
            # water_gamma = water_gamma_ijm[0][res1type][res2type]*k_hypercharge
data = pd.DataFrame(data_, columns=["Res1", "Res2", "Type", "Theta", "Index1", "Index2"])



In [74]:



In [4]:



In [30]:

    
gammaFile = "/Users/weilu/Research/server/mar_2020/mass_iterative_optimization/optimization_larger_excl_withoutBurial/iter_larger_excl_withoutBurial_30"
contact_gammas = get_contact_gamma_info(gammaFile)

contact_gammas["Res1"] = contact_gammas.apply(lambda x: one_to_three(x["Res1"]), axis=1)
contact_gammas["Res2"] = contact_gammas.apply(lambda x: one_to_three(x["Res2"]), axis=1)
contact_gammas["Type"] = contact_gammas["Interaction"]

# pdb = "1sfp"
# pdb = "1who"
pdb = "1jon"
pdb = "1neu"
pdbFile = f"/Users/weilu/Research/server/mar_2020/mass_iterative_run/iteration_1_stronger_exclude_withoutBurial/{pdb}/0/lastFrame.pdb"
parser = PDBParser()
structure = parser.get_structure("X", pdbFile)
data = get_interaction_data(structure)

a = data.merge(contact_gammas, on=["Res1", "Res2", "Type"])
a["theta_gamma"] = a["Theta"] * a["Gamma"]

pdbFile = f"/Users/weilu/Research/server/mar_2020/mass_iterative_run/iteration_1_stronger_exclude_withoutBurial/{pdb}/0/crystal_structure.pdb"
parser = PDBParser()
structure = parser.get_structure("X", pdbFile)
data_native = get_interaction_data(structure)

a_native = data_native.merge(contact_gammas, on=["Res1", "Res2", "Type"])
a_native["theta_gamma"] = a_native["Theta"] * a_native["Gamma"]



In [31]:

    
a[["Type", "theta_gamma"]].groupby("Type").sum()









    Out[31]:







  
    
      
      theta_gamma
    
    
      Type
      
    
  
  
    
      Direct
      -101.276373
    
    
      Protein
      -105.200971
    
    
      Water
      -1.311072



In [32]:

    
a_native[["Type", "theta_gamma"]].groupby("Type").sum()









    Out[32]:







  
    
      
      theta_gamma
    
    
      Type
      
    
  
  
    
      Direct
      -59.127062
    
    
      Protein
      -66.563254
    
    
      Water
      -4.710177



In [33]:

    
a[["Type", "theta_gamma"]].groupby("Type").sum().sum()









    Out[33]:





theta_gamma   -207.788416
dtype: float64



In [34]:

    
b2 = a.query("Theta > 0.1").groupby(["Res1", "Res2", "Type"])["theta_gamma"].count().reset_index()
b2.sort_values("theta_gamma").tail(5)









    Out[34]:







  
    
      
      Res1
      Res2
      Type
      theta_gamma
    
  
  
    
      276
      SER
      VAL
      Protein
      7
    
    
      264
      SER
      LYS
      Protein
      7
    
    
      140
      GLY
      VAL
      Protein
      7
    
    
      165
      ILE
      ILE
      Direct
      7
    
    
      364
      VAL
      SER
      Protein
      8



In [35]:

    
b3 = a.query("Theta > 0.1").groupby(["Res1", "Res2", "Type"])["theta_gamma"].sum().reset_index()
b3.sort_values("theta_gamma").head(5)









    Out[35]:







  
    
      
      Res1
      Res2
      Type
      theta_gamma
    
  
  
    
      165
      ILE
      ILE
      Direct
      -5.859840
    
    
      54
      ASP
      LYS
      Direct
      -4.141928
    
    
      181
      ILE
      VAL
      Direct
      -3.560194
    
    
      171
      ILE
      PHE
      Direct
      -3.555981
    
    
      29
      ARG
      PHE
      Protein
      -2.875323



In [36]:

    
b2_native = a_native.query("Theta > 0.1").groupby(["Res1", "Res2", "Type"])["theta_gamma"].count().reset_index()
b2_native.sort_values("theta_gamma").tail(5)









    Out[36]:







  
    
      
      Res1
      Res2
      Type
      theta_gamma
    
  
  
    
      299
      VAL
      SER
      Protein
      5
    
    
      142
      ILE
      PHE
      Protein
      5
    
    
      137
      ILE
      ILE
      Protein
      5
    
    
      307
      VAL
      VAL
      Protein
      5
    
    
      172
      PHE
      ASP
      Protein
      6



In [37]:

    
b3_native = a_native.query("Theta > 0.1").groupby(["Res1", "Res2", "Type"])["theta_gamma"].sum().reset_index()
b3_native.sort_values("theta_gamma").head(5)









    Out[37]:







  
    
      
      Res1
      Res2
      Type
      theta_gamma
    
  
  
    
      250
      TRP
      ILE
      Direct
      -4.199720
    
    
      306
      VAL
      VAL
      Direct
      -3.221582
    
    
      180
      PHE
      ILE
      Direct
      -2.645379
    
    
      136
      ILE
      ILE
      Direct
      -2.453640
    
    
      292
      VAL
      LEU
      Direct
      -2.448768



In [60]:

    
gammaFile = "/Users/weilu/Research/server/mar_2020/cath_dataset_shuffle_optimization/optimization_iter0_com_density_rmin_2p5/saved_gammas/iter0_cutoff600_impose_Aprime_constraint"
contact_gammas = get_contact_gamma_info(gammaFile)

contact_gammas["Res1"] = contact_gammas.apply(lambda x: one_to_three(x["Res1"]), axis=1)
contact_gammas["Res2"] = contact_gammas.apply(lambda x: one_to_three(x["Res2"]), axis=1)
contact_gammas["Type"] = contact_gammas["Interaction"]

pdb = "1sfp"
pdbFile = f"/Users/weilu/Research/server/mar_2020/mass_iterative_run/iteration_0_stronger_exclude_volume/{pdb}/0/lastFrame.pdb"
parser = PDBParser()
structure = parser.get_structure("X", pdbFile)
data = get_interaction_data(structure)

a = data.merge(contact_gammas, on=["Res1", "Res2", "Type"])
a["theta_gamma"] = a["Theta"] * a["Gamma"]



In [61]:

    
a[["Type", "theta_gamma"]].groupby("Type").sum()









    Out[61]:







  
    
      
      theta_gamma
    
    
      Type
      
    
  
  
    
      Direct
      -71.788934
    
    
      Protein
      -11.780707
    
    
      Water
      -2.330682



In [62]:

    
b2 = a.query("Theta > 0.1").groupby(["Res1", "Res2", "Type"])["theta_gamma"].count().reset_index()
b2.sort_values("theta_gamma").tail(5)



In [63]:









    Out[63]:







  
    
      
      Res1
      Res2
      Type
      theta_gamma
    
  
  
    
      111
      GLY
      TYR
      Protein
      5
    
    
      134
      ILE
      LEU
      Protein
      6
    
    
      132
      ILE
      ILE
      Protein
      6
    
    
      130
      ILE
      GLY
      Protein
      6
    
    
      146
      ILE
      VAL
      Protein
      6



In [20]:

    
b3 = a.query("Theta > 0.1").groupby(["Res1", "Res2", "Type"])["theta_gamma"].sum().reset_index()
b3.sort_values("theta_gamma").head(5)



In [23]:









    Out[23]:







  
    
      
      Res1
      Res2
      Type
      theta_gamma
    
  
  
    
      19
      CYS
      CYS
      Direct
      -25.324650
    
    
      149
      VAL
      LEU
      Direct
      -11.089641
    
    
      20
      CYS
      CYS
      Protein
      -6.585576
    
    
      26
      CYS
      LEU
      Protein
      -3.843926
    
    
      104
      PHE
      CYS
      Direct
      -3.533796



In [25]:

    
fastaFile = "/Users/weilu/Research/server/mar_2020/mass_iterative_run/iteration_0_stronger_exclude_volume/3cyr/0/crystal_structure.fasta"
seq = read_fasta(fastaFile)



In [26]:

    
seq.count("C")









    Out[26]:





8



In [27]:

    
data = pd.read_csv("/Users/weilu/Research/server/mar_2020/mass_iterative_run/training_set.csv")
specific_decoys = data.query("Length < 150 and Length > 70").reset_index(drop=True)
pdb_list = specific_decoys["Protein"].to_list()
pdb_list = [a.lower() for a in pdb_list]
skip_pdb_list = ["1puc", "1skz"]
skip_pdb_list += ["1msc", "1fmb", "1gvp", "2tgi", "1whi", "1baj", "1rmd", "1div"]  #dimer.
skip_pdb_list += ["1aqe"]  # lots of ligand
filtered_pdb_list = [x for x in pdb_list if x not in skip_pdb_list]
pdb_list = filtered_pdb_list



In [65]:

    
info_ = []
for pdb in pdb_list:
    seq = np.loadtxt(f"/Users/weilu/Research/server/mar_2020/mass_iterative_optimization/database/S20_seq/{pdb}.seq", dtype=str)
    seq = str(seq)
    info_.append([pdb, seq, seq.count("C"), len(seq)])



In [66]:

    
data = pd.DataFrame(info_, columns=["Protein", "Seq", "Count", "Length"])



In [67]:

    
data.to_csv("/Users/weilu/Research/server/mar_2020/mass_iterative_optimization/cys_count_data.csv")



In [47]:

    
data["Length"] = data["Seq"].apply(lambda x:len(x))



In [70]:

    
print(data.sort_values("Count").query("Count >= 6")["Protein"].to_list())









    



['1by2', '1rcb', '1hyp', '3lzt', '3cyr', '7rsa', '1rzl', '1b6e', '1poc', '1bea', '1poa']



In [64]:

    
data









    Out[64]:







  
    
      
      Res1
      Res2
      Type
      Theta
      Index1
      Index2
      r
    
  
  
    
      0
      LEU
      ALA
      Protein
      0.000
      0
      18
      11.045428
    
    
      1
      LEU
      ALA
      Water
      0.000
      0
      18
      11.045428
    
    
      2
      LEU
      ALA
      Direct
      0.000
      0
      18
      11.045428
    
    
      3
      LEU
      ARG
      Protein
      0.033
      0
      40
      9.838098
    
    
      4
      LEU
      ARG
      Water
      0.000
      0
      40
      9.838098
    
    
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
    
    
      3097
      TYR
      GLU
      Water
      0.000
      89
      101
      15.198022
    
    
      3098
      TYR
      GLU
      Direct
      0.000
      89
      101
      15.198022
    
    
      3099
      ILE
      TYR
      Protein
      0.000
      90
      100
      10.454025
    
    
      3100
      ILE
      TYR
      Water
      0.000
      90
      100
      10.454025
    
    
      3101
      ILE
      TYR
      Direct
      0.000
      90
      100
      10.454025
    
  

3102 rows × 7 columns



In [20]:

    
gammaFile = "/Users/weilu/Research/server/mar_2020/mass_iterative_optimization/optimization_new_4_withoutBurial/saved_gammas/new_4_cutoff600_impose_Aprime_constraint"
contact_gammas = get_contact_gamma_info(gammaFile)



In [75]:

    
# contact_gammas["Res1"] = contact_gammas.apply(lambda x: one_to_three(x["Res1"]), axis=1)
# contact_gammas["Res2"] = contact_gammas.apply(lambda x: one_to_three(x["Res2"]), axis=1)
contact_gammas["Type"] = contact_gammas["Interaction"]
a = data.merge(contact_gammas, on=["Res1", "Res2", "Type"])
a["theta_gamma"] = a["Theta"] * a["Gamma"]



In [77]:

    
a["theta_gamma"] = a["Theta"] * a["Gamma"]



In [78]:

    
a[["Type", "theta_gamma"]].groupby("Type").sum()









    Out[78]:







  
    
      
      theta_gamma
    
    
      Type
      
    
  
  
    
      Direct
      -949.732540
    
    
      Protein
      -479.435681
    
    
      Water
      -8.402658



In [79]:

    
a["theta_gamma"].sum()









    Out[79]:





-1437.5708789999999



In [80]:

    
b2 = a.query("Theta > 0.1").groupby(["Res1", "Res2", "Type"])["theta_gamma"].count().reset_index()



In [83]:

    
b3 = a.query("Theta > 0.1").groupby(["Res1", "Res2", "Type"])["theta_gamma"].sum().reset_index()



In [91]:

    
b3.sort_values("theta_gamma").head(200)["theta_gamma"].sum()









    Out[91]:





-1483.202181



In [96]:

    
a.query("Res1 == 'TYR' and Res2 == 'CYS' and Theta > 0.1 and Type =='Direct'").sort_values("r")









    Out[96]:







  
    
      
      Res1
      Res2
      Type
      Theta
      Index1
      Index2
      r
      Interaction
      Index
      Gamma
      theta_gamma
    
  
  
    
      890
      TYR
      CYS
      Direct
      0.956
      23
      77
      2.808838
      Direct
      88
      -4.012
      -3.835472
    
    
      921
      TYR
      CYS
      Direct
      0.999
      66
      84
      3.204737
      Direct
      88
      -4.012
      -4.007988
    
    
      876
      TYR
      CYS
      Direct
      1.000
      2
      89
      3.921630
      Direct
      88
      -4.012
      -4.012000
    
    
      877
      TYR
      CYS
      Direct
      1.000
      2
      84
      3.969086
      Direct
      88
      -4.012
      -4.012000
    
    
      926
      TYR
      CYS
      Direct
      1.000
      68
      91
      4.097748
      Direct
      88
      -4.012
      -4.012000
    
    
      897
      TYR
      CYS
      Direct
      1.000
      26
      91
      4.154008
      Direct
      88
      -4.012
      -4.012000
    
    
      912
      TYR
      CYS
      Direct
      1.000
      62
      117
      4.176590
      Direct
      88
      -4.012
      -4.012000
    
    
      898
      TYR
      CYS
      Direct
      1.000
      26
      70
      4.354343
      Direct
      88
      -4.012
      -4.012000
    
    
      906
      TYR
      CYS
      Direct
      1.000
      50
      84
      4.464048
      Direct
      88
      -4.012
      -4.012000
    
    
      905
      TYR
      CYS
      Direct
      1.000
      50
      117
      4.704973
      Direct
      88
      -4.012
      -4.012000
    
    
      873
      TYR
      CYS
      Direct
      1.000
      2
      117
      4.796232
      Direct
      88
      -4.012
      -4.012000
    
    
      909
      TYR
      CYS
      Direct
      1.000
      50
      89
      4.796544
      Direct
      88
      -4.012
      -4.012000
    
    
      879
      TYR
      CYS
      Direct
      1.000
      2
      49
      4.818510
      Direct
      88
      -4.012
      -4.012000
    
    
      901
      TYR
      CYS
      Direct
      1.000
      26
      77
      4.863667
      Direct
      88
      -4.012
      -4.012000
    
    
      923
      TYR
      CYS
      Direct
      1.000
      66
      91
      4.958446
      Direct
      88
      -4.012
      -4.012000
    
    
      895
      TYR
      CYS
      Direct
      1.000
      26
      84
      4.969316
      Direct
      88
      -4.012
      -4.012000
    
    
      929
      TYR
      CYS
      Direct
      1.000
      104
      117
      4.990378
      Direct
      88
      -4.012
      -4.012000
    
    
      925
      TYR
      CYS
      Direct
      1.000
      68
      84
      5.031310
      Direct
      88
      -4.012
      -4.012000
    
    
      870
      TYR
      CYS
      Direct
      1.000
      2
      25
      5.094150
      Direct
      88
      -4.012
      -4.012000
    
    
      900
      TYR
      CYS
      Direct
      1.000
      26
      89
      5.218085
      Direct
      88
      -4.012
      -4.012000
    
    
      913
      TYR
      CYS
      Direct
      1.000
      62
      84
      5.235210
      Direct
      88
      -4.012
      -4.012000
    
    
      927
      TYR
      CYS
      Direct
      1.000
      68
      89
      5.246518
      Direct
      88
      -4.012
      -4.012000
    
    
      882
      TYR
      CYS
      Direct
      1.000
      2
      91
      5.403767
      Direct
      88
      -4.012
      -4.012000
    
    
      915
      TYR
      CYS
      Direct
      1.000
      62
      89
      5.464846
      Direct
      88
      -4.012
      -4.012000
    
    
      920
      TYR
      CYS
      Direct
      1.000
      66
      89
      5.552884
      Direct
      88
      -4.012
      -4.012000
    
    
      928
      TYR
      CYS
      Direct
      1.000
      68
      98
      5.720343
      Direct
      88
      -4.012
      -4.012000
    
    
      903
      TYR
      CYS
      Direct
      1.000
      26
      98
      5.720448
      Direct
      88
      -4.012
      -4.012000
    
    
      884
      TYR
      CYS
      Direct
      0.999
      23
      84
      5.763156
      Direct
      88
      -4.012
      -4.007988
    
    
      907
      TYR
      CYS
      Direct
      0.997
      50
      91
      5.911228
      Direct
      88
      -4.012
      -3.999964
    
    
      886
      TYR
      CYS
      Direct
      0.996
      23
      91
      5.938394
      Direct
      88
      -4.012
      -3.995952
    
    
      893
      TYR
      CYS
      Direct
      0.995
      23
      43
      5.965650
      Direct
      88
      -4.012
      -3.991940
    
    
      888
      TYR
      CYS
      Direct
      0.760
      23
      59
      6.384588
      Direct
      88
      -4.012
      -3.049120
    
    
      922
      TYR
      CYS
      Direct
      0.603
      66
      77
      6.458406
      Direct
      88
      -4.012
      -2.419236
    
    
      914
      TYR
      CYS
      Direct
      0.318
      62
      91
      6.576455
      Direct
      88
      -4.012
      -1.275816



In [86]:

    
b3.sort_values("theta_gamma").head(20)









    Out[86]:







  
    
      
      Res1
      Res2
      Type
      theta_gamma
    
  
  
    
      342
      TYR
      CYS
      Direct
      -130.883476
    
    
      321
      TRP
      CYS
      Direct
      -106.422800
    
    
      113
      CYS
      TYR
      Protein
      -90.723716
    
    
      343
      TYR
      CYS
      Protein
      -87.706208
    
    
      112
      CYS
      TYR
      Direct
      -73.202952
    
    
      320
      TRP
      ASN
      Protein
      -58.802645
    
    
      226
      MET
      CYS
      Direct
      -50.142196
    
    
      246
      PHE
      CYS
      Direct
      -32.165329
    
    
      279
      SER
      CYS
      Direct
      -30.213630
    
    
      188
      ILE
      TYR
      Direct
      -28.798753
    
    
      333
      TRP
      TYR
      Direct
      -24.746634
    
    
      17
      ARG
      ASP
      Direct
      -24.667500
    
    
      59
      ASN
      TRP
      Protein
      -22.552750
    
    
      319
      TRP
      ARG
      Direct
      -22.380120
    
    
      346
      TYR
      ILE
      Direct
      -19.988483
    
    
      94
      CYS
      CYS
      Protein
      -19.848447
    
    
      324
      TRP
      ILE
      Direct
      -18.720945
    
    
      110
      CYS
      TRP
      Direct
      -17.923650
    
    
      19
      ARG
      CYS
      Direct
      -17.749700
    
    
      88
      CYS
      ARG
      Direct
      -17.403716



In [69]:

    
b2.sort_values("theta_gamma").tail(10)









    Out[69]:







  
    
      
      Res1
      Res2
      Type
      theta_gamma
    
  
  
    
      87
      CYS
      ALA
      Protein
      18
    
    
      112
      CYS
      TYR
      Direct
      19
    
    
      320
      TRP
      ASN
      Protein
      19
    
    
      336
      TYR
      ALA
      Protein
      20
    
    
      357
      TYR
      TYR
      Direct
      22
    
    
      343
      TYR
      CYS
      Protein
      28
    
    
      113
      CYS
      TYR
      Protein
      29
    
    
      321
      TRP
      CYS
      Direct
      30
    
    
      342
      TYR
      CYS
      Direct
      34
    
    
      94
      CYS
      CYS
      Protein
      45



In [82]:

    
a.query("Res1 == 'CYS' and Res2 == 'CYS' and Theta > 0.1")









    Out[82]:







  
    
      
      Res1
      Res2
      Type
      Theta
      Index1
      Index2
      r
      Interaction
      Index
      Gamma
      theta_gamma
    
  
  
    
      3153
      CYS
      CYS
      Protein
      1.000
      10
      25
      7.411365
      Protein
      284
      -0.477
      -0.477000
    
    
      3154
      CYS
      CYS
      Protein
      0.987
      10
      59
      9.068994
      Protein
      284
      -0.477
      -0.470799
    
    
      3155
      CYS
      CYS
      Protein
      1.000
      10
      27
      7.557992
      Protein
      284
      -0.477
      -0.477000
    
    
      3157
      CYS
      CYS
      Protein
      1.000
      10
      84
      8.094460
      Protein
      284
      -0.477
      -0.477000
    
    
      3158
      CYS
      CYS
      Protein
      0.938
      10
      77
      6.771117
      Protein
      284
      -0.477
      -0.447426
    
    
      3159
      CYS
      CYS
      Protein
      0.998
      10
      43
      7.104249
      Protein
      284
      -0.477
      -0.476046
    
    
      3160
      CYS
      CYS
      Protein
      0.999
      10
      42
      8.782217
      Protein
      284
      -0.477
      -0.476523
    
    
      3161
      CYS
      CYS
      Protein
      0.678
      10
      91
      9.425323
      Protein
      284
      -0.477
      -0.323406
    
    
      3162
      CYS
      CYS
      Protein
      0.288
      25
      59
      9.590501
      Protein
      284
      -0.477
      -0.137376
    
    
      3163
      CYS
      CYS
      Protein
      0.976
      25
      98
      9.128296
      Protein
      284
      -0.477
      -0.465552
    
    
      3164
      CYS
      CYS
      Protein
      1.000
      25
      117
      8.697128
      Protein
      284
      -0.477
      -0.477000
    
    
      3165
      CYS
      CYS
      Protein
      1.000
      25
      70
      7.770286
      Protein
      284
      -0.477
      -0.477000
    
    
      3169
      CYS
      CYS
      Protein
      0.859
      25
      49
      9.318975
      Protein
      284
      -0.477
      -0.409743
    
    
      3170
      CYS
      CYS
      Protein
      0.903
      25
      43
      9.277326
      Protein
      284
      -0.477
      -0.430731
    
    
      3174
      CYS
      CYS
      Protein
      1.000
      27
      84
      8.294059
      Protein
      284
      -0.477
      -0.477000
    
    
      3175
      CYS
      CYS
      Protein
      1.000
      27
      42
      8.483358
      Protein
      284
      -0.477
      -0.477000
    
    
      3176
      CYS
      CYS
      Protein
      1.000
      27
      91
      7.991209
      Protein
      284
      -0.477
      -0.477000
    
    
      3177
      CYS
      CYS
      Protein
      0.932
      27
      70
      6.761860
      Protein
      284
      -0.477
      -0.444564
    
    
      3178
      CYS
      CYS
      Protein
      0.994
      27
      59
      7.015271
      Protein
      284
      -0.477
      -0.474138
    
    
      3179
      CYS
      CYS
      Protein
      0.244
      27
      89
      9.613122
      Protein
      284
      -0.477
      -0.116388
    
    
      3182
      CYS
      CYS
      Protein
      1.000
      27
      98
      8.172283
      Protein
      284
      -0.477
      -0.477000
    
    
      3183
      CYS
      CYS
      Protein
      0.898
      27
      43
      6.717986
      Protein
      284
      -0.477
      -0.428346
    
    
      3186
      CYS
      CYS
      Protein
      0.999
      42
      117
      7.180018
      Protein
      284
      -0.477
      -0.476523
    
    
      3190
      CYS
      CYS
      Protein
      0.135
      42
      77
      9.685992
      Protein
      284
      -0.477
      -0.064395
    
    
      3194
      CYS
      CYS
      Protein
      0.999
      43
      117
      8.810677
      Protein
      284
      -0.477
      -0.476523
    
    
      3198
      CYS
      CYS
      Protein
      1.000
      43
      77
      7.933357
      Protein
      284
      -0.477
      -0.477000
    
    
      3200
      CYS
      CYS
      Protein
      0.990
      49
      59
      6.957804
      Protein
      284
      -0.477
      -0.472230
    
    
      3201
      CYS
      CYS
      Protein
      0.969
      49
      98
      9.154268
      Protein
      284
      -0.477
      -0.462213
    
    
      3204
      CYS
      CYS
      Protein
      1.000
      49
      89
      8.122187
      Protein
      284
      -0.477
      -0.477000
    
    
      3205
      CYS
      CYS
      Protein
      1.000
      49
      84
      8.692106
      Protein
      284
      -0.477
      -0.477000
    
    
      3207
      CYS
      CYS
      Protein
      1.000
      59
      117
      7.468670
      Protein
      284
      -0.477
      -0.477000
    
    
      3210
      CYS
      CYS
      Protein
      0.995
      59
      70
      8.971240
      Protein
      284
      -0.477
      -0.474615
    
    
      3212
      CYS
      CYS
      Protein
      1.000
      59
      77
      8.543577
      Protein
      284
      -0.477
      -0.477000
    
    
      3213
      CYS
      CYS
      Protein
      1.000
      59
      98
      8.630322
      Protein
      284
      -0.477
      -0.477000
    
    
      3215
      CYS
      CYS
      Protein
      1.000
      70
      117
      8.256154
      Protein
      284
      -0.477
      -0.477000
    
    
      3216
      CYS
      CYS
      Protein
      1.000
      70
      89
      8.483834
      Protein
      284
      -0.477
      -0.477000
    
    
      3217
      CYS
      CYS
      Protein
      0.986
      70
      84
      9.071579
      Protein
      284
      -0.477
      -0.470322
    
    
      3218
      CYS
      CYS
      Protein
      1.000
      70
      91
      7.877710
      Protein
      284
      -0.477
      -0.477000
    
    
      3219
      CYS
      CYS
      Protein
      0.883
      77
      98
      9.298271
      Protein
      284
      -0.477
      -0.421191
    
    
      3221
      CYS
      CYS
      Protein
      0.999
      77
      89
      8.816828
      Protein
      284
      -0.477
      -0.476523
    
    
      3222
      CYS
      CYS
      Protein
      0.964
      77
      91
      6.828478
      Protein
      284
      -0.477
      -0.459828
    
    
      3224
      CYS
      CYS
      Protein
      1.000
      84
      117
      8.211732
      Protein
      284
      -0.477
      -0.477000
    
    
      3225
      CYS
      CYS
      Protein
      0.999
      89
      117
      7.158068
      Protein
      284
      -0.477
      -0.476523
    
    
      3226
      CYS
      CYS
      Protein
      0.999
      91
      117
      8.794488
      Protein
      284
      -0.477
      -0.476523
    
    
      3227
      CYS
      CYS
      Protein
      1.000
      98
      117
      7.307405
      Protein
      284
      -0.477
      -0.477000
    
    
      3316
      CYS
      CYS
      Direct
      1.000
      25
      89
      3.975121
      Direct
      74
      2.484
      2.484000
    
    
      3318
      CYS
      CYS
      Direct
      1.000
      25
      77
      5.051316
      Direct
      74
      2.484
      2.484000
    
    
      3333
      CYS
      CYS
      Direct
      0.102
      27
      43
      6.717986
      Direct
      74
      2.484
      0.253368
    
    
      3342
      CYS
      CYS
      Direct
      0.167
      43
      59
      2.339643
      Direct
      74
      2.484
      0.414828



In [56]:

    
a.sort_values("theta_gamma")









    Out[56]:







  
    
      
      Res1
      Res2
      Type
      Theta
      Index1
      Index2
      Interaction
      Index
      Gamma
      theta_gamma
    
  
  
    
      1904
      GLN
      HIS
      Direct
      1.000
      9
      46
      Direct
      93
      -4.963
      -4.963000
    
    
      2490
      MET
      CYS
      Direct
      1.000
      7
      84
      Direct
      82
      -4.654
      -4.654000
    
    
      2501
      MET
      CYS
      Direct
      1.000
      7
      43
      Direct
      82
      -4.654
      -4.654000
    
    
      2499
      MET
      CYS
      Direct
      1.000
      7
      25
      Direct
      82
      -4.654
      -4.654000
    
    
      2491
      MET
      CYS
      Direct
      0.999
      7
      42
      Direct
      82
      -4.654
      -4.649346
    
    
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
    
    
      149
      ASN
      ASN
      Direct
      0.960
      0
      112
      Direct
      39
      1.119
      1.074240
    
    
      5023
      SER
      THR
      Direct
      1.000
      32
      74
      Direct
      196
      1.075
      1.075000
    
    
      6450
      ASP
      TRP
      Protein
      0.998
      19
      60
      Protein
      281
      1.097
      1.094806
    
    
      3316
      CYS
      CYS
      Direct
      1.000
      25
      89
      Direct
      74
      2.484
      2.484000
    
    
      3318
      CYS
      CYS
      Direct
      1.000
      25
      77
      Direct
      74
      2.484
      2.484000
    
  

7521 rows × 10 columns



In [4]:

    
from Bio.PDB.Polypeptide import d1_to_index
from Bio.PDB.Polypeptide import dindex_to_1

pdb = "2a0b"

pdbFile = f"/Users/weilu/Research/server/mar_2020/mass_iterative_run/setups/{pdb}/cbd-openmmawsem.pdb"
seq = read_fasta(f"/Users/weilu/Research/server/mar_2020/mass_iterative_run/setups/{pdb}/crystal_structure.fasta")
def get_contact_info(pdbFile, seq, frame_index=0):
    # frame_index = 0
    cutoff = 9.5
    MAX_OFFSET = 9
    parser = PDBParser()
    structure = parser.get_structure("x", pdbFile)
    models = list(structure.get_models())
    model = models[frame_index]
    all_residues = list(model.get_residues())
    info_ = []
    for i, res1 in enumerate(all_residues):
        if seq[i] == "G":
            a1 = res1["CA"]
        else:
            a1 = res1["CB"]
        for j, res2 in enumerate(all_residues):
            if seq[j] == "G":
                a2 = res2["CA"]
            else:
                a2 = res2["CB"]
            dis = a1 - a2
            if dis < cutoff and j - i > MAX_OFFSET:
                info_.append([pdb, i, j, seq[i], seq[j], dis])
    info = pd.DataFrame(info_, columns=["Protein", "i", "j", "Resi", "Resj", "Dis"])
    return info



In [82]:

    
data = pd.read_csv("training_set.csv")
specific_decoys = data.query("Length < 150 and Length > 70").reset_index(drop=True)
pdb_list = specific_decoys["Protein"].to_list()
pdb_list = [a.lower() for a in pdb_list]
skip_pdb_list = ["1puc", "1skz"]
skip_pdb_list += ["1msc", "1fmb", "1gvp", "2tgi", "1whi", "1baj", "1rmd", "1div"]  #dimer.
skip_pdb_list += ["1aqe"]  # lots of ligand
filtered_pdb_list = [x for x in pdb_list if x not in skip_pdb_list]
pdb_list = filtered_pdb_list



In [9]:

    
info_ = []
for pdb in pdb_list:
    pdbFile = f"/Users/weilu/Research/server/mar_2020/mass_iterative_run/setups/{pdb}/cbd-openmmawsem.pdb"
    seq = read_fasta(f"/Users/weilu/Research/server/mar_2020/mass_iterative_run/setups/{pdb}/crystal_structure.fasta")
    info = get_contact_info(pdbFile, seq)
    info_.append(info)
info = pd.concat(info_)



In [10]:



In [14]:

    
direct_contact_table = np.zeros((20,20))
for i, line in info.iterrows():
    resi = line["Resi"]
    resj = line["Resj"]
    dis = line["Dis"]
    if dis < 6.5:
        direct_contact_table[d1_to_index[resi]][d1_to_index[resj]] += 1
        if resi != resj:
            direct_contact_table[d1_to_index[resj]][d1_to_index[resi]] += 1



In [16]:

    
direct_contact_table.min()









    Out[16]:





3.0



In [15]:

    
plt.imshow(direct_contact_table, origin=0)
plt.colorbar()
_ = plt.xticks(np.arange(20), aa3)
_ = plt.yticks(np.arange(20), aa3)



In [11]:

    
contact_table = np.zeros((20,20))
for i, line in info.iterrows():
    resi = line["Resi"]
    resj = line["Resj"]
    dis = line["Dis"]
    contact_table[d1_to_index[resi]][d1_to_index[resj]] += 1
    if resi != resj:
        contact_table[d1_to_index[resj]][d1_to_index[resi]] += 1



In [12]:

    
len(info)









    Out[12]:





32267



In [13]:

    
contact_table.min()









    Out[13]:





12.0



In [115]:

    
plt.imshow(contact_table, origin=0)
plt.colorbar()
_ = plt.xticks(np.arange(20), aa3)
_ = plt.yticks(np.arange(20), aa3)



In [109]:

    
plt.imshow(contact_table, origin=0)
plt.colorbar()
_ = plt.xticks(np.arange(20), aa3)
_ = plt.yticks(np.arange(20), aa3)



In [88]:

    
info.groupby(["Resi", "Resj"])["Dis"].count()









    Out[88]:





Resi  Resj
A     A       297
      C        60
      D        87
      E        88
      F       105
             ... 
Y     S        55
      T        61
      V       122
      W        24
      Y        46
Name: Dis, Length: 400, dtype: int64



In [81]:

    
pdb = "2a0b"

pdbFile = f"/Users/weilu/Research/server/mar_2020/mass_iterative_run/setups/{pdb}/cbd-openmmawsem.pdb"
seq = read_fasta(f"/Users/weilu/Research/server/mar_2020/mass_iterative_run/setups/{pdb}/crystal_structure.fasta")
get_contact_info(pdbFile, seq)









    Out[81]:







  
    
      
      Protein
      i
      j
      Resi
      Resj
      Dis
    
  
  
    
      0
      2a0b
      1
      104
      K
      H
      5.800971
    
    
      1
      2a0b
      1
      105
      K
      D
      8.981970
    
    
      2
      2a0b
      1
      108
      K
      V
      4.504978
    
    
      3
      2a0b
      1
      111
      K
      A
      8.126446
    
    
      4
      2a0b
      2
      108
      S
      V
      5.637322
    
    
      ...
      ...
      ...
      ...
      ...
      ...
      ...
    
    
      267
      2a0b
      79
      94
      I
      W
      6.115420
    
    
      268
      2a0b
      79
      95
      I
      I
      5.385581
    
    
      269
      2a0b
      79
      98
      I
      M
      4.980760
    
    
      270
      2a0b
      81
      94
      S
      W
      8.732571
    
    
      271
      2a0b
      84
      94
      L
      W
      7.445224
    
  

272 rows × 6 columns



In [78]:

    
from Bio.PDB.Polypeptide import d1_to_index
from Bio.PDB.Polypeptide import dindex_to_1

pdb = "2a0b"

pdbFile = f"/Users/weilu/Research/server/mar_2020/mass_iterative_run/setups/{pdb}/cbd-openmmawsem.pdb"
seq = read_fasta(f"/Users/weilu/Research/server/mar_2020/mass_iterative_run/setups/{pdb}/crystal_structure.fasta")

frame_index = 0
cutoff = 9.5
MAX_OFFSET = 9
parser = PDBParser()
structure = parser.get_structure("x", pdbFile)
models = list(structure.get_models())
model = models[frame_index]
all_residues = list(model.get_residues())
info_ = []
for i, res1 in enumerate(all_residues):
    if seq[i] == "G":
        a1 = res1["CA"]
    else:
        a1 = res1["CB"]
    for j, res2 in enumerate(all_residues):
        if seq[j] == "G":
            a2 = res2["CA"]
        else:
            a2 = res2["CB"]
        dis = a1 - a2
        if dis < cutoff and j - i > MAX_OFFSET:
            info_.append([pdb, i, j, seq[i], seq[j], dis])



In [74]:



In [6]:

    
def getContactMapFromPDB(pdbFile, seq, frame_index=0):
    cutoff = 9.5
    MAX_OFFSET = 9
    parser = PDBParser()
    structure = parser.get_structure("x", pdbFile)
    models = list(structure.get_models())
    model = models[frame_index]
    all_residues = list(model.get_residues())
    n = len(all_residues)
    contact_table = np.zeros((n,n))
    # print(pdb, n)#
    for i, res1 in enumerate(all_residues):
        if seq[i] == "G":
            a1 = res1["CA"]
        else:
            a1 = res1["CB"]
        for j, res2 in enumerate(all_residues):
            if seq[j] == "G":
                a2 = res2["CA"]
            else:
                a2 = res2["CB"]
            contact_table[i][j] = a1 - a2

    data = (contact_table < cutoff)
    remove_band = np.eye(n)
    for i in range(1, MAX_OFFSET):
        remove_band += np.eye(n, k=i)
        remove_band += np.eye(n, k=-i)
    data[remove_band==1] = 0
    return data



In [81]:

    
pdb = "2a0b"

pdbFile = f"/Users/weilu/Research/server/mar_2020/mass_iterative_run/setups/{pdb}/cbd-openmmawsem.pdb"
# pdbFile = f"/Users/weilu/Research/server/mar_2020/mass_iterative_run/iteration_start_native_iter5/{pdb}/0/movie.pdb"
seq = read_fasta(f"/Users/weilu/Research/server/mar_2020/mass_iterative_run/setups/{pdb}/crystal_structure.fasta")
data_native = getContactMapFromPDB(pdbFile, seq, frame_index=-1)

pdbFile = f"/Users/weilu/Research/server/mar_2020/mass_iterative_run/iteration_start_native_iter5/{pdb}/0/lastFrame.pdb"
# pdbFile = "/Users/weilu/Research/server/mar_2020/mass_iterative_run/test/native.pdb"
data_predicted = getContactMapFromPDB(pdbFile, seq, frame_index=-1)

combined = data_native + data_predicted * 2
from matplotlib import colors
# white means not present in both
# red means present in the first but not the second
# blue means present in the second but not the first
# black means present in both
cmap = colors.ListedColormap(['white', 'red', 'blue', 'black'])
bounds=[-1,0.1, 1.1, 2.1, 3.1]
norm = colors.BoundaryNorm(bounds, cmap.N)
plt.imshow(combined, origin="bottom", cmap=cmap, norm=norm)









    Out[81]:





<matplotlib.image.AxesImage at 0x11f8780b8>



In [7]:

    
pdb = "3lzt"

pdbFile = f"/Users/weilu/Research/server/mar_2020/mass_iterative_run/setups/{pdb}/cbd-openmmawsem.pdb"
seq = read_fasta(f"/Users/weilu/Research/server/mar_2020/mass_iterative_run/setups/{pdb}/crystal_structure.fasta")
data_native = getContactMapFromPDB(pdbFile, seq)

pdbFile = f"/Users/weilu/Research/server/mar_2020/mass_iterative_run/iteration_start_native_iter5/{pdb}/0/lastFrame.pdb"
# pdbFile = "/Users/weilu/Research/server/mar_2020/mass_iterative_run/test/native.pdb"
data_predicted = getContactMapFromPDB(pdbFile, seq, frame_index=-1)

combined = data_native + data_predicted * 2
from matplotlib import colors
# white means not present in both
# red means present in the first but not the second
# blue means present in the second but not the first
# black means present in both
cmap = colors.ListedColormap(['white', 'red', 'blue', 'black'])
bounds=[-1,0.1, 1.1, 2.1, 3.1]
norm = colors.BoundaryNorm(bounds, cmap.N)
plt.imshow(combined, origin="bottom", cmap=cmap, norm=norm)









    Out[7]:





<matplotlib.image.AxesImage at 0x11cffc208>



In [66]:

    
pdb = "2a0b"

pdbFile = f"/Users/weilu/Research/server/mar_2020/mass_iterative_run/setups/{pdb}/cbd-openmmawsem.pdb"
seq = read_fasta(f"/Users/weilu/Research/server/mar_2020/mass_iterative_run/setups/{pdb}/crystal_structure.fasta")
data_native = getContactMapFromPDB(pdbFile, seq)

pdbFile = f"/Users/weilu/Research/server/mar_2020/mass_iterative_run/iteration_start_native_iter5/{pdb}/0/lastFrame.pdb"
# pdbFile = "/Users/weilu/Research/server/mar_2020/mass_iterative_run/test/native.pdb"
data_predicted = getContactMapFromPDB(pdbFile, seq, frame_index=-1)

combined = data_native + data_predicted * 2
from matplotlib import colors
# white means not present in both
# red means present in the first but not the second
# blue means present in the second but not the first
# black means present in both
cmap = colors.ListedColormap(['white', 'red', 'blue', 'black'])
bounds=[-1,0.1, 1.1, 2.1, 3.1]
norm = colors.BoundaryNorm(bounds, cmap.N)
plt.imshow(combined, origin="bottom", cmap=cmap, norm=norm)









    Out[66]:





<matplotlib.image.AxesImage at 0x120a5f240>



In [58]:

    
pdbFile = "/Users/weilu/Research/server/mar_2020/mass_iterative_run/setups/1baj/cbd-openmmawsem.pdb"
seq = read_fasta("/Users/weilu/Research/server/mar_2020/mass_iterative_run/setups/1baj/crystal_structure.fasta")
data_native = getContactMapFromPDB(pdbFile, seq)



In [63]:

    
pdbFile = "/Users/weilu/Research/server/mar_2020/mass_iterative_run/iteration_start_native_iter5/1baj/0/lastFrame.pdb"
# pdbFile = "/Users/weilu/Research/server/mar_2020/mass_iterative_run/test/native.pdb"
data_predicted = getContactMapFromPDB(pdbFile, seq, frame_index=-1)



In [64]:

    
combined = data_native + data_predicted * 2
from matplotlib import colors
# white means not present in both
# red means present in the first but not the second
# blue means present in the second but not the first
# black means present in both
cmap = colors.ListedColormap(['white', 'red', 'blue', 'black'])
bounds=[-1,0.1, 1.1, 2.1, 3.1]
norm = colors.BoundaryNorm(bounds, cmap.N)
plt.imshow(combined, origin="bottom", cmap=cmap, norm=norm)









    Out[64]:





<matplotlib.image.AxesImage at 0x122e16278>



In [56]:

    
pdbFile = "/Users/weilu/Research/server/mar_2020/mass_iterative_run/iteration_start_native_iter5/1baj/0/native.pdb"
seq = read_fasta("/Users/weilu/Research/server/mar_2020/mass_iterative_run/setups/1baj/crystal_structure.fasta")
frame_index = 0
cutoff = 9.5
MAX_OFFSET = 9
parser = PDBParser()
structure = parser.get_structure("x", pdbFile)
models = list(structure.get_models())
model = models[frame_index]
all_residues = list(model.get_residues())
n = len(all_residues)
contact_table = np.zeros((n,n))
# print(pdb, n)#
for i, res1 in enumerate(all_residues):
    if seq[i] == "G":
        a1 = res1["CA"]
    else:
        a1 = res1["CB"]
    for j, res2 in enumerate(all_residues):
        if seq[j] == "G":
            a2 = res2["CA"]
        else:
            a2 = res2["CB"]
        contact_table[i][j] = a1 - a2

data = (contact_table < cutoff)
remove_band = np.eye(n)
for i in range(1, MAX_OFFSET):
    remove_band += np.eye(n, k=i)
    remove_band += np.eye(n, k=-i)
data[remove_band==1] = 0



In [55]:









    Out[55]:





[<Model id=0>, <Model id=1>]



In [23]:

    
three_to_one("TRP")









    Out[23]:





'W'



In [25]:

    
seq.count("W")









    Out[25]:





1



In [ ]:

    
pdbFile = "/Users/weilu/Research/server/mar_2020/mass_iterative_run/setups/1baj/cbd-openmmawsem.pdb"

cutoff = 9.5
MAX_OFFSET = 9
parser = PDBParser()
structure = parser.get_structure("x", pdbFile)
all_residues = list(structure.get_residues())
n = len(all_residues)
contact_table = np.zeros((n,n))
# print(pdb, n)#
for i, res1 in enumerate(all_residues):
    for j, res2 in enumerate(all_residues):
        contact_table[i][j] = res1["CA"]-res2["CA"]

data = (contact_table < cutoff)
remove_band = np.eye(n)
for i in range(1, MAX_OFFSET):
    remove_band += np.eye(n, k=i)
    remove_band += np.eye(n, k=-i)
data[remove_band==1] = 0
data_ca = data



In [11]:

    
fileLocation = "/Users/weilu/Research/server/feb_2020/SPOT-Contact-Helical-New/outputs/tmp.spotcon"
data = pd.read_csv(fileLocation, skiprows=5, sep="\s+", names=["i","j","p"]).dropna().reset_index(drop=True)

data["i"] = data["i"].astype(int)
data["j"] = data["j"].astype(int)
data_A = data



In [12]:

    
fileLocation = "/Users/weilu/Research/server/feb_2020/SPOT-Contact-Helical-New/outputs/original.spotcon"
data = pd.read_csv(fileLocation, skiprows=5, sep="\s+", names=["i","j","p"]).dropna().reset_index(drop=True)

data["i"] = data["i"].astype(int)
data["j"] = data["j"].astype(int)
data_original = data









    



/Users/weilu/anaconda3/envs/py36/lib/python3.6/site-packages/IPython/core/interactiveshell.py:3058: DtypeWarning: Columns (0) have mixed types. Specify dtype option on import or set low_memory=False.
  interactivity=interactivity, compiler=compiler, result=result)



In [14]:

    
data_original.dtypes









    Out[14]:





i      int64
j      int64
p    float64
dtype: object



In [15]:

    
data_A.dtypes









    Out[15]:





i      int64
j      int64
p    float64
dtype: object



In [17]:

    
seq = "MSKLTTGSFSIEDLESVQITINNIVGAAKEAAEEKEKELVNAGPTLFPGLEGYRDDWNFKLLDRYEPVITPMCDQCCYCTYGPCDLSGNKRGACGIDMKGHNGREFFLRVITGTACHAAHGRHLLDHLIEKYGEDLPLTLGQSNVLTPNITISTGLSPKTLGEVKPAMEYVEEQLTQLLATVHAGQESAEIDYDSKALFSGSLDHVGMEISDIVQVAAYDFPKADPEAPLVEIGMGTIDKSKPFLCVIGHNVAGVTYMMDYMEDNNLTDKMEIAGLCCTAIDLTRYKEADRRPPYAKVIGSMSKELKVIRSGMPDVIVVDEQCVRGDIVPEAQKLKIPVIASNPKIMYGLPNRTDADVDETMEELKSGKIPGCVMLDYDKLGELCVRLTMEMAPIRDAAGITALPTDEELVNMVAKCADCGACLLACPEEIDIPEAMGFAKKGDFSYFEEIHDTCIGCRRCEQVCKKEIPILNVIEKIAQKQIAEEKGLMRAGRGQVSDAEIRAEGLNLVMGTTPGIIAIIGCPNYAGGTKDVYYIAEEFLKRNFIVVTTGCGAMDIGMFKDADGKTLYERFPGGFQCGGLANIGSCVSNAHITGAAEKVAAIFAQRTLEGNLAEIGDYILNRVGACGLAWGAFSQKASSIGTGCNIFGIPAVLGPHSSKYRRALIAKTYEEDKWKVYDARNGQEMPIPPAPEFLLTTAETWQEAIPMMAKACIRPSDNSMGRAIKLTHWMELHKKYLGGKEPEDWWKFVRTEADLPLATREALLKELEKEHGWEIDWKRKKIISGPKIKFDVSAQPTNLKRLCKEA"



In [18]:

    
len(seq)









    Out[18]:





807



In [33]:

    
seq









    Out[33]:





'MSKLTTGSFSIEDLESVQITINNIVGAAKEAAEEKEKELVNAGPTLFPGLEGYRDDWNFKLLDRYEPVITPMCDQCCYCTYGPCDLSGNKRGACGIDMKGHNGREFFLRVITGTACHAAHGRHLLDHLIEKYGEDLPLTLGQSNVLTPNITISTGLSPKTLGEVKPAMEYVEEQLTQLLATVHAGQESAEIDYDSKALFSGSLDHVGMEISDIVQVAAYDFPKADPEAPLVEIGMGTIDKSKPFLCVIGHNVAGVTYMMDYMEDNNLTDKMEIAGLCCTAIDLTRYKEADRRPPYAKVIGSMSKELKVIRSGMPDVIVVDEQCVRGDIVPEAQKLKIPVIASNPKIMYGLPNRTDADVDETMEELKSGKIPGCVMLDYDKLGELCVRLTMEMAPIRDAAGITALPTDEELVNMVAKCADCGACLLACPEEIDIPEAMGFAKKGDFSYFEEIHDTCIGCRRCEQVCKKEIPILNVIEKIAQKQIAEEKGLMRAGRGQVSDAEIRAEGLNLVMGTTPGIIAIIGCPNYAGGTKDVYYIAEEFLKRNFIVVTTGCGAMDIGMFKDADGKTLYERFPGGFQCGGLANIGSCVSNAHITGAAEKVAAIFAQRTLEGNLAEIGDYILNRVGACGLAWGAFSQKASSIGTGCNIFGIPAVLGPHSSKYRRALIAKTYEEDKWKVYDARNGQEMPIPPAPEFLLTTAETWQEAIPMMAKACIRPSDNSMGRAIKLTHWMELHKKYLGGKEPEDWWKFVRTEADLPLATREALLKELEKEHGWEIDWKRKKIISGPKIKFDVSAQPTNLKRLCKEA'



In [61]:

    
n = len(seq)
t = np.zeros((n,n))
for index, d in data_A.iterrows():
#     print(index)
    i = int(d["i"]) - 1
    j = int(d["j"]) - 1 
    p = d["p"]
#     print(i,j,p)
    t[i,j] = p
    t[j,i] = p



In [72]:

    
data_s = data.astype(int)
t_s = (t>0.08).astype(float)
combined = data_s + t_s * 2
from matplotlib import colors
cmap = colors.ListedColormap(['white', 'red', 'blue', 'black'])
bounds=[-1,0.1, 1.1, 2.1, 3.1]
norm = colors.BoundaryNorm(bounds, cmap.N)
plt.imshow(combined, origin="bottom", cmap=cmap, norm=norm)









    



---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-72-7d542ff5db06> in <module>
      1 data_s = data.astype(int)
      2 t_s = (t>0.08).astype(float)
----> 3 combined = data_s + t_s * 2
      4 from matplotlib import colors
      5 cmap = colors.ListedColormap(['white', 'red', 'blue', 'black'])

ValueError: operands could not be broadcast together with shapes (766,766) (807,807)



In [71]:

    
plt.imshow((t>0.08).astype(float), origin="bottom")
plt.colorbar()









    Out[71]:





<matplotlib.colorbar.Colorbar at 0x1a198c42b0>



In [70]:

    
plt.imshow(data, origin="bottom")









    Out[70]:





<matplotlib.image.AxesImage at 0x1a195e3fd0>



In [20]:

    
plt.imshow((t>0.5).astype(float), origin="bottom")
plt.colorbar()









    Out[20]:





<matplotlib.colorbar.Colorbar at 0x1a176c9fd0>



In [21]:

    
n = len(seq)
t = np.zeros((n,n))
for index, d in data_original.iterrows():
#     print(index)
    i = int(d["i"]) - 1
    j = int(d["j"]) - 1 
    p = d["p"]
#     print(i,j,p)
    t[i,j] = p
    t[j,i] = p



In [26]:

    
plt.imshow((t>0.1).astype(float), origin="bottom")
plt.colorbar()









    Out[26]:





<matplotlib.colorbar.Colorbar at 0x1a184b3cc0>



In [ ]:



In [ ]:



In [7]:

    
len(a[5])









    Out[7]:





51



In [22]:

    
pdb = "4rws"
fileLocation = f"/Users/weilu/Research/raptorX/{pdb}/A.txt"
data = pd.read_csv(fileLocation, skiprows=6, sep="\s+", names=["i","j","s", "ss","p"]).dropna().reset_index(drop=True)
data["i"] = data["i"].astype(int)
data["j"] = data["j"].astype(int)
data_A = data
seq_A = getSeqFromRaptorXContact(fileLocation)

fileLocation = f"/Users/weilu/Research/raptorX/{pdb}/B.txt"
data = pd.read_csv(fileLocation, skiprows=6, sep="\s+", names=["i","j","s", "ss","p"]).dropna().reset_index(drop=True)
data["i"] = data["i"].astype(int)
data["j"] = data["j"].astype(int)
data_B = data
seq_B = getSeqFromRaptorXContact(fileLocation)



In [86]:

    
import textwrap
header = '''\
PFRMAT RR
TARGET {}
AUTHOR RaptorX-Contact
METHOD deep dilated residual networks (one variant of deep CNN). Consult jinboxu@gmail.com for details.
MODEL 1
'''
fileLocation = f"/Users/weilu/Research/raptorX/{pdb}/{pdb}.txt"
with open(fileLocation, "w") as out:
    out.write(header.format(pdb))
    out.write("\n".join(textwrap.wrap(seq, width=50))+"\n")
    for index, d in data_A.iterrows():
    #     print(index)
        i = int(d["i"])
        j = int(d["j"])
        p = round(d["p"], 8)
        s = int(d["s"])
        ss = int(d["ss"])
        out.write(f"{i} {j} {s} {ss} {p}\n")
    for index, d in data_B.iterrows():
    #     print(index)
        i = int(d["i"]) + len(seq_A)
        j = int(d["j"]) + len(seq_A)
        p = round(d["p"], 8)
        s = int(d["s"])
        ss = int(d["ss"])
        out.write(f"{i} {j} {s} {ss} {p}\n")
    out.write("END\n")



In [80]:

    
for index, d in data_B.iterrows():
#     print(index)
    i = int(d["i"]) - 1 + len(seq_A)
    j = int(d["j"]) - 1 + len(seq_A)
    p = round(d["p"], 8)
    s = int(d["s"])
    ss = int(d["ss"])



In [84]:

    
data_A









    Out[84]:







  
    
      
      i
      j
      s
      ss
      p
    
  
  
    
      0
      34
      59
      0.0
      8.0
      0.997731
    
    
      1
      38
      56
      0.0
      8.0
      0.980999
    
    
      2
      41
      52
      0.0
      8.0
      0.973823
    
    
      3
      38
      59
      0.0
      8.0
      0.966630
    
    
      4
      27
      66
      0.0
      8.0
      0.962626
    
    
      5
      34
      62
      0.0
      8.0
      0.961573
    
    
      6
      35
      59
      0.0
      8.0
      0.916884
    
    
      7
      41
      55
      0.0
      8.0
      0.895539
    
    
      8
      31
      63
      0.0
      8.0
      0.874988
    
    
      9
      30
      62
      0.0
      8.0
      0.842116
    
    
      10
      20
      73
      0.0
      8.0
      0.814816
    
    
      11
      27
      69
      0.0
      8.0
      0.767721
    
    
      12
      37
      55
      0.0
      8.0
      0.758386
    
    
      13
      83
      160
      0.0
      8.0
      0.728676
    
    
      14
      42
      52
      0.0
      8.0
      0.727045
    
    
      15
      23
      69
      0.0
      8.0
      0.709704
    
    
      16
      83
      161
      0.0
      8.0
      0.701026
    
    
      17
      37
      276
      0.0
      8.0
      0.693589
    
    
      18
      31
      62
      0.0
      8.0
      0.680381
    
    
      19
      31
      66
      0.0
      8.0
      0.678132
    
    
      20
      37
      275
      0.0
      8.0
      0.675751
    
    
      21
      41
      48
      0.0
      8.0
      0.668435
    
    
      22
      109
      195
      0.0
      8.0
      0.657830
    
    
      23
      217
      274
      0.0
      8.0
      0.654746
    
    
      24
      113
      199
      0.0
      8.0
      0.654502
    
    
      25
      105
      192
      0.0
      8.0
      0.653025
    
    
      26
      109
      196
      0.0
      8.0
      0.652981
    
    
      27
      102
      188
      0.0
      8.0
      0.652657
    
    
      28
      112
      196
      0.0
      8.0
      0.650273
    
    
      29
      106
      192
      0.0
      8.0
      0.650126
    
    
      ...
      ...
      ...
      ...
      ...
      ...
    
    
      36555
      32
      53
      0.0
      8.0
      0.000235
    
    
      36556
      40
      63
      0.0
      8.0
      0.000228
    
    
      36557
      35
      53
      0.0
      8.0
      0.000224
    
    
      36558
      30
      56
      0.0
      8.0
      0.000219
    
    
      36559
      39
      54
      0.0
      8.0
      0.000218
    
    
      36560
      42
      62
      0.0
      8.0
      0.000209
    
    
      36561
      39
      53
      0.0
      8.0
      0.000201
    
    
      36562
      36
      65
      0.0
      8.0
      0.000194
    
    
      36563
      40
      58
      0.0
      8.0
      0.000191
    
    
      36564
      36
      56
      0.0
      8.0
      0.000183
    
    
      36565
      41
      61
      0.0
      8.0
      0.000179
    
    
      36566
      36
      64
      0.0
      8.0
      0.000146
    
    
      36567
      28
      60
      0.0
      8.0
      0.000140
    
    
      36568
      42
      61
      0.0
      8.0
      0.000136
    
    
      36569
      40
      60
      0.0
      8.0
      0.000136
    
    
      36570
      40
      62
      0.0
      8.0
      0.000133
    
    
      36571
      36
      53
      0.0
      8.0
      0.000128
    
    
      36572
      32
      61
      0.0
      8.0
      0.000121
    
    
      36573
      40
      57
      0.0
      8.0
      0.000088
    
    
      36574
      36
      60
      0.0
      8.0
      0.000080
    
    
      36575
      32
      57
      0.0
      8.0
      0.000078
    
    
      36576
      39
      62
      0.0
      8.0
      0.000075
    
    
      36577
      36
      57
      0.0
      8.0
      0.000066
    
    
      36578
      40
      61
      0.0
      8.0
      0.000064
    
    
      36579
      39
      57
      0.0
      8.0
      0.000064
    
    
      36580
      32
      60
      0.0
      8.0
      0.000058
    
    
      36581
      32
      56
      0.0
      8.0
      0.000049
    
    
      36582
      39
      58
      0.0
      8.0
      0.000049
    
    
      36583
      39
      61
      0.0
      8.0
      0.000045
    
    
      36584
      36
      61
      0.0
      8.0
      0.000044
    
  

36585 rows × 5 columns



In [79]:

    
round(p, 8)









    Out[79]:





0.0023355



In [23]:

    
seq = seq_A + seq_B



In [26]:

    
seq









    Out[26]:





'SMKEPCFREENANFNKIFLPTIYSIIFLTGIVGNGLVILVMGYQSMTDKYRLHLSVADLLFVITLPFWAVDAVANWYFGNFLCKAVHVIYTVNLYSSVWILAFISLDRYLAIVHATNSQRPRKLLAEKVVYVGVWIPALLLTIPDFIFANVSEADDRYICCRFYPNDLWVVVFQFQHIMVGLILPGIVILSCYCIIISKLSHSGHQKRKALKPTVILILAFFACWLPYYIGISIDSFILLEIIKQGCEFENTVHKWISITEALAFFHCCLNPILYALGASCHRPDKCCLGYQKRPLPQVLLSSWYPTSQLCSKPGVIFLTKRGRQVCADKSKDWVKKLMQQLPVTA'



In [27]:

    
n = len(seq)
t = np.zeros((n,n))
for index, d in data_A.iterrows():
#     print(index)
    i = int(d["i"]) - 1
    j = int(d["j"]) - 1 
    p = d["p"]
#     print(i,j,p)
    t[i,j] = p
    t[j,i] = p
for index, d in data_B.iterrows():
#     print(index)
    i = int(d["i"]) - 1 + len(seq_A)
    j = int(d["j"]) - 1 + len(seq_A)
    p = d["p"]
#     print(i,j,p)
    t[i,j] = p
    t[j,i] = p



In [28]:

    
plt.imshow((t>0.5).astype(float), origin="bottom")
plt.colorbar()









    Out[28]:





<matplotlib.colorbar.Colorbar at 0x1a184bfb38>



In [29]:

    
def getSeqFromRaptorXContact(fileLocation):
    with open(fileLocation) as f:
        a = f.readlines()
    i = 4
    seq = ""
    assert a[i] == "MODEL 1\n"
    i += 1
    while True:
        line = a[i]
        if line[0].isdigit():
            break
        i += 1
        seq += line.strip()
    # print(i)
    return seq
def getContactMapFromPDB(pdbFile):
    cutoff = 9.5
    MAX_OFFSET = 6
    parser = PDBParser()
    structure = parser.get_structure('target', pdbFile)
    all_residues = list(structure.get_residues())
    n = len(all_residues)
    contact_table = np.zeros((n,n))
    # print(pdb, n)#
    for i, res1 in enumerate(all_residues):
        for j, res2 in enumerate(all_residues):
            contact_table[i][j] = res1["CA"]-res2["CA"]

    data = (contact_table < cutoff)
    remove_band = np.eye(n)
    for i in range(1, MAX_OFFSET):
        remove_band += np.eye(n, k=i)
        remove_band += np.eye(n, k=-i)
    data[remove_band==1] = 0
    return data



In [53]:

    
pdbFile = "/Users/weilu/Research/server/feb_2020/SPOT-Contact-Helical-New/outputs/3cf4_A.pdb"
cutoff = 9.5
MAX_OFFSET = 6
parser = PDBParser()
structure = parser.get_structure('target', pdbFile)
# chainA = structure[0]["A"]
# all_residues = list(chainA.get_residues())
all_residues = []
for res in structure.get_residues():
    if res.get_id()[0] == " ":
        all_residues.append(res)
n = len(all_residues)
contact_table = np.zeros((n,n))
# print(pdb, n)#
for i, res1 in enumerate(all_residues):
    for j, res2 in enumerate(all_residues):
        contact_table[i][j] = res1["CA"]-res2["CA"]

data = (contact_table < cutoff)
remove_band = np.eye(n)
for i in range(1, MAX_OFFSET):
    remove_band += np.eye(n, k=i)
    remove_band += np.eye(n, k=-i)
data[remove_band==1] = 0



In [60]:

    
plt.imshow(data, origin="bottom")









    Out[60]:





<matplotlib.image.AxesImage at 0x1a18819128>



In [58]:

    
data_s = data.astype(int)
t_s = (t>0.55).astype(float)



In [30]:

    
data = getContactMapFromPDB("/Users/weilu/Research/server/feb_2020/SPOT-Contact-Helical-New/outputs/3cf4.pdb")









    



/Users/weilu/anaconda3/envs/py36/lib/python3.6/site-packages/Bio/PDB/StructureBuilder.py:92: PDBConstructionWarning: WARNING: Chain A is discontinuous at line 8106.
  PDBConstructionWarning,
/Users/weilu/anaconda3/envs/py36/lib/python3.6/site-packages/Bio/PDB/StructureBuilder.py:92: PDBConstructionWarning: WARNING: Chain G is discontinuous at line 8197.
  PDBConstructionWarning,
/Users/weilu/anaconda3/envs/py36/lib/python3.6/site-packages/Bio/PDB/StructureBuilder.py:92: PDBConstructionWarning: WARNING: Chain A is discontinuous at line 8214.
  PDBConstructionWarning,
/Users/weilu/anaconda3/envs/py36/lib/python3.6/site-packages/Bio/PDB/StructureBuilder.py:92: PDBConstructionWarning: WARNING: Chain G is discontinuous at line 8653.
  PDBConstructionWarning,






    



---------------------------------------------------------------------------
KeyError                                  Traceback (most recent call last)
<ipython-input-30-304a6c71b370> in <module>
----> 1 data = getContactMapFromPDB("/Users/weilu/Research/server/feb_2020/SPOT-Contact-Helical-New/outputs/3cf4.pdb")

<ipython-input-29-efa1942577c9> in getContactMapFromPDB(pdbFile)
     25     for i, res1 in enumerate(all_residues):
     26         for j, res2 in enumerate(all_residues):
---> 27             contact_table[i][j] = res1["CA"]-res2["CA"]
     28 
     29     data = (contact_table < cutoff)

~/anaconda3/envs/py36/lib/python3.6/site-packages/Bio/PDB/Entity.py in __getitem__(self, id)
     40     def __getitem__(self, id):
     41         """Return the child with given id."""
---> 42         return self.child_dict[id]
     43 
     44     def __delitem__(self, id):

KeyError: 'CA'



In [ ]:



In [30]:

    
data = getContactMapFromPDB("/Users/weilu/Research/server/jul_2019/two_chains/cleaned_pdbs/4rws.pdb")



In [70]:

    
data_s = data.astype(int)
t_s = (t>0.55).astype(float)



In [72]:

    
fig, ax = plt.subplots()
# ax.imshow(data_s+1, origin="bottom")
ax.imshow(t, origin="bottom")









    Out[72]:





<matplotlib.image.AxesImage at 0x1a24ae82b0>



In [60]:

    
combined = data_s + t_s * 2



In [63]:

    
combined = data_s + t_s * 2
from matplotlib import colors
cmap = colors.ListedColormap(['white', 'red', 'blue', 'black'])
bounds=[-1,0.1, 1.1, 2.1, 3.1]
norm = colors.BoundaryNorm(bounds, cmap.N)
plt.imshow(combined, origin="bottom", cmap=cmap, norm=norm)









    Out[63]:





<matplotlib.image.AxesImage at 0x1a235698d0>



In [87]:

    
pdbFile = "/Users/weilu/Research/server/jul_2019/two_chains/cleaned_pdbs/4rws.pdb"
parser = PDBParser()
structure = parser.get_structure('target', pdbFile)



In [88]:

    
model = structure[0]



In [90]:

    
a = list(model.get_chains())



In [92]:

    
c = a[0]



In [94]:

    
c.id









    Out[94]:





'A'



In [95]:

    
c = "ALL"



In [99]:

    
"ABC" is not "ALL"









    Out[99]:





True



In [93]:

    
c.get_id()









    Out[93]:





'A'



In [55]:

    
fig, ax = plt.subplots()
ax.imshow(data_s+1, origin="bottom")
ax.imshow(t_s, origin="bottom")









    Out[55]:





<matplotlib.image.AxesImage at 0x1a21a9a9b0>



In [43]:

    
plt.imshow(data_s, origin="bottom")









    Out[43]:





<matplotlib.image.AxesImage at 0x1a1cd67828>



In [44]:

    
# plt.imshow(-data_s, origin="bottom")
plt.imshow(t_s, origin="bottom")
# plt.colorbar()









    Out[44]:





<matplotlib.image.AxesImage at 0x1a1d144e48>



In [16]:

    
346









    Out[16]:





70



In [17]:

    
fileLocation = f"/Users/weilu/Research/raptorX/{pdb}/B.txt"



In [20]:

    
len(seq)









    Out[20]:





276



In [18]:

    
pdb = "4rws"
fileLocation = f"/Users/weilu/Research/raptorX/{pdb}/A.txt"
data = pd.read_csv(fileLocation, skiprows=6, sep="\s+", names=["i","j","s", "ss","p"]).dropna().reset_index(drop=True)
data["i"] = data["i"].astype(int)
data["j"] = data["j"].astype(int)
n = int(info.query(f"Protein =='{pdb}'")["Length"])
t = np.zeros((n,n))
for index, d in data.iterrows():
#     print(index)
    i = int(d["i"]) - 1
    j = int(d["j"]) - 1 
    p = d["p"]
#     print(i,j,p)
    t[i,j] = p
    t[j,i] = p









    



---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-18-f0b1e6216f4f> in <module>
      4 data["i"] = data["i"].astype(int)
      5 data["j"] = data["j"].astype(int)
----> 6 n = int(info.query(f"Protein =='{pdb}'")["Length"])
      7 t = np.zeros((n,n))
      8 for index, d in data.iterrows():

NameError: name 'info' is not defined



In [ ]:

    
def getContactMap(pdb):
    fileLocation = f"/Users/weilu/Research/raptorX/{pdb}/contactmap.txt"
    data = pd.read_csv(fileLocation, skiprows=6, sep="\s+", names=["i","j","s", "ss","p"]).dropna().reset_index(drop=True)
    data["i"] = data["i"].astype(int)
    data["j"] = data["j"].astype(int)
    n = int(info.query(f"Protein =='{pdb}'")["Length"])
    t = np.zeros((n,n))
    for index, d in data.iterrows():
    #     print(index)
        i = int(d["i"]) - 1
        j = int(d["j"]) - 1 
        p = d["p"]
    #     print(i,j,p)
        t[i,j] = p
        t[j,i] = p

	Res1	Res2	Theta
83	CYS	ASP	4
64	ASP	CYS	4
124	GLU	CYS	7
85	CYS	GLN	7
86	CYS	GLU	7
...	...	...	...
199	ILE	VAL	370
209	LEU	ILE	424
190	ILE	LEU	424
219	LEU	VAL	490
390	VAL	LEU	490

	Res1	Res2	Theta
84	CYS	CYS	1
242	MET	ASN	5
52	ASN	MET	5
105	GLN	GLN	7
357	TRP	TRP	7
...	...	...	...
209	LEU	ILE	440
389	VAL	ILE	512
199	ILE	VAL	512
219	LEU	VAL	614
390	VAL	LEU	614

	Res1	Res2	Theta
357	TRP	TRP	35
168	HIS	HIS	49
44	ASN	CYS	54
82	CYS	ASN	54
64	ASP	CYS	55
...	...	...	...
390	VAL	LEU	3135
219	LEU	VAL	3135
190	ILE	LEU	3583
209	LEU	ILE	3583
210	LEU	LEU	3753

	Res1	Res2	Theta
83	CYS	ASP	31
64	ASP	CYS	31
104	GLN	CYS	42
44	ASN	CYS	42
82	CYS	ASN	42
...	...	...	...
390	VAL	LEU	3564
219	LEU	VAL	3564
210	LEU	LEU	4051
190	ILE	LEU	4107
209	LEU	ILE	4107

	Q	Run	Protein	Folder
0	0.36	0	1hoe	iteration_3
1	0.42	1	1hoe	iteration_3
2	0.43	0	1tif	iteration_3
3	0.45	1	1tif	iteration_3
4	0.80	0	1vcc	iteration_3
...	...	...	...	...
149	0.47	1	2hbg	iteration_3
150	0.81	0	1akr	iteration_3
151	0.76	1	1akr	iteration_3
152	0.51	0	1osa	iteration_3
153	0.44	1	1osa	iteration_3

	Res1	Res2	Type	Theta	Index1	Index2	r	ResId1	ResId2	index_x	Density_H_x	Density_P_x	index_y	Density_H_y	Density_P_y
16	LEU	LEU	Direct	0.475	3	44	6.877109	4	45	3	2.000	4.751	44	2.011	1.475
83	LEU	LEU	Direct	1.000	24	52	6.056805	25	53	24	1.131	5.574	52	2.000	5.403
86	LEU	LEU	Direct	0.409	24	50	6.903615	25	51	24	1.131	5.574	50	2.465	3.409
151	LEU	LEU	Direct	0.207	53	76	7.001105	54	77	53	0.000	6.172	76	2.000	4.037
205	LEU	LEU	Direct	1.000	76	110	5.694576	77	111	76	2.000	4.037	110	1.996	4.020

	Res1	Res2	Type	Theta	Index1	Index2	r	ResId1	ResId2	index_x	...	Density_P_x	index_y	Density_H_y	Density_P_y	Protein	Run	D_H	D_P	Contact	isTP
0	THR	PRO	Direct	1.000	28	2	4.986226	29	3	28	...	0.000	2	1.038	1.000	1by9	0	5.038	1.000	28_2	FP
1	THR	PRO	Direct	0.994	65	81	6.079741	66	82	65	...	2.000	81	0.994	1.000	1fna	0	3.435	3.000	65_81	TP
2	THR	PRO	Direct	0.908	88	58	6.357295	89	59	88	...	1.010	58	0.908	1.129	1fna	0	2.815	2.139	88_58	TP
3	THR	PRO	Direct	0.693	58	75	6.504425	59	76	58	...	1.998	75	1.693	1.000	1bm8	0	3.386	2.998	58_75	TP
4	THR	PRO	Direct	1.000	64	51	5.319895	65	52	64	...	2.000	51	1.246	1.001	3vub	0	3.206	3.001	64_51	TP
5	THR	PRO	Direct	1.000	47	19	5.083985	48	20	47	...	0.000	19	2.000	0.177	1kte	0	5.980	0.177	47_19	TP
6	THR	PRO	Direct	1.000	95	77	5.553338	96	78	95	...	1.028	77	4.153	1.000	1bkf	0	9.006	2.028	95_77	TP
7	THR	PRO	Direct	1.000	19	93	4.892044	20	94	19	...	1.050	93	2.000	0.912	1sfp	0	4.000	1.962	19_93	FP
8	THR	PRO	Direct	1.000	30	62	5.411886	31	63	30	...	2.020	62	1.001	1.000	1sfp	0	3.811	3.020	30_62	TP
9	THR	PRO	Direct	1.000	22	1	5.513818	23	2	22	...	0.598	1	1.022	0.999	2mcm	0	4.372	1.597	22_1	FP
10	THR	PRO	Direct	0.953	117	20	6.284565	118	21	117	...	0.913	20	1.198	2.071	2a0b	0	3.151	2.984	117_20	TP
11	THR	PRO	Direct	1.000	14	26	4.916763	15	27	14	...	1.986	26	1.294	0.856	2sak	0	2.769	2.842	14_26	TP
12	THR	PRO	Direct	0.999	74	32	5.849476	75	33	74	...	0.000	32	4.984	0.999	2sak	0	9.983	0.999	74_32	FP
13	THR	PRO	Direct	0.941	54	98	6.308799	55	99	54	...	0.752	98	3.224	1.024	1bgf	0	6.262	1.776	54_98	TP
14	THR	PRO	Direct	0.992	35	2	6.100786	36	3	35	...	0.000	2	2.973	0.759	1opy	0	5.966	0.759	35_2	FP
15	THR	PRO	Direct	0.430	69	83	6.613797	70	84	69	...	1.000	83	3.475	2.000	1opy	0	4.835	3.000	69_83	TP
16	THR	PRO	Direct	1.000	12	28	4.872084	13	29	12	...	1.102	28	2.048	1.007	1rlw	0	5.040	2.109	12_28	FP
17	THR	PRO	Direct	0.997	12	53	5.991587	13	54	12	...	1.102	53	2.999	2.451	1rlw	0	5.991	3.553	12_53	FP
18	THR	PRO	Direct	0.956	15	26	6.278195	16	27	15	...	0.995	26	4.104	1.000	1rlw	0	7.954	1.995	15_26	TP
19	THR	PRO	Direct	0.725	52	37	6.488734	53	38	52	...	0.997	37	3.735	0.000	1hmt	0	7.908	0.997	52_37	TP
20	THR	PRO	Direct	0.998	52	37	5.943890	53	38	52	...	1.001	37	2.819	1.001	1crb	0	4.817	2.002	52_37	TP
21	THR	PRO	Direct	1.000	54	44	5.338830	55	45	54	...	2.156	44	3.000	0.999	1ax8	0	6.004	3.155	54_44	TP
22	THR	PRO	Direct	0.493	12	2	6.588423	13	3	12	...	2.823	2	1.493	1.316	1rss	0	4.068	4.139	12_2	TP
23	THR	PRO	Direct	1.000	140	27	5.474146	141	28	140	...	4.120	27	2.733	4.004	1jon	0	4.212	8.124	140_27	FP
24	THR	PRO	Direct	1.000	95	72	5.552165	96	73	95	...	0.072	72	3.002	0.278	1aly	0	5.005	0.350	95_72	TP
25	THR	PRO	Direct	1.000	28	2	5.575712	29	3	28	...	0.000	2	2.952	0.000	1by9	1	5.953	0.000	28_2	FP
26	THR	PRO	Direct	0.364	65	81	6.641636	66	82	65	...	1.138	81	0.364	0.139	1fna	1	3.515	1.277	65_81	TP
27	THR	PRO	Direct	0.671	88	58	6.514388	89	59	88	...	2.025	58	0.672	1.011	1fna	1	2.030	3.036	88_58	TP
28	THR	PRO	Direct	0.998	68	35	5.970904	69	36	68	...	1.106	35	3.930	0.000	1plc	1	7.994	1.106	68_35	FP
29	THR	PRO	Direct	1.000	58	75	5.666871	59	76	58	...	0.156	75	2.162	2.000	1bm8	1	3.162	2.156	58_75	TP
30	THR	PRO	Direct	0.782	64	51	6.458181	65	52	64	...	1.054	51	1.782	0.237	3vub	1	2.564	1.291	64_51	TP
31	THR	PRO	Direct	1.000	47	19	4.833291	48	20	47	...	0.000	19	2.000	0.157	1kte	1	5.998	0.157	47_19	TP
32	THR	PRO	Direct	1.000	95	77	5.206377	96	78	95	...	0.986	77	3.997	2.000	1bkf	1	6.993	2.986	95_77	TP
33	THR	PRO	Direct	1.000	30	65	5.047096	31	66	30	...	0.990	65	3.001	1.153	1sfp	1	6.001	2.143	30_65	FP
34	THR	PRO	Direct	0.998	86	62	5.947662	87	63	86	...	1.076	62	1.000	1.000	1sfp	1	2.996	2.076	86_62	TP
35	THR	PRO	Direct	0.691	94	40	6.505275	95	41	94	...	1.008	40	1.724	1.002	1neu	1	7.285	2.010	94_40	FP
36	THR	PRO	Direct	1.000	14	26	5.060741	15	27	14	...	1.989	26	2.056	0.739	2sak	1	4.052	2.728	14_26	TP
37	THR	PRO	Direct	1.000	54	98	5.010488	55	99	54	...	0.954	98	1.998	2.005	1bgf	1	5.871	2.959	54_98	TP
38	THR	PRO	Direct	0.957	69	83	6.274398	70	84	69	...	1.000	83	2.776	2.332	1opy	1	3.764	3.332	69_83	TP
39	THR	PRO	Direct	0.154	52	37	6.756447	53	38	52	...	0.914	37	2.203	0.000	1hmt	1	6.351	0.914	52_37	TP
40	THR	PRO	Direct	1.000	27	10	5.427485	28	11	27	...	0.999	10	2.290	0.000	1htp	1	6.219	0.999	27_10	FP
41	THR	PRO	Direct	1.000	52	37	5.246543	53	38	52	...	1.001	37	3.900	1.906	1crb	1	5.900	2.907	52_37	TP
42	THR	PRO	Direct	0.982	54	44	6.184997	55	45	54	...	2.082	44	3.622	2.991	1ax8	1	6.604	5.073	54_44	TP
43	THR	PRO	Direct	1.000	95	72	4.828091	96	73	95	...	0.000	72	2.984	0.007	1aly	1	5.984	0.007	95_72	TP
44	THR	PRO	Direct	0.996	16	128	6.042614	17	129	16	...	0.610	128	2.494	1.011	1akr	1	6.476	1.621	16_128	TP

	Interaction	Res1	Res2	Index	Gamma
372	Direct	Y	P	193	-1.124
371	Direct	P	Y	193	-1.124
379	Direct	W	S	197	-1.121
378	Direct	S	W	197	-1.121
214	Direct	K	E	110	-1.080
...	...	...	...	...	...
351	Direct	F	F	182	1.058
243	Direct	F	G	125	1.091
242	Direct	G	F	125	1.091
277	Direct	H	V	143	1.524
278	Direct	V	H	143	1.524

	Interaction	Res1	Res2	Index	Gamma
20	Direct	L	A	10	-0.388
57	Direct	L	R	29	0.280
92	Direct	L	N	47	0.045
125	Direct	L	D	64	0.576
156	Direct	L	C	80	0.056
185	Direct	L	Q	95	-0.039
212	Direct	L	E	109	-0.480
237	Direct	L	G	122	-0.340
260	Direct	L	H	134	-0.417
281	Direct	L	I	145	-0.322
300	Direct	L	L	155	-0.074
301	Direct	L	K	156	-0.287
303	Direct	L	M	157	-0.639
305	Direct	L	F	158	-0.365
307	Direct	L	P	159	-0.455
309	Direct	L	S	160	0.464
311	Direct	L	T	161	-0.118
313	Direct	L	W	162	-0.176
315	Direct	L	Y	163	-0.610
317	Direct	L	V	164	-0.835

	Res1	Res2	Type	Theta	Index1	Index2	r	ResId1	ResId2	index_x	Density_H_x	Density_P_x	index_y	Density_H_y	Density_P_y	Protein	D_H	D_P
0	ASP	ASP	Direct	0.615	0	39	6.387458	1	40	0	2.615	0.000	39	2.612	0.001	1hoe	5.227	0.001
1	THR	ASP	Direct	1.000	1	39	3.803764	2	40	1	1.026	1.000	39	2.612	0.001	1hoe	3.638	1.001
2	ARG	ASP	Direct	0.997	67	39	5.408635	68	40	67	3.023	1.000	39	2.612	0.001	1hoe	5.635	1.001
3	ASP	ARG	Direct	1.000	0	67	4.196109	1	68	0	2.615	0.000	67	3.023	1.000	1hoe	5.638	1.000
4	VAL	ARG	Direct	1.000	35	67	5.400923	36	68	35	4.864	1.000	67	3.023	1.000	1hoe	7.887	2.000
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
20225	LEU	PHE	Direct	0.111	111	91	7.342279	112	92	111	0.000	2.111	91	0.986	4.953	1osa	0.986	7.064
20226	GLU	ASP	Direct	1.000	103	92	5.176369	104	93	103	3.997	1.000	92	5.000	2.004	1osa	8.997	3.004
20227	GLU	ARG	Direct	0.874	103	93	6.003034	104	94	103	3.997	1.000	93	1.874	0.076	1osa	5.871	1.076
20228	HIS	ARG	Direct	1.000	106	93	4.551530	107	94	106	2.970	0.000	93	1.874	0.076	1osa	4.844	0.076
20229	VAL	ARG	Direct	0.076	107	93	6.888728	108	94	107	0.687	3.946	93	1.874	0.076	1osa	2.561	4.022

	Res1	Res2	Theta
83	CYS	ASP	4
64	ASP	CYS	4
124	GLU	CYS	7
85	CYS	GLN	7
86	CYS	GLU	7
...	...	...	...
199	ILE	VAL	370
209	LEU	ILE	424
190	ILE	LEU	424
219	LEU	VAL	490
390	VAL	LEU	490

	Res1	Res2	Theta
84	CYS	CYS	1
242	MET	ASN	5
52	ASN	MET	5
105	GLN	GLN	7
357	TRP	TRP	7
...	...	...	...
209	LEU	ILE	440
389	VAL	ILE	512
199	ILE	VAL	512
219	LEU	VAL	614
390	VAL	LEU	614

	Res1	Res2	Type	Theta	Index1	Index2	r	ResId1	ResId2	index_x	Density_H_x	Density_P_x	index_y	Density_H_y	Density_P_y	Protein	D_H	D_P
16	LEU	LEU	Direct	1.0	13	69	4.486654	14	70	13	2.793	2.000	69	1.999	5.000	1hoe	4.792	7.000
212	LEU	LEU	Direct	1.0	15	42	6.106652	16	43	15	2.019	5.527	42	2.988	5.999	1tif	5.007	11.526
214	LEU	LEU	Direct	1.0	32	42	5.137943	33	43	32	3.892	1.886	42	2.988	5.999	1tif	6.880	7.885
366	LEU	LEU	Direct	1.0	10	30	5.106055	11	31	10	2.003	5.997	30	1.932	5.996	1vcc	3.935	11.993
492	LEU	LEU	Direct	1.0	6	50	5.629869	7	51	6	0.000	9.000	50	0.000	8.000	1by9	0.000	17.000
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
19877	LEU	LEU	Direct	1.0	53	110	5.940358	54	111	53	0.000	9.958	110	1.999	7.008	1akr	1.999	16.966
19878	LEU	LEU	Direct	1.0	76	110	5.347788	77	111	76	2.279	6.739	110	1.999	7.008	1akr	4.278	13.747
20052	LEU	LEU	Direct	1.0	76	113	5.790491	77	114	76	2.279	6.739	113	2.935	1.959	1akr	5.214	8.698
20083	LEU	LEU	Direct	1.0	3	68	5.984374	4	69	3	1.000	3.000	68	0.998	3.996	1osa	1.998	6.996
20125	LEU	LEU	Direct	1.0	31	47	5.299009	32	48	31	0.784	6.989	47	5.163	2.328	1osa	5.947	9.317

	Res1	Res2	Type	Theta	Index1	Index2	r	ResId1	ResId2	index_x	Density_H_x	Density_P_x	index_y	Density_H_y	Density_P_y	Protein	D_H	D_P
7390	LEU	LEU	Direct	0.999	4	44	6.195842	5	45	4	5.270	3.561	44	4.985	0.999	1tmy	10.255	4.560
11901	LEU	LEU	Direct	1.000	6	63	5.423016	7	64	6	5.892	1.000	63	4.004	1.000	1cpq	9.896	2.000
14426	LEU	LEU	Direct	0.998	10	116	6.235893	11	117	10	3.262	3.131	116	5.969	1.133	2end	9.231	4.264

	ResName1	ResName2	r_max	r_min
19	SER	SER	4.359305	3.198264
20	SER	PRO	4.323022	3.109163
21	SER	VAL	4.271867	3.411367
22	SER	THR	4.547018	3.236274
23	SER	CYS	4.243967	3.176515
24	SER	ILE	4.792703	2.905342
25	SER	LEU	4.725601	3.041130
26	SER	ASN	4.646894	3.344635
27	SER	ASP	3.718539	3.013373
28	SER	GLN	4.553369	3.024015
29	SER	LYS	4.350099	3.242161
30	SER	GLU	4.048059	3.258734
31	SER	MET	5.140508	3.441655
32	SER	HIS	4.257428	3.561157
33	SER	PHE	4.850533	3.393127
34	SER	ARG	4.990777	2.941009
35	SER	TYR	4.985624	3.275489
36	SER	TRP	5.014418	3.435178

	Res1	Res2	Theta
357	TRP	TRP	2
83	CYS	ASP	5
64	ASP	CYS	5
348	TRP	HIS	7
177	HIS	TRP	7
...	...	...	...
199	ILE	VAL	291
209	LEU	ILE	368
190	ILE	LEU	368
219	LEU	VAL	442
390	VAL	LEU	442

	Res1	Res2	Type	Theta	Index1	Index2	r	ResId1	ResId2	index_x	Density_H_x	Density_P_x	index_y	Density_H_y	Density_P_y	Protein
14367	PHE	LEU	Direct	0.998	38	64	5.879114	39	65	38	2.776	4.976	64	0.999	8.189	1pne
14368	ILE	LEU	Direct	1.000	41	64	5.440172	42	65	41	2.991	5.830	64	0.999	8.189	1pne
14369	GLU	LEU	Direct	0.999	45	64	5.846354	46	65	45	4.000	1.999	64	0.999	8.189	1pne
14370	LEU	LEU	Direct	1.000	49	64	5.588858	50	65	49	0.004	6.959	64	0.999	8.189	1pne
14371	ILE	LEU	Direct	0.994	99	64	5.991632	100	65	99	0.886	5.010	64	0.999	8.189	1pne
14372	VAL	LEU	Direct	0.209	101	64	6.633253	102	65	101	0.000	6.422	64	0.999	8.189	1pne

	Res1	Res2	Type	Theta	Index1	Index2	r	index_x	Density_H_x	Density_P_x	index_y	Density_H_y	Density_P_y
0	ALA	SER	Direct	0.985	68	78	6.081271	68	3.233	2.011	78	3.792	1.000
1	ARG	TYR	Direct	1.000	27	67	5.312370	27	1.678	2.179	67	4.000	0.000
2	THR	TYR	Direct	1.000	29	67	5.425073	29	2.678	2.000	67	4.000	0.000
3	LEU	ILE	Direct	0.915	2	64	6.262689	2	1.001	5.733	64	0.000	4.962
4	LEU	ILE	Direct	0.949	12	64	6.207278	12	1.001	5.948	64	0.000	4.962
...	...	...	...	...	...	...	...	...	...	...	...	...	...
102	VAL	THR	Direct	0.587	23	70	6.464955	23	4.024	1.060	70	3.616	1.587
103	ARG	GLY	Direct	1.000	24	71	4.846040	24	1.041	0.000	71	1.996	0.001
104	ARG	GLU	Direct	1.000	27	41	4.193047	27	1.678	2.179	41	2.003	0.000
105	THR	GLU	Direct	1.000	29	41	5.378955	29	2.678	2.000	41	2.003	0.000
106	THR	VAL	Direct	1.000	29	39	4.840663	29	2.678	2.000	39	2.000	0.000

	Res1	Res2	Theta
0	ALA	GLU	1
31	PHE	ILE	1
32	PRO	ALA	1
33	PRO	GLU	1
34	PRO	ILE	1
...	...	...	...
1	ALA	SER	3
68	VAL	ILE	3
27	LEU	THR	3
22	ILE	ILE	6
25	LEU	ILE	6

	Res1	Res2	Type	Theta	Index1	Index2	r	index_x	Density_H_x	Density_P_x	index_y	Density_H_y	Density_P_y
131	LEU	ILE	Direct	1.000	2	84	4.197815	2	1.0	6.0	84	0.0	2.0
791	LEU	ILE	Direct	1.000	2	14	5.333030	2	1.0	6.0	14	1.0	7.0
929	LEU	ILE	Direct	0.915	2	64	6.262689	2	1.0	6.0	64	0.0	5.0
938	LEU	ILE	Direct	0.949	12	64	6.207278	12	1.0	6.0	64	0.0	5.0
1142	LEU	ILE	Direct	1.000	2	82	5.090292	2	1.0	6.0	82	2.0	2.0
1535	LEU	ILE	Direct	1.000	12	53	4.717994	12	1.0	6.0	53	0.0	7.0

	Res1	Res2	Theta
83	CYS	ASP	4
64	ASP	CYS	4
124	GLU	CYS	7
85	CYS	GLN	7
86	CYS	GLU	7
...	...	...	...
199	ILE	VAL	370
209	LEU	ILE	424
190	ILE	LEU	424
219	LEU	VAL	490
390	VAL	LEU	490

	Res1	Res2	Theta
84	CYS	CYS	1
242	MET	ASN	5
52	ASN	MET	5
105	GLN	GLN	7
357	TRP	TRP	7
...	...	...	...
209	LEU	ILE	440
389	VAL	ILE	512
199	ILE	VAL	512
219	LEU	VAL	614
390	VAL	LEU	614

	Res1	Res2	Theta
357	TRP	TRP	2
83	CYS	ASP	5
64	ASP	CYS	5
348	TRP	HIS	7
177	HIS	TRP	7
...	...	...	...
199	ILE	VAL	291
209	LEU	ILE	368
190	ILE	LEU	368
219	LEU	VAL	442
390	VAL	LEU	442

	Res1	Res2	Theta
0	ALA	GLU	1
31	PHE	ILE	1
32	PRO	ALA	1
33	PRO	GLU	1
34	PRO	ILE	1
...	...	...	...
1	ALA	SER	3
68	VAL	ILE	3
27	LEU	THR	3
22	ILE	ILE	6
25	LEU	ILE	6

	Res1	Res2	Type	Theta	Index1	Index2	r	index_x	Density_H_x	Density_P_x	index_y	Density_H_y	Density_P_y
0	ARG	VAL	Protein	0.0	0	69	13.319039	0	1.892	3.972	69	2.902	1.0
1	ARG	VAL	Water	0.0	0	69	13.319039	0	1.892	3.972	69	2.902	1.0
2	ARG	VAL	Direct	0.0	0	69	13.319039	0	1.892	3.972	69	2.902	1.0
3	TRP	VAL	Protein	0.0	16	69	12.675022	16	2.649	4.679	69	2.902	1.0
4	TRP	VAL	Water	0.0	16	69	12.675022	16	2.649	4.679	69	2.902	1.0
...	...	...	...	...	...	...	...	...	...	...	...	...	...
2344	ILE	VAL	Water	0.0	28	39	10.884260	28	1.000	5.199	39	2.000	0.0
2345	ILE	VAL	Direct	0.0	28	39	10.884260	28	1.000	5.199	39	2.000	0.0
2346	THR	VAL	Protein	0.0	29	39	4.840663	29	2.678	2.000	39	2.000	0.0
2347	THR	VAL	Water	0.0	29	39	4.840663	29	2.678	2.000	39	2.000	0.0
2348	THR	VAL	Direct	1.0	29	39	4.840663	29	2.678	2.000	39	2.000	0.0

	index	Density_H	Density_P
0	0	1.892	3.972
1	1	1.443	0.000
2	2	1.001	5.733
3	3	1.443	1.000
4	4	1.000	3.294
...	...	...	...
86	86	1.959	2.000
87	87	1.000	0.686
88	88	2.552	3.960
89	89	2.956	0.000
90	90	2.007	0.000

	Res1	Res2	Type	theta_gamma
165	ILE	ILE	Direct	-5.859840
54	ASP	LYS	Direct	-4.141928
181	ILE	VAL	Direct	-3.560194
171	ILE	PHE	Direct	-3.555981
29	ARG	PHE	Protein	-2.875323

	Res1	Res2	Type	theta_gamma
250	TRP	ILE	Direct	-4.199720
306	VAL	VAL	Direct	-3.221582
180	PHE	ILE	Direct	-2.645379
136	ILE	ILE	Direct	-2.453640
292	VAL	LEU	Direct	-2.448768

	Res1	Res2	Type	theta_gamma
19	CYS	CYS	Direct	-25.324650
149	VAL	LEU	Direct	-11.089641
20	CYS	CYS	Protein	-6.585576
26	CYS	LEU	Protein	-3.843926
104	PHE	CYS	Direct	-3.533796

	Res1	Res2	Type	Theta	Index1	Index2	r
0	LEU	ALA	Protein	0.000	0	18	11.045428
1	LEU	ALA	Water	0.000	0	18	11.045428
2	LEU	ALA	Direct	0.000	0	18	11.045428
3	LEU	ARG	Protein	0.033	0	40	9.838098
4	LEU	ARG	Water	0.000	0	40	9.838098
...	...	...	...	...	...	...	...
3097	TYR	GLU	Water	0.000	89	101	15.198022
3098	TYR	GLU	Direct	0.000	89	101	15.198022
3099	ILE	TYR	Protein	0.000	90	100	10.454025
3100	ILE	TYR	Water	0.000	90	100	10.454025
3101	ILE	TYR	Direct	0.000	90	100	10.454025