In [1]:
import os
import sys
import random
import time
from random import seed, randint
import argparse
import platform
from datetime import datetime
import imp
import numpy as np
import fileinput
from itertools import product
import pandas as pd
from scipy.interpolate import griddata
from scipy.interpolate import interp2d
import seaborn as sns
from os import listdir

import matplotlib.pyplot as plt
import seaborn as sns
from scipy.interpolate import griddata
import matplotlib as mpl
# sys.path.insert(0,'..')
# from notebookFunctions import *
# from .. import notebookFunctions

from Bio.PDB.PDBParser import PDBParser
from pyCodeLib import *
%matplotlib inline
# plt.rcParams['figure.figsize'] = (10,6.180)    #golden ratio
# %matplotlib notebook
%load_ext autoreload
%autoreload 2

In [2]:
plt.rcParams['figure.figsize'] = [16.18033, 10]    #golden ratio
plt.rcParams['figure.facecolor'] = 'w'
plt.rcParams['figure.dpi'] = 100

In [4]:
data = pd.read_csv("/Users/weilu/Research/data/survey_center_of_mass_distance_complete.csv", index_col=0)

In [5]:
y = "r_com_com"
d = data

t = d.groupby(["ResName1", "ResName2"])[y].idxmin().reset_index()
selected = d.iloc[t[y].to_list()].reset_index(drop=True)

In [7]:


In [26]:
weight_list = ["ALA", "SER", "PRO", "VAL", "THR", "CYS", "ILE", "LEU", "ASN", "ASP", "GLN", "LYS", "GLU", "MET", "HIS", "PHE", "ARG", "TYR", "TRP"]
res_to_index = {}
for i, res in enumerate(weight_list):
    res_to_index[res] = i

min_r_com_com_matrix = np.zeros((19, 19))
for i, line in selected.iterrows():
    res1 = line["ResName1"]
    res2 = line["ResName2"]
    min_r_com_com_matrix[res_to_index[res1]][res_to_index[res2]] = line["r_com_com"] - 3.5 + 0.3
    
plt.imshow(min_r_com_com_matrix, origin=0, cmap="seismic", vmin=-2, vmax=2)
plt.colorbar()
plt.xticks(ticks=np.arange(19), labels=weight_list)

In [27]:



Out[27]:
([<matplotlib.axis.XTick at 0x1a27095940>,
  <matplotlib.axis.XTick at 0x1a2708bda0>,
  <matplotlib.axis.XTick at 0x1a212cbac8>,
  <matplotlib.axis.XTick at 0x1a26ac1518>,
  <matplotlib.axis.XTick at 0x1a26ac19e8>,
  <matplotlib.axis.XTick at 0x1a26ac1ef0>,
  <matplotlib.axis.XTick at 0x1a26ac34a8>,
  <matplotlib.axis.XTick at 0x1a26ac3a20>,
  <matplotlib.axis.XTick at 0x1a26ac1fd0>,
  <matplotlib.axis.XTick at 0x1a26ac3828>,
  <matplotlib.axis.XTick at 0x1a26ac7390>,
  <matplotlib.axis.XTick at 0x1a26ac7908>,
  <matplotlib.axis.XTick at 0x1a26ac7e80>,
  <matplotlib.axis.XTick at 0x1a26ac9438>,
  <matplotlib.axis.XTick at 0x1a26ac99b0>,
  <matplotlib.axis.XTick at 0x1a26ac9f28>,
  <matplotlib.axis.XTick at 0x1a26acc4e0>,
  <matplotlib.axis.XTick at 0x1a26ac94e0>,
  <matplotlib.axis.XTick at 0x1a26ac78d0>],
 <a list of 19 Text xticklabel objects>)

In [6]:
selected


Out[6]:
Protein id1 ResName1 id2 ResName2 r_ca_ca r_com_com r_cb_cb r_ca_com r_com_ca
0 1cqzB02 22 ALA 95 ALA 5.908709 3.138504 3.138505 4.545833 4.482826
1 1ev7B02 299 ALA 276 ARG 5.682232 3.165103 4.353362 4.676932 4.942834
2 1rrsA01 185 ALA 182 ASN 5.601128 3.270920 4.290506 4.184345 5.161467
3 2efwG00 95 ALA 92 ASP 5.505649 3.154983 4.075313 4.186296 4.884866
4 1c07A00 77 ALA 61 CYS 4.441674 3.015272 2.951008 3.794416 4.444079
5 2ib1A00 182 ALA 146 GLN 5.747602 3.246770 3.775722 4.106207 4.257691
6 3geuA00 164 ALA 167 GLU 5.388519 3.120584 3.915556 4.358684 4.813024
7 1qrjA01 120 ALA 71 HIS 5.473842 3.163042 4.608379 4.049760 4.580517
8 4ne3A00 86 ALA 96 ILE 6.374214 3.398528 3.494172 4.808078 4.939825
9 1irzA00 21 ALA 38 LEU 6.210785 3.239366 3.551487 4.081262 5.013743
10 4fcyB02 199 ALA 202 LYS 5.366684 3.518351 4.340115 4.193958 4.893482
11 2of5H00 807 ALA 835 MET 7.241058 3.408897 5.124332 4.429695 6.049232
12 3gn5B02 88 ALA 99 PHE 6.692594 3.360567 4.275672 4.502249 5.712069
13 1wgxA00 38 ALA 46 PRO 4.604546 3.409870 3.201724 4.895352 3.432163
14 5cviA01 37 ALA 34 SER 4.958821 3.140926 3.451564 4.338090 4.023389
15 2bidA00 158 ALA 175 THR 5.975650 3.211792 3.227825 4.419328 4.551087
16 1if1B00 41 ALA 46 TRP 5.556347 3.331457 4.588389 3.919903 6.026068
17 1or5A00 16 ALA 37 TYR 6.945055 3.350794 5.559887 4.864548 5.805660
18 1d2dA00 29 ALA 13 VAL 5.783528 3.069021 3.032873 4.415103 4.304394
19 1ev7B02 276 ARG 299 ALA 5.682232 3.165103 4.353362 4.942834 4.676932
20 2of7A02 144 ARG 164 ARG 7.144023 3.770143 7.056175 6.805733 6.046997
21 3bhqA00 135 ARG 71 ASN 7.013448 3.546138 4.845044 4.677932 4.698705
22 1z77A02 126 ARG 130 ASP 5.759355 3.348837 5.647260 4.922713 5.811543
23 1oywA03 382 ARG 397 CYS 6.522312 3.699298 3.984618 5.083659 5.280008
24 2efwG00 31 ARG 29 GLN 4.641343 3.700493 3.510537 5.356221 5.675107
25 4jd9G00 17 ARG 16 GLU 3.805390 3.564350 5.197448 3.837766 5.872784
26 1bdhA01 26 ARG 20 HIS 5.955606 3.643981 4.879739 4.965923 6.194645
27 3pqkA00 82 ARG 87 ILE 4.850553 3.558249 5.024288 6.100633 4.557544
28 2w9mB01 10 ARG 47 LEU 7.756642 3.629136 4.861151 5.582074 6.324805
29 1vx4K02 114 ARG 130 LYS 6.068630 3.310505 5.542534 6.591987 3.959741
... ... ... ... ... ... ... ... ... ... ...
331 1u5tB01 444 TYR 415 ILE 6.694387 3.723682 5.559739 6.315435 4.472366
332 1n3kA00 82 TYR 7 LEU 6.947247 3.560403 4.470984 5.060046 5.062538
333 1f1eA00 32 TYR 7 LYS 6.281407 3.755418 5.346178 5.338842 4.597226
334 1v1gA00 120 TYR 136 MET 6.711337 3.408188 5.148762 5.579890 5.628433
335 1lmzA00 56 TYR 60 PHE 5.732690 3.268141 5.324258 5.184902 4.927429
336 1wylA01 46 TYR 55 PRO 7.753644 3.606297 5.877881 6.568705 4.202286
337 4i5jA02 374 TYR 372 SER 5.428262 3.275489 5.322394 4.675667 5.028648
338 3mwnA00 1158 TYR 1102 THR 8.337726 3.625804 5.527067 6.658520 5.500113
339 3lmoA00 74 TYR 60 TRP 7.709921 3.837961 5.145976 5.386977 4.383227
340 3geuA00 58 TYR 92 TYR 7.787091 3.593895 5.178475 5.562479 6.155689
341 4u7nA01 403 TYR 424 VAL 7.626807 3.558603 5.149593 6.620856 5.227092
342 1d2dA00 13 VAL 29 ALA 5.783528 3.069021 3.032873 4.304394 4.415103
343 1cscA02 374 VAL 329 ARG 5.264984 3.673704 3.922351 4.713512 5.390074
344 4a2qA01 49 VAL 21 ASN 5.769338 3.776138 4.700932 5.156895 4.693386
345 1vx4K02 143 VAL 107 ASP 5.016043 3.054983 3.879137 4.034955 3.732843
346 2jrfA01 103 VAL 42 CYS 5.179540 3.479707 4.282054 3.480748 5.526419
347 3o60A00 70 VAL 21 GLN 7.457473 3.676127 4.993977 4.884728 5.950055
348 3kkdA00 53 VAL 158 GLU 7.372310 3.489796 5.104510 5.221215 5.847657
349 1oywA03 384 VAL 478 HIS 4.554333 3.696919 4.582411 4.661388 4.947785
350 2a6hF02 304 VAL 291 ILE 7.158327 3.709094 4.543865 5.577966 5.277258
351 2l9fA00 67 VAL 64 LEU 5.898939 3.756116 3.657911 5.197858 5.107406
352 4du6E01 70 VAL 35 LYS 6.845805 3.977987 5.282276 5.398007 5.290564
353 4bjaA00 75 VAL 83 MET 6.052062 3.810124 4.409585 5.046900 4.838860
354 1guxB00 725 VAL 739 PHE 7.047835 3.684758 4.817834 4.702976 5.879418
355 1kx5A00 35 VAL 38 PRO 6.332792 3.584730 4.850818 5.388102 4.738336
356 4q45A03 203 VAL 226 SER 6.211264 3.411367 3.631042 4.601442 5.287065
357 1xjtA00 70 VAL 67 THR 5.597509 3.697824 4.269946 5.046158 4.837270
358 3lmmB05 528 VAL 524 TRP 4.886695 3.639475 4.623049 5.190443 4.489318
359 4u7nA01 424 VAL 403 TYR 7.626807 3.558603 5.149593 5.227092 6.620856
360 3a8rA01 176 VAL 212 VAL 5.894917 3.718177 4.191580 5.044070 5.024618

361 rows × 10 columns


In [19]:
res_list = ['ARG', 'ASP', 'PRO', 'TRP', 'THR', 'HIS', 'GLU', 'LEU', 'SER',
       'LYS', 'VAL', 'PHE', 'ILE', 'TYR', 'GLN', 'ALA', 'ASN', 'MET',
       'CYS']

In [ ]:
for res1 in res_list:
    for res2 in res_list:
        data.query(f"ResName1 == '{res1}' and ResName2 == '{res2}'").hist("r_com_com", bins=50)
        plt.xlim([0,20])
        plt.title(f"{res1}_{res2}")
        plt.savefig(f"/Users/weilu/Research/database/survey_center_of_mass_distance/{res1}_{res2}.png", dpi=300)
        plt.clf()

In [63]:
data["r_com_com"].min()


Out[63]:
2.062640965053271

In [248]:



Out[248]:
2.062640965053271

In [249]:
short_list = ['ARG', 'ASP', 'PRO', 'TRP', 'THR', 'HIS', 'GLU', 'LEU', 'SER',
       'LYS', 'VAL', 'PHE', 'ILE', 'TYR', 'GLN', 'ALA', 'ASN', 'MET',
       'CYS']

from sklearn.mixture import GaussianMixture

df_ = []
for res1 in short_list:
    for res2 in short_list:
        data_one = data.query(f"ResName1 == '{res1}' and ResName2 == '{res2}'").reset_index(drop=True)
        lower_data = data_one[data_one.r_com_com <= 7.5].reset_index(drop=True)
        data_res = lower_data
        X = data_res[["r_com_com"]].values
        gmm = GaussianMixture(n_components=3).fit(X)
        plt.hist(X, bins=20)
        # x_hat = np.array(bin_centers).reshape(-1, 1)
        x_min = data_res["r_com_com"].min()
        x_max = data_res["r_com_com"].max()
        x_hat = np.linspace(x_min, x_max).reshape(-1, 1)
        y_hat = len(X)/6*np.exp(gmm.score_samples(x_hat))
        # e_hat = gmm.score_samples(x_hat)
        plt.plot(x_hat, y_hat)
        #short_list = ['ARG','ASP']
#         df_temp = pd.DataFrame(np.array([bin_centers, y]).T, columns=["r_cbd_cbd","energy"])
#         df_temp["ResName1"] = res1
#         df_temp["ResName2"] = res2
#         #print(df_temp)
#         df_.append(df_temp)
#         plt.plot(bin_centers, y)
        plt.title(f"gmm_fit_{res1}_{res2}")
        plt.savefig(f"/Users/weilu/Research/server/mar_2020/cmd_cmd_exclude_volume/figures/gmm_fit_{res1}_{res2}.png", dpi=300)
        plt.clf()
# print(df)
# df = pd.concat(df_).reset_index(drop=True)
# df.to_csv('/Users/weilu/Research/server/mar_2020/cmd_cmd_exclude_volume/cbd_cbd_energy.csv', index=False)


<Figure size 1618.03x1000 with 0 Axes>

In [157]:
short_list = ['ARG', 'ASP', 'PRO', 'TRP', 'THR', 'HIS', 'GLU', 'LEU', 'SER',
       'LYS', 'VAL', 'PHE', 'ILE', 'TYR', 'GLN', 'ALA', 'ASN', 'MET',
       'CYS']


#short_list = ['ARG','ASP']
df_ = []
for res1 in short_list:
    for res2 in short_list:
        data_one = data.query(f"ResName1 == '{res1}' and ResName2 == '{res2}'").reset_index(drop=True)
        lower_data = data_one[data_one.r_com_com <= 7.5].reset_index(drop=True)
        hist,bins = np.histogram(lower_data["r_com_com"], bins=10)
        bin_centers = (bins[1:] + bins[:-1])/2
        y = -np.log((hist+10)/np.sum(hist+10))
        df_temp = pd.DataFrame(np.array([bin_centers, y]).T, columns=["r_cbd_cbd","energy"])
        df_temp["ResName1"] = res1
        df_temp["ResName2"] = res2
        #print(df_temp)
        df_.append(df_temp)
        plt.plot(bin_centers, y)
        plt.title(f"energy_{res1}_{res2}")
        plt.savefig(f"/Users/weilu/Research/server/mar_2020/cmd_cmd_exclude_volume/figures/energy_{res1}_{res2}.png", dpi=300)
        plt.clf()
# print(df)
df = pd.concat(df_).reset_index(drop=True)
df.to_csv('/Users/weilu/Research/server/mar_2020/cmd_cmd_exclude_volume/cbd_cbd_energy.csv', index=False)


<Figure size 1618.03x1000 with 0 Axes>

In [147]:
df_temp = pd.DataFrame(np.array([bin_centers, y]).T, columns=["r_cbd_cbd","energy"])
df_temp["ResName1"] = res1
df_temp["ResName2"] = res2

In [194]:
[10, 20]*2


Out[194]:
[10, 20, 10, 20]

In [201]:
lower_data.hist("r_com_com")


Out[201]:
array([[<matplotlib.axes._subplots.AxesSubplot object at 0x1a539c44a8>]],
      dtype=object)

In [ ]:


In [270]:
data.query("3c18A03")


Out[270]:
Protein id1 ResName1 id2 ResName2 r_ca_ca r_com_com r_cb_cb r_ca_com r_com_ca
0 3c18A03 233 ARG 285 TYR 11.549665 12.578935 11.438617 14.603118 10.346378
1 3c18A03 233 ARG 238 THR 12.113347 8.895204 11.572654 12.652252 8.730217
2 3c18A03 233 ARG 234 ASP 3.733604 9.481483 5.515192 5.804656 7.033244
3 3c18A03 233 ARG 245 GLU 11.869354 6.874781 9.813950 9.490125 8.704953
4 3c18A03 233 ARG 241 GLU 11.584703 5.853921 9.063191 10.052837 7.410306
5 3c18A03 233 ARG 237 TRP 8.558047 3.946257 7.460506 6.392295 5.876545
6 3c18A03 233 ARG 286 GLU 9.464896 9.889798 9.457272 8.719884 9.756637
7 3c18A03 233 ARG 235 ARG 5.714665 4.654038 5.757178 5.374342 6.539949
8 3c18A03 233 ARG 236 PRO 7.766732 8.435995 8.679585 8.943483 6.935363
9 3c18A03 233 ARG 242 LEU 11.528663 8.488094 10.936115 11.071333 8.223381
10 3c18A03 233 ARG 287 PRO 10.366532 10.151411 10.315418 8.694661 11.712348
11 3c18A03 234 ASP 237 TRP 9.385686 11.093018 11.125618 8.626148 11.775607
12 3c18A03 234 ASP 286 GLU 8.758088 7.766050 9.401233 6.702904 10.310185
13 3c18A03 234 ASP 235 ARG 3.791946 7.336827 5.247543 5.337638 5.670565
14 3c18A03 234 ASP 233 ARG 3.733604 9.481483 5.515192 7.033244 5.804656
15 3c18A03 234 ASP 236 PRO 7.046739 9.160526 9.420709 7.487685 9.074649
16 3c18A03 234 ASP 287 PRO 9.832555 9.874736 10.802205 8.608006 10.894620
17 3c18A03 235 ARG 285 TYR 8.594310 13.998463 10.477855 12.079645 11.153152
18 3c18A03 235 ARG 238 THR 9.945628 10.016453 10.461168 10.592024 10.072400
19 3c18A03 235 ARG 234 ASP 3.791946 7.336827 5.247543 5.670565 5.337638
20 3c18A03 235 ARG 284 THR 9.100389 9.881903 8.159506 8.156570 10.860741
21 3c18A03 235 ARG 241 GLU 11.817742 7.652822 10.212736 9.307006 10.535587
22 3c18A03 235 ARG 273 GLN 11.839126 9.392544 9.983719 8.493341 12.841967
23 3c18A03 235 ARG 271 SER 11.302978 12.342756 10.611438 9.482736 14.069852
24 3c18A03 235 ARG 237 TRP 6.672196 7.595253 7.877475 7.536017 7.333605
25 3c18A03 235 ARG 286 GLU 6.891255 8.860679 7.342536 4.817196 10.257261
26 3c18A03 235 ARG 269 ARG 11.137118 13.909918 11.530207 9.863058 14.784901
27 3c18A03 235 ARG 233 ARG 5.714665 4.654038 5.757178 6.539949 5.374342
28 3c18A03 235 ARG 236 PRO 3.741907 6.833489 5.186390 3.872977 6.309835
29 3c18A03 235 ARG 287 PRO 9.341687 11.510424 11.077155 8.474854 12.728924
... ... ... ... ... ... ... ... ... ... ...
1688312 1lnwA01 139 LEU 135 LEU 6.597429 6.917246 6.785338 8.372935 5.798475
1688313 1lnwA01 139 LEU 133 HIS 10.045782 11.634865 11.047232 11.198787 9.819054
1688314 1lnwA01 139 LEU 131 LEU 12.706198 12.000909 13.050916 13.317375 11.757093
1688315 1lnwA01 139 LEU 140 ALA 3.810307 6.660487 5.268385 4.864487 5.851340
1688316 1lnwA01 140 ALA 136 ASP 6.060953 7.011885 5.911810 7.714963 5.601464
1688317 1lnwA01 140 ALA 142 GLN 5.607827 8.077087 7.291825 6.698670 7.019127
1688318 1lnwA01 140 ALA 132 VAL 12.052047 11.991816 11.921991 12.680241 11.444326
1688319 1lnwA01 140 ALA 138 CYS 5.767436 8.480668 7.540234 8.107310 6.225615
1688320 1lnwA01 140 ALA 134 LEU 9.930770 11.678266 10.815098 12.238484 9.387843
1688321 1lnwA01 140 ALA 141 ALA 3.807496 5.311946 5.311946 4.793664 4.584375
1688322 1lnwA01 140 ALA 139 LEU 3.810307 6.660487 5.268385 5.851340 4.864487
1688323 1lnwA01 140 ALA 137 GLN 5.148353 7.125798 5.796195 8.080124 4.569858
1688324 1lnwA01 140 ALA 135 LEU 8.810117 11.030222 9.682953 11.013837 8.721411
1688325 1lnwA01 140 ALA 133 HIS 10.299937 9.311728 10.226795 10.510094 9.373099
1688326 1lnwA01 141 ALA 136 ASP 8.666059 10.978633 9.589719 10.788760 8.728532
1688327 1lnwA01 141 ALA 142 GLN 3.798247 5.601065 5.724882 5.263065 4.692795
1688328 1lnwA01 141 ALA 138 CYS 5.680610 7.187967 6.582744 7.895816 5.175738
1688329 1lnwA01 141 ALA 134 LEU 10.940968 11.856834 11.216235 12.772847 10.240140
1688330 1lnwA01 141 ALA 139 LEU 5.441364 8.471638 7.376191 7.990101 5.966338
1688331 1lnwA01 141 ALA 137 GLN 6.196007 7.712250 6.286438 8.506445 5.763910
1688332 1lnwA01 141 ALA 135 LEU 10.411728 12.451764 11.448430 12.717684 10.095949
1688333 1lnwA01 141 ALA 133 HIS 12.332558 11.541509 12.663423 12.052639 11.969016
1688334 1lnwA01 141 ALA 140 ALA 3.807496 5.311946 5.311946 4.584375 4.793664
1688335 1lnwA01 142 GLN 136 ASP 10.313915 12.200622 11.139579 12.466821 9.875877
1688336 1lnwA01 142 GLN 138 CYS 6.817777 7.027064 7.299867 8.708416 5.669314
1688337 1lnwA01 142 GLN 141 ALA 3.798247 5.601065 5.724882 4.692795 5.263065
1688338 1lnwA01 142 GLN 139 LEU 5.408051 6.543003 5.833147 7.550437 4.847952
1688339 1lnwA01 142 GLN 137 GLN 8.820095 11.626661 10.127757 11.685051 8.654228
1688340 1lnwA01 142 GLN 135 LEU 11.512459 11.760776 11.826377 13.358436 10.201123
1688341 1lnwA01 142 GLN 140 ALA 5.607827 8.077087 7.291825 7.019127 6.698670

1688342 rows × 10 columns


In [254]:
a = gmm.sample(1000)[0]

In [259]:
min(X)[0]


Out[259]:
2.062640965053271

In [268]:
fig, ax1 = plt.subplots()

from sklearn.mixture import GaussianMixture
data_res = lower_data
X = data_res[["r_com_com"]].values
gmm = GaussianMixture(n_components=3).fit(X)

x_hat = np.linspace(min(X)[0], max(X)[0], 200).reshape(-1, 1)
y_hat = len(X)/6*np.exp(gmm.score_samples(x_hat))
e_hat = -gmm.score_samples(x_hat)
color = 'tab:blue'
ax1.tick_params(axis='y', labelcolor=color)
ax1.hist(X, bins=20)
ax2 = ax1.twinx()  # instantiate a second axes that shares the same x-axis

color = 'tab:red'
ax2.set_ylabel('sin', color=color)  # we already handled the x-label with ax1
ax2.plot(x_hat, e_hat, color=color)
ax2.tick_params(axis='y', labelcolor=color)

fig.tight_layout()  # otherwise the right y-label is slightly clipped
plt.show()



In [269]:
plt.plot(x_hat, y_hat)


Out[269]:
[<matplotlib.lines.Line2D at 0x1a80d046a0>]

In [251]:
# Create some mock data
t = np.arange(0.01, 10.0, 0.01)
data1 = np.exp(t)
data2 = np.sin(2 * np.pi * t)

fig, ax1 = plt.subplots()

color = 'tab:red'
ax1.set_xlabel('time (s)')
ax1.set_ylabel('exp', color=color)

ax1.tick_params(axis='y', labelcolor=color)
ax1.hist(X, bins=20)
ax2 = ax1.twinx()  # instantiate a second axes that shares the same x-axis

color = 'tab:blue'
ax2.set_ylabel('sin', color=color)  # we already handled the x-label with ax1
ax2.plot(x_hat, -e_hat, color=color)
ax2.tick_params(axis='y', labelcolor=color)

fig.tight_layout()  # otherwise the right y-label is slightly clipped
plt.show()



In [243]:
plt.hist(X, bins=20)
# x_hat = np.array(bin_centers).reshape(-1, 1)
x_hat = np.linspace(bins[0], bins[-1]).reshape(-1, 1)
y_hat = len(X)/6*np.exp(gmm.score_samples(x_hat))
# e_hat = gmm.score_samples(x_hat)
plt.plot(x_hat, y_hat)
# plt.plot(x_hat, e_hat)


Out[243]:
[<matplotlib.lines.Line2D at 0x1a626f8710>]

In [224]:
plt.hist(list(gmm.sample(1000)[0].flatten()))


Out[224]:
(array([ 15.,  49., 116., 138., 160., 137.,  58., 123., 162.,  42.]),
 array([3.40783336, 3.8572258 , 4.30661824, 4.75601069, 5.20540313,
        5.65479557, 6.10418802, 6.55358046, 7.0029729 , 7.45236535,
        7.90175779]),
 <a list of 10 Patch objects>)

In [226]:
gmm.score_samples(np.array(bin_centers).reshape(-1, 1))


Out[226]:
array([-2.3501565 , -1.5211332 , -1.22514241, -1.08947293, -0.90516632,
       -1.14392634, -2.01902036, -1.97690737, -1.01154463, -1.102287  ])

In [228]:
y_hat = -gmm.score_samples(np.array(bin_centers).reshape(-1, 1))
plt.plot(bin_centers, y_hat)


Out[228]:
[<matplotlib.lines.Line2D at 0x1a5ea4f240>]

In [200]:
from scipy.signal import savgol_filter
res1 = "CYS"
res2 = "MET"
data_one = data.query(f"ResName1 == '{res1}' and ResName2 == '{res2}'").reset_index(drop=True)
lower_data = data_one[data_one.r_com_com <= 7.5].reset_index(drop=True)
n_bins = 10 * 1
data_selected = lower_data["r_com_com"].to_list()
hist,bins = np.histogram(data_selected, bins=n_bins)
bin_centers = (bins[1:] + bins[:-1])/2
y = -np.log((hist+10)/np.sum(hist+10))
# yhat = savgol_filter(y, 21, 3) # window size 51, polynomial order 3
df_temp = pd.DataFrame(np.array([bin_centers, y]).T, columns=["r_cbd_cbd","energy"])
df_temp["ResName1"] = res1
df_temp["ResName2"] = res2
plt.plot(bin_centers, y)
# plt.plot(bin_centers, yhat)
plt.title(f"energy_{res1}_{res2}")


Out[200]:
Text(0.5, 1.0, 'energy_CYS_MET')

In [ ]:
scatter()

In [174]:
from scipy.signal import savgol_filter
x = np.linspace(0,2*np.pi,100)
y = np.sin(x) + np.random.random(100) * 0.2
yhat = savgol_filter(y, 5, 3) # window size 51, polynomial order 3

plt.plot(x,y)
plt.plot(x,yhat, color='red')
plt.show()



In [106]:
df = pd.DataFrame(columns=["ResName1","ResName2","Max","Lowest"])


short_list = ['ARG', 'ASP', 'PRO', 'TRP', 'THR', 'HIS', 'GLU', 'LEU', 'SER',
       'LYS', 'VAL', 'PHE', 'ILE', 'TYR', 'GLN', 'ALA', 'ASN', 'MET',
       'CYS']


#short_list = ['ARG','ASP']

for res1 in short_list:
    for res2 in short_list:
        data_one = data.query(f"ResName1 == '{res1}' and ResName2 == '{res2}'").reset_index(drop=True)
        lower_data = data_one[data_one.r_com_com <= 7.5].reset_index(drop=True)
        hist,bins = np.histogram(lower_data["r_com_com"], bins=10)
        max_index = list(hist).index(max(hist))
        max_value = (bins[max_index + 1] + bins[max_index]) / 2
        lowest_value = bins[0]
        min_value = lower_data["r_com_com"].min()
        max_hist = max(hist)
        #print(lowest_value)
        #print(max_value)
        df_temp = pd.DataFrame([(res1,res2,max_value,lowest_value, min_value, max_hist)], columns=["ResName1","ResName2","Max","Lowest", "Min", "max_hist"])
        #print(df_temp)
        df = df.append(df_temp, ignore_index=True) # append return a new dataframe, so must use df = df.append()
        #
# print(df)
df.to_csv('/Users/weilu/Research/server/mar_2020/cmd_cmd_exclude_volume/cbd_cbd_max_min_bins10.csv', index=False)


/Users/weilu/anaconda3/envs/py36/lib/python3.6/site-packages/pandas/core/frame.py:6692: FutureWarning:

Sorting because non-concatenation axis is not aligned. A future version
of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.

To retain the current behavior and silence the warning, pass 'sort=True'.



In [64]:
df = pd.DataFrame(columns=["ResName1","ResName2","Max","Lowest"])


short_list = ['ARG', 'ASP', 'PRO', 'TRP', 'THR', 'HIS', 'GLU', 'LEU', 'SER',
       'LYS', 'VAL', 'PHE', 'ILE', 'TYR', 'GLN', 'ALA', 'ASN', 'MET',
       'CYS']


#short_list = ['ARG','ASP']

for res1 in short_list:
    for res2 in short_list:
        data_one = data.query(f"ResName1 == '{res1}' and ResName2 == '{res2}'")
        lower_data = data_one[data_one.r_com_com <= 7.5].reset_index(drop=True)
        hist,bins = np.histogram(lower_data["r_com_com"], bins=30)
        max_index = list(hist).index(max(hist))
        max_value = (bins[max_index + 1] + bins[max_index]) / 2
        lowest_value = bins[0]
        min_value = lower_data["r_com_com"].min()
        #print(lowest_value)
        #print(max_value)
        df_temp = pd.DataFrame([(res1,res2,max_value,lowest_value, min_value)], columns=["ResName1","ResName2","Max","Lowest", "Min"])
        #print(df_temp)
        df = df.append(df_temp, ignore_index=True) # append return a new dataframe, so must use df = df.append()
        #
        #print(bins[max_index])
        plt.hist(lower_data["r_com_com"],bins=bins)
        plt.axvline(max_value, color='black')
        plt.axvline(lowest_value, color='red')
        #sns.distplot(lower_data["r_com_com"],bins=30)
        plt.savefig(f"/Users/weilu/Research/server/mar_2020/cmd_cmd_exclude_volume/figures/{res1}_{res2}.png", dpi=300)
        plt.clf()
print(df)
df.to_csv('/Users/weilu/Research/server/mar_2020/cmd_cmd_exclude_volume/cbd_cbd_max_min.csv', index=False)


/Users/weilu/anaconda3/envs/py36/lib/python3.6/site-packages/pandas/core/frame.py:6692: FutureWarning:

Sorting because non-concatenation axis is not aligned. A future version
of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.

To retain the current behavior and silence the warning, pass 'sort=True'.


       Lowest       Max       Min ResName1 ResName2
0    3.770143  7.311417  3.770143      ARG      ARG
1    3.348837  5.493157  3.348837      ARG      ASP
2    3.921745  7.197585  3.921745      ARG      PRO
3    3.540129  7.163957  3.540129      ARG      TRP
4    3.529463  7.431207  3.529463      ARG      THR
5    3.643981  7.425067  3.643981      ARG      HIS
6    3.564350  4.546942  3.564350      ARG      GLU
7    3.629136  7.434434  3.629136      ARG      LEU
8    2.941009  7.116918  2.941009      ARG      SER
9    3.310505  7.427975  3.310505      ARG      LYS
10   3.673704  7.308532  3.673704      ARG      VAL
11   3.405437  7.430602  3.405437      ARG      PHE
12   3.558249  7.434250  3.558249      ARG      ILE
13   3.493992  5.962074  3.493992      ARG      TYR
14   3.700493  7.308298  3.700493      ARG      GLN
15   3.165103  6.848204  3.165103      ARG      ALA
16   3.546138  7.298869  3.546138      ARG      ASN
17   3.748634  6.558111  3.748634      ARG      MET
18   3.699298  7.169115  3.699298      ARG      CYS
19   3.348837  5.493157  3.348837      ASP      ARG
20   3.122743  6.834642  3.122743      ASP      ASP
21   3.635543  5.628313  3.635543      ASP      PRO
22   3.544580  7.425667  3.544580      ASP      TRP
23   3.243500  6.288349  3.243500      ASP      THR
24   3.335746  6.451011  3.335746      ASP      HIS
25   3.083951  7.425333  3.083951      ASP      GLU
26   3.594743  6.913284  3.594743      ASP      LEU
27   3.013373  6.078628  3.013373      ASP      SER
28   2.898653  7.423066  2.898653      ASP      LYS
29   3.054983  7.275440  3.054983      ASP      VAL
..        ...       ...       ...      ...      ...
331  3.441655  7.156381  3.441655      MET      SER
332  3.848597  6.221802  3.848597      MET      LYS
333  3.810124  5.470169  3.810124      MET      VAL
334  3.926980  5.533516  3.926980      MET      PHE
335  3.864055  5.498161  3.864055      MET      ILE
336  3.408188  5.930209  3.408188      MET      TYR
337  3.832618  6.329078  3.832618      MET      GLN
338  3.408897  7.428593  3.408897      MET      ALA
339  3.695005  7.422068  3.695005      MET      ASN
340  3.918445  5.888201  3.918445      MET      MET
341  3.854717  7.421054  3.854717      MET      CYS
342  3.699298  7.169115  3.699298      CYS      ARG
343  3.671520  7.048404  3.671520      CYS      ASP
344  2.910863  4.509563  2.910863      CYS      PRO
345  3.540219  5.980270  3.540219      CYS      TRP
346  3.187759  6.996172  3.187759      CYS      THR
347  3.413017  5.779388  3.413017      CYS      HIS
348  3.406721  7.158691  3.406721      CYS      GLU
349  3.874834  5.020659  3.874834      CYS      LEU
350  3.176515  6.543989  3.176515      CYS      SER
351  3.869748  7.439368  3.869748      CYS      LYS
352  3.479707  5.420955  3.479707      CYS      VAL
353  3.524323  5.445771  3.524323      CYS      PHE
354  3.561465  5.990087  3.561465      CYS      ILE
355  3.679104  5.141784  3.679104      CYS      TYR
356  3.575497  7.421625  3.575497      CYS      GLN
357  3.015272  6.673431  3.015272      CYS      ALA
358  3.531541  6.768976  3.531541      CYS      ASN
359  3.854717  7.421054  3.854717      CYS      MET
360  2.062641  2.330473  2.062641      CYS      CYS

[361 rows x 5 columns]
<Figure size 1618.03x1000 with 0 Axes>

In [66]:
df["diff"] = df["Lowest"] - df["Min"]

In [74]:
lower_data["r_com_com"].min()


Out[74]:
2.062640965053271

In [93]:
df_bin30 = pd.read_csv("/Users/weilu/Research/server/mar_2020/cmd_cmd_exclude_volume/cbd_cbd_max_min.csv")

In [96]:
plt.plot(df_bin30["Lowest"]-df["Lowest"])


Out[96]:
[<matplotlib.lines.Line2D at 0x1a347bb080>]

In [107]:
plt.plot(df_bin30["Max"]-df["Max"])


Out[107]:
[<matplotlib.lines.Line2D at 0x1a31457ba8>]

In [97]:
plt.plot(df_bin30["Max"]-df["Max"])


Out[97]:
[<matplotlib.lines.Line2D at 0x1a312c4ba8>]

In [77]:
weight_list = ["ALA", "SER", "PRO", "VAL", "THR", "CYS", "ILE", "LEU", "ASN", "ASP", "GLN", "LYS", "GLU", "MET", "HIS", "PHE", "ARG", "TYR", "TRP"]
res_to_index = {}
for i, res in enumerate(weight_list):
    res_to_index[res] = i

min_r_com_com_matrix = np.zeros((19, 19))
for i, line in df.iterrows():
    res1 = line["ResName1"]
    res2 = line["ResName2"]
    min_r_com_com_matrix[res_to_index[res1]][res_to_index[res2]] = line["Min"]
    
# plt.imshow(min_r_com_com_matrix, origin=0, cmap="seismic", vmin=-2, vmax=2)
plt.imshow(min_r_com_com_matrix, origin=0, cmap="seismic")
plt.colorbar()
plt.xticks(ticks=np.arange(19), labels=weight_list)


Out[77]:
([<matplotlib.axis.XTick at 0x1a345bcc18>,
  <matplotlib.axis.XTick at 0x1a3271df98>,
  <matplotlib.axis.XTick at 0x1a3271def0>,
  <matplotlib.axis.XTick at 0x1a3439d390>,
  <matplotlib.axis.XTick at 0x1a3439d160>,
  <matplotlib.axis.XTick at 0x1a346e26d8>,
  <matplotlib.axis.XTick at 0x1a346e2438>,
  <matplotlib.axis.XTick at 0x1a34714da0>,
  <matplotlib.axis.XTick at 0x1a346e0b38>,
  <matplotlib.axis.XTick at 0x1a346e0278>,
  <matplotlib.axis.XTick at 0x1a3439d4a8>,
  <matplotlib.axis.XTick at 0x1a346f8908>,
  <matplotlib.axis.XTick at 0x1a41fbe4e0>,
  <matplotlib.axis.XTick at 0x1a41fbea58>,
  <matplotlib.axis.XTick at 0x1a41fbb080>,
  <matplotlib.axis.XTick at 0x1a41fbb588>,
  <matplotlib.axis.XTick at 0x1a41fbbb00>,
  <matplotlib.axis.XTick at 0x1a270ae0f0>,
  <matplotlib.axis.XTick at 0x1a41fbb390>],
 <a list of 19 Text xticklabel objects>)

In [78]:
weight_list = ["ALA", "SER", "PRO", "VAL", "THR", "CYS", "ILE", "LEU", "ASN", "ASP", "GLN", "LYS", "GLU", "MET", "HIS", "PHE", "ARG", "TYR", "TRP"]
res_to_index = {}
for i, res in enumerate(weight_list):
    res_to_index[res] = i

min_r_com_com_matrix = np.zeros((19, 19))
for i, line in df.iterrows():
    res1 = line["ResName1"]
    res2 = line["ResName2"]
    min_r_com_com_matrix[res_to_index[res1]][res_to_index[res2]] = line["Max"]
    
# plt.imshow(min_r_com_com_matrix, origin=0, cmap="seismic", vmin=-2, vmax=2)
plt.imshow(min_r_com_com_matrix, origin=0, cmap="seismic")
plt.colorbar()
plt.xticks(ticks=np.arange(19), labels=weight_list)


Out[78]:
([<matplotlib.axis.XTick at 0x1a3483d240>,
  <matplotlib.axis.XTick at 0x1a3480bb38>,
  <matplotlib.axis.XTick at 0x1a3480b860>,
  <matplotlib.axis.XTick at 0x1a315f9240>,
  <matplotlib.axis.XTick at 0x1a315f9710>,
  <matplotlib.axis.XTick at 0x1a315f9be0>,
  <matplotlib.axis.XTick at 0x1a315e3160>,
  <matplotlib.axis.XTick at 0x1a315e36a0>,
  <matplotlib.axis.XTick at 0x1a315e3c18>,
  <matplotlib.axis.XTick at 0x1a315de1d0>,
  <matplotlib.axis.XTick at 0x1a315de748>,
  <matplotlib.axis.XTick at 0x1a315decc0>,
  <matplotlib.axis.XTick at 0x1a315de278>,
  <matplotlib.axis.XTick at 0x1a315e3668>,
  <matplotlib.axis.XTick at 0x1a315f9588>,
  <matplotlib.axis.XTick at 0x1a335b6710>,
  <matplotlib.axis.XTick at 0x1a335b6c88>,
  <matplotlib.axis.XTick at 0x1a335a2240>,
  <matplotlib.axis.XTick at 0x1a335a27b8>],
 <a list of 19 Text xticklabel objects>)

In [32]:
data.query("ResName1 == 'TRP' and ResName2 == 'TRP' and Protein == '4ft3A02'").sort_values("r_com_com")


Out[32]:
Protein id1 ResName1 id2 ResName2 r_ca_ca r_com_com r_cb_cb r_ca_com r_com_ca
198099 4ft3A02 221 TRP 231 TRP 8.761823 4.716319 7.419003 5.885756 7.896278
198286 4ft3A02 231 TRP 221 TRP 8.761823 4.716319 7.419003 7.896278 5.885756
197378 4ft3A02 192 TRP 208 TRP 13.079970 6.778209 11.958128 9.951345 10.262243
197848 4ft3A02 208 TRP 192 TRP 13.079970 6.778209 11.958128 10.262243 9.951345
197842 4ft3A02 208 TRP 221 TRP 9.581996 6.883076 7.461747 10.071727 7.285372
198100 4ft3A02 221 TRP 208 TRP 9.581996 6.883076 7.461747 7.285372 10.071727
197852 4ft3A02 208 TRP 231 TRP 11.865668 7.512713 11.706540 9.129191 10.943330
198296 4ft3A02 231 TRP 208 TRP 11.865668 7.512713 11.706540 10.943330 9.129191
197363 4ft3A02 192 TRP 221 TRP 14.083471 8.712547 12.435682 11.124576 11.012262
198095 4ft3A02 221 TRP 192 TRP 14.083471 8.712547 12.435682 11.012262 11.124576

In [133]:
bins


Out[133]:
array([3.54012895, 3.73779227, 3.93545559, 4.13311891, 4.33078223,
       4.52844556, 4.72610888, 4.9237722 , 5.12143552, 5.31909884,
       5.51676217, 5.71442549, 5.91208881, 6.10975213, 6.30741545,
       6.50507878, 6.7027421 , 6.90040542, 7.09806874, 7.29573206,
       7.49339539])

In [132]:
bin_centers


Out[132]:
array([ 7.27792121,  7.67324786,  8.0685745 ,  8.46390114,  8.85922779,
        9.25455443,  9.64988108, 10.04520772, 10.44053436, 10.83586101,
       11.23118765, 11.6265143 , 12.02184094, 12.41716758, 12.81249423,
       13.20782087, 13.60314752, 13.99847416, 14.3938008 , 14.78912745])

In [114]:
hist,bins


Out[114]:
(array([164,   4,  12,  42,  42,  28,  22,  56,  80,  58]),
 array([2.06264097, 2.5983059 , 3.13397083, 3.66963576, 4.20530069,
        4.74096562, 5.27663055, 5.81229548, 6.34796041, 6.88362534,
        7.41929028]))

In [134]:
selected_data = data.query("ResName1 == 'ARG' and ResName2 == 'TRP' and r_com_com < 7.5")
hist,bins = np.histogram(selected_data["r_com_com"], bins=10)
bin_centers = (bins[1:] + bins[:-1])/2
y = -np.log((hist+10)/np.sum(hist+10))
plt.plot(bin_centers, y)


Out[134]:
[<matplotlib.lines.Line2D at 0x1a4139ecf8>]

In [120]:
np.sum(((hist+10)/np.sum(hist+10)))


Out[120]:
0.9999999999999999

In [112]:
data.query("ResName1 == 'ARG' and ResName2 == 'TRP' and r_com_com < 7.5").hist("r_com_com", bins=30)


Out[112]:
array([[<matplotlib.axes._subplots.AxesSubplot object at 0x1a30dafe80>]],
      dtype=object)

In [111]:
data.query("ResName1 == 'ARG' and ResName2 == 'TRP' and r_com_com < 7.5").hist("r_com_com", bins=10)


Out[111]:
array([[<matplotlib.axes._subplots.AxesSubplot object at 0x1a31476908>]],
      dtype=object)

In [113]:
data.query("ResName1 == 'ARG' and ResName2 == 'TRP' and r_com_com < 7.5").hist("r_com_com", bins=20)


Out[113]:
array([[<matplotlib.axes._subplots.AxesSubplot object at 0x1a37505b70>]],
      dtype=object)

In [105]:
data.query("ResName1 == 'TRP' and ResName2 == 'TRP' and r_com_com < 7.5").hist("r_com_com", bins=10)


Out[105]:
array([[<matplotlib.axes._subplots.AxesSubplot object at 0x1a33509240>]],
      dtype=object)

In [13]:
data.query("ResName1 == 'TRP' and ResName2 == 'TRP'").hist("r_com_com", bins=50)


Out[13]:
array([[<matplotlib.axes._subplots.AxesSubplot object at 0x1a1b62a630>]],
      dtype=object)

In [20]:
data.query("ResName1 == 'ALA' and ResName2 == 'ALA'").hist("r_com_com", bins=50)


Out[20]:
array([[<matplotlib.axes._subplots.AxesSubplot object at 0x1a2c6b2048>]],
      dtype=object)

In [ ]:


In [30]:
data.query("ResName1 == 'ASN' and ResName2 == 'ARG'").hist("r_com_com", bins=50)


Out[30]:
array([[<matplotlib.axes._subplots.AxesSubplot object at 0x1a2a5ed7b8>]],
      dtype=object)

In [32]:
data.query("ResName1 == 'ASN' and ResName2 == 'ARG'").sort_values("r_com_com").head(2)


Out[32]:
Protein id1 ResName1 id2 ResName2 r_ca_ca r_com_com r_cb_cb r_ca_com r_com_ca
1314561 3bhqA00 71 ASN 135 ARG 7.013448 3.546138 4.845044 4.698705 4.677932
221056 1y0uA00 28 ASN 31 ARG 5.293830 3.603272 4.065062 4.884607 4.692377

In [33]:
data.query("ResName1 == 'ARG' and ResName2 == 'ASN'").sort_values("r_com_com").head(2)


Out[33]:
Protein id1 ResName1 id2 ResName2 r_ca_ca r_com_com r_cb_cb r_ca_com r_com_ca
1315840 3bhqA00 135 ARG 71 ASN 7.013448 3.546138 4.845044 4.677932 4.698705
221106 1y0uA00 31 ARG 28 ASN 5.293830 3.603272 4.065062 4.692377 4.884607

In [29]:
data.query("ResName1 == 'ARG' and ResName2 == 'ASN'").hist("r_com_com", bins=50)


Out[29]:
array([[<matplotlib.axes._subplots.AxesSubplot object at 0x1a26f7de48>]],
      dtype=object)

In [39]:
import plotly.express as px
res = "PHE"
data_res = data.query(f"ResName1 == '{res}' and ResName2 == '{res}' and r_com_com < 7.5").reset_index(drop=True)
print(res, data_res.shape)
fig = px.scatter_3d(data_res, x='r_ca_ca', y='r_com_com', z='r_ca_com', opacity=0.1)
fig.show()


PHE (2182, 10)

In [40]:
import plotly.express as px
res = "PHE"
data_res = data.query(f"ResName1 == '{res}' and ResName2 == '{res}'").reset_index(drop=True)
print(res, data_res.shape)
fig = px.scatter_3d(data_res, x='r_ca_ca', y='r_com_com', z='r_ca_com', opacity=0.1)
fig.show()


PHE (5160, 10)

In [62]:
data_res = data.query("ResName1 == 'ARG' and ResName2 == 'MET' and r_com_com < 7.5")
data_res_filtered = data_res.query("abs(id1-id2)<12")
data_res_filtered.hist("r_com_com", bins=20)


Out[62]:
array([[<matplotlib.axes._subplots.AxesSubplot object at 0x1a2c982048>]],
      dtype=object)

In [61]:
data_res = data.query("ResName1 == 'ARG' and ResName2 == 'MET' and r_com_com < 7.5")
data_res_filtered = data_res.query("abs(id1-id2)>12")
data_res_filtered.hist("r_com_com", bins=20)


Out[61]:
array([[<matplotlib.axes._subplots.AxesSubplot object at 0x1a3789b2e8>]],
      dtype=object)

In [53]:
res = "ARG"
data_res = data.query(f"ResName1 == '{res}' and ResName2 == '{res}'").reset_index(drop=True)

In [51]:
data_res.dtypes


Out[51]:
Protein       object
id1            int64
ResName1      object
id2            int64
ResName2      object
r_ca_ca      float64
r_com_com    float64
r_cb_cb      float64
r_ca_com     float64
r_com_ca     float64
dtype: object

In [55]:
data_res.query("r_com_com > 7 and r_com_com < 8")


Out[55]:
Protein id1 ResName1 id2 ResName2 r_ca_ca r_com_com r_cb_cb r_ca_com r_com_ca
61 1rr7A02 82 ARG 115 ARG 11.401725 7.965920 10.116141 8.786590 10.752554
71 1rr7A02 115 ARG 82 ARG 11.401725 7.965920 10.116141 10.752554 8.786590
91 1i36A02 218 ARG 219 ARG 3.823408 7.726450 5.429818 6.303528 5.852432
95 1i36A02 219 ARG 218 ARG 3.823408 7.726450 5.429818 5.852432 6.303528
120 2g7lA00 43 ARG 44 ARG 3.859255 7.186208 5.491639 7.871461 4.423999
121 2g7lA00 44 ARG 43 ARG 3.859255 7.186208 5.491639 4.423999 7.871461
143 4uasA02 22 ARG 81 ARG 9.012647 7.772874 8.299223 7.919721 8.976596
155 4uasA02 54 ARG 57 ARG 5.103925 7.707703 5.757870 6.161800 8.659234
156 4uasA02 57 ARG 60 ARG 5.325678 7.061013 5.920855 3.750400 8.826361
157 4uasA02 57 ARG 54 ARG 5.103925 7.707703 5.757870 8.659234 6.161800
158 4uasA02 60 ARG 57 ARG 5.325678 7.061013 5.920855 8.826361 3.750400
163 4uasA02 81 ARG 22 ARG 9.012647 7.772874 8.299223 8.976596 7.919721
196 2du9A00 77 ARG 80 ARG 5.293693 7.501127 6.131831 6.372154 7.536913
198 2du9A00 79 ARG 80 ARG 3.804334 7.587120 5.311568 7.165077 6.892877
199 2du9A00 80 ARG 79 ARG 3.804334 7.587120 5.311568 6.892877 7.165077
200 2du9A00 80 ARG 77 ARG 5.293693 7.501127 6.131831 7.536913 6.372154
264 2z99A01 80 ARG 91 ARG 9.628271 7.761508 9.519560 9.309143 6.892079
266 2z99A01 91 ARG 93 ARG 5.920463 7.340798 6.096433 7.392959 7.900196
268 2z99A01 91 ARG 80 ARG 9.628271 7.761508 9.519560 6.892079 9.309143
269 2z99A01 93 ARG 91 ARG 5.920463 7.340798 6.096433 7.900196 7.392959
281 3bjbA00 103 ARG 106 ARG 5.568596 7.182470 6.319476 5.564152 8.729867
284 3bjbA00 106 ARG 103 ARG 5.568596 7.182470 6.319476 8.729867 5.564152
292 3bjbA00 141 ARG 162 ARG 6.977150 7.792590 6.915888 9.114334 7.761200
294 3bjbA00 162 ARG 141 ARG 6.977150 7.792590 6.915888 7.761200 9.114334
325 2xzmO02 123 ARG 126 ARG 4.864711 7.390258 5.222585 5.128117 8.910962
327 2xzmO02 126 ARG 123 ARG 4.864711 7.390258 5.222585 8.910962 5.128117
331 1t4hB02 433 ARG 434 ARG 3.801716 7.735683 5.348572 6.860982 7.057160
332 1t4hB02 434 ARG 433 ARG 3.801716 7.735683 5.348572 7.057160 6.860982
333 1t4hB02 461 ARG 467 ARG 5.779865 7.646923 4.961262 4.933169 5.370296
334 1t4hB02 467 ARG 461 ARG 5.779865 7.646923 4.961262 5.370296 4.933169
... ... ... ... ... ... ... ... ... ... ...
6067 3nqoA00 180 ARG 183 ARG 5.205732 7.532491 7.484649 5.733380 8.633727
6072 3nqoA00 183 ARG 180 ARG 5.205732 7.532491 7.484649 8.633727 5.733380
6174 1am7A00 50 ARG 78 ARG 13.713146 7.648844 11.953283 11.673792 10.396275
6177 1am7A00 78 ARG 50 ARG 13.713146 7.648844 11.953283 10.396275 11.673792
6206 2i10A02 129 ARG 160 ARG 10.938452 7.666889 11.368801 9.796782 9.429411
6213 2i10A02 160 ARG 129 ARG 10.938452 7.666889 11.368801 9.429411 9.796782
6224 4nk2A01 121 ARG 168 ARG 5.929658 7.713017 4.786485 8.092199 5.008049
6232 4nk2A01 134 ARG 180 ARG 9.231066 7.957708 8.153050 9.704117 6.045285
6233 4nk2A01 134 ARG 181 ARG 10.823272 7.789191 11.207920 12.235657 6.799349
6236 4nk2A01 168 ARG 121 ARG 5.929658 7.713017 4.786485 5.008049 8.092199
6242 4nk2A01 180 ARG 134 ARG 9.231066 7.957708 8.153050 6.045285 9.704117
6245 4nk2A01 181 ARG 134 ARG 10.823272 7.789191 11.207920 6.799349 12.235657
6273 2vxzA01 12 ARG 40 ARG 9.364072 7.755714 7.212648 10.141613 8.229472
6280 2vxzA01 40 ARG 12 ARG 9.364072 7.755714 7.212648 8.229472 10.141613
6286 3keoA01 20 ARG 24 ARG 6.344525 7.766562 6.501618 7.393022 8.786520
6287 3keoA01 24 ARG 20 ARG 6.344525 7.766562 6.501618 8.786520 7.393022
6308 2xv4S04 364 ARG 367 ARG 5.190940 7.216355 5.590591 4.731094 9.554598
6309 2xv4S04 367 ARG 364 ARG 5.190940 7.216355 5.590591 9.554598 4.731094
6328 1dw0A00 74 ARG 80 ARG 9.569914 7.260593 9.543694 6.539560 9.991794
6329 1dw0A00 80 ARG 74 ARG 9.569914 7.260593 9.543694 9.991794 6.539560
6375 2pgsA01 13 ARG 174 ARG 5.831460 7.758321 5.045290 8.110736 5.387355
6377 2pgsA01 35 ARG 174 ARG 8.768698 7.606865 7.542578 11.540242 5.585772
6393 2pgsA01 174 ARG 13 ARG 5.831460 7.758321 5.045290 5.387355 8.110736
6394 2pgsA01 174 ARG 35 ARG 8.768698 7.606865 7.542578 5.585772 11.540242
6445 3p5mA02 238 ARG 241 ARG 5.327576 7.438775 6.182621 4.434978 8.753421
6447 3p5mA02 241 ARG 238 ARG 5.327576 7.438775 6.182621 8.753421 4.434978
6453 4em2A00 77 ARG 94 ARG 10.680479 7.092186 9.841823 8.147002 9.983551
6455 4em2A00 93 ARG 94 ARG 3.830490 7.355301 5.240642 5.748303 6.802641
6456 4em2A00 94 ARG 93 ARG 3.830490 7.355301 5.240642 6.802641 5.748303
6458 4em2A00 94 ARG 77 ARG 10.680479 7.092186 9.841823 9.983551 8.147002

487 rows × 10 columns


In [46]:
import plotly.express as px
res = "TRP"
data_res = data.query(f"ResName1 == '{res}' and ResName2 == '{res}' and r_com_com < 7.5").reset_index(drop=True)
data_res["r_com_com_plus_r_com_ca"] = data_res["r_com_com"] + data_res["r_com_ca"]
data_res["r_ca_com_plus_r_com_com"] = data_res["r_ca_com"] + data_res["r_com_com"]
print(res, data_res.shape)
# fig = px.scatter_3d(data_res, x='r_com_com_plus_r_com_ca', y='r_ca_com_plus_r_com_com', z='r_ca_com', opacity=0.1)
# fig.show()
sns.scatterplot("r_com_com_plus_r_com_ca", "r_ca_com_plus_r_com_com", size="r_com_com", data=data_res, )


TRP (220, 12)
Out[46]:
<matplotlib.axes._subplots.AxesSubplot at 0x1a37cd7be0>

In [41]:
import plotly.express as px
res = "TRP"
data_res = data.query(f"ResName1 == '{res}' and ResName2 == '{res}'").reset_index(drop=True)
print(res, data_res.shape)
fig = px.scatter_3d(data_res, x='r_ca_ca', y='r_com_com', z='r_ca_com', opacity=0.1)
fig.show()


TRP (703, 10)

In [34]:
data_res = data.query("ResName1 == 'PHE' and ResName2 == 'PHE'").reset_index(drop=True)
sns.scatterplot("r_ca_ca", "r_com_com", data=data_res, )


Out[34]:
<matplotlib.axes._subplots.AxesSubplot at 0x1a2ac45f28>

In [22]:
data_res = data.query("ResName1 == 'ALA' and ResName2 == 'ALA'").reset_index(drop=True)
sns.scatterplot("r_ca_ca", "r_com_com", data=data_res, )


Out[22]:
<matplotlib.axes._subplots.AxesSubplot at 0x1a2d574fd0>

In [23]:
data_res = data.query("ResName1 == 'TRP' and ResName2 == 'TRP'").reset_index(drop=True)
sns.scatterplot("r_ca_ca", "r_com_com", data=data_res, )


Out[23]:
<matplotlib.axes._subplots.AxesSubplot at 0x1a2c529630>

In [24]:
data_res = data.query("ResName1 == 'GLU' and ResName2 == 'GLU'").reset_index(drop=True)
sns.scatterplot("r_ca_ca", "r_com_com", data=data_res, )


Out[24]:
<matplotlib.axes._subplots.AxesSubplot at 0x1a25890470>

In [10]:
selected.sort_values("r_com_com")


Out[10]:
Protein id1 ResName1 id2 ResName2 r_ca_ca r_com_com r_cb_cb r_ca_com r_com_ca
80 5ghkA01 90 CYS 101 CYS 5.401658 2.062641 3.647413 3.221539 3.713083
129 1vx4K02 124 GLU 128 THR 4.376815 2.544600 2.075428 2.581292 4.145525
291 1vx4K02 128 THR 124 GLU 4.376815 2.544600 2.075428 4.145525 2.581292
200 1vx4K02 94 LYS 96 LYS 3.893709 2.620520 3.417238 3.268701 4.924048
67 2efwG00 120 ASP 116 LYS 5.544206 2.898653 5.155882 5.334033 4.437231
193 2efwG00 116 LYS 120 ASP 5.544206 2.898653 5.155882 4.437231 5.334033
251 3dv9A02 102 PRO 101 CYS 3.810050 2.910863 4.765211 4.034386 3.855889
89 3dv9A02 101 CYS 102 PRO 3.810050 2.910863 4.765211 3.855889 4.034386
33 1guxB00 775 ARG 773 SER 5.505802 2.941009 4.772800 4.355688 4.886553
267 1guxB00 773 SER 775 ARG 5.505802 2.941009 4.772800 4.886553 4.355688
71 4d1eA04 880 ASP 870 SER 5.716259 3.013373 3.769198 4.403587 4.597980
269 4d1eA04 870 SER 880 ASP 5.716259 3.013373 3.769198 4.597980 4.403587
4 1c07A00 77 ALA 61 CYS 4.441674 3.015272 2.951008 3.794416 4.444079
76 1c07A00 61 CYS 77 ALA 4.441674 3.015272 2.951008 4.444079 3.794416
109 4dckA01 1807 GLN 1888 SER 6.404585 3.024015 3.879412 5.296675 4.778621
271 4dckA01 1888 SER 1807 GLN 6.404585 3.024015 3.879412 4.778621 5.296675
179 1vx4K02 121 LEU 129 ILE 6.739280 3.041119 4.308884 4.989625 4.875346
161 1vx4K02 129 ILE 121 LEU 6.739280 3.041119 4.308884 4.875346 4.989625
345 1vx4K02 143 VAL 107 ASP 5.016043 3.054983 3.879137 4.034955 3.732843
75 1vx4K02 107 ASP 143 VAL 5.016043 3.054983 3.879137 3.732843 4.034955
342 1d2dA00 13 VAL 29 ALA 5.783528 3.069021 3.032873 4.304394 4.415103
18 1d2dA00 29 ALA 13 VAL 5.783528 3.069021 3.032873 4.415103 4.304394
41 2bidA00 117 ASN 124 ASP 6.284988 3.071778 3.390138 5.108782 4.714438
59 2bidA00 124 ASP 117 ASN 6.284988 3.071778 3.390138 4.714438 5.108782
117 3f0cA01 56 GLU 54 ASP 5.302188 3.083951 4.860867 4.813696 4.790518
63 3f0cA01 54 ASP 56 GLU 5.302188 3.083951 4.860867 4.790518 4.813696
279 3otvA02 233 SER 221 PRO 5.053548 3.109163 3.531911 3.911895 3.726473
261 3otvA02 221 PRO 233 SER 5.053548 3.109163 3.531911 3.726473 3.911895
6 3geuA00 164 ALA 167 GLU 5.388519 3.120584 3.915556 4.358684 4.813024
114 3geuA00 167 GLU 164 ALA 5.388519 3.120584 3.915556 4.813024 4.358684
... ... ... ... ... ... ... ... ... ... ...
86 4a2qA02 158 CYS 107 LYS 7.993738 3.869748 6.252692 5.851406 5.666014
298 3lmoA00 22 THR 19 PRO 5.651631 3.870250 4.636888 5.439568 4.594197
262 3lmoA00 19 PRO 22 THR 5.651631 3.870250 4.636888 4.594197 5.439568
175 2jrfA01 10 LEU 100 CYS 5.972121 3.874834 4.365780 5.861402 4.655688
85 2jrfA01 100 CYS 10 LEU 5.972121 3.874834 4.365780 4.655688 5.861402
168 2qkwB02 277 ILE 138 TRP 6.647983 3.878796 5.175632 5.124591 6.156495
312 2qkwB02 138 TRP 277 ILE 6.647983 3.878796 5.175632 6.156495 5.124591
178 2w96B02 271 LEU 292 HIS 7.057615 3.884124 4.721678 4.258191 5.690298
142 2w96B02 292 HIS 271 LEU 7.057615 3.884124 4.721678 5.690298 4.258191
120 1bekA01 267 GLU 271 GLU 5.929452 3.899164 5.696073 4.987894 6.604933
220 2do7A01 840 MET 881 MET 7.771861 3.918445 5.505347 5.043615 6.344501
248 1v97A02 155 PRO 154 ARG 3.852176 3.921745 5.147350 3.638854 4.007482
32 1v97A02 154 ARG 155 PRO 3.852176 3.921745 5.147350 4.007482 3.638854
122 1b22A00 43 GLU 63 ILE 6.792290 3.926044 3.878689 4.754094 6.118848
158 1b22A00 63 ILE 43 GLU 6.792290 3.926044 3.878689 6.118848 4.754094
239 1v97A02 160 PHE 124 MET 8.006330 3.926980 5.640724 5.386110 5.663910
221 1v97A02 124 MET 160 PHE 8.006330 3.926980 5.640724 5.663910 5.386110
216 4n73A00 477 MET 381 HIS 6.098707 3.946083 3.842706 4.199876 5.913135
144 4n73A00 381 HIS 477 MET 6.098707 3.946083 3.842706 5.913135 4.199876
74 3m7gA03 153 ASP 238 TYR 6.537810 3.951164 5.504674 5.285026 4.268128
326 3m7gA03 238 TYR 153 ASP 6.537810 3.951164 5.504674 4.268128 5.285026
208 4du6E01 35 LYS 70 VAL 6.845805 3.977987 5.282276 5.290564 5.398007
352 4du6E01 70 VAL 35 LYS 6.845805 3.977987 5.282276 5.398007 5.290564
181 1xo5B00 59 LEU 70 LYS 6.810952 4.087539 5.389232 5.492944 5.501393
199 1xo5B00 70 LYS 59 LEU 6.810952 4.087539 5.389232 5.501393 5.492944
160 3otvA02 128 ILE 232 ILE 6.876956 4.133772 4.867549 5.527323 5.211950
260 1vx4K02 88 PRO 89 PRO 2.979281 4.159009 3.183561 3.466689 4.368296
165 1w98B01 283 ILE 280 PRO 5.198867 4.178803 4.228227 5.256006 4.933873
255 1w98B01 280 PRO 283 ILE 5.198867 4.178803 4.228227 4.933873 5.256006
320 4ft3A02 221 TRP 231 TRP 8.761823 4.716319 7.419003 5.885756 7.896278

361 rows × 10 columns


In [ ]: