notebook.community

Edit and run



In [1]:

    
import os
import sys
import random
import time
from random import seed, randint
import argparse
import platform
from datetime import datetime
import imp
import numpy as np
import fileinput
from itertools import product
import pandas as pd
from scipy.interpolate import griddata
from scipy.interpolate import interp2d
import seaborn as sns
from os import listdir

import matplotlib.pyplot as plt
import seaborn as sns
from scipy.interpolate import griddata
import matplotlib as mpl
# sys.path.insert(0,'..')
# from notebookFunctions import *
# from .. import notebookFunctions

from Bio.PDB.PDBParser import PDBParser
from pyCodeLib import *
%matplotlib inline
# plt.rcParams['figure.figsize'] = (10,6.180)    #golden ratio
# %matplotlib notebook
%load_ext autoreload
%autoreload 2



In [2]:

    
plt.rcParams['figure.figsize'] = [16.18033, 10]    #golden ratio
plt.rcParams['figure.facecolor'] = 'w'
plt.rcParams['figure.dpi'] = 100



In [3]:

    
a = "/Users/weilu/Research/server/may_2019/multi_iter0/database/dompdb/1EG4A_89-207.pdb"



In [3]:

    
import glob



In [4]:

    
a_list = glob.glob("/Users/weilu/Research/server/may_2019/multi_iter0/database/dompdb/*.pdb")



In [16]:

    
len(a_list)









    Out[16]:





1825



In [71]:

    
parser = PDBParser()
all_freq = np.zeros(100)
for i, a in enumerate(a_list):
    structure = parser.get_structure("x", a)
    res_list = get_res_list(structure)
    neighbor_list = get_neighbor_list(structure)
    # sequence = get_sequence_from_structure(structure)
    cb_density = calculate_cb_density(res_list, neighbor_list)
    cb_density = np.clip(cb_density, 0, 10)
    freq, bins = np.histogram(cb_density, bins=100, range=(0,10))
    all_freq += freq
#     if i == 100:
#         break



In [72]:

    
cb_all_freq = all_freq
cb_bins = bins



In [80]:

    
plt.plot(cb_bins[1:], cb_all_freq)
plt.xlim(0,5)









    Out[80]:





(0, 5)



In [73]:

    
parser = PDBParser()
largest = 20
all_freq = np.zeros(largest*10)
for i, a in enumerate(a_list):
    structure = parser.get_structure("x", a)
    res_list = get_res_list(structure)
    neighbor_list = get_neighbor_list(structure)
    # sequence = get_sequence_from_structure(structure)
    cb_density = calculate_cb_weight_density(res_list, neighbor_list)
    cb_density = np.clip(cb_density, 0, largest)
    freq, bins = np.histogram(cb_density, bins=largest*10, range=(0,largest))
    all_freq += freq
#     if i == 10:
#         break



In [74]:

    
weight_all_freq = all_freq
weight_bins = bins



In [81]:

    
plt.plot(weight_bins[2:], weight_all_freq[1:])









    Out[81]:





[<matplotlib.lines.Line2D at 0x1a19658048>]



In [83]:

    
plt.plot(weight_bins[2:], weight_all_freq[1:])
plt.xlim(2,4)









    Out[83]:





(2, 4)



In [10]:

    
parser = PDBParser()
largest = 600
all_freq = np.zeros(200)
for i, a in enumerate(a_list):
    structure = parser.get_structure("x", a)
    res_list = get_res_list(structure)
    neighbor_list = get_neighbor_list(structure)
    # sequence = get_sequence_from_structure(structure)
    cb_density = calculate_cb_weight_density_2(res_list, neighbor_list)
    cb_density = np.clip(cb_density, 0, largest)
    freq, bins = np.histogram(cb_density, bins=200, range=(0,largest))
    all_freq += freq
    if i == 100:
        break



In [11]:

    
weight_2_all_freq = all_freq
weight_2_bins = bins



In [12]:

    
plt.plot(weight_2_bins[2:], weight_2_all_freq[1:])









    Out[12]:





[<matplotlib.lines.Line2D at 0x1a20a14c88>]



In [5]:

    
def calculate_cb_weight_density_2(res_list, neighbor_list, min_seq_sep=2):
    weight_info = pd.read_csv("~/opt/parameters/amino_acid_side_chain_weight", comment="#", sep="\s+")
    weight_info["normalized_weight"] = weight_info["weight"] /(weight_info["weight"].min())
    weight_info["sideChainWeight"] = weight_info["weight"] - 56
    weight_info["normalized_weight_2"] =  (weight_info["sideChainWeight"]  /(weight_info["sideChainWeight"].min()))

    num_residues = len(res_list)
    density = np.zeros(num_residues)
    for res1globalindex, res1 in enumerate(res_list):
        res1index = get_local_index(res1)
        res1chain = get_chain(res1)
        for res2 in get_neighbors_within_radius(neighbor_list, res1, 9.0):
            res2index = get_local_index(res2)
            res2chain = get_chain(res2)
            res2globalindex = get_global_index(res_list, res2)
            if abs(res2index - res1index) >= min_seq_sep or (res1chain != res2chain):
                rij = get_interaction_distance(res1, res2)
                res2type = three_to_one(res2.get_resname())
                try:
                    # weight = float(weight_info.query(f"oneLetterCode == '{res2type}'")["normalized_weight"])
                    weight = float(weight_info.query(f"oneLetterCode == '{res2type}'")["normalized_weight_2"])
                except:
                    print(res2)
                    print(res2type)
                    print(weight_info.query(f"oneLetterCode == '{res2type}'")["normalized_weight"])
                density[res1globalindex] += weight*interaction_well(rij, 4.5, 6.5, 5)
    return density



In [100]:

    
parser = PDBParser()
all_freq = np.zeros(100)
largest = 20
all_ = []
for i, a in enumerate(a_list):
    structure = parser.get_structure("x", a)
    res_list = get_res_list(structure)
    neighbor_list = get_neighbor_list(structure)
    # sequence = get_sequence_from_structure(structure)
    cb_density = calculate_cb_density(res_list, neighbor_list)
    cb_density = np.clip(cb_density, 0, 10)
    cb_density_weight = calculate_cb_weight_density(res_list, neighbor_list)
    cb_density_weight_2 = calculate_cb_weight_density_2(res_list, neighbor_list)
    cb_density_weight_2 = np.clip(cb_density_weight, 0, 500)
    tmp =pd.DataFrame([cb_density, cb_density_weight, cb_density_weight_2]).T
    all_.append(tmp)
    freq, bins = np.histogram(cb_density, bins=100, range=(0,10))
    all_freq += freq
#     break
    if i == 10:
        break
data = pd.concat(all_)



In [102]:

    
data.columns = ["density", "weight_density", "weight_density_2"]



In [103]:

    
data.plot.scatter("density", "weight_density")









    Out[103]:





<matplotlib.axes._subplots.AxesSubplot at 0x1a1ac19390>



In [104]:

    
data.plot.scatter("density", "weight_density_2")









    Out[104]:





<matplotlib.axes._subplots.AxesSubplot at 0x1a1c1b0710>



In [106]:

    
plt.hist(data["weight_density_2"],bins=100)









    Out[106]:





(array([66.,  8., 10.,  4.,  6.,  8., 10.,  9., 18.,  8., 26., 22., 10.,
        32., 12., 13., 14., 17., 20., 25., 20., 24., 22., 18., 23., 26.,
        19., 19., 33., 20., 25., 17., 21., 22., 17., 21., 19., 13., 21.,
        22., 12., 18., 21., 19., 25., 15., 14., 12., 17., 15., 15., 10.,
         7.,  7., 15., 14.,  9., 10., 11.,  8.,  8.,  8.,  9.,  9.,  9.,
         4.,  8.,  1.,  4.,  3.,  3.,  4.,  2.,  4.,  3.,  2.,  1.,  0.,
         3.,  0.,  1.,  2.,  1.,  0.,  0.,  3.,  1.,  2.,  0.,  0.,  0.,
         0.,  0.,  0.,  1.,  0.,  0.,  0.,  0.,  1.]),
 array([3.74212048e-09, 5.10384723e+00, 1.02076945e+01, 1.53115417e+01,
        2.04153889e+01, 2.55192361e+01, 3.06230834e+01, 3.57269306e+01,
        4.08307778e+01, 4.59346250e+01, 5.10384723e+01, 5.61423195e+01,
        6.12461667e+01, 6.63500139e+01, 7.14538612e+01, 7.65577084e+01,
        8.16615556e+01, 8.67654028e+01, 9.18692501e+01, 9.69730973e+01,
        1.02076945e+02, 1.07180792e+02, 1.12284639e+02, 1.17388486e+02,
        1.22492333e+02, 1.27596181e+02, 1.32700028e+02, 1.37803875e+02,
        1.42907722e+02, 1.48011570e+02, 1.53115417e+02, 1.58219264e+02,
        1.63323111e+02, 1.68426958e+02, 1.73530806e+02, 1.78634653e+02,
        1.83738500e+02, 1.88842347e+02, 1.93946195e+02, 1.99050042e+02,
        2.04153889e+02, 2.09257736e+02, 2.14361583e+02, 2.19465431e+02,
        2.24569278e+02, 2.29673125e+02, 2.34776972e+02, 2.39880820e+02,
        2.44984667e+02, 2.50088514e+02, 2.55192361e+02, 2.60296209e+02,
        2.65400056e+02, 2.70503903e+02, 2.75607750e+02, 2.80711597e+02,
        2.85815445e+02, 2.90919292e+02, 2.96023139e+02, 3.01126986e+02,
        3.06230834e+02, 3.11334681e+02, 3.16438528e+02, 3.21542375e+02,
        3.26646222e+02, 3.31750070e+02, 3.36853917e+02, 3.41957764e+02,
        3.47061611e+02, 3.52165459e+02, 3.57269306e+02, 3.62373153e+02,
        3.67477000e+02, 3.72580847e+02, 3.77684695e+02, 3.82788542e+02,
        3.87892389e+02, 3.92996236e+02, 3.98100084e+02, 4.03203931e+02,
        4.08307778e+02, 4.13411625e+02, 4.18515473e+02, 4.23619320e+02,
        4.28723167e+02, 4.33827014e+02, 4.38930861e+02, 4.44034709e+02,
        4.49138556e+02, 4.54242403e+02, 4.59346250e+02, 4.64450098e+02,
        4.69553945e+02, 4.74657792e+02, 4.79761639e+02, 4.84865486e+02,
        4.89969334e+02, 4.95073181e+02, 5.00177028e+02, 5.05280875e+02,
        5.10384723e+02]),
 <a list of 100 Patch objects>)



In [ ]:



In [ ]:



In [98]:

    
data.plot.scatter("density", "weight_density")









    Out[98]:





<matplotlib.axes._subplots.AxesSubplot at 0x1a1a9687b8>



In [22]:

    
k_bin = 0.1
dz = np.linspace(-50,50)
y = 0.5*(1+np.tanh(k_bin*(dz-15)))



In [23]:

    
plt.plot(dz, y)









    Out[23]:





[<matplotlib.lines.Line2D at 0x10d213550>]



In [24]:

    
memb_b = 15
y = 0.5*((np.tanh(k_bin*(dz+memb_b)))+(np.tanh(k_bin*(memb_b-dz))))
plt.plot(dz, y)









    Out[24]:





[<matplotlib.lines.Line2D at 0x1a211e2630>]



In [25]:

    
y = 0.5*(1+np.tanh(k_bin*(-memb_b-dz)))
plt.plot(dz, y)









    Out[25]:





[<matplotlib.lines.Line2D at 0x1a20f67278>]



In [88]:

    
tmp =pd.DataFrame([cb_density, cb_density_weight]).T



In [89]:

    
pd.concat([tmp,tmp])









    Out[89]:







  
    
      
      0
      1
    
  
  
    
      0
      2.993022
      4.727698
    
    
      1
      1.806508
      3.814755
    
    
      2
      2.309954
      4.959676
    
    
      3
      3.308095
      6.645009
    
    
      4
      2.786939
      5.509010
    
    
      5
      0.556119
      1.127559
    
    
      6
      4.274126
      8.589912
    
    
      7
      3.618283
      7.625045
    
    
      8
      2.179394
      5.045787
    
    
      9
      2.978000
      5.743504
    
    
      10
      6.036262
      10.049988
    
    
      11
      3.014058
      4.255542
    
    
      12
      2.063547
      3.078392
    
    
      13
      5.444406
      10.074953
    
    
      14
      3.895277
      9.152198
    
    
      15
      2.996748
      6.976362
    
    
      16
      3.273656
      7.040075
    
    
      17
      4.272850
      7.956552
    
    
      18
      1.021981
      2.306800
    
    
      19
      2.000452
      2.502336
    
    
      20
      5.718506
      11.978154
    
    
      21
      2.038231
      5.680214
    
    
      22
      0.000002
      0.000004
    
    
      23
      2.704234
      5.974667
    
    
      24
      3.892721
      8.555460
    
    
      25
      1.043569
      1.140616
    
    
      26
      3.874254
      7.605833
    
    
      27
      3.974217
      8.459827
    
    
      28
      2.002529
      4.268617
    
    
      29
      2.146544
      3.843615
    
    
      ...
      ...
      ...
    
    
      97
      2.132946
      4.298472
    
    
      98
      1.146695
      1.354785
    
    
      99
      5.620678
      12.370013
    
    
      100
      4.708714
      10.103140
    
    
      101
      2.754802
      5.546452
    
    
      102
      4.245172
      8.164826
    
    
      103
      4.116744
      8.051728
    
    
      104
      2.088871
      4.727190
    
    
      105
      1.958469
      4.572726
    
    
      106
      2.231003
      4.851934
    
    
      107
      1.899259
      4.469852
    
    
      108
      2.135395
      3.593609
    
    
      109
      1.749161
      3.515641
    
    
      110
      2.933344
      6.163593
    
    
      111
      3.960450
      8.535304
    
    
      112
      3.197874
      6.142908
    
    
      113
      2.968733
      5.632736
    
    
      114
      2.978796
      5.922940
    
    
      115
      3.027892
      5.310409
    
    
      116
      4.293823
      8.862601
    
    
      117
      4.051008
      7.655233
    
    
      118
      2.017220
      3.268619
    
    
      119
      2.800006
      6.121992
    
    
      120
      4.012869
      8.511885
    
    
      121
      3.117258
      5.572198
    
    
      122
      2.163315
      4.207571
    
    
      123
      2.047200
      4.296116
    
    
      124
      2.529712
      6.138667
    
    
      125
      3.155336
      6.764125
    
    
      126
      0.182430
      0.470642
    
  

254 rows × 2 columns



In [ ]:

    
values = numpy.arange(10, dtype=int)
bins = numpy.arange(-1, 11)
freq, bins = numpy.histogram(values, bins)



In [38]:

    
freq, bins = np.histogram([-1,1,11], bins=100, range=(0,10))



In [33]:

    
freq.shape









    Out[33]:





(100,)



In [41]:

    
all_freq









    Out[41]:





array([669.,  78.,  56.,  49.,  52.,  50.,  48.,  68., 101., 503., 523.,
       200., 125., 147., 140., 168., 168., 187., 263., 697., 567., 248.,
       217., 222., 213., 226., 216., 272., 345., 656., 459., 264., 274.,
       244., 240., 287., 263., 279., 366., 486., 341., 280., 210., 212.,
       196., 207., 209., 235., 266., 298., 240., 200., 170., 155., 119.,
       173., 151., 170., 150., 175., 130.,  92.,  84.,  76.,  82.,  59.,
        73.,  68.,  67.,  71.,  43.,  31.,  29.,  25.,  31.,  27.,  25.,
        22.,  24.,  14.,  11.,  11.,   7.,  12.,   2.,   3.,   7.,   6.,
         5.,   4.,   0.,   0.,   1.,   1.,   1.,   0.,   1.,   0.,   1.,
         2.])



In [37]:

    
bins









    Out[37]:





array([ 0. ,  0.1,  0.2,  0.3,  0.4,  0.5,  0.6,  0.7,  0.8,  0.9,  1. ,
        1.1,  1.2,  1.3,  1.4,  1.5,  1.6,  1.7,  1.8,  1.9,  2. ,  2.1,
        2.2,  2.3,  2.4,  2.5,  2.6,  2.7,  2.8,  2.9,  3. ,  3.1,  3.2,
        3.3,  3.4,  3.5,  3.6,  3.7,  3.8,  3.9,  4. ,  4.1,  4.2,  4.3,
        4.4,  4.5,  4.6,  4.7,  4.8,  4.9,  5. ,  5.1,  5.2,  5.3,  5.4,
        5.5,  5.6,  5.7,  5.8,  5.9,  6. ,  6.1,  6.2,  6.3,  6.4,  6.5,
        6.6,  6.7,  6.8,  6.9,  7. ,  7.1,  7.2,  7.3,  7.4,  7.5,  7.6,
        7.7,  7.8,  7.9,  8. ,  8.1,  8.2,  8.3,  8.4,  8.5,  8.6,  8.7,
        8.8,  8.9,  9. ,  9.1,  9.2,  9.3,  9.4,  9.5,  9.6,  9.7,  9.8,
        9.9, 10. ])



In [45]:

    
plt.plot(bins[1:], all_freq)









    Out[45]:





[<matplotlib.lines.Line2D at 0x1a17a35d68>]



In [46]:

    
plt.plot(bins[1:], all_freq)
plt.xlim(2,3)









    Out[46]:





(2, 3)



In [36]:

    
all_freq









    Out[36]:





array([669.,  78.,  56.,  49.,  52.,  50.,  48.,  68., 101., 503., 523.,
       200., 125., 147., 140., 168., 168., 187., 263., 697., 567., 248.,
       217., 222., 213., 226., 216., 272., 345., 656., 459., 264., 274.,
       244., 240., 287., 263., 279., 366., 486., 341., 280., 210., 212.,
       196., 207., 209., 235., 266., 298., 240., 200., 170., 155., 119.,
       173., 151., 170., 150., 175., 130.,  92.,  84.,  76.,  82.,  59.,
        73.,  68.,  67.,  71.,  43.,  31.,  29.,  25.,  31.,  27.,  25.,
        22.,  24.,  14.,  11.,  11.,   7.,  12.,   2.,   3.,   7.,   6.,
         5.,   4.,   0.,   0.,   1.,   1.,   1.,   0.,   1.,   0.,   1.,
         1.])



In [35]:

    
plt.plot(bins[1:], all_freq)









    Out[35]:





[<matplotlib.lines.Line2D at 0x1a18723b00>]



In [29]:

    
plt.plot(bins[1:], all_freq)









    Out[29]:





[<matplotlib.lines.Line2D at 0x1a177c69e8>]



In [19]:

    
len(cb_density)









    Out[19]:





119



In [12]:

    
_ = plt.hist(cb_density, bins=100)



In [ ]:

	0	1
0	2.993022	4.727698
1	1.806508	3.814755
2	2.309954	4.959676
3	3.308095	6.645009
4	2.786939	5.509010
5	0.556119	1.127559
6	4.274126	8.589912
7	3.618283	7.625045
8	2.179394	5.045787
9	2.978000	5.743504
10	6.036262	10.049988
11	3.014058	4.255542
12	2.063547	3.078392
13	5.444406	10.074953
14	3.895277	9.152198
15	2.996748	6.976362
16	3.273656	7.040075
17	4.272850	7.956552
18	1.021981	2.306800
19	2.000452	2.502336
20	5.718506	11.978154
21	2.038231	5.680214
22	0.000002	0.000004
23	2.704234	5.974667
24	3.892721	8.555460
25	1.043569	1.140616
26	3.874254	7.605833
27	3.974217	8.459827
28	2.002529	4.268617
29	2.146544	3.843615
...	...	...
97	2.132946	4.298472
98	1.146695	1.354785
99	5.620678	12.370013
100	4.708714	10.103140
101	2.754802	5.546452
102	4.245172	8.164826
103	4.116744	8.051728
104	2.088871	4.727190
105	1.958469	4.572726
106	2.231003	4.851934
107	1.899259	4.469852
108	2.135395	3.593609
109	1.749161	3.515641
110	2.933344	6.163593
111	3.960450	8.535304
112	3.197874	6.142908
113	2.968733	5.632736
114	2.978796	5.922940
115	3.027892	5.310409
116	4.293823	8.862601
117	4.051008	7.655233
118	2.017220	3.268619
119	2.800006	6.121992
120	4.012869	8.511885
121	3.117258	5.572198
122	2.163315	4.207571
123	2.047200	4.296116
124	2.529712	6.138667
125	3.155336	6.764125
126	0.182430	0.470642