notebook.community

Edit and run



In [89]:

    
from ain import AtomicInteractionNetwork
from itertools import combinations
from scipy.spatial import Delaunay
from scipy.spatial.distance import euclidean
from Bio.Data import IUPACData

import networkx as nx
import numpy as np
import pandas as pd
%load_ext autoreload

%autoreload 2









    



The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload



In [25]:

    
# Load the model's AIN
ain = AtomicInteractionNetwork('hiv-protease.pdb')

# Grab out only the c_alpha carbons
c_alpha = ain.dataframe[ain.dataframe['atom'] == 'CA'].reset_index(drop=True)
c_alpha.head()









    Out[25]:






  
    
      
      Record name
      atom
      chain_id
      resi_name
      resi_num
      serial_number
      x
      y
      z
    
  
  
    
      0
      ATOM
      CA
      A
      PRO
      1
      2
      27.62
      21.97
      34.55
    
    
      1
      ATOM
      CA
      A
      GLN
      2
      9
      23.85
      22.42
      34.31
    
    
      2
      ATOM
      CA
      A
      ILE
      3
      18
      22.80
      25.57
      32.42
    
    
      3
      ATOM
      CA
      A
      THR
      4
      26
      19.32
      27.08
      32.75
    
    
      4
      ATOM
      CA
      A
      LEU
      5
      33
      17.86
      29.18
      29.91



In [40]:

    
# Perform delaunay triangulation on the c_alpha
points = c_alpha[['x', 'y', 'z']]
tri = Delaunay(points)
tri.simplices
len(tri.simplices)









    Out[40]:





1108



In [96]:

    
# Let's get the pairwise distances between each of these atoms, and store them as a graph.

# Initialize a dense graph where distances are all initialized to an empty list.
G = nx.Graph()
aa_letters = 'CHIMSVAGLPTRFYWDNEQK'
assert len(aa_letters) == 20

for l1, l2 in combinations(aa_letters, 2):
    aa1 = IUPACData.protein_letters_1to3[l1].upper()
    aa2 = IUPACData.protein_letters_1to3[l2].upper()
    G.add_edge(aa1, aa2, distances=[])
for aa in G.nodes():
    G.add_edge(aa, aa, distances=[])
    
len(G.edges())









    Out[96]:





210



In [97]:

    
# Note: simplices are essentially an array of indexes. Since we have a 3-D structure,
# simplices are therefore tetrahedrons, with 4 vertices. Hence, they form a graph!
for idxs in tri.simplices:
    for i1, i2 in combinations(idxs, 2):
        resi1 = c_alpha.ix[i1]['resi_name']
        resi2 = c_alpha.ix[i2]['resi_name']

        d = euclidean(c_alpha.ix[i1][['x', 'y', 'z']], c_alpha.ix[i2][['x', 'y', 'z']])

        if not G.has_edge(resi1, resi2):
            G.add_edge(resi1, resi2, distances=[d])
        else:
            G.edge[resi1][resi2]['distances'].append(d)



In [98]:

    
len(G.edges())









    Out[98]:





210



In [99]:

    
meanG = nx.Graph()
stdG = nx.Graph()

for n1, n2, d in G.edges(data=True):
    if bool(d['distances']) == False:
        meanG.add_edge(n1, n2, mean=0)
        stdG.add_edge(n1, n2, std=0)
    else:
        meanG.add_edge(n1, n2, mean=np.mean(d['distances']))
        stdG.add_edge(n1, n2, std=np.std(d['distances']))



In [100]:

    
len(meanG.edges(data=True))
df = pd.DataFrame(nx.to_numpy_matrix(meanG, weight='mean'))
df.columns = meanG.nodes()
df.index = df.columns
df



In [83]:

    
len(stdG.edges(data=True))









    Out[83]:





210



In [ ]:



In [ ]:

	HIS	GLN	LYS	ASP	PHE	VAL	MET	GLY	ALA	GLU	CYS	ASN	THR	TYR	TRP	LEU	ILE	PRO	ARG
HIS	0.000000	0.000000	3.811338	0.000000	9.261974	0.000000	0.000000	6.079234	6.631765	5.345816	5.599712	0.000000	0.000000	0.000000	0.000000	0.000000	5.424772	11.069530	0.000000
GLN	0.000000	12.765991	11.606799	8.781458	11.635088	8.666492	15.309219	7.715497	10.605674	13.483515	0.000000	7.806816	7.539741	3.801178	8.091691	6.275253	7.563058	10.561918	5.210226
LYS	3.811338	11.606799	12.399570	11.447616	6.959064	5.928550	8.399095	9.563136	5.262083	8.845259	8.444490	11.092276	11.750786	7.006069	9.098067	8.992764	6.968282	8.093376	6.517231
ASP	0.000000	8.781458	11.447616	4.383204	0.000000	9.023685	13.101622	6.979768	4.738406	0.000000	0.000000	6.834222	5.656951	3.801371	13.591974	5.954400	7.613657	13.738326	6.673331
PHE	9.261974	11.635088	6.959064	0.000000	0.000000	12.403790	6.410256	4.756710	0.000000	19.945152	7.003189	3.820359	6.499954	0.000000	0.000000	9.324912	6.724265	9.505342	0.000000
VAL	0.000000	8.666492	5.928550	9.023685	12.403790	6.457422	6.534558	7.239469	6.468121	6.865097	6.365086	5.566076	5.252848	6.392949	0.000000	5.996907	7.985957	6.356880	6.542992
MET	0.000000	15.309219	8.399095	13.101622	6.410256	6.534558	0.000000	8.609107	0.000000	11.384647	0.000000	3.799947	0.000000	0.000000	0.000000	7.115740	9.559697	12.968967	10.159321
GLY	6.079234	7.715497	9.563136	6.979768	4.756710	7.239469	8.609107	6.663330	7.244108	7.193537	3.835992	8.110219	5.717841	7.476415	10.451395	6.789369	6.093597	8.639834	5.818967
ALA	6.631765	10.605674	5.262083	4.738406	0.000000	6.468121	0.000000	7.244108	6.381862	4.529139	5.863928	4.562466	6.194166	0.000000	0.000000	5.699519	6.581771	10.594236	6.238392
SER	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000
GLU	5.345816	13.483515	8.845259	0.000000	19.945152	6.865097	11.384647	7.193537	4.529139	3.824331	6.403360	6.510107	5.460588	0.000000	0.000000	5.158965	5.771524	8.740025	10.054005
CYS	5.599712	0.000000	8.444490	0.000000	7.003189	6.365086	0.000000	3.835992	5.863928	6.403360	11.382605	5.356943	6.232267	0.000000	0.000000	7.518198	6.768733	9.346044	0.000000
ASN	0.000000	7.806816	11.092276	6.834222	3.820359	5.566076	3.799947	8.110219	4.562466	6.510107	5.356943	7.153978	6.582483	0.000000	9.069261	4.482502	5.724880	8.891612	7.499645
THR	0.000000	7.539741	11.750786	5.656951	6.499954	5.252848	0.000000	5.717841	6.194166	5.460588	6.232267	6.582483	6.647234	5.595462	8.054223	6.248171	6.458719	5.932046	6.514236
TYR	0.000000	3.801178	7.006069	3.801371	0.000000	6.392949	0.000000	7.476415	0.000000	0.000000	0.000000	0.000000	5.595462	0.000000	4.818950	7.404663	6.817765	8.654916	6.581441
TRP	0.000000	8.091691	9.098067	13.591974	0.000000	0.000000	0.000000	10.451395	0.000000	0.000000	0.000000	9.069261	8.054223	4.818950	0.000000	6.737333	8.978569	9.627502	6.426062
LEU	0.000000	6.275253	8.992764	5.954400	9.324912	5.996907	7.115740	6.789369	5.699519	5.158965	7.518198	4.482502	6.248171	7.404663	6.737333	6.229065	7.811934	5.427495	6.125040
ILE	5.424772	7.563058	6.968282	7.613657	6.724265	7.985957	9.559697	6.093597	6.581771	5.771524	6.768733	5.724880	6.458719	6.817765	8.978569	7.811934	6.786494	7.236777	15.847435
PRO	11.069530	10.561918	8.093376	13.738326	9.505342	6.356880	12.968967	8.639834	10.594236	8.740025	9.346044	8.891612	5.932046	8.654916	9.627502	5.427495	7.236777	9.663940	7.172030
ARG	0.000000	5.210226	6.517231	6.673331	0.000000	6.542992	10.159321	5.818967	6.238392	10.054005	0.000000	7.499645	6.514236	6.581441	6.426062	6.125040	15.847435	7.172030	8.013873

	Record name	atom	chain_id	resi_name	resi_num	serial_number	x	y	z
0	ATOM	CA	A	PRO	1	2	27.62	21.97	34.55
1	ATOM	CA	A	GLN	2	9	23.85	22.42	34.31
2	ATOM	CA	A	ILE	3	18	22.80	25.57	32.42
3	ATOM	CA	A	THR	4	26	19.32	27.08	32.75
4	ATOM	CA	A	LEU	5	33	17.86	29.18	29.91