In [89]:
from ain import AtomicInteractionNetwork
from itertools import combinations
from scipy.spatial import Delaunay
from scipy.spatial.distance import euclidean
from Bio.Data import IUPACData

import networkx as nx
import numpy as np
import pandas as pd
%load_ext autoreload

%autoreload 2


The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload

In [25]:
# Load the model's AIN
ain = AtomicInteractionNetwork('hiv-protease.pdb')

# Grab out only the c_alpha carbons
c_alpha = ain.dataframe[ain.dataframe['atom'] == 'CA'].reset_index(drop=True)
c_alpha.head()


Out[25]:
Record name atom chain_id resi_name resi_num serial_number x y z
0 ATOM CA A PRO 1 2 27.62 21.97 34.55
1 ATOM CA A GLN 2 9 23.85 22.42 34.31
2 ATOM CA A ILE 3 18 22.80 25.57 32.42
3 ATOM CA A THR 4 26 19.32 27.08 32.75
4 ATOM CA A LEU 5 33 17.86 29.18 29.91

In [40]:
# Perform delaunay triangulation on the c_alpha
points = c_alpha[['x', 'y', 'z']]
tri = Delaunay(points)
tri.simplices
len(tri.simplices)


Out[40]:
1108

In [96]:
# Let's get the pairwise distances between each of these atoms, and store them as a graph.

# Initialize a dense graph where distances are all initialized to an empty list.
G = nx.Graph()
aa_letters = 'CHIMSVAGLPTRFYWDNEQK'
assert len(aa_letters) == 20

for l1, l2 in combinations(aa_letters, 2):
    aa1 = IUPACData.protein_letters_1to3[l1].upper()
    aa2 = IUPACData.protein_letters_1to3[l2].upper()
    G.add_edge(aa1, aa2, distances=[])
for aa in G.nodes():
    G.add_edge(aa, aa, distances=[])
    
len(G.edges())


Out[96]:
210

In [97]:
# Note: simplices are essentially an array of indexes. Since we have a 3-D structure,
# simplices are therefore tetrahedrons, with 4 vertices. Hence, they form a graph!
for idxs in tri.simplices:
    for i1, i2 in combinations(idxs, 2):
        resi1 = c_alpha.ix[i1]['resi_name']
        resi2 = c_alpha.ix[i2]['resi_name']

        d = euclidean(c_alpha.ix[i1][['x', 'y', 'z']], c_alpha.ix[i2][['x', 'y', 'z']])

        if not G.has_edge(resi1, resi2):
            G.add_edge(resi1, resi2, distances=[d])
        else:
            G.edge[resi1][resi2]['distances'].append(d)

In [98]:
len(G.edges())


Out[98]:
210

In [99]:
meanG = nx.Graph()
stdG = nx.Graph()

for n1, n2, d in G.edges(data=True):
    if bool(d['distances']) == False:
        meanG.add_edge(n1, n2, mean=0)
        stdG.add_edge(n1, n2, std=0)
    else:
        meanG.add_edge(n1, n2, mean=np.mean(d['distances']))
        stdG.add_edge(n1, n2, std=np.std(d['distances']))

In [100]:
len(meanG.edges(data=True))
df = pd.DataFrame(nx.to_numpy_matrix(meanG, weight='mean'))
df.columns = meanG.nodes()
df.index = df.columns
df


Out[100]:
HIS GLN LYS ASP PHE VAL MET GLY ALA SER GLU CYS ASN THR TYR TRP LEU ILE PRO ARG
HIS 0.000000 0.000000 3.811338 0.000000 9.261974 0.000000 0.000000 6.079234 6.631765 0 5.345816 5.599712 0.000000 0.000000 0.000000 0.000000 0.000000 5.424772 11.069530 0.000000
GLN 0.000000 12.765991 11.606799 8.781458 11.635088 8.666492 15.309219 7.715497 10.605674 0 13.483515 0.000000 7.806816 7.539741 3.801178 8.091691 6.275253 7.563058 10.561918 5.210226
LYS 3.811338 11.606799 12.399570 11.447616 6.959064 5.928550 8.399095 9.563136 5.262083 0 8.845259 8.444490 11.092276 11.750786 7.006069 9.098067 8.992764 6.968282 8.093376 6.517231
ASP 0.000000 8.781458 11.447616 4.383204 0.000000 9.023685 13.101622 6.979768 4.738406 0 0.000000 0.000000 6.834222 5.656951 3.801371 13.591974 5.954400 7.613657 13.738326 6.673331
PHE 9.261974 11.635088 6.959064 0.000000 0.000000 12.403790 6.410256 4.756710 0.000000 0 19.945152 7.003189 3.820359 6.499954 0.000000 0.000000 9.324912 6.724265 9.505342 0.000000
VAL 0.000000 8.666492 5.928550 9.023685 12.403790 6.457422 6.534558 7.239469 6.468121 0 6.865097 6.365086 5.566076 5.252848 6.392949 0.000000 5.996907 7.985957 6.356880 6.542992
MET 0.000000 15.309219 8.399095 13.101622 6.410256 6.534558 0.000000 8.609107 0.000000 0 11.384647 0.000000 3.799947 0.000000 0.000000 0.000000 7.115740 9.559697 12.968967 10.159321
GLY 6.079234 7.715497 9.563136 6.979768 4.756710 7.239469 8.609107 6.663330 7.244108 0 7.193537 3.835992 8.110219 5.717841 7.476415 10.451395 6.789369 6.093597 8.639834 5.818967
ALA 6.631765 10.605674 5.262083 4.738406 0.000000 6.468121 0.000000 7.244108 6.381862 0 4.529139 5.863928 4.562466 6.194166 0.000000 0.000000 5.699519 6.581771 10.594236 6.238392
SER 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000
GLU 5.345816 13.483515 8.845259 0.000000 19.945152 6.865097 11.384647 7.193537 4.529139 0 3.824331 6.403360 6.510107 5.460588 0.000000 0.000000 5.158965 5.771524 8.740025 10.054005
CYS 5.599712 0.000000 8.444490 0.000000 7.003189 6.365086 0.000000 3.835992 5.863928 0 6.403360 11.382605 5.356943 6.232267 0.000000 0.000000 7.518198 6.768733 9.346044 0.000000
ASN 0.000000 7.806816 11.092276 6.834222 3.820359 5.566076 3.799947 8.110219 4.562466 0 6.510107 5.356943 7.153978 6.582483 0.000000 9.069261 4.482502 5.724880 8.891612 7.499645
THR 0.000000 7.539741 11.750786 5.656951 6.499954 5.252848 0.000000 5.717841 6.194166 0 5.460588 6.232267 6.582483 6.647234 5.595462 8.054223 6.248171 6.458719 5.932046 6.514236
TYR 0.000000 3.801178 7.006069 3.801371 0.000000 6.392949 0.000000 7.476415 0.000000 0 0.000000 0.000000 0.000000 5.595462 0.000000 4.818950 7.404663 6.817765 8.654916 6.581441
TRP 0.000000 8.091691 9.098067 13.591974 0.000000 0.000000 0.000000 10.451395 0.000000 0 0.000000 0.000000 9.069261 8.054223 4.818950 0.000000 6.737333 8.978569 9.627502 6.426062
LEU 0.000000 6.275253 8.992764 5.954400 9.324912 5.996907 7.115740 6.789369 5.699519 0 5.158965 7.518198 4.482502 6.248171 7.404663 6.737333 6.229065 7.811934 5.427495 6.125040
ILE 5.424772 7.563058 6.968282 7.613657 6.724265 7.985957 9.559697 6.093597 6.581771 0 5.771524 6.768733 5.724880 6.458719 6.817765 8.978569 7.811934 6.786494 7.236777 15.847435
PRO 11.069530 10.561918 8.093376 13.738326 9.505342 6.356880 12.968967 8.639834 10.594236 0 8.740025 9.346044 8.891612 5.932046 8.654916 9.627502 5.427495 7.236777 9.663940 7.172030
ARG 0.000000 5.210226 6.517231 6.673331 0.000000 6.542992 10.159321 5.818967 6.238392 0 10.054005 0.000000 7.499645 6.514236 6.581441 6.426062 6.125040 15.847435 7.172030 8.013873

In [83]:
len(stdG.edges(data=True))


Out[83]:
210

In [ ]:


In [ ]: