In [89]:
from ain import AtomicInteractionNetwork
from itertools import combinations
from scipy.spatial import Delaunay
from scipy.spatial.distance import euclidean
from Bio.Data import IUPACData
import networkx as nx
import numpy as np
import pandas as pd
%load_ext autoreload
%autoreload 2
In [25]:
# Load the model's AIN
ain = AtomicInteractionNetwork('hiv-protease.pdb')
# Grab out only the c_alpha carbons
c_alpha = ain.dataframe[ain.dataframe['atom'] == 'CA'].reset_index(drop=True)
c_alpha.head()
Out[25]:
In [40]:
# Perform delaunay triangulation on the c_alpha
points = c_alpha[['x', 'y', 'z']]
tri = Delaunay(points)
tri.simplices
len(tri.simplices)
Out[40]:
In [96]:
# Let's get the pairwise distances between each of these atoms, and store them as a graph.
# Initialize a dense graph where distances are all initialized to an empty list.
G = nx.Graph()
aa_letters = 'CHIMSVAGLPTRFYWDNEQK'
assert len(aa_letters) == 20
for l1, l2 in combinations(aa_letters, 2):
aa1 = IUPACData.protein_letters_1to3[l1].upper()
aa2 = IUPACData.protein_letters_1to3[l2].upper()
G.add_edge(aa1, aa2, distances=[])
for aa in G.nodes():
G.add_edge(aa, aa, distances=[])
len(G.edges())
Out[96]:
In [97]:
# Note: simplices are essentially an array of indexes. Since we have a 3-D structure,
# simplices are therefore tetrahedrons, with 4 vertices. Hence, they form a graph!
for idxs in tri.simplices:
for i1, i2 in combinations(idxs, 2):
resi1 = c_alpha.ix[i1]['resi_name']
resi2 = c_alpha.ix[i2]['resi_name']
d = euclidean(c_alpha.ix[i1][['x', 'y', 'z']], c_alpha.ix[i2][['x', 'y', 'z']])
if not G.has_edge(resi1, resi2):
G.add_edge(resi1, resi2, distances=[d])
else:
G.edge[resi1][resi2]['distances'].append(d)
In [98]:
len(G.edges())
Out[98]:
In [99]:
meanG = nx.Graph()
stdG = nx.Graph()
for n1, n2, d in G.edges(data=True):
if bool(d['distances']) == False:
meanG.add_edge(n1, n2, mean=0)
stdG.add_edge(n1, n2, std=0)
else:
meanG.add_edge(n1, n2, mean=np.mean(d['distances']))
stdG.add_edge(n1, n2, std=np.std(d['distances']))
In [100]:
len(meanG.edges(data=True))
df = pd.DataFrame(nx.to_numpy_matrix(meanG, weight='mean'))
df.columns = meanG.nodes()
df.index = df.columns
df
Out[100]:
In [83]:
len(stdG.edges(data=True))
Out[83]:
In [ ]:
In [ ]: