In [1]:
from __future__ import print_function

import math
import timeit

from Bio import PDB

In [2]:
repository = PDB.PDBList()
parser = PDB.PDBParser()
repository.retrieve_pdb_file('1TUP', pdir='.')
p53_1tup = parser.get_structure('P 53', 'pdb1tup.ent')


Structure exists: './pdb1tup.ent' 
/home/tra/Dropbox/soft/biopython/Bio/PDB/StructureBuilder.py:87: PDBConstructionWarning: WARNING: Chain A is discontinuous at line 6146.
  PDBConstructionWarning)
/home/tra/Dropbox/soft/biopython/Bio/PDB/StructureBuilder.py:87: PDBConstructionWarning: WARNING: Chain B is discontinuous at line 6147.
  PDBConstructionWarning)
/home/tra/Dropbox/soft/biopython/Bio/PDB/StructureBuilder.py:87: PDBConstructionWarning: WARNING: Chain C is discontinuous at line 6148.
  PDBConstructionWarning)
/home/tra/Dropbox/soft/biopython/Bio/PDB/StructureBuilder.py:87: PDBConstructionWarning: WARNING: Chain E is discontinuous at line 6149.
  PDBConstructionWarning)
/home/tra/Dropbox/soft/biopython/Bio/PDB/StructureBuilder.py:87: PDBConstructionWarning: WARNING: Chain F is discontinuous at line 6171.
  PDBConstructionWarning)
/home/tra/Dropbox/soft/biopython/Bio/PDB/StructureBuilder.py:87: PDBConstructionWarning: WARNING: Chain A is discontinuous at line 6185.
  PDBConstructionWarning)
/home/tra/Dropbox/soft/biopython/Bio/PDB/StructureBuilder.py:87: PDBConstructionWarning: WARNING: Chain B is discontinuous at line 6383.
  PDBConstructionWarning)
/home/tra/Dropbox/soft/biopython/Bio/PDB/StructureBuilder.py:87: PDBConstructionWarning: WARNING: Chain C is discontinuous at line 6453.
  PDBConstructionWarning)

In [3]:
zns = []
for atom in p53_1tup.get_atoms():
    if atom.element == 'ZN':
        #print(atom, dir(atom), atom.mass, atom.element, atom.coord[0])
        zns.append(atom)
for zn in zns:
        print(zn, zn.coord)


<Atom ZN> [ 58.10800171  23.24200058  57.42399979]
<Atom ZN> [ 60.10800171  17.9810009   75.93099976]
<Atom ZN> [ 33.65299988   0.403       74.11499786]

In [4]:
#Suggest a pymol viewing

In [5]:
#Try this in numba?
def get_closest_atoms(pdb_struct, ref_atom, distance):
    atoms = {}
    rx, ry, rz = ref_atom.coord
    for atom in pdb_struct.get_atoms():
        if atom == ref_atom:
            continue
        x, y, z = atom.coord
        my_dist = math.sqrt((x - rx)**2 + (y - ry)**2 + (z - rz)**2) 
        if my_dist < distance:
            atoms[atom] = my_dist
    return atoms

In [6]:
for zn in zns:
    print()
    print(zn.coord)
    atoms = get_closest_atoms(p53_1tup, zn, 4)
    for atom, distance in atoms.items():
        print(atom.element, distance, atom.coord)


[ 58.10800171  23.24200058  57.42399979]
S 2.27892096249 [ 60.31700134  23.31800079  57.97900009]
C 2.92437196013 [ 56.99300003  23.94300079  54.81299973]
C 3.40801176963 [ 57.77000046  21.2140007   60.14199829]
S 2.32622437996 [ 57.06499863  21.45199966  58.48199844]
C 3.62725094648 [ 61.61000061  24.08699989  57.00099945]
C 3.85772919812 [ 61.14799881  25.06100082  55.89699936]
C 3.45665374923 [ 58.88600159  20.86700058  55.0359993 ]
C 3.08721447067 [ 57.20500183  25.09900093  59.71900177]
C 3.06412055976 [ 58.04700089  22.03800011  54.60699844]
S 2.22531584465 [ 56.91400146  25.05400085  57.91699982]
N 1.99182735373 [ 57.75500107  23.07299995  55.47100067]

[ 60.10800171  17.9810009   75.93099976]
C 2.98523321742 [ 61.33200073  20.38199997  74.64700317]
C 3.80512639027 [ 62.57300186  18.26300049  78.81600189]
S 2.24232090692 [ 58.97800064  19.40200043  77.24700165]
C 3.41769274437 [ 57.5929985   15.78299999  75.20700073]
C 3.18032005125 [ 61.52099991  17.13599968  78.65200043]
S 2.32547215821 [ 58.58599854  17.08200073  74.41999817]
C 3.46720709671 [ 62.27199936  17.17399979  73.34500122]
C 3.2038921042 [ 57.62400055  18.41699982  77.90699768]
C 3.11391347252 [ 62.06100082  18.61499977  73.58999634]
N 2.05645999722 [ 61.36600113  19.05599976  74.70999908]
S 2.20704048852 [ 61.28699875  16.4470005   76.99299622]

[ 33.65299988   0.403       74.11499786]
C 3.40762621232 [ 34.44200134  -0.639       77.26200104]
C 3.203789888 [ 35.56900024   2.89400005  73.49199677]
C 3.42690033588 [ 31.43499947  -1.55700004  72.38800049]
N 3.8418381161 [ 32.61999893  -3.26699996  73.64199829]
S 2.37880142799 [ 32.94200134  -0.60699999  72.08200073]
S 2.13150441609 [ 32.39099884   1.93900001  74.88400269]
C 3.15756814671 [ 36.18299866  -0.46900001  72.43900299]
C 3.07685585317 [ 30.81999969   1.08200002  75.10500336]
C 2.98511466119 [ 36.22499847   0.98000002  72.71399689]
S 2.32042002279 [ 34.45299911  -1.23300004  75.5530014 ]
N 2.09591300499 [ 35.24000168   1.60300004  73.45600128]
C 3.94050478004 [ 35.47399902   0.46200001  77.60900116]

In [7]:
for distance in [1, 2, 4, 8, 16, 32, 64, 128]:
    my_atoms = []
    for zn in zns:
        atoms = get_closest_atoms(p53_1tup, zn, distance)
        my_atoms.append(len(atoms))
    print(distance, my_atoms)


1 [0, 0, 0]
2 [1, 0, 0]
4 [11, 11, 12]
8 [109, 113, 106]
16 [523, 721, 487]
32 [2381, 3493, 2053]
64 [5800, 5827, 5501]
128 [5827, 5827, 5827]

In [8]:
nexecs = 10
print(timeit.timeit('get_closest_atoms(p53_1tup, zns[0], 4.0)',
                    'from __main__ import get_closest_atoms, p53_1tup, zns',
                    number=nexecs) / nexecs * 1000)


88.2678031921

In [9]:
def get_closest_alternative(pdb_struct, ref_atom, distance):
    atoms = {}
    rx, ry, rz = ref_atom.coord
    for atom in pdb_struct.get_atoms():
        if atom == ref_atom:
            continue
        x, y, z = atom.coord
        if abs(x - rx) > distance or abs(y - ry) > distance or abs(z - rz) > distance:
            continue
        my_dist = math.sqrt((x - rx)**2 + (y - ry)**2 + (z - rz)**2) 
        if my_dist < distance:
            atoms[atom] = my_dist
    return atoms

In [10]:
print(timeit.timeit('get_closest_alternative(p53_1tup, zns[0], 4.0)',
                    'from __main__ import get_closest_alternative, p53_1tup, zns',
                    number=nexecs) / nexecs * 1000)


38.8225793839

In [11]:
print('Standard')
for distance in [1, 4, 16, 64, 128]:
    print(timeit.timeit('get_closest_atoms(p53_1tup, zns[0], distance)',
                        'from __main__ import get_closest_atoms, p53_1tup, zns, distance',
                        number=nexecs) / nexecs * 1000)
print('Optimized')
for distance in [1, 4, 16, 64, 128]:
    print(timeit.timeit('get_closest_alternative(p53_1tup, zns[0], distance)',
                        'from __main__ import get_closest_alternative, p53_1tup, zns, distance',
                        number=nexecs) / nexecs * 1000)


Standard
85.9097957611
85.0714921951
84.3131065369
85.9227895737
85.7550859451
Optimized
35.1483106613
37.5775098801
58.0827951431
135.914206505
133.298301697

In [ ]:


In [12]:
#for interesting distances