In [1]:
from __future__ import print_function


import numpy as np
import pandas as pd

from Bio import PDB

In [2]:
#!rm -f 1tup.cif 2>/dev/null
#!wget "http://www.rcsb.org/pdb/download/downloadFile.do?fileFormat=cif&compression=NO&structureId=1TUP" -O 1tup.cif
#parser = PDB.MMCIFParser()
#p53_1tup = parser.get_structure('P53', '1tup.cif')

In [3]:
repository = PDB.PDBList()
parser = PDB.PDBParser()
repository.retrieve_pdb_file('1TUP', pdir='.')
p53_1tup = parser.get_structure('P 53', 'pdb1tup.ent')


Structure exists: './pdb1tup.ent' 
/home/tra/Dropbox/soft/biopython/Bio/PDB/StructureBuilder.py:87: PDBConstructionWarning: WARNING: Chain A is discontinuous at line 6146.
  PDBConstructionWarning)
/home/tra/Dropbox/soft/biopython/Bio/PDB/StructureBuilder.py:87: PDBConstructionWarning: WARNING: Chain B is discontinuous at line 6147.
  PDBConstructionWarning)
/home/tra/Dropbox/soft/biopython/Bio/PDB/StructureBuilder.py:87: PDBConstructionWarning: WARNING: Chain C is discontinuous at line 6148.
  PDBConstructionWarning)
/home/tra/Dropbox/soft/biopython/Bio/PDB/StructureBuilder.py:87: PDBConstructionWarning: WARNING: Chain E is discontinuous at line 6149.
  PDBConstructionWarning)
/home/tra/Dropbox/soft/biopython/Bio/PDB/StructureBuilder.py:87: PDBConstructionWarning: WARNING: Chain F is discontinuous at line 6171.
  PDBConstructionWarning)
/home/tra/Dropbox/soft/biopython/Bio/PDB/StructureBuilder.py:87: PDBConstructionWarning: WARNING: Chain A is discontinuous at line 6185.
  PDBConstructionWarning)
/home/tra/Dropbox/soft/biopython/Bio/PDB/StructureBuilder.py:87: PDBConstructionWarning: WARNING: Chain B is discontinuous at line 6383.
  PDBConstructionWarning)
/home/tra/Dropbox/soft/biopython/Bio/PDB/StructureBuilder.py:87: PDBConstructionWarning: WARNING: Chain C is discontinuous at line 6453.
  PDBConstructionWarning)

In [4]:
my_residues = set()
for residue in p53_1tup.get_residues():
    my_residues.add(residue.id[0])
print(my_residues)


set([' ', 'H_ ZN', 'W'])

In [10]:
def get_mass(atoms, accept_fun=lambda atom: atom.parent.id[0] != 'W'):
    return sum([atom.mass for atom in atoms if accept_fun(atom)])

chain_names = [chain.id for chain in p53_1tup.get_chains()]
my_mass = np.ndarray((len(chain_names), 3))
for i, chain in enumerate(p53_1tup.get_chains()):
    my_mass[i, 0] = get_mass(chain.get_atoms())
    my_mass[i, 1] = get_mass(chain.get_atoms(), accept_fun=lambda atom: atom.parent.id[0] not in [' ', 'W'])
    my_mass[i, 2] = get_mass(chain.get_atoms(), accept_fun=lambda atom: atom.parent.id[0] == 'W')
masses = pd.DataFrame(my_mass, index=chain_names, columns=['No Water', 'Zincs', 'Water'])
masses


Out[10]:
No Water Zincs Water
E 6068.04412 0.00 351.9868
F 6258.20442 0.00 223.9916
A 20548.26300 65.39 3167.8812
B 20368.18840 65.39 1119.9580
C 20466.22540 65.39 1279.9520

In [6]:
def get_center(atoms, weight_fun=lambda atom: 1 if atom.parent.id[0] != 'W' else 0):
    xsum = ysum = zsum = 0.0
    acum = 0.0
    for atom in atoms:
        x, y, z = atom.coord
        weight = weight_fun(atom)
        acum += weight
        xsum += weight * x
        ysum += weight * y
        zsum += weight * z
    return xsum / acum, ysum / acum, zsum / acum

In [7]:
print(get_center(p53_1tup.get_atoms()))
print(get_center(p53_1tup.get_atoms(),
                 weight_fun=lambda atom: atom.mass if atom.parent.id[0] != 'W' else 0))


(58.296333984624454, 15.48705585839803, 78.405295027957777)
(58.122611380926472, 15.751286500376127, 78.375370368859649)

In [8]:
my_center = np.ndarray((len(chain_names), 6))
for i, chain in enumerate(p53_1tup.get_chains()):
    x, y, z = get_center(chain.get_atoms())
    my_center[i, 0] = x
    my_center[i, 1] = y
    my_center[i, 2] = z
    x, y, z = get_center(chain.get_atoms(), weight_fun=lambda atom: atom.mass if atom.parent.id[0] != 'W' else 0)
    my_center[i, 3] = x
    my_center[i, 4] = y
    my_center[i, 5] = z
weights = pd.DataFrame(my_center, index=chain_names, columns=['X', 'Y', 'Z', 'X (Mass)', 'Y (Mass)', 'Z (Mass)'])
weights


Out[8]:
X Y Z X (Mass) Y (Mass) Z (Mass)
E 49.727231 32.744879 81.253417 49.708513 32.759725 81.207395
F 51.982368 33.843370 81.578795 52.002223 33.820064 81.624394
A 72.990763 28.825429 56.714012 72.822668 28.810327 56.716117
B 67.810026 12.624435 88.656590 67.729100 12.724130 88.545659
C 38.221565 -5.010494 88.293141 38.169364 -4.915395 88.166711

In [9]:
#Pymol viz