In [1]:
from __future__ import print_function
from Bio import PDB

In [2]:
repository = PDB.PDBList()
repository.retrieve_pdb_file('1TUP', pdir='.')
repository.retrieve_pdb_file('1OLG', pdir='.')
repository.retrieve_pdb_file('1YCQ', pdir='.')


Downloading PDB structure '1TUP'...
Downloading PDB structure '1OLG'...
Downloading PDB structure '1YCQ'...
Out[2]:
'./pdb1ycq.ent'

In [3]:
parser = PDB.PDBParser()
p53_1tup = parser.get_structure('P 53 - DNA Binding', 'pdb1tup.ent')
p53_1olg = parser.get_structure('P 53 - Tetramerization', 'pdb1olg.ent')
p53_1ycq = parser.get_structure('P 53 - Transactivation', 'pdb1ycq.ent')


/home/tra/Dropbox/soft/biopython/Bio/PDB/StructureBuilder.py:87: PDBConstructionWarning: WARNING: Chain A is discontinuous at line 6146.
  PDBConstructionWarning)
/home/tra/Dropbox/soft/biopython/Bio/PDB/StructureBuilder.py:87: PDBConstructionWarning: WARNING: Chain B is discontinuous at line 6147.
  PDBConstructionWarning)
/home/tra/Dropbox/soft/biopython/Bio/PDB/StructureBuilder.py:87: PDBConstructionWarning: WARNING: Chain C is discontinuous at line 6148.
  PDBConstructionWarning)
/home/tra/Dropbox/soft/biopython/Bio/PDB/StructureBuilder.py:87: PDBConstructionWarning: WARNING: Chain E is discontinuous at line 6149.
  PDBConstructionWarning)
/home/tra/Dropbox/soft/biopython/Bio/PDB/StructureBuilder.py:87: PDBConstructionWarning: WARNING: Chain F is discontinuous at line 6171.
  PDBConstructionWarning)
/home/tra/Dropbox/soft/biopython/Bio/PDB/StructureBuilder.py:87: PDBConstructionWarning: WARNING: Chain A is discontinuous at line 6185.
  PDBConstructionWarning)
/home/tra/Dropbox/soft/biopython/Bio/PDB/StructureBuilder.py:87: PDBConstructionWarning: WARNING: Chain B is discontinuous at line 6383.
  PDBConstructionWarning)
/home/tra/Dropbox/soft/biopython/Bio/PDB/StructureBuilder.py:87: PDBConstructionWarning: WARNING: Chain C is discontinuous at line 6453.
  PDBConstructionWarning)
/home/tra/Dropbox/soft/biopython/Bio/PDB/StructureBuilder.py:87: PDBConstructionWarning: WARNING: Chain A is discontinuous at line 1125.
  PDBConstructionWarning)
/home/tra/Dropbox/soft/biopython/Bio/PDB/StructureBuilder.py:87: PDBConstructionWarning: WARNING: Chain B is discontinuous at line 1160.
  PDBConstructionWarning)

In [4]:
def print_pdb_headers(headers, indent=0):
    ind_text = ' ' * indent
    for header, content in headers.items():
        if type(content) == dict:
            print('\n%s%20s:' % (ind_text, header))
            print_pdb_headers(content, indent + 4)
            print()
        elif type(content) == list:
            print('%s%20s:' % (ind_text, header))
            for elem in content:
                print('%s%21s %s' % (ind_text, '->', elem))
        else:
            print('%s%20s: %s' % (ind_text, header, content))

print_pdb_headers(p53_1tup.header)


    structure_method: x-ray diffraction
                head: antitumor protein/dna
             journal: AUTH   Y.CHO,S.GORINA,P.D.JEFFREY,N.P.PAVLETICHTITL   CRYSTAL STRUCTURE OF A P53 TUMOR SUPPRESSOR-DNATITL 2 COMPLEX: UNDERSTANDING TUMORIGENIC MUTATIONS.REF    SCIENCE                       V. 265   346 1994REFN                   ISSN 0036-8075PMID   8023157
   journal_reference: y.cho,s.gorina,p.d.jeffrey,n.p.pavletich crystal structure of a p53 tumor suppressor-dna complex: understanding tumorigenic mutations. science v. 265 346 1994 issn 0036-8075 8023157 

            compound:

                       1:
                    molecule: dna (5'-d(*tp*tp*tp*cp*cp*tp*ap*gp*ap*cp*tp*tp*gp*cp*cp*cp*a p*ap*tp*tp*a)-3') 
                        misc: 
                  engineered: yes
                       chain: e


                       3:
                    molecule: protein (p53 tumor suppressor )
                        misc: 
                  engineered: yes
                       chain: a, b, c


                       2:
                    molecule: dna (5'-d(*ap*tp*ap*ap*tp*tp*gp*gp*gp*cp*ap*ap*gp*tp*cp*tp*a p*gp*gp*ap*a)-3') 
                        misc: 
                  engineered: yes
                       chain: f


            keywords: antigen p53, antitumor protein/dna complex
                name:  tumor suppressor p53 complexed with dna
              author: Y.Cho,S.Gorina,P.D.Jeffrey,N.P.Pavletich
     deposition_date: 1995-07-11
        release_date: 1995-07-11

              source:

                       1:
                   synthetic: yes
                        misc: 


                       3:
           expression_system: escherichia coli
        expression_system_taxid: 562
         organism_scientific: homo sapiens
                        misc: 
                        cell: human vulva carcinoma
        expression_system_plasmid: pet3d
                   cell_line: a431
              organism_taxid: 9606
             organism_common: human


                       2:
                   synthetic: yes
                        misc: 


          resolution: 2.2
 structure_reference:
                   -> n.p.pavletich,k.a.chambers,c.o.pabo the dna-binding domain of p53 contains the four conserved regions and the major mutation hot spots genes dev. v. 7 2556 1993 issn 0890-9369 
                   -> b.vogelstein,k.w.kinzler p53 function and dysfunction cell(cambridge,mass.) v. 70 523 1992 issn 0092-8674 

In [5]:
print(p53_1tup.header['compound'])
print(p53_1olg.header['compound'])
print(p53_1ycq.header['compound'])


{'1': {'molecule': "dna (5'-d(*tp*tp*tp*cp*cp*tp*ap*gp*ap*cp*tp*tp*gp*cp*cp*cp*a p*ap*tp*tp*a)-3') ", 'misc': '', 'engineered': 'yes', 'chain': 'e'}, '3': {'molecule': 'protein (p53 tumor suppressor )', 'misc': '', 'engineered': 'yes', 'chain': 'a, b, c'}, '2': {'molecule': "dna (5'-d(*ap*tp*ap*ap*tp*tp*gp*gp*gp*cp*ap*ap*gp*tp*cp*tp*a p*gp*gp*ap*a)-3') ", 'misc': '', 'engineered': 'yes', 'chain': 'f'}}
{'1': {'molecule': 'tumor suppressor p53 (oligomerization domain)', 'misc': '', 'engineered': 'yes', 'chain': 'a, b, c, d'}}
{'1': {'molecule': 'mdm2', 'engineered': 'yes', 'misc': '', 'synonym': 'mdm2', 'chain': 'a'}, '2': {'fragment': 'residues 13 - 29', 'molecule': 'p53', 'misc': '', 'engineered': 'yes', 'chain': 'b'}}

In [6]:
def describe_model(name, pdb):
    print()
    for model in pdb:
        for chain in model:
            print('%s - Chain: %s. Number of residues: %d. Number of atoms: %d.' %
                  (name, chain.id, len(chain), len(list(chain.get_atoms()))))
describe_model('1TUP', p53_1tup)
describe_model('1OLG', p53_1olg)
describe_model('1YCQ', p53_1ycq)
#will go deep in a next recipe (bottom up)


1TUP - Chain: E. Number of residues: 43. Number of atoms: 442.
1TUP - Chain: F. Number of residues: 35. Number of atoms: 449.
1TUP - Chain: A. Number of residues: 395. Number of atoms: 1734.
1TUP - Chain: B. Number of residues: 265. Number of atoms: 1593.
1TUP - Chain: C. Number of residues: 276. Number of atoms: 1610.

1OLG - Chain: A. Number of residues: 42. Number of atoms: 698.
1OLG - Chain: B. Number of residues: 42. Number of atoms: 698.
1OLG - Chain: C. Number of residues: 42. Number of atoms: 698.
1OLG - Chain: D. Number of residues: 42. Number of atoms: 698.

1YCQ - Chain: A. Number of residues: 123. Number of atoms: 741.
1YCQ - Chain: B. Number of residues: 16. Number of atoms: 100.

In [13]:
for residue in p53_1tup.get_residues():
    if residue.id[0] in [' ', 'W']:
        continue
    print(residue.id)


('H_ ZN', 951, ' ')
('H_ ZN', 952, ' ')
('H_ ZN', 953, ' ')

In [10]:
res = next(p53_1tup[0]['A'].get_residues())
print(res)
for atom in res:
    print(atom, atom.serial_number, atom.element)
print(p53_1tup[0]['A'][94]['CA'])


<Residue SER het=  resseq=94 icode= >
<Atom N> 858 N
<Atom CA> 859 C
<Atom C> 860 C
<Atom O> 861 O
<Atom CB> 862 C
<Atom OG> 863 O
<Atom CA>

In [11]:
from Bio.SeqIO import PdbIO, FastaIO

def get_fasta(pdb_file, fasta_file, transfer_ids=None):
    fasta_writer = FastaIO.FastaWriter(fasta_file)
    fasta_writer.write_header()
    for rec in PdbIO.PdbSeqresIterator(pdb_file):
        if len(rec.seq) == 0:
            continue
        if transfer_ids is not None and rec.id not in transfer_ids:
            continue
        print(rec.id, rec.seq, len(rec.seq))
        fasta_writer.write_record(rec)
        
get_fasta(open('pdb1tup.ent'), open('1tup.fasta', 'w'), transfer_ids=['1TUP:B'])
get_fasta(open('pdb1olg.ent'), open('1olg.fasta', 'w'), transfer_ids=['1OLG:B'])
get_fasta(open('pdb1ycq.ent'), open('1ycq.fasta', 'w'), transfer_ids=['1YCQ:B'])


1TUP:B SSSVPSQKTYQGSYGFRLGFLHSGTAKSVTCTYSPALNKMFCQLAKTCPVQLWVDSTPPPGTRVRAMAIYKQSQHMTEVVRRCPHHERCSDSDGLAPPQHLIRVEGNLRVEYLDDRNTFRHSVVVPYEPPEVGSDCTTIHYNYMCNSSCMGGMNRRPILTIITLEDSSGNLLGRNSFEVRVCACPGRDRRTEEENLRKKGEPHHELPPGSTKRALPNNT 219
1OLG:B KKKPLDGEYFTLQIRGRERFEMFRELNEALELKDAQAGKEPG 42
1YCQ:B PLSQETFSDLWKLLPEN 17

In [11]: