In [1]:
%pylab inline
import dfi 
import glob


Populating the interactive namespace from numpy and matplotlib

In [2]:
import pandas as pd

In [3]:
with open('pdblist.txt','r') as infile:
    pdbids = [line.strip('\n') for line in infile]

In [4]:
pdbids=map(lambda x: x.replace('\r',''),pdbids)

In [5]:
for pdb in pdbids:
    print pdb
    dfi.fetch_pdb(pdb)


4DUR
2Q7Z
4I0K
1GAX
1QU2
2Z7X
5DO7
4KX7
3SE6
5AB0
5AB2
1DJG
3OHM
2FJU
1QAS
4QJ3
4GNK
3S84
3R2P
1AV1
2A01
2LEM
2MSC
3NQ5
3NM8
4P6R
4J6V
4HD4
4J6T
4HD7

In [6]:
#data = pd.read_csv('missingresidues.csv')

In [7]:
#for pdb in data['pdb'].values:
#    dfi.fetch_pdb(pdb)

In [8]:
pdbfiles = glob.glob('*.pdb')

In [9]:
pdbfiles


Out[9]:
['1AV1.pdb',
 '1DJG.pdb',
 '1GAX.pdb',
 '1QAS.pdb',
 '1QU2.pdb',
 '2A01.pdb',
 '2FJU.pdb',
 '2LEM.pdb',
 '2MSC.pdb',
 '2Q7Z.pdb',
 '2Z7X.pdb',
 '3NM8.pdb',
 '3NQ5.pdb',
 '3OHM.pdb',
 '3R2P.pdb',
 '3S84.pdb',
 '3SE6.pdb',
 '4DUR.pdb',
 '4GNK.pdb',
 '4HD4.pdb',
 '4HD7.pdb',
 '4I0K.pdb',
 '4J6T.pdb',
 '4J6V.pdb',
 '4KX7.pdb',
 '4P6R.pdb',
 '4QJ3.pdb',
 '5AB0.pdb',
 '5AB2.pdb',
 '5DO7.pdb']

In [10]:
pdbfiles=map(lambda x: x.replace('\r',''),pdbfiles)

In [11]:
def findmissing(pdbfile):
    """
    Enter a pdbid and output the missing residues and number of chains.
    The missing residues from the beninning and end of the chain are excluded.
    """
    import pdbfixer 
    fixer = pdbfixer.PDBFixer(filename=pdbfile)
    fixer.findMissingResidues()
    keys = fixer.missingResidues.keys()
    chains = list(fixer.topology.chains())
    #print chains
    
    for key in keys:
        chain = chains[key[0]]
        if key[1] == 0 or key[1] == len( list(chain.residues())):
            del fixer.missingResidues[key]
            
    return fixer.missingResidues

In [12]:
pdbfiles


Out[12]:
['1AV1.pdb',
 '1DJG.pdb',
 '1GAX.pdb',
 '1QAS.pdb',
 '1QU2.pdb',
 '2A01.pdb',
 '2FJU.pdb',
 '2LEM.pdb',
 '2MSC.pdb',
 '2Q7Z.pdb',
 '2Z7X.pdb',
 '3NM8.pdb',
 '3NQ5.pdb',
 '3OHM.pdb',
 '3R2P.pdb',
 '3S84.pdb',
 '3SE6.pdb',
 '4DUR.pdb',
 '4GNK.pdb',
 '4HD4.pdb',
 '4HD7.pdb',
 '4I0K.pdb',
 '4J6T.pdb',
 '4J6V.pdb',
 '4KX7.pdb',
 '4P6R.pdb',
 '4QJ3.pdb',
 '5AB0.pdb',
 '5AB2.pdb',
 '5DO7.pdb']

In [13]:
for pdbfil in pdbfiles:
    #print pdbfil, findmissing(pdbfil)
    if findmissing(pdbfil):
        print pdbfil, findmissing(pdbfil)
        #fixer.addMissingAtoms()
        #fixer.pdb.writeFile(fixer.topology,fixer.positions,file=open(pdbfil+'mdl','w'))


1DJG.pdb {(0, 243): ['GLY', 'GLY', 'LEU', 'LEU', 'PRO', 'ALA', 'GLY', 'GLY', 'GLU', 'ASN', 'GLY', 'SER', 'GLU', 'ALA', 'THR', 'ASP', 'VAL', 'SER', 'ASP', 'GLU', 'VAL', 'GLU', 'ALA', 'ALA', 'GLU', 'MET', 'GLU', 'ASP', 'GLU', 'ALA', 'VAL', 'ARG', 'SER', 'GLN', 'VAL', 'GLN', 'HIS', 'LYS', 'PRO', 'LYS', 'GLU', 'ASP', 'LYS', 'LEU'], (1, 288): ['LEU', 'PRO', 'ALA', 'GLY', 'GLY', 'GLU', 'ASN', 'GLY', 'SER', 'GLU', 'ALA', 'THR', 'ASP', 'VAL', 'SER', 'ASP', 'GLU', 'VAL', 'GLU', 'ALA', 'ALA', 'GLU', 'MET', 'GLU', 'ASP', 'GLU', 'ALA', 'VAL', 'ARG', 'SER', 'GLN', 'VAL', 'GLN', 'HIS', 'LYS', 'PRO', 'LYS', 'GLU']}
1QAS.pdb {(0, 399): ['LYS', 'ASN', 'LYS', 'ASN', 'SER'], (1, 240): ['LEU', 'PRO', 'ALA', 'GLY', 'GLY', 'GLU', 'ASN', 'GLY', 'SER', 'GLU', 'ALA', 'THR', 'ASP', 'VAL', 'SER', 'ASP', 'GLU', 'VAL', 'GLU', 'ALA', 'ALA', 'GLU', 'MET', 'GLU', 'ASP', 'GLU', 'ALA', 'VAL', 'ARG', 'SER', 'GLN', 'VAL', 'GLN', 'HIS', 'LYS', 'PRO', 'LYS', 'GLU', 'ASP'], (1, 398): ['LYS', 'ASN', 'LYS', 'ASN', 'SER'], (0, 240): ['LEU', 'LEU', 'PRO', 'ALA', 'GLY', 'GLY', 'GLU', 'ASN', 'GLY', 'SER', 'GLU', 'ALA', 'THR', 'ASP', 'VAL', 'SER', 'ASP', 'GLU', 'VAL', 'GLU', 'ALA', 'ALA', 'GLU', 'MET', 'GLU', 'ASP', 'GLU', 'ALA', 'VAL', 'ARG', 'SER', 'GLN', 'VAL', 'GLN', 'HIS', 'LYS', 'PRO', 'LYS', 'GLU', 'ASP', 'LYS', 'LEU'], (1, 266): ['GLY', 'THR', 'SER']}
2FJU.pdb {(1, 217): ['TYR', 'HIS', 'ALA', 'LYS', 'ALA', 'LYS', 'PRO'], (1, 425): ['ASN', 'GLN', 'PHE', 'SER', 'GLY', 'PRO', 'THR', 'SER', 'SER', 'SER', 'LYS', 'ASP', 'THR', 'GLY', 'GLY', 'GLU', 'ALA', 'GLU', 'GLY', 'SER', 'SER', 'PRO', 'PRO', 'SER', 'ALA', 'PRO', 'ALA', 'VAL', 'TRP', 'ALA', 'GLY', 'GLU', 'GLU', 'GLY', 'THR', 'GLU', 'LEU', 'GLU', 'GLU', 'GLU', 'GLU', 'VAL', 'GLU', 'GLU', 'GLU', 'GLU', 'GLU'], (1, 234): ['SER', 'ARG', 'LEU', 'ASN', 'SER', 'LEU', 'LEU', 'PHE', 'PRO', 'PRO', 'ALA', 'ARG', 'PRO', 'ASP', 'GLN', 'VAL'], (1, 574): ['ASP', 'LYS', 'GLN', 'PHE', 'ASN', 'PRO', 'PHE', 'SER', 'VAL', 'ASP', 'ARG', 'ILE', 'ASP', 'VAL', 'VAL', 'VAL', 'ALA'], (1, 244): ['GLY', 'ILE', 'ASN', 'ALA', 'GLN', 'ARG']}
3NQ5.pdb {(1, 243): ['ASN', 'GLN', 'ASN']}
3OHM.pdb {(1, 78): ['LEU', 'GLY', 'PHE', 'GLY', 'GLY', 'PRO', 'ASP', 'ALA', 'ARG'], (1, 451): ['ARG', 'PRO', 'SER', 'ALA', 'GLY', 'GLY', 'PRO', 'ASP', 'SER', 'ALA', 'GLY', 'ARG', 'LYS', 'ARG', 'PRO', 'LEU', 'GLU', 'GLN', 'SER', 'ASN', 'SER', 'ALA', 'LEU', 'SER', 'GLU', 'SER', 'SER', 'ALA', 'ALA', 'THR', 'GLU', 'PRO', 'SER', 'SER', 'PRO', 'GLN', 'LEU', 'GLY', 'SER', 'PRO', 'SER', 'SER', 'ASP', 'SER', 'CYS', 'PRO', 'GLY', 'LEU', 'SER', 'ASN', 'GLY', 'GLU', 'GLU', 'VAL', 'GLY', 'LEU', 'GLU', 'LYS', 'PRO', 'SER', 'LEU', 'GLU', 'PRO', 'GLN', 'LYS', 'SER', 'LEU', 'GLY', 'ASP', 'GLU', 'GLY', 'LEU', 'ASN', 'ARG', 'GLY', 'PRO', 'TYR', 'VAL', 'LEU', 'GLY', 'PRO', 'ALA', 'ASP', 'ARG', 'GLU', 'ASP', 'GLU', 'GLU', 'GLU', 'ASP', 'GLU', 'GLU', 'GLU', 'GLU', 'GLU', 'GLN', 'THR', 'ASP', 'PRO', 'LYS', 'LYS', 'PRO', 'THR']}
3SE6.pdb {(1, 441): ['CYS', 'LEU', 'GLU', 'SER', 'ASP', 'PHE', 'THR', 'SER', 'GLY', 'GLY', 'VAL', 'CYS', 'HIS', 'SER', 'ASP', 'PRO', 'LYS', 'MET', 'THR', 'SER', 'ASN', 'MET', 'LEU', 'ALA', 'PHE', 'LEU', 'GLY', 'GLU', 'ASN'], (1, 71): ['GLU', 'ASP', 'SER', 'ARG', 'TYR', 'MET', 'LYS'], (0, 73): ['ASP', 'SER', 'ARG'], (0, 446): ['CYS', 'LEU', 'GLU', 'SER', 'ASP', 'PHE', 'THR', 'SER', 'GLY', 'GLY', 'VAL', 'CYS', 'HIS', 'SER', 'ASP', 'PRO', 'LYS', 'MET', 'THR', 'SER', 'ASN', 'MET', 'LEU', 'ALA', 'PHE'], (0, 488): ['VAL', 'PHE', 'GLN', 'GLU', 'ASP', 'PRO', 'GLU', 'TRP', 'ARG', 'ALA', 'LEU'], (1, 480): ['PHE', 'GLN', 'GLU', 'ASP', 'PRO', 'GLU', 'TRP', 'ARG', 'ALA', 'LEU', 'GLN', 'GLU']}
4DUR.pdb {(1, 442): ['GLY', 'LYS', 'GLY'], (1, 333): ['SER', 'SER', 'PRO', 'VAL', 'SER', 'THR', 'GLU', 'GLN', 'LEU', 'ALA'], (0, 332): ['SER', 'SER', 'PRO', 'VAL', 'SER', 'THR', 'GLU', 'GLN', 'LEU', 'ALA'], (1, 474): ['ARG', 'ALA', 'GLY'], (0, 656): ['GLN', 'GLY', 'THR'], (1, 653): ['THR', 'GLN', 'GLY'], (0, 425): ['THR', 'GLU', 'ALA', 'SER', 'VAL', 'VAL', 'ALA', 'PRO', 'PRO', 'PRO', 'VAL', 'VAL', 'LEU', 'LEU', 'PRO', 'ASP', 'VAL', 'GLU', 'THR', 'PRO'], (1, 509): ['ALA', 'ALA'], (1, 446): ['ARG', 'ALA'], (1, 436): ['VAL', 'VAL', 'LEU', 'LEU', 'PRO', 'ASP', 'VAL', 'GLU', 'THR', 'PRO', 'SER', 'GLU', 'GLU']}
4GNK.pdb {(4, 157): ['GLU', 'ALA', 'LYS', 'MET', 'ARG', 'ASP', 'LYS'], (1, 460): ['HIS', 'ARG', 'PRO', 'SER', 'ALA', 'GLY', 'GLY', 'PRO', 'ASP', 'SER', 'ALA', 'GLY', 'ARG', 'LYS', 'ARG', 'PRO', 'LEU', 'GLU', 'GLN', 'SER', 'ASN', 'SER', 'ALA', 'LEU', 'SER', 'GLU', 'SER', 'SER', 'ALA', 'ALA', 'THR', 'GLU', 'PRO', 'SER', 'SER', 'PRO', 'GLN', 'LEU', 'GLY', 'SER', 'PRO', 'SER', 'SER', 'ASP', 'SER', 'CYS', 'PRO', 'GLY', 'LEU', 'SER', 'ASN', 'GLY', 'GLU', 'GLU', 'VAL', 'GLY', 'LEU', 'GLU', 'LYS', 'PRO', 'SER', 'LEU', 'GLU', 'PRO', 'GLN', 'LYS', 'SER', 'LEU', 'GLY', 'ASP', 'GLU', 'GLY', 'LEU', 'ASN', 'ARG', 'GLY', 'PRO', 'TYR', 'VAL', 'LEU', 'GLY', 'PRO', 'ALA', 'ASP', 'ARG', 'GLU', 'ASP', 'GLU', 'GLU', 'GLU', 'ASP', 'GLU', 'GLU', 'GLU', 'GLU', 'GLU', 'GLN', 'THR', 'ASP', 'PRO', 'LYS', 'LYS'], (3, 459): ['HIS', 'ARG', 'PRO', 'SER', 'ALA', 'GLY', 'GLY', 'PRO', 'ASP', 'SER', 'ALA', 'GLY', 'ARG', 'LYS', 'ARG', 'PRO', 'LEU', 'GLU', 'GLN', 'SER', 'ASN', 'SER', 'ALA', 'LEU', 'SER', 'GLU', 'SER', 'SER', 'ALA', 'ALA', 'THR', 'GLU', 'PRO', 'SER', 'SER', 'PRO', 'GLN', 'LEU', 'GLY', 'SER', 'PRO', 'SER', 'SER', 'ASP', 'SER', 'CYS', 'PRO', 'GLY', 'LEU', 'SER', 'ASN', 'GLY', 'GLU', 'GLU', 'VAL', 'GLY', 'LEU', 'GLU', 'LYS', 'PRO', 'SER', 'LEU', 'GLU', 'PRO', 'GLN', 'LYS', 'SER', 'LEU', 'GLY', 'ASP', 'GLU', 'GLY', 'LEU', 'ASN', 'ARG', 'GLY', 'PRO', 'TYR', 'VAL', 'LEU', 'GLY', 'PRO', 'ALA', 'ASP', 'ARG', 'GLU', 'ASP', 'GLU', 'GLU', 'GLU', 'ASP', 'GLU', 'GLU', 'GLU', 'GLU', 'GLU', 'GLN', 'THR', 'ASP', 'PRO', 'LYS', 'LYS', 'PRO', 'THR'], (3, 187): ['PHE', 'ASN'], (4, 75): ['PRO', 'GLY', 'ALA', 'LEU', 'GLY', 'GLY', 'ALA', 'ALA', 'ASP', 'VAL', 'GLU', 'ASP', 'THR', 'LYS', 'GLU', 'GLY', 'GLU']}
4I0K.pdb {(0, 117): ['ASN', 'LYS', 'ASP', 'LEU', 'ARG']}
4KX7.pdb {(0, 532): ['SER', 'SER', 'ASN', 'PRO']}
5AB0.pdb {(1, 448): ['SER', 'CYS', 'LEU', 'GLU', 'SER', 'ASP', 'PHE', 'THR', 'SER', 'GLY', 'GLY', 'VAL', 'CYS', 'HIS', 'SER', 'ASP', 'PRO', 'LYS', 'MET', 'THR', 'SER', 'ASN', 'MET', 'LEU', 'ALA', 'PHE', 'LEU']}
5AB2.pdb {(0, 449): ['CYS', 'LEU', 'GLU', 'SER', 'ASP', 'PHE', 'THR', 'SER', 'GLY', 'GLY', 'VAL', 'CYS'], (1, 449): ['CYS', 'LEU', 'GLU', 'SER', 'ASP', 'PHE', 'THR', 'SER', 'GLY', 'GLY', 'VAL', 'CYS', 'HIS', 'SER', 'ASP', 'PRO', 'LYS', 'MET', 'THR', 'SER', 'ASN', 'MET', 'LEU', 'ALA', 'PHE', 'LEU', 'GLY', 'GLU', 'ASN']}
5DO7.pdb {(1, 22): ['ASN', 'THR', 'LEU', 'GLU', 'VAL', 'ARG', 'ASP', 'LEU', 'ASN', 'TYR', 'GLN', 'VAL', 'ASP', 'LEU', 'ALA', 'SER', 'GLN', 'VAL', 'PRO', 'TRP', 'PHE', 'GLU', 'GLN', 'LEU', 'ALA', 'GLN', 'PHE', 'LYS', 'MET', 'PRO', 'TRP', 'THR', 'SER', 'PRO', 'SER', 'CYS', 'GLN', 'ASN', 'SER', 'CYS', 'GLU', 'LEU'], (1, 282): ['ASP', 'LEU', 'ASP', 'GLU', 'ASP', 'THR', 'CYS', 'VAL', 'GLU', 'SER', 'SER', 'VAL', 'THR', 'PRO', 'LEU', 'ASP', 'THR', 'ASN', 'CYS', 'LEU', 'PRO', 'SER', 'PRO', 'THR', 'LYS', 'MET', 'PRO'], (2, 21): ['ASN', 'THR', 'LEU', 'GLU', 'VAL', 'ARG', 'ASP', 'LEU', 'ASN', 'TYR', 'GLN', 'VAL', 'ASP', 'LEU', 'ALA', 'SER', 'GLN', 'VAL', 'PRO', 'TRP', 'PHE', 'GLU', 'GLN', 'LEU', 'ALA', 'GLN', 'PHE', 'LYS', 'MET', 'PRO', 'TRP', 'THR', 'SER', 'PRO', 'SER', 'CYS', 'GLN', 'ASN', 'SER', 'CYS', 'GLU'], (0, 13): ['VAL', 'SER', 'HIS', 'ARG', 'VAL', 'ARG', 'PRO', 'TRP', 'TRP', 'ASP', 'ILE', 'THR', 'SER', 'CYS', 'ARG', 'GLN', 'GLN', 'TRP', 'THR'], (3, 11): ['VAL', 'SER', 'HIS', 'ARG', 'VAL', 'ARG', 'PRO', 'TRP', 'TRP', 'ASP', 'ILE', 'THR', 'SER', 'CYS', 'ARG', 'GLN', 'GLN', 'TRP', 'THR', 'ARG'], (3, 528): ['SER', 'ASN', 'VAL', 'SER', 'VAL', 'THR', 'THR', 'ASN', 'PRO'], (2, 291): ['LEU', 'ASP', 'ASP', 'PHE', 'LEU', 'TRP', 'LYS', 'ALA', 'GLU', 'THR', 'LYS', 'ASP', 'LEU', 'ASP', 'GLU', 'ASP', 'THR', 'CYS', 'VAL', 'GLU', 'SER', 'SER', 'VAL', 'THR', 'PRO', 'LEU', 'ASP', 'THR', 'ASN', 'CYS', 'LEU', 'PRO', 'SER', 'PRO'], (1, 257): ['VAL', 'ASP', 'LEU', 'THR', 'SER', 'ILE', 'ASP', 'ARG', 'ARG', 'SER', 'ARG', 'GLU', 'GLN', 'GLU', 'LEU', 'ALA', 'THR', 'ARG', 'GLU', 'LYS'], (0, 535): ['GLY', 'SER', 'SER', 'ASN', 'VAL', 'SER', 'VAL', 'THR', 'THR', 'ASN', 'PRO', 'MET', 'CYS'], (3, 293): ['LYS', 'THR', 'LEU', 'PRO', 'MET', 'VAL']}

In [14]:
import pdbfixer

In [15]:
def fixpdb(pdbfile):
    "Only select the first chain of a protein and model the center"
    import pdbfixer
    fixer = pdbfixer.PDBFixer(filename=pdbfile)
    #numChains = len(list(fixer.topology.chains()))
    #fixer.removeChains(range(1,numChains))
    fixer.findMissingResidues()
    keys = fixer.missingResidues.keys()
    chains = list(fixer.topology.chains())    
    for key in keys:
        chain = chains[key[0]]
        if key[1] == 0 or key[1] == len( list(chain.residues())):
            del fixer.missingResidues[key]
    print fixer.missingResidues 
    fixer.findMissingAtoms()
    fixer.addMissingAtoms()
    fixer.pdb.writeFile(fixer.topology,fixer.positions,file=open(pdbfile+'mdl','w'))

In [16]:
for pdb in pdbfiles:
    fixpdb(pdb)


{}
{(0, 243): ['GLY', 'GLY', 'LEU', 'LEU', 'PRO', 'ALA', 'GLY', 'GLY', 'GLU', 'ASN', 'GLY', 'SER', 'GLU', 'ALA', 'THR', 'ASP', 'VAL', 'SER', 'ASP', 'GLU', 'VAL', 'GLU', 'ALA', 'ALA', 'GLU', 'MET', 'GLU', 'ASP', 'GLU', 'ALA', 'VAL', 'ARG', 'SER', 'GLN', 'VAL', 'GLN', 'HIS', 'LYS', 'PRO', 'LYS', 'GLU', 'ASP', 'LYS', 'LEU'], (1, 288): ['LEU', 'PRO', 'ALA', 'GLY', 'GLY', 'GLU', 'ASN', 'GLY', 'SER', 'GLU', 'ALA', 'THR', 'ASP', 'VAL', 'SER', 'ASP', 'GLU', 'VAL', 'GLU', 'ALA', 'ALA', 'GLU', 'MET', 'GLU', 'ASP', 'GLU', 'ALA', 'VAL', 'ARG', 'SER', 'GLN', 'VAL', 'GLN', 'HIS', 'LYS', 'PRO', 'LYS', 'GLU']}
{}
{(0, 399): ['LYS', 'ASN', 'LYS', 'ASN', 'SER'], (1, 240): ['LEU', 'PRO', 'ALA', 'GLY', 'GLY', 'GLU', 'ASN', 'GLY', 'SER', 'GLU', 'ALA', 'THR', 'ASP', 'VAL', 'SER', 'ASP', 'GLU', 'VAL', 'GLU', 'ALA', 'ALA', 'GLU', 'MET', 'GLU', 'ASP', 'GLU', 'ALA', 'VAL', 'ARG', 'SER', 'GLN', 'VAL', 'GLN', 'HIS', 'LYS', 'PRO', 'LYS', 'GLU', 'ASP'], (1, 398): ['LYS', 'ASN', 'LYS', 'ASN', 'SER'], (0, 240): ['LEU', 'LEU', 'PRO', 'ALA', 'GLY', 'GLY', 'GLU', 'ASN', 'GLY', 'SER', 'GLU', 'ALA', 'THR', 'ASP', 'VAL', 'SER', 'ASP', 'GLU', 'VAL', 'GLU', 'ALA', 'ALA', 'GLU', 'MET', 'GLU', 'ASP', 'GLU', 'ALA', 'VAL', 'ARG', 'SER', 'GLN', 'VAL', 'GLN', 'HIS', 'LYS', 'PRO', 'LYS', 'GLU', 'ASP', 'LYS', 'LEU'], (1, 266): ['GLY', 'THR', 'SER']}
{}
{}
{(1, 217): ['TYR', 'HIS', 'ALA', 'LYS', 'ALA', 'LYS', 'PRO'], (1, 425): ['ASN', 'GLN', 'PHE', 'SER', 'GLY', 'PRO', 'THR', 'SER', 'SER', 'SER', 'LYS', 'ASP', 'THR', 'GLY', 'GLY', 'GLU', 'ALA', 'GLU', 'GLY', 'SER', 'SER', 'PRO', 'PRO', 'SER', 'ALA', 'PRO', 'ALA', 'VAL', 'TRP', 'ALA', 'GLY', 'GLU', 'GLU', 'GLY', 'THR', 'GLU', 'LEU', 'GLU', 'GLU', 'GLU', 'GLU', 'VAL', 'GLU', 'GLU', 'GLU', 'GLU', 'GLU'], (1, 234): ['SER', 'ARG', 'LEU', 'ASN', 'SER', 'LEU', 'LEU', 'PHE', 'PRO', 'PRO', 'ALA', 'ARG', 'PRO', 'ASP', 'GLN', 'VAL'], (1, 574): ['ASP', 'LYS', 'GLN', 'PHE', 'ASN', 'PRO', 'PHE', 'SER', 'VAL', 'ASP', 'ARG', 'ILE', 'ASP', 'VAL', 'VAL', 'VAL', 'ALA'], (1, 244): ['GLY', 'ILE', 'ASN', 'ALA', 'GLN', 'ARG']}
{}
{}
{}
{}
{}
{(1, 243): ['ASN', 'GLN', 'ASN']}
{(1, 78): ['LEU', 'GLY', 'PHE', 'GLY', 'GLY', 'PRO', 'ASP', 'ALA', 'ARG'], (1, 451): ['ARG', 'PRO', 'SER', 'ALA', 'GLY', 'GLY', 'PRO', 'ASP', 'SER', 'ALA', 'GLY', 'ARG', 'LYS', 'ARG', 'PRO', 'LEU', 'GLU', 'GLN', 'SER', 'ASN', 'SER', 'ALA', 'LEU', 'SER', 'GLU', 'SER', 'SER', 'ALA', 'ALA', 'THR', 'GLU', 'PRO', 'SER', 'SER', 'PRO', 'GLN', 'LEU', 'GLY', 'SER', 'PRO', 'SER', 'SER', 'ASP', 'SER', 'CYS', 'PRO', 'GLY', 'LEU', 'SER', 'ASN', 'GLY', 'GLU', 'GLU', 'VAL', 'GLY', 'LEU', 'GLU', 'LYS', 'PRO', 'SER', 'LEU', 'GLU', 'PRO', 'GLN', 'LYS', 'SER', 'LEU', 'GLY', 'ASP', 'GLU', 'GLY', 'LEU', 'ASN', 'ARG', 'GLY', 'PRO', 'TYR', 'VAL', 'LEU', 'GLY', 'PRO', 'ALA', 'ASP', 'ARG', 'GLU', 'ASP', 'GLU', 'GLU', 'GLU', 'ASP', 'GLU', 'GLU', 'GLU', 'GLU', 'GLU', 'GLN', 'THR', 'ASP', 'PRO', 'LYS', 'LYS', 'PRO', 'THR']}
{}
{}
{(1, 441): ['CYS', 'LEU', 'GLU', 'SER', 'ASP', 'PHE', 'THR', 'SER', 'GLY', 'GLY', 'VAL', 'CYS', 'HIS', 'SER', 'ASP', 'PRO', 'LYS', 'MET', 'THR', 'SER', 'ASN', 'MET', 'LEU', 'ALA', 'PHE', 'LEU', 'GLY', 'GLU', 'ASN'], (1, 71): ['GLU', 'ASP', 'SER', 'ARG', 'TYR', 'MET', 'LYS'], (0, 73): ['ASP', 'SER', 'ARG'], (0, 446): ['CYS', 'LEU', 'GLU', 'SER', 'ASP', 'PHE', 'THR', 'SER', 'GLY', 'GLY', 'VAL', 'CYS', 'HIS', 'SER', 'ASP', 'PRO', 'LYS', 'MET', 'THR', 'SER', 'ASN', 'MET', 'LEU', 'ALA', 'PHE'], (0, 488): ['VAL', 'PHE', 'GLN', 'GLU', 'ASP', 'PRO', 'GLU', 'TRP', 'ARG', 'ALA', 'LEU'], (1, 480): ['PHE', 'GLN', 'GLU', 'ASP', 'PRO', 'GLU', 'TRP', 'ARG', 'ALA', 'LEU', 'GLN', 'GLU']}
{(1, 442): ['GLY', 'LYS', 'GLY'], (1, 333): ['SER', 'SER', 'PRO', 'VAL', 'SER', 'THR', 'GLU', 'GLN', 'LEU', 'ALA'], (0, 332): ['SER', 'SER', 'PRO', 'VAL', 'SER', 'THR', 'GLU', 'GLN', 'LEU', 'ALA'], (1, 474): ['ARG', 'ALA', 'GLY'], (0, 656): ['GLN', 'GLY', 'THR'], (1, 653): ['THR', 'GLN', 'GLY'], (0, 425): ['THR', 'GLU', 'ALA', 'SER', 'VAL', 'VAL', 'ALA', 'PRO', 'PRO', 'PRO', 'VAL', 'VAL', 'LEU', 'LEU', 'PRO', 'ASP', 'VAL', 'GLU', 'THR', 'PRO'], (1, 509): ['ALA', 'ALA'], (1, 446): ['ARG', 'ALA'], (1, 436): ['VAL', 'VAL', 'LEU', 'LEU', 'PRO', 'ASP', 'VAL', 'GLU', 'THR', 'PRO', 'SER', 'GLU', 'GLU']}
{(4, 157): ['GLU', 'ALA', 'LYS', 'MET', 'ARG', 'ASP', 'LYS'], (1, 460): ['HIS', 'ARG', 'PRO', 'SER', 'ALA', 'GLY', 'GLY', 'PRO', 'ASP', 'SER', 'ALA', 'GLY', 'ARG', 'LYS', 'ARG', 'PRO', 'LEU', 'GLU', 'GLN', 'SER', 'ASN', 'SER', 'ALA', 'LEU', 'SER', 'GLU', 'SER', 'SER', 'ALA', 'ALA', 'THR', 'GLU', 'PRO', 'SER', 'SER', 'PRO', 'GLN', 'LEU', 'GLY', 'SER', 'PRO', 'SER', 'SER', 'ASP', 'SER', 'CYS', 'PRO', 'GLY', 'LEU', 'SER', 'ASN', 'GLY', 'GLU', 'GLU', 'VAL', 'GLY', 'LEU', 'GLU', 'LYS', 'PRO', 'SER', 'LEU', 'GLU', 'PRO', 'GLN', 'LYS', 'SER', 'LEU', 'GLY', 'ASP', 'GLU', 'GLY', 'LEU', 'ASN', 'ARG', 'GLY', 'PRO', 'TYR', 'VAL', 'LEU', 'GLY', 'PRO', 'ALA', 'ASP', 'ARG', 'GLU', 'ASP', 'GLU', 'GLU', 'GLU', 'ASP', 'GLU', 'GLU', 'GLU', 'GLU', 'GLU', 'GLN', 'THR', 'ASP', 'PRO', 'LYS', 'LYS'], (3, 459): ['HIS', 'ARG', 'PRO', 'SER', 'ALA', 'GLY', 'GLY', 'PRO', 'ASP', 'SER', 'ALA', 'GLY', 'ARG', 'LYS', 'ARG', 'PRO', 'LEU', 'GLU', 'GLN', 'SER', 'ASN', 'SER', 'ALA', 'LEU', 'SER', 'GLU', 'SER', 'SER', 'ALA', 'ALA', 'THR', 'GLU', 'PRO', 'SER', 'SER', 'PRO', 'GLN', 'LEU', 'GLY', 'SER', 'PRO', 'SER', 'SER', 'ASP', 'SER', 'CYS', 'PRO', 'GLY', 'LEU', 'SER', 'ASN', 'GLY', 'GLU', 'GLU', 'VAL', 'GLY', 'LEU', 'GLU', 'LYS', 'PRO', 'SER', 'LEU', 'GLU', 'PRO', 'GLN', 'LYS', 'SER', 'LEU', 'GLY', 'ASP', 'GLU', 'GLY', 'LEU', 'ASN', 'ARG', 'GLY', 'PRO', 'TYR', 'VAL', 'LEU', 'GLY', 'PRO', 'ALA', 'ASP', 'ARG', 'GLU', 'ASP', 'GLU', 'GLU', 'GLU', 'ASP', 'GLU', 'GLU', 'GLU', 'GLU', 'GLU', 'GLN', 'THR', 'ASP', 'PRO', 'LYS', 'LYS', 'PRO', 'THR'], (3, 187): ['PHE', 'ASN'], (4, 75): ['PRO', 'GLY', 'ALA', 'LEU', 'GLY', 'GLY', 'ALA', 'ALA', 'ASP', 'VAL', 'GLU', 'ASP', 'THR', 'LYS', 'GLU', 'GLY', 'GLU']}
{}
{}
{(0, 117): ['ASN', 'LYS', 'ASP', 'LEU', 'ARG']}
{}
{}
{(0, 532): ['SER', 'SER', 'ASN', 'PRO']}
{}
{}
{(1, 448): ['SER', 'CYS', 'LEU', 'GLU', 'SER', 'ASP', 'PHE', 'THR', 'SER', 'GLY', 'GLY', 'VAL', 'CYS', 'HIS', 'SER', 'ASP', 'PRO', 'LYS', 'MET', 'THR', 'SER', 'ASN', 'MET', 'LEU', 'ALA', 'PHE', 'LEU']}
{(0, 449): ['CYS', 'LEU', 'GLU', 'SER', 'ASP', 'PHE', 'THR', 'SER', 'GLY', 'GLY', 'VAL', 'CYS'], (1, 449): ['CYS', 'LEU', 'GLU', 'SER', 'ASP', 'PHE', 'THR', 'SER', 'GLY', 'GLY', 'VAL', 'CYS', 'HIS', 'SER', 'ASP', 'PRO', 'LYS', 'MET', 'THR', 'SER', 'ASN', 'MET', 'LEU', 'ALA', 'PHE', 'LEU', 'GLY', 'GLU', 'ASN']}
{(1, 22): ['ASN', 'THR', 'LEU', 'GLU', 'VAL', 'ARG', 'ASP', 'LEU', 'ASN', 'TYR', 'GLN', 'VAL', 'ASP', 'LEU', 'ALA', 'SER', 'GLN', 'VAL', 'PRO', 'TRP', 'PHE', 'GLU', 'GLN', 'LEU', 'ALA', 'GLN', 'PHE', 'LYS', 'MET', 'PRO', 'TRP', 'THR', 'SER', 'PRO', 'SER', 'CYS', 'GLN', 'ASN', 'SER', 'CYS', 'GLU', 'LEU'], (1, 282): ['ASP', 'LEU', 'ASP', 'GLU', 'ASP', 'THR', 'CYS', 'VAL', 'GLU', 'SER', 'SER', 'VAL', 'THR', 'PRO', 'LEU', 'ASP', 'THR', 'ASN', 'CYS', 'LEU', 'PRO', 'SER', 'PRO', 'THR', 'LYS', 'MET', 'PRO'], (2, 21): ['ASN', 'THR', 'LEU', 'GLU', 'VAL', 'ARG', 'ASP', 'LEU', 'ASN', 'TYR', 'GLN', 'VAL', 'ASP', 'LEU', 'ALA', 'SER', 'GLN', 'VAL', 'PRO', 'TRP', 'PHE', 'GLU', 'GLN', 'LEU', 'ALA', 'GLN', 'PHE', 'LYS', 'MET', 'PRO', 'TRP', 'THR', 'SER', 'PRO', 'SER', 'CYS', 'GLN', 'ASN', 'SER', 'CYS', 'GLU'], (0, 13): ['VAL', 'SER', 'HIS', 'ARG', 'VAL', 'ARG', 'PRO', 'TRP', 'TRP', 'ASP', 'ILE', 'THR', 'SER', 'CYS', 'ARG', 'GLN', 'GLN', 'TRP', 'THR'], (3, 11): ['VAL', 'SER', 'HIS', 'ARG', 'VAL', 'ARG', 'PRO', 'TRP', 'TRP', 'ASP', 'ILE', 'THR', 'SER', 'CYS', 'ARG', 'GLN', 'GLN', 'TRP', 'THR', 'ARG'], (3, 528): ['SER', 'ASN', 'VAL', 'SER', 'VAL', 'THR', 'THR', 'ASN', 'PRO'], (2, 291): ['LEU', 'ASP', 'ASP', 'PHE', 'LEU', 'TRP', 'LYS', 'ALA', 'GLU', 'THR', 'LYS', 'ASP', 'LEU', 'ASP', 'GLU', 'ASP', 'THR', 'CYS', 'VAL', 'GLU', 'SER', 'SER', 'VAL', 'THR', 'PRO', 'LEU', 'ASP', 'THR', 'ASN', 'CYS', 'LEU', 'PRO', 'SER', 'PRO'], (1, 257): ['VAL', 'ASP', 'LEU', 'THR', 'SER', 'ILE', 'ASP', 'ARG', 'ARG', 'SER', 'ARG', 'GLU', 'GLN', 'GLU', 'LEU', 'ALA', 'THR', 'ARG', 'GLU', 'LYS'], (0, 535): ['GLY', 'SER', 'SER', 'ASN', 'VAL', 'SER', 'VAL', 'THR', 'THR', 'ASN', 'PRO', 'MET', 'CYS'], (3, 293): ['LYS', 'THR', 'LEU', 'PRO', 'MET', 'VAL']}

In [19]:
modeledpbs = glob.glob('*.pdbmdl')

In [21]:
for pdb in modeledpbs:
    print pdb
    try:
        dfi.calc_dfi(pdb,writetofile=True)
    except:
        print "Cannot process: ", pdb


1AV1.pdbmdl
Read 804 atoms from the 1AV1.pdbmdl
Wrote out to 1AV1-dfianalysis.csv
1DJG.pdbmdl
Read 1156 atoms from the 1DJG.pdbmdl
Wrote out to 1DJG-dfianalysis.csv
1GAX.pdbmdl
Read 1726 atoms from the 1GAX.pdbmdl
Wrote out to 1GAX-dfianalysis.csv
1QAS.pdbmdl
Read 1103 atoms from the 1QAS.pdbmdl
Wrote out to 1QAS-dfianalysis.csv
1QU2.pdbmdl
Read 917 atoms from the 1QU2.pdbmdl
Wrote out to 1QU2-dfianalysis.csv
2A01.pdbmdl
Read 729 atoms from the 2A01.pdbmdl
Wrote out to 2A01-dfianalysis.csv
2FJU.pdbmdl
Read 966 atoms from the 2FJU.pdbmdl
Wrote out to 2FJU-dfianalysis.csv
2LEM.pdbmdl
Read 216 atoms from the 2LEM.pdbmdl
Wrote out to 2LEM-dfianalysis.csv
2MSC.pdbmdl
Read 581 atoms from the 2MSC.pdbmdl
Wrote out to 2MSC-dfianalysis.csv
2Q7Z.pdbmdl
Read 1931 atoms from the 2Q7Z.pdbmdl
Cannot process:  2Q7Z.pdbmdl
2Z7X.pdbmdl
Read 1075 atoms from the 2Z7X.pdbmdl
Wrote out to 2Z7X-dfianalysis.csv
3NM8.pdbmdl
Read 570 atoms from the 3NM8.pdbmdl
Wrote out to 3NM8-dfianalysis.csv
3NQ5.pdbmdl
Read 570 atoms from the 3NQ5.pdbmdl
Wrote out to 3NQ5-dfianalysis.csv
3OHM.pdbmdl
Read 1189 atoms from the 3OHM.pdbmdl
Wrote out to 3OHM-dfianalysis.csv
3R2P.pdbmdl
Read 180 atoms from the 3R2P.pdbmdl
Wrote out to 3R2P-dfianalysis.csv
3S84.pdbmdl
Read 480 atoms from the 3S84.pdbmdl
Wrote out to 3S84-dfianalysis.csv
3SE6.pdbmdl
Read 1817 atoms from the 3SE6.pdbmdl
Wrote out to 3SE6-dfianalysis.csv
4DUR.pdbmdl
Read 1579 atoms from the 4DUR.pdbmdl
Wrote out to 4DUR-dfianalysis.csv
4GNK.pdbmdl
Read 2660 atoms from the 4GNK.pdbmdl
Wrote out to 4GNK-dfianalysis.csv
4HD4.pdbmdl
Read 573 atoms from the 4HD4.pdbmdl
Wrote out to 4HD4-dfianalysis.csv
4HD7.pdbmdl
Read 572 atoms from the 4HD7.pdbmdl
Wrote out to 4HD7-dfianalysis.csv
4I0K.pdbmdl
Read 205 atoms from the 4I0K.pdbmdl
Cannot process:  4I0K.pdbmdl
4J6T.pdbmdl
Read 573 atoms from the 4J6T.pdbmdl
Wrote out to 4J6T-dfianalysis.csv
4J6V.pdbmdl
Read 573 atoms from the 4J6V.pdbmdl
Wrote out to 4J6V-dfianalysis.csv
4KX7.pdbmdl
Read 879 atoms from the 4KX7.pdbmdl
Wrote out to 4KX7-dfianalysis.csv
4P6R.pdbmdl
Read 575 atoms from the 4P6R.pdbmdl
Wrote out to 4P6R-dfianalysis.csv
4QJ3.pdbmdl
Read 1072 atoms from the 4QJ3.pdbmdl
Wrote out to 4QJ3-dfianalysis.csv
5AB0.pdbmdl
Read 1838 atoms from the 5AB0.pdbmdl
Wrote out to 5AB0-dfianalysis.csv
5AB2.pdbmdl
Read 1829 atoms from the 5AB2.pdbmdl
Wrote out to 5AB2-dfianalysis.csv
5DO7.pdbmdl
Read 2528 atoms from the 5DO7.pdbmdl
Wrote out to 5DO7-dfianalysis.csv

In [22]:
dfi.calc_dfi('4I0K.pdbmdl')


Read 205 atoms from the 4I0K.pdbmdl
---------------------------------------------------------------------------
AssertionError                            Traceback (most recent call last)
<ipython-input-22-87c6cbec7173> in <module>()
----> 1 dfi.calc_dfi('4I0K.pdbmdl')

/Users/teachingassistants/dfi/dfi/dfi_calc.pyc in calc_dfi(pdbfile, pdbid, covar, ls_reschain, chain_name, Verbose, writetofile, colorpdb, dfianalfile)
    608     # create covariance matrix or read it in if provided
    609     if not(covar):
--> 610         invHrs = calc_covariance(numres, x, y, z, Verbose=False)
    611     else:  # this is where we load the Hessian if provided
    612         invHrs = np.loadtxt(covar)

/Users/teachingassistants/dfi/dfi/dfi_calc.pyc in calc_covariance(numres, x, y, z, invhessfile, Verbose)
    557         flatandwrite(invHrs, invhessfile)
    558     assert np.sum(
--> 559         singular) == 6., "Number of near-singular eigenvalues: %f" % np.sum(singular)
    560     return invHrs
    561 

AssertionError: Number of near-singular eigenvalues: 9.000000

In [ ]: