In [25]:
import os
from Bio.PDB import PDBList
from pdbfixer import PDBFixer
from simtk.openmm.app import PDBFile

In [3]:
os.chdir("/Users/weilu/Research/server/nov_2018/iterative_optimization")

In [6]:
a = "1R69, 1UTG, 3ICB, 256BA, 4CPV, 1CCR, 2MHR, 1MBA, 2FHA"

In [8]:
pdb_list = a.split(", ")

In [9]:
pdb_list


Out[9]:
['1R69', '1UTG', '3ICB', '256BA', '4CPV', '1CCR', '2MHR', '1MBA', '2FHA']

In [23]:
def download(pdb_list):
    for pdb_id in pdb_list:
        pdb = f"{pdb_id.lower()[:4]}"
        pdbFile = pdb+".pdb"
        if not os.path.isfile("original_pdbs/"+pdbFile):
            pdbl = PDBList()
            name = pdbl.retrieve_pdb_file(pdb, pdir='.', file_format='pdb')
        os.system(f"mv {name} original_pdbs/{pdbFile}")


Downloading PDB structure '1r69'...
Downloading PDB structure '1utg'...
Downloading PDB structure '3icb'...
Downloading PDB structure '256b'...
Downloading PDB structure '4cpv'...
Downloading PDB structure '1ccr'...
Downloading PDB structure '2mhr'...
Downloading PDB structure '1mba'...
Downloading PDB structure '2fha'...

In [29]:
pdbFile = "original_pdbs/2fha.pdb"

In [34]:
pdb_list


Out[34]:
['1R69', '1UTG', '3ICB', '256BA', '4CPV', '1CCR', '2MHR', '1MBA', '2FHA']

In [36]:
def cleanPdb(pdb_list):
    for pdb_id in pdb_list:
        pdb = f"{pdb_id.lower()[:4]}"
        if len(pdb_id) == 5:
            chain = pdb_id[4].upper()
        else:
            assert(len(pdb_id) == 4)
            chain = "A"
        pdbFile = pdb+".pdb"
        # clean pdb
        fixer = PDBFixer(filename="original_pdbs/"+pdbFile)
        # remove unwanted chains
        chains = list(fixer.topology.chains())
        chains_to_remove = [i for i, x in enumerate(chains) if x.id != chain]
        fixer.removeChains(chains_to_remove)

        fixer.findMissingResidues()
        # add missing residues in the middle of a chain, not ones at the start or end of the chain.
        chains = list(fixer.topology.chains())
        keys = fixer.missingResidues.keys()
        # print(keys)
        for key in list(keys):
            chain = chains[key[0]]
            if key[1] == 0 or key[1] == len(list(chain.residues())):
                del fixer.missingResidues[key]

        fixer.findNonstandardResidues()
        fixer.replaceNonstandardResidues()
        fixer.removeHeterogens(False)
        fixer.findMissingAtoms()
        fixer.addMissingAtoms()
        fixer.addMissingHydrogens(7.0)
        PDBFile.writeFile(fixer.topology, fixer.positions, open("cleaned_pdbs/"+pdbFile, 'w'))

In [37]:
cleanPdb(pdb_list)


1r69.pdb
dict_keys([(0, 63)])
1utg.pdb
dict_keys([])
3icb.pdb
dict_keys([])
256b.pdb
dict_keys([])
4cpv.pdb
dict_keys([])
1ccr.pdb
dict_keys([])
2mhr.pdb
dict_keys([])
1mba.pdb
dict_keys([])
2fha.pdb
dict_keys([(0, 0), (0, 172)])

In [ ]: