In [25]:
import os
from Bio.PDB import PDBList
from pdbfixer import PDBFixer
from simtk.openmm.app import PDBFile
In [3]:
os.chdir("/Users/weilu/Research/server/nov_2018/iterative_optimization")
In [6]:
a = "1R69, 1UTG, 3ICB, 256BA, 4CPV, 1CCR, 2MHR, 1MBA, 2FHA"
In [8]:
pdb_list = a.split(", ")
In [9]:
pdb_list
Out[9]:
In [23]:
def download(pdb_list):
for pdb_id in pdb_list:
pdb = f"{pdb_id.lower()[:4]}"
pdbFile = pdb+".pdb"
if not os.path.isfile("original_pdbs/"+pdbFile):
pdbl = PDBList()
name = pdbl.retrieve_pdb_file(pdb, pdir='.', file_format='pdb')
os.system(f"mv {name} original_pdbs/{pdbFile}")
In [29]:
pdbFile = "original_pdbs/2fha.pdb"
In [34]:
pdb_list
Out[34]:
In [36]:
def cleanPdb(pdb_list):
for pdb_id in pdb_list:
pdb = f"{pdb_id.lower()[:4]}"
if len(pdb_id) == 5:
chain = pdb_id[4].upper()
else:
assert(len(pdb_id) == 4)
chain = "A"
pdbFile = pdb+".pdb"
# clean pdb
fixer = PDBFixer(filename="original_pdbs/"+pdbFile)
# remove unwanted chains
chains = list(fixer.topology.chains())
chains_to_remove = [i for i, x in enumerate(chains) if x.id != chain]
fixer.removeChains(chains_to_remove)
fixer.findMissingResidues()
# add missing residues in the middle of a chain, not ones at the start or end of the chain.
chains = list(fixer.topology.chains())
keys = fixer.missingResidues.keys()
# print(keys)
for key in list(keys):
chain = chains[key[0]]
if key[1] == 0 or key[1] == len(list(chain.residues())):
del fixer.missingResidues[key]
fixer.findNonstandardResidues()
fixer.replaceNonstandardResidues()
fixer.removeHeterogens(False)
fixer.findMissingAtoms()
fixer.addMissingAtoms()
fixer.addMissingHydrogens(7.0)
PDBFile.writeFile(fixer.topology, fixer.positions, open("cleaned_pdbs/"+pdbFile, 'w'))
In [37]:
cleanPdb(pdb_list)
In [ ]: