In [ ]:
from ssbio.databases.pdb import PDBProp
from ssbio.databases.uniprot import UniProtProp
In [ ]:
import sys
import logging
In [ ]:
# Create logger
logger = logging.getLogger()
logger.setLevel(logging.DEBUG) # SET YOUR LOGGING LEVEL HERE #
In [ ]:
# Other logger stuff for Jupyter notebooks
handler = logging.StreamHandler(sys.stderr)
formatter = logging.Formatter('[%(asctime)s] [%(name)s] %(levelname)s: %(message)s', datefmt="%Y-%m-%d %H:%M")
handler.setFormatter(formatter)
logger.handlers = [handler]
In [ ]:
my_structure = PDBProp(ident='5T4Q', description='E. coli ATP synthase')
Downloading will:
In [ ]:
import tempfile
my_structure.download_structure_file(outdir=tempfile.gettempdir(), file_type='mmtf')
In [ ]:
my_structure.get_dict()
The mapped_chains attribute allows us to limit sequence analyses to specified chains (see the later section where we align a sequence to this structure). For this example, the ATP synthase is a complex of a number of protein chains, and if we are interested in a specific gene transcript, we can set those.
In [ ]:
# Chains A, B, and C make up ATP synthase subunit alpha - from the gene b3734 (UniProt ID P0ABB0)
my_structure.add_mapped_chain_ids(['A', 'B', 'C'])
Parsing the structure will parse the sequences of each chain, and store those in the chains attribute. It will also return a Biopython Structure object which opens up all methods available for structures in Biopython.
In [ ]:
parsed_structure = my_structure.parse_structure()
print(type(parsed_structure.structure))
print(type(parsed_structure.first_model))
Cleaning a structure does the following:
In the example below, we will clean the structure so it only includes our mapped chains.
In [ ]:
cleaned_structure = my_structure.clean_structure(outdir='/tmp', keep_chains=my_structure.mapped_chains, force_rerun=True)
cleaned_structure
In [ ]:
# The original structure
my_structure.view_structure(recolor=False)
In [ ]:
# The cleaned structure
import nglview
nglview.show_structure_file(cleaned_structure)
In [ ]: