pyPDBeREST

Below you can find some tips and examples on how to use pyDPBeREST.


In [1]:
# to keep reloading the project (during testing)
%load_ext autoreload
%autoreload 2

In [2]:
# loading the module...
from pdbe import pyPDBeREST
p = pyPDBeREST()

In [3]:
# printing out the version
p.version


Out[3]:
'0.1.0'

In [4]:
# having a look at the default base_url
p.base_url


Out[4]:
'https://www.ebi.ac.uk/pdbe/'

In [5]:
# one can also override the base url with the dev branch of the api
# but this is not advisable unless you are testing new endpoints...
p = pyPDBeREST(base_url='https://wwwdev.ebi.ac.uk/pdbe/')
p.base_url


Out[5]:
'https://wwwdev.ebi.ac.uk/pdbe/'

In [6]:
# base to the default
p = pyPDBeREST()

In [7]:
# printing out all the available method endpoints...
print(p.endpoints())


The following endpoints are available:
    EMDB
    SSM
    SEARCH
    SIFTS
    COMPOUNDS
    TOPOLOGY
    VALIDATION
    PDB
    PISA

In [8]:
# the same information can be found in
p.values


Out[8]:
['EMDB',
 'SSM',
 'SEARCH',
 'SIFTS',
 'COMPOUNDS',
 'TOPOLOGY',
 'VALIDATION',
 'PDB',
 'PISA']

In [9]:
# for each of these top level endpoints, print available methods
pdb = p.PDB
print(pdb.endpoints())


The following endpoints are available:
    getReleaseStatus
    getBindingSites
    getObservedRanges
    getRelatedPublications
    getResidueListingChain
    getNmrResources
    getExperiments
    getSecondaryStructure
    getVariousUrls
    getModifiedResidues
    getSummary
    getResidueListing
    getPublications
    getLigands
    getMutatedResidues
    getMolecules

In [10]:
# values...
pdb.values


Out[10]:
['getReleaseStatus',
 'getBindingSites',
 'getObservedRanges',
 'getRelatedPublications',
 'getResidueListingChain',
 'getNmrResources',
 'getExperiments',
 'getSecondaryStructure',
 'getVariousUrls',
 'getModifiedResidues',
 'getSummary',
 'getResidueListing',
 'getPublications',
 'getLigands',
 'getMutatedResidues',
 'getMolecules']

In [11]:
# example of a GET query...
data = pdb.getSummary(pdbid='1cbs')
print(data)


{
    "1cbs": [
        {
            "related_structures": [], 
            "split_entry": [], 
            "title": "CRYSTAL STRUCTURE OF CELLULAR RETINOIC-ACID-BINDING PROTEINS I AND II IN COMPLEX WITH ALL-TRANS-RETINOIC ACID AND A SYNTHETIC RETINOID", 
            "release_date": "19950126", 
            "experimental_method": [
                "X-ray diffraction"
            ], 
            "experimental_method_class": [
                "x-ray"
            ], 
            "revision_date": "20090224", 
            "entry_authors": [
                "Kleywegt, G.J.", 
                "Bergfors, T.", 
                "Jones, T.A."
            ], 
            "deposition_site": null, 
            "number_of_entities": {
                "polypeptide": 1, 
                "dna": 0, 
                "ligand": 1, 
                "dna/rna": 0, 
                "rna": 0, 
                "sugar": 0, 
                "water": 1, 
                "other": 0
            }, 
            "processing_site": null, 
            "deposition_date": "19940928", 
            "assemblies": [
                {
                    "assembly_id": "1", 
                    "form": "homo", 
                    "preferred": true, 
                    "name": "monomer"
                }
            ]
        }
    ]
}

In [12]:
# two things to consider now
# one is that the default output json mode is set to be 'pretty'
print(p.pretty_json)


True

In [13]:
# one can override that to compress the json output
p = pyPDBeREST(pretty_json=False)
print(p.pretty_json)


False

In [14]:
# running the same example...
pdb = p.PDB
data = pdb.getSummary(pdbid='1cbs')
print(data)


{u'1cbs': [{u'related_structures': [], u'split_entry': [], u'title': u'CRYSTAL STRUCTURE OF CELLULAR RETINOIC-ACID-BINDING PROTEINS I AND II IN COMPLEX WITH ALL-TRANS-RETINOIC ACID AND A SYNTHETIC RETINOID', u'release_date': u'19950126', u'experimental_method': [u'X-ray diffraction'], u'experimental_method_class': [u'x-ray'], u'revision_date': u'20090224', u'entry_authors': [u'Kleywegt, G.J.', u'Bergfors, T.', u'Jones, T.A.'], u'deposition_site': None, u'number_of_entities': {u'polypeptide': 1, u'dna': 0, u'ligand': 1, u'dna/rna': 0, u'rna': 0, u'sugar': 0, u'water': 1, u'other': 0}, u'processing_site': None, u'deposition_date': u'19940928', u'assemblies': [{u'assembly_id': u'1', u'form': u'homo', u'preferred': True, u'name': u'monomer'}]}]}

In [15]:
# the second aspect is that for each endpoint one might want to try learn more about 
# which params are needed, etc.
p = pyPDBeREST()
pdb = p.PDB

In [16]:
# read the doc provided for each method
print(pdb.getSummary.doc)


Summary.
This call provides a summary of properties of a PDB entry, such as the title of the entry, list of depositors, date of deposition, date of release, date of latest revision, experimental method, list of related entries in case split entries, etc.

In [17]:
# getting a look on the url
print(p.base_url + pdb.getSummary.url)


https://www.ebi.ac.uk/pdbe/api/pdb/entry/summary/{{pdbid}}

In [18]:
# getting to know which params are needed for a particular endpoint
print(pdb.getSummary.var.keys())
print(pdb.getSummary.var)


['pdbid']
{'pdbid': {'doc': '4-character PDB id code. (e.g. 1cbs).\nFor POST requests, data should contain one or more comma-separated ids.', 'type': <type 'str'>}}

In [19]:
# getting to read the doc provided for each param
print(pdb.getSummary.var['pdbid']['doc'])


4-character PDB id code. (e.g. 1cbs).
For POST requests, data should contain one or more comma-separated ids.

In [20]:
# checking whether this method allows for POST requests
print(pdb.getSummary.method)
'POST' in pdb.getSummary.method


['GET', 'POST']
Out[20]:
True

In [21]:
# now that we know POST request can be made to this endpoint let's try
# an example POST query...
# up to 1000 pdb ids can be queried with post methods
data = pdb.getSummary(pdbid='1cbs, 2pah', method='POST')
print(data)


{
    "1cbs": [
        {
            "related_structures": [], 
            "split_entry": [], 
            "title": "CRYSTAL STRUCTURE OF CELLULAR RETINOIC-ACID-BINDING PROTEINS I AND II IN COMPLEX WITH ALL-TRANS-RETINOIC ACID AND A SYNTHETIC RETINOID", 
            "release_date": "19950126", 
            "experimental_method": [
                "X-ray diffraction"
            ], 
            "experimental_method_class": [
                "x-ray"
            ], 
            "revision_date": "20090224", 
            "entry_authors": [
                "Kleywegt, G.J.", 
                "Bergfors, T.", 
                "Jones, T.A."
            ], 
            "deposition_site": null, 
            "number_of_entities": {
                "polypeptide": 1, 
                "dna": 0, 
                "ligand": 1, 
                "dna/rna": 0, 
                "rna": 0, 
                "sugar": 0, 
                "water": 1, 
                "other": 0
            }, 
            "processing_site": null, 
            "deposition_date": "19940928", 
            "assemblies": [
                {
                    "assembly_id": "1", 
                    "form": "homo", 
                    "preferred": true, 
                    "name": "monomer"
                }
            ]
        }
    ], 
    "2pah": [
        {
            "related_structures": [], 
            "split_entry": [], 
            "title": "TETRAMERIC HUMAN PHENYLALANINE HYDROXYLASE", 
            "release_date": "19991006", 
            "experimental_method": [
                "X-ray diffraction"
            ], 
            "experimental_method_class": [
                "x-ray"
            ], 
            "revision_date": "20110713", 
            "entry_authors": [
                "Stevens, R.C.", 
                "Fusetti, F.", 
                "Erlandsen, H."
            ], 
            "deposition_site": "BNL", 
            "number_of_entities": {
                "polypeptide": 1, 
                "dna": 0, 
                "ligand": 1, 
                "dna/rna": 0, 
                "rna": 0, 
                "sugar": 0, 
                "water": 0, 
                "other": 0
            }, 
            "processing_site": "RCSB", 
            "deposition_date": "19980526", 
            "assemblies": [
                {
                    "assembly_id": "1", 
                    "form": "homo", 
                    "preferred": true, 
                    "name": "tetramer"
                }
            ]
        }
    ]
}

In [22]:
# loading the data into python common data-structures
import json
pdata = json.loads(data)
print(pdata)


{u'1cbs': [{u'related_structures': [], u'split_entry': [], u'assemblies': [{u'name': u'monomer', u'preferred': True, u'form': u'homo', u'assembly_id': u'1'}], u'title': u'CRYSTAL STRUCTURE OF CELLULAR RETINOIC-ACID-BINDING PROTEINS I AND II IN COMPLEX WITH ALL-TRANS-RETINOIC ACID AND A SYNTHETIC RETINOID', u'release_date': u'19950126', u'experimental_method_class': [u'x-ray'], u'revision_date': u'20090224', u'entry_authors': [u'Kleywegt, G.J.', u'Bergfors, T.', u'Jones, T.A.'], u'deposition_site': None, u'number_of_entities': {u'polypeptide': 1, u'dna': 0, u'ligand': 1, u'dna/rna': 0, u'rna': 0, u'sugar': 0, u'water': 1, u'other': 0}, u'processing_site': None, u'deposition_date': u'19940928', u'experimental_method': [u'X-ray diffraction']}], u'2pah': [{u'related_structures': [], u'split_entry': [], u'assemblies': [{u'name': u'tetramer', u'preferred': True, u'form': u'homo', u'assembly_id': u'1'}], u'title': u'TETRAMERIC HUMAN PHENYLALANINE HYDROXYLASE', u'release_date': u'19991006', u'experimental_method_class': [u'x-ray'], u'revision_date': u'20110713', u'entry_authors': [u'Stevens, R.C.', u'Fusetti, F.', u'Erlandsen, H.'], u'deposition_site': u'BNL', u'number_of_entities': {u'polypeptide': 1, u'dna': 0, u'ligand': 1, u'dna/rna': 0, u'rna': 0, u'sugar': 0, u'water': 0, u'other': 0}, u'processing_site': u'RCSB', u'deposition_date': u'19980526', u'experimental_method': [u'X-ray diffraction']}]}

This is pretty much all you can do with the wrapper atm

Below you can find an example usage at every available endpoint

PDB


In [23]:
# using default parameters and compressed json output
p = pyPDBeREST(pretty_json=False)
print(p.PDB.endpoints())


The following endpoints are available:
    getReleaseStatus
    getBindingSites
    getObservedRanges
    getRelatedPublications
    getResidueListingChain
    getNmrResources
    getExperiments
    getSecondaryStructure
    getVariousUrls
    getModifiedResidues
    getSummary
    getResidueListing
    getPublications
    getLigands
    getMutatedResidues
    getMolecules

In [ ]:
# Not running everything here because the output can be massive...

In [ ]:
print(p.PDB.getReleaseStatus(pdbid='2pah'))

In [ ]:
print(p.PDB.getSummary(pdbid='2pah'))

In [ ]:
print(p.PDB.getBindingSites(pdbid='2pah'))

In [ ]:
print(p.PDB.getObservedRanges(pdbid='2pah'))

In [ ]:
print(p.PDB.getRelatedPublications(pdbid='2pah'))

In [ ]:
print(p.PDB.getResidueListingChain(pdbid='2pah', chainid='A'))

In [ ]:
# needs an NMR structure
print(p.PDB.getNmrResources(pdbid='2k8v'))

In [ ]:
print(p.PDB.getExperiments(pdbid='2pah'))

In [ ]:
print(p.PDB.getVariousUrls(pdbid='2pah'))

In [ ]:
# needs a pdbid with DNA or RNA
print(p.PDB.getModifiedResidues(pdbid='4v5j'))

In [ ]:
print(p.PDB.getResidueListing(pdbid='2pah'))

In [ ]:
print(p.PDB.getExperiments(pdbid='2pah'))

In [ ]:
print(p.PDB.getPublications(pdbid='2pah'))

In [ ]:
# needs a structure with mutated residues
print(p.PDB.getMutatedResidues(pdbid='1bgj'))

In [ ]:
print(p.PDB.getMolecules(pdbid='2pah'))

In [ ]:
# post method
print(p.PDB.getSummary(pdbid='2pah, 1cbs', method='POST'))

COMPOUNDS


In [24]:
print(p.COMPOUNDS.endpoints())


The following endpoints are available:
    getBounds
    getAtoms
    getSummary
    getInPdbs

In [ ]:
print(p.COMPOUNDS.getBounds(compid='ATP'))

In [ ]:
print(p.COMPOUNDS.getAtoms(compid='ATP'))

In [ ]:
print(p.COMPOUNDS.getSummary(compid='ATP'))

In [ ]:
print(p.COMPOUNDS.getInPdbs(compid='ATP'))

In [ ]:
# post method
print(p.COMPOUNDS.getSummary(compid='ATP, HEM', method='POST'))

EMDB


In [25]:
print(p.EMDB.endpoints())


The following endpoints are available:
    getInfo

In [ ]:
print(p.EMDB.getInfo(property='summary', emdbid='EMD-1200'))

SIFTS


In [26]:
print(p.SIFTS.endpoints())


The following endpoints are available:
    getPdbPfam
    getHomologene
    getPdbGo
    getSequenceDomains
    getBestStructures
    getMappings
    getPdbInterpro
    getPdbUniprot
    getPdbCath
    getPdbEc
    getStructuralDomains
    getPdbScop

In [ ]:
print(p.SIFTS.getMappings(accession='1cbs'))

In [ ]:
print(p.SIFTS.getPdbPfam(pdbid='1cbs'))

In [ ]:
print(p.SIFTS.getPdbGo(pdbid='1cbs'))

In [ ]:
print(p.SIFTS.getPdbInterpro(pdbid='1cbs'))

In [ ]:
print(p.SIFTS.getPdbUniprot(pdbid='1cbs'))

In [ ]:
print(p.SIFTS.getPdbCath(pdbid='1cbs'))

In [ ]:
print(p.SIFTS.getPdbEc(pdbid='2pah'))

In [ ]:
print(p.SIFTS.getPdbScop(pdbid='1cbs'))

In [ ]:
print(p.SIFTS.getSequenceDomains(pdbid='1cbs'))

In [ ]:
print(p.SIFTS.getStructuralDomains(pdbid='1cbs'))

In [ ]:
print(p.SIFTS.getBestStructures(uniprotid='P29373'))

In [ ]:
print(p.SIFTS.getHomologene(pdbid='1cbs', entity='1'))

PISA


In [27]:
print(p.PISA.endpoints())


The following endpoints are available:
    getInterfaces
    getAssembly
    getNumberInterfaces
    getInterfacesList
    getPdbsList
    getAnalysis
    getAsisDetails
    getNumberEntries
    getAsisSummary
    getAssembliesList
    getMonomersList
    getAsisComponent
    getInterfaceDetails
    getAssemblyDetails
    getAssemblyComponent
    getVersion
    getMonomerComponent
    getInterfaceComponent
    getMonomerDetails
    getAsisList

In [ ]:
print(p.PISA.getVersion())

In [ ]:
print(p.PISA.getNumberEntries())

In [ ]:
print(p.PISA.getPdbsList())

In [ ]:
print(p.PISA.getAsisList(pdbid='3gcb'))

In [ ]:
print(p.PISA.getAsisDetails(pdbid='3gcb', assemblyid='0'))

In [ ]:
print(p.PISA.getAsisSummary(pdbid='3gcb', assemblyid='0'))

In [ ]:
print(p.PISA.getAsisComponent(pdbid='3gcb', assemblyid='0',
                              assembly_component='energetics'))

In [ ]:
print(p.PISA.getAssembly(pdbid='3gcb', assemblyid='0',
                         set=0, assembly_index=0))

In [ ]:
print(p.PISA.getAnalysis(pdbid='3gcb', assemblyid='0'))

In [ ]:
print(p.PISA.getAssembliesList(pdbid='3gcb', assemblyid='0'))

In [ ]:
print(p.PISA.getAssemblyDetails(pdbid='3gcb', assemblyid='0',
                                assembly_index=0))

In [ ]:
print(p.PISA.getAssemblyComponent(pdbid='3gcb', assemblyid='0',
                                  assembly_index=0,
                                  assembly_component='energetics'))

In [ ]:
print(p.PISA.getMonomersList(pdbid='3gcb', assemblyid='0'))

In [ ]:
print(p.PISA.getMonomerDetails(pdbid='3gcb', assemblyid='0',
                               monomer_index=1))

In [ ]:
print(p.PISA.getMonomerComponent(pdbid='3gcb', assemblyid='0',
                                 monomer_index=1,
                                 monomer_component='energetics'))

In [ ]:
print(p.PISA.getInterfaces(pdbid='3gcb', assemblyid='0'))

In [ ]:
print(p.PISA.getNumberInterfaces(pdbid='3gcb', assemblyid='0'))

In [ ]:
print(p.PISA.getInterfacesList(pdbid='3gcb', assemblyid='0'))

In [ ]:
print(p.PISA.getInterfaceDetails(pdbid='3gcb', assemblyid='0',
                                 interface_index=1))

In [ ]:
print(p.PISA.getInterfaceComponent(pdbid='3gcb', assemblyid='0',
                                   interface_index=1,
                                   interface_component='energetics'))

SSM


In [28]:
print(p.SSM.endpoints())


The following endpoints are available:
    getMatchStandard
    getVersion
    getMatchDetail
    getNumberMatches
    getMatchSummary

In [ ]:
print(p.SSM.getVersion())

In [ ]:
print(p.SSM.getMatchStandard(pdbid='3gcb'))

In [ ]:
print(p.SSM.getNumberMatches(pdbid='3gcb'))

In [ ]:
print(p.SSM.getMatchSummary(pdbid='3gcb'))

In [ ]:
print(p.SSM.getMatchDetail(pdbid='3gcb', ssm_index=1))

VALIDATION


In [29]:
print(p.VALIDATION.endpoints())


The following endpoints are available:
    getGlobalRelativePercentiles
    getRamachandranSidechainOutliers
    getGlobalPercentilesDetails
    getVanDerWaalOverlaps
    getBackboneSidechainQuality
    getSuitePuckerRnaOutliers
    getAllOutliersUnitId
    getOutlierTypesResidues
    getDiffractionRefinementDescriptors
    getGlobalAbsolutePercentilesSummary
    getGeometryOutliers

In [ ]:
print(p.VALIDATION.getGlobalRelativePercentiles(pdbid='1cbs'))

In [ ]:
print(p.VALIDATION.getGlobalAbsolutePercentilesSummary(pdbid='1cbs'))

In [ ]:
print(p.VALIDATION.getGlobalPercentilesDetails(pdbid='1cbs'))

In [ ]:
print(p.VALIDATION.getDiffractionRefinementDescriptors(pdbid='1cbs'))

In [ ]:
print(p.VALIDATION.getRamachandranSidechainOutliers(pdbid='1cbs'))

In [ ]:
print(p.VALIDATION.getBackboneSidechainQuality(pdbid='1cbs'))

In [ ]:
print(p.VALIDATION.getSuitePuckerRnaOutliers(pdbid='1cbs'))

In [ ]:
print(p.VALIDATION.getOutlierTypesResidues(pdbid='1cbs'))

In [ ]:
print(p.VALIDATION.getVanDerWaalOverlaps(pdbid='1cbs'))

In [ ]:
print(p.VALIDATION.getGeometryOutliers(pdbid='1cbs'))

In [ ]:
print(p.VALIDATION.getAllOutliersUnitId(pdbid='1cbs'))

In [ ]:
# post method
print(p.VALIDATION.getAllOutliersUnitId(pdbid='2pah, 1cbs', method='POST'))

TOPOLOGY


In [30]:
print(p.TOPOLOGY.endpoints())


The following endpoints are available:
    getTopology
    getTopologyPerChain

In [ ]:
print(p.TOPOLOGY.getTopology(pdbid='1csb'))

In [ ]:
print(p.TOPOLOGY.getTopologyPerChain(pdbid='1csb', chainid='A'))

SEARCH


In [31]:
print(p.SEARCH.endpoints())


The following endpoints are available:
    getSearch

In [ ]:
print(p.SEARCH.getSearch(query='q=pfam_name:Lipocalin&wt=json'))