PDBe REST API 101


In [1]:
import sys

PY3 = sys.version > '3'

if PY3:
    import urllib.request as urllib2
else:
    import urllib2

SERVER_URL = "https://www.ebi.ac.uk/pdbe/api"

def make_request(url, data):   
    request = urllib2.Request(url)

    try:
        url_file = urllib2.urlopen(request, data)
    except urllib2.HTTPError as e:
        if e.code == 404:
            print("[NOTFOUND %d] %s" % (e.code, url))
        else:
            print("[ERROR %d] %s" % (e.code, url))

        return None

    return url_file.read().decode()

def get_request(url, arg, pretty=False):
    full_url = "%s/%s/%s?pretty=%s" % (SERVER_URL, url, arg, str(pretty).lower())
    
    return make_request(full_url, None)

def post_request(url, data, pretty=False):
    full_url = "%s/%s/?pretty=%s" % (SERVER_URL, url, str(pretty).lower())
    
    if isinstance(data, (list, tuple)):
        data = ",".join(data)
    
    return make_request(full_url, data.encode())

In [2]:
summary = "/pdb/entry/summary"
experiment = "/pdb/entry/experiment"
sifts = "/mappings"

We can make GET resquests simply by doing:


In [11]:
print(get_request(experiment, "1cbs", True))


{
    "1cbs": [
        {
            "resolution_low": 8,
            "r_factor": 0.2,
            "r_free_selection_details": null,
            "resolution_high": 1.8,
            "r_free_percent_reflections": null,
            "starting_model": null,
            "refinement_software": "X-PLOR                  ",
            "completeness": 90.54,
            "r_free": 0.237,
            "cell": {
                "a": 45.65,
                "c": 77.61,
                "b": 47.56,
                "beta": 90,
                "alpha": 90,
                "gamma": 90
            },
            "percent_reflections_observed": 90.3,
            "diffraction_experiment": [
                {
                    "wavelength_list": null,
                    "detector_details": null,
                    "beam_source_type": null,
                    "detector_type": null,
                    "synchrotron_beamline": null,
                    "source_details": null,
                    "synchrotron_site": null,
                    "diffraction_protocol": null,
                    "ambient_temp": null,
                    "wavelength": null,
                    "detector": null,
                    "beam_source_name": null
                }
            ],
            "expression_host_scientific_name": [
                {
                    "scientific_name": "Escherichia coli BL21(DE3)",
                    "tax_id": 469008
                }
            ],
            "crystal_growth": [
                {
                    "grow_details": null,
                    "grow_ph": null,
                    "grow_method": null,
                    "grow_temperature": null
                }
            ],
            "experimental_method": "X-ray diffraction",
            "num_reflections": 14678,
            "phasing_method": null,
            "experiment_data_available": "Y",
            "experimental_method_class": "x-ray",
            "r_work": 0.2,
            "spacegroup": "P 21 21 21",
            "resolution": 1.8,
            "structure_determination_method": null
        }
    ]
}

POST requests passing a string:


In [4]:
print(post_request(summary, "1cbs, 1otz, 2ktn", True))


{
    "1cbs": [
        {
            "experimental_method": [
                "X-ray diffraction"
            ],
            "assemblies": [
                {
                    "preferred": true,
                    "form": "homo",
                    "name": "monomeric",
                    "assembly_id": "1"
                }
            ],
            "title": "CRYSTAL STRUCTURE OF CELLULAR RETINOIC-ACID-BINDING PROTEINS I AND II IN COMPLEX WITH ALL-TRANS-RETINOIC ACID AND A SYNTHETIC RETINOID",
            "release_date": "19950126",
            "split_entry": null,
            "experimental_method_class": [
                "x-ray"
            ],
            "revision_date": "20090224",
            "entry_authors": [
                "Bergfors, T.",
                "Jones, T.A.",
                "Kleywegt, G.J."
            ],
            "deposition_site": null,
            "number_of_entities": {
                "water": 1,
                "polypeptide": 1,
                "other": 0,
                "dna": 0,
                "ligand": 1,
                "dna/rna": 0,
                "rna": 0,
                "sugar": 0
            },
            "deposition_date": "19940928",
            "processing_site": null
        }
    ],
    "2ktn": [
        {
            "experimental_method": [
                "Solution NMR"
            ],
            "assemblies": [
                {
                    "preferred": true,
                    "form": "homo",
                    "name": "monomeric",
                    "assembly_id": "1"
                }
            ],
            "title": "Spatial structure of Lch-alpha peptide from two-component lantibiotic system Lichenicidin VK21",
            "release_date": "20100721",
            "split_entry": null,
            "experimental_method_class": [
                "nmr"
            ],
            "revision_date": "20110810",
            "entry_authors": [
                "Arseniev, A.S.",
                "Mineev, K.S.",
                "Ovchinnikova, T.V.",
                "Shenkarev, Z.O."
            ],
            "deposition_site": "BMRB",
            "number_of_entities": {
                "water": 0,
                "polypeptide": 1,
                "other": 0,
                "dna": 0,
                "ligand": 0,
                "dna/rna": 0,
                "rna": 0,
                "sugar": 0
            },
            "deposition_date": "20100205",
            "processing_site": "RCSB"
        }
    ]
}

A list:


In [ ]:
print(post_request(summary, ["1cbs", "1otz", "2ktn"], True))

Or a tuple:


In [6]:
response = post_request(summary, ("1cbs", "1otz", "2ktn"))
print(response)


{"1cbs":[{"experimental_method":["X-ray diffraction"],"assemblies":[{"preferred":true,"form":"homo","name":"monomeric","assembly_id":"1"}],"title":"CRYSTAL STRUCTURE OF CELLULAR RETINOIC-ACID-BINDING PROTEINS I AND II IN COMPLEX WITH ALL-TRANS-RETINOIC ACID AND A SYNTHETIC RETINOID","release_date":"19950126","split_entry":null,"experimental_method_class":["x-ray"],"revision_date":"20090224","entry_authors":["Bergfors, T.","Jones, T.A.","Kleywegt, G.J."],"deposition_site":null,"number_of_entities":{"water":1,"polypeptide":1,"other":0,"dna":0,"ligand":1,"dna/rna":0,"rna":0,"sugar":0},"deposition_date":"19940928","processing_site":null}],"2ktn":[{"experimental_method":["Solution NMR"],"assemblies":[{"preferred":true,"form":"homo","name":"monomeric","assembly_id":"1"}],"title":"Spatial structure of Lch-alpha peptide from two-component lantibiotic system Lichenicidin VK21","release_date":"20100721","split_entry":null,"experimental_method_class":["nmr"],"revision_date":"20110810","entry_authors":["Arseniev, A.S.","Mineev, K.S.","Ovchinnikova, T.V.","Shenkarev, Z.O."],"deposition_site":"BMRB","number_of_entities":{"water":0,"polypeptide":1,"other":0,"dna":0,"ligand":0,"dna/rna":0,"rna":0,"sugar":0},"deposition_date":"20100205","processing_site":"RCSB"}]}

Please note that we have not used the pretty=True argument in the last request. This is the best way to access the REST API programatically as, by eliminating all the whitespaces, you will save bandwidth and, therefore, time.

The response is a string that can be parsed into a Python dictionary using the json module:


In [7]:
import json
entries = json.loads(response)

And now we can access the elements individually:


In [ ]:
print(entries["1cbs"][0]["experimental_method"])
print(entries["1cbs"][0]["entry_authors"])

In [12]:
entries["1cbs"]


Out[12]:
[{u'assemblies': [{u'assembly_id': u'1',
    u'form': u'homo',
    u'name': u'monomeric',
    u'preferred': True}],
  u'deposition_date': u'19940928',
  u'deposition_site': None,
  u'entry_authors': [u'Bergfors, T.', u'Jones, T.A.', u'Kleywegt, G.J.'],
  u'experimental_method': [u'X-ray diffraction'],
  u'experimental_method_class': [u'x-ray'],
  u'number_of_entities': {u'dna': 0,
   u'dna/rna': 0,
   u'ligand': 1,
   u'other': 0,
   u'polypeptide': 1,
   u'rna': 0,
   u'sugar': 0,
   u'water': 1},
  u'processing_site': None,
  u'release_date': u'19950126',
  u'revision_date': u'20090224',
  u'split_entry': None,
  u'title': u'CRYSTAL STRUCTURE OF CELLULAR RETINOIC-ACID-BINDING PROTEINS I AND II IN COMPLEX WITH ALL-TRANS-RETINOIC ACID AND A SYNTHETIC RETINOID'}]

In [10]:
for k in entries.keys():
    print entries[k]


[{u'split_entry': None, u'title': u'CRYSTAL STRUCTURE OF CELLULAR RETINOIC-ACID-BINDING PROTEINS I AND II IN COMPLEX WITH ALL-TRANS-RETINOIC ACID AND A SYNTHETIC RETINOID', u'release_date': u'19950126', u'experimental_method': [u'X-ray diffraction'], u'experimental_method_class': [u'x-ray'], u'revision_date': u'20090224', u'entry_authors': [u'Bergfors, T.', u'Jones, T.A.', u'Kleywegt, G.J.'], u'deposition_site': None, u'number_of_entities': {u'polypeptide': 1, u'dna': 0, u'ligand': 1, u'dna/rna': 0, u'rna': 0, u'sugar': 0, u'water': 1, u'other': 0}, u'processing_site': None, u'deposition_date': u'19940928', u'assemblies': [{u'assembly_id': u'1', u'form': u'homo', u'preferred': True, u'name': u'monomeric'}]}]
[{u'split_entry': None, u'title': u'Spatial structure of Lch-alpha peptide from two-component lantibiotic system Lichenicidin VK21', u'release_date': u'20100721', u'experimental_method': [u'Solution NMR'], u'experimental_method_class': [u'nmr'], u'revision_date': u'20110810', u'entry_authors': [u'Arseniev, A.S.', u'Mineev, K.S.', u'Ovchinnikova, T.V.', u'Shenkarev, Z.O.'], u'deposition_site': u'BMRB', u'number_of_entities': {u'polypeptide': 1, u'dna': 0, u'ligand': 0, u'dna/rna': 0, u'rna': 0, u'sugar': 0, u'water': 0, u'other': 0}, u'processing_site': u'RCSB', u'deposition_date': u'20100205', u'assemblies': [{u'assembly_id': u'1', u'form': u'homo', u'preferred': True, u'name': u'monomeric'}]}]

In [ ]: