In [1]:
import json
import pprint
import functools
import xmltodict
import requests
In [2]:
pp = pprint.PrettyPrinter(indent=0, width=60, compact=True)
URL for the community on Zenodo, which in this case is lsst-dm. Note that we're using the oai_datacite3
In [3]:
url = 'http://zenodo.org/oai2d?verb=ListRecords&metadataPrefix=oai_datacite3&set=user-lsst-dm'
Now we get the collection's XML metadata in oai_datacite3 format.
We use xmltodict to parse into a Python dict structure. Note that process_namespaces=False is preferred since the full namespace information is unncesssary for us.
In [4]:
r = requests.get(url)
r.status_code
xml_content = r.content
In [5]:
dataset = xmltodict.parse(xml_content, process_namespaces=False)
The XML contains several hierachical levels of metadata about the metadata spec. Three levels down we find a list of records.
In [6]:
records = dataset['OAI-PMH']['ListRecords']['record']
records is a list; each item is an OrderedDict containing metadata about that Zenodo upload. Here's we'll look at the first one.
In [7]:
record = records[0]
pp.pprint(record)
OrderedDict([('header',
OrderedDict([('identifier',
'oai:zenodo.org:33711'),
('datestamp',
'2015-11-23T23:29:09Z'),
('setSpec', 'user-lsst-dm')])),
('metadata',
OrderedDict([('oai_datacite',
OrderedDict([('@xmlns:xsi',
'http://www.w3.org/2001/XMLSchema-instance'),
('@xsi:schemaLocation',
'http://schema.datacite.org/oai/oai-1.0/ '
'http://schema.datacite.org/oai/oai-1.0/oai.xsd'),
('isReferenceQuality',
'true'),
('schemaVersion',
'3.1'),
('datacentreSymbol',
'CERN.ZENODO'),
('payload',
OrderedDict([('resource',
OrderedDict([('@xmlns',
'http://datacite.org/schema/kernel-3'),
('@xsi:schemaLocation',
'http://datacite.org/schema/kernel-3 '
'http://schema.datacite.org/meta/kernel-3/metadata.xsd'),
('identifier',
OrderedDict([('@identifierType',
'DOI'),
('#text',
'10.5281/zenodo.33711')])),
('creators',
OrderedDict([('creator',
[OrderedDict([('creatorName',
'Frossie '
'Economou'),
('affiliation',
'LSST')]),
OrderedDict([('creatorName',
'JMatt '
'Peterson'),
('affiliation',
'LSST')]),
OrderedDict([('creatorName',
'Joshua '
'Hoblitt'),
('affiliation',
'LSST')]),
OrderedDict([('creatorName',
'Jonathan '
'Sick'),
('affiliation',
'LSST')])])])),
('titles',
OrderedDict([('title',
'SQR-001 '
'v1.0')])),
('publisher',
'Zenodo'),
('publicationYear',
'2015'),
('subjects',
OrderedDict([('subject',
['lsst',
'lsst-sqr'])])),
('contributors',
None),
('dates',
OrderedDict([('date',
OrderedDict([('@dateType',
'Issued'),
('#text',
'2015-11-05')]))])),
('resourceType',
OrderedDict([('@resourceTypeGeneral',
'Text'),
('#text',
'technicalnote')])),
('alternateIdentifiers',
OrderedDict([('alternateIdentifier',
OrderedDict([('@alternateIdentifierType',
'URL'),
('#text',
'http://zenodo.org/record/33711')]))])),
('relatedIdentifiers',
OrderedDict([('relatedIdentifier',
[OrderedDict([('@relationType',
'IsSupplementTo'),
('@relatedIdentifierType',
'URL'),
('#text',
'https://github.com/lsst-sqre/technote-001/tree/v1.0')]),
OrderedDict([('@relationType',
'Compiles'),
('@relatedIdentifierType',
'URL'),
('#text',
'http://sqr-001.lsst.io')]),
OrderedDict([('@relationType',
'HasPart'),
('@relatedIdentifierType',
'URL'),
('#text',
'https://zenodo.org/record/33711/files/technote-001-v1.0.zip')])])])),
('rightsList',
OrderedDict([('rights',
[OrderedDict([('@rightsURI',
'info:eu-repo/semantics/openAccess'),
('#text',
'Open '
'Access')]),
OrderedDict([('@rightsURI',
'http://creativecommons.org/licenses/by/4.0/'),
('#text',
'Creative '
'Commons '
'Attribution '
'4.0 '
'International')])])])),
('descriptions',
OrderedDict([('description',
OrderedDict([('@descriptionType',
'Abstract'),
('#text',
'<p>SQR-001: '
'Git '
'LFS '
'Architecture '
'Note version '
'1.0 '
'release</p>')]))]))]))]))]))]))])
In [8]:
record['metadata']['oai_datacite']['payload']['resource']
Out[8]:
OrderedDict([('@xmlns', 'http://datacite.org/schema/kernel-3'),
('@xsi:schemaLocation',
'http://datacite.org/schema/kernel-3 http://schema.datacite.org/meta/kernel-3/metadata.xsd'),
('identifier',
OrderedDict([('@identifierType', 'DOI'),
('#text', '10.5281/zenodo.33711')])),
('creators',
OrderedDict([('creator',
[OrderedDict([('creatorName', 'Frossie Economou'),
('affiliation', 'LSST')]),
OrderedDict([('creatorName', 'JMatt Peterson'),
('affiliation', 'LSST')]),
OrderedDict([('creatorName', 'Joshua Hoblitt'),
('affiliation', 'LSST')]),
OrderedDict([('creatorName', 'Jonathan Sick'),
('affiliation', 'LSST')])])])),
('titles', OrderedDict([('title', 'SQR-001 v1.0')])),
('publisher', 'Zenodo'),
('publicationYear', '2015'),
('subjects', OrderedDict([('subject', ['lsst', 'lsst-sqr'])])),
('contributors', None),
('dates',
OrderedDict([('date',
OrderedDict([('@dateType', 'Issued'),
('#text', '2015-11-05')]))])),
('resourceType',
OrderedDict([('@resourceTypeGeneral', 'Text'),
('#text', 'technicalnote')])),
('alternateIdentifiers',
OrderedDict([('alternateIdentifier',
OrderedDict([('@alternateIdentifierType', 'URL'),
('#text',
'http://zenodo.org/record/33711')]))])),
('relatedIdentifiers',
OrderedDict([('relatedIdentifier',
[OrderedDict([('@relationType', 'IsSupplementTo'),
('@relatedIdentifierType', 'URL'),
('#text',
'https://github.com/lsst-sqre/technote-001/tree/v1.0')]),
OrderedDict([('@relationType', 'Compiles'),
('@relatedIdentifierType', 'URL'),
('#text',
'http://sqr-001.lsst.io')]),
OrderedDict([('@relationType', 'HasPart'),
('@relatedIdentifierType', 'URL'),
('#text',
'https://zenodo.org/record/33711/files/technote-001-v1.0.zip')])])])),
('rightsList',
OrderedDict([('rights',
[OrderedDict([('@rightsURI',
'info:eu-repo/semantics/openAccess'),
('#text', 'Open Access')]),
OrderedDict([('@rightsURI',
'http://creativecommons.org/licenses/by/4.0/'),
('#text',
'Creative Commons Attribution 4.0 International')])])])),
('descriptions',
OrderedDict([('description',
OrderedDict([('@descriptionType', 'Abstract'),
('#text',
'<p>SQR-001: Git LFS Architecture Note version 1.0 release</p>')]))]))])
In [ ]:
Content source: compaas/compaas
Similar notebooks: