Exploring CSW access in Python


In [4]:
from owslib.csw import CatalogueServiceWeb

In [10]:
# connect to CSW, explore it's properties
#endpoint = 'http://www.ngdc.noaa.gov/geoportal/csw' # NGDC Geoportal
#endpoint = 'http://www.nodc.noaa.gov/geoportal/csw'   # NODC Geoportal: granule level
#endpoint = 'http://data.nodc.noaa.gov/geoportal/csw'  # NODC Geoportal: collection level
#endpoint = 'http://geodiscover.cgdi.ca/wes/serviceManagerCSW/csw'  # NRCAN CUSTOM
#endpoint = 'http://geoport.whoi.edu/gi-cat/services/cswiso' # USGS Woods Hole GI_CAT
#endpoint = 'http://cida.usgs.gov/gdp/geonetwork/srv/en/csw' # USGS CIDA Geonetwork
endpoint = 'http://geoport.whoi.edu/pycsw'
csw = CatalogueServiceWeb(endpoint)
csw.version


  File "<string>", line unknown
XMLSyntaxError: Space required after the Public Identifier, line 1, column 52

In [3]:
[op.name for op in csw.operations]


Out[3]:
['GetCapabilities',
 'DescribeRecord',
 'GetDomain',
 'GetRecords',
 'GetRecordById',
 'Transaction']

In [4]:
bbox=[-141,42,-52,84]
#bbox=[-71.5, 39.5, -63.0, 46]
csw.getrecords(keywords=['sea_water_temperature'],bbox=bbox,maxrecords=20)
#csw.getrecords(keywords=['sea_water_temperature'],maxrecords=20)
csw.results


Out[4]:
{'matches': 0, 'nextrecord': 0, 'returned': 0}

In [7]:
for rec,item in csw.records.iteritems():
    print rec
    print item.abstract

In [58]:
a=csw.records['data/oceansites/DATA/STATION-M/OS_STATION-M-1_194810_D_CTD.nc']

In [60]:
print a.xml


<csw:SummaryRecord xmlns:csw="http://www.opengis.net/cat/csw/2.0.2" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:dcmiBox="http://dublincore.org/documents/2000/07/11/dcmi-box/" xmlns:dct="http://purl.org/dc/terms/" xmlns:gml="http://www.opengis.net/gml" xmlns:ows="http://www.opengis.net/ows" xmlns:xsd="http://www.w3.org/2001/XMLSchema">
<dc:identifier scheme="urn:x-esri:specification:ServiceType:ArcIMS:Metadata:FileID">data/oceansites/DATA/STATION-M/OS_STATION-M-1_194810_D_CTD.nc</dc:identifier>
<dc:identifier scheme="urn:x-esri:specification:ServiceType:ArcIMS:Metadata:DocID">{1DB52543-50EF-471E-BBFE-A5A87C42EC42}</dc:identifier>
<dc:title>OceanSITES STATION-M in-situ data</dc:title>
<dc:type scheme="urn:x-esri:specification:ServiceType:ArcIMS:Metadata:ContentType">downloadableData</dc:type>
<dc:type scheme="urn:x-esri:specification:ServiceType:ArcIMS:Metadata:ContentType">liveData</dc:type>
<dc:subject>sea_water_temperature</dc:subject>
<dc:subject>sea_water_salinity</dc:subject>
<dc:subject>depth</dc:subject>
<dc:subject>time</dc:subject>
<dc:subject>depth</dc:subject>
<dc:subject>latitude</dc:subject>
<dc:subject>longitude</dc:subject>
<dc:subject>climatologyMeteorologyAtmosphere</dc:subject>
<dct:modified>2013-03-16T02:45:29-06:00</dct:modified>
<dct:abstract>EuroSITES European Ocean Observatory NetworkEU Framework 7 collaborative project contract FP7-ENV-2007-1-202955</dct:abstract>
<dct:abstract>EuroSITES European Ocean Observatory NetworkEU Framework 7 collaborative project contract FP7-ENV-2007-1-202955</dct:abstract>
<dct:references scheme="urn:x-esri:specification:ServiceType:ArcIMS:Metadata:Onlink">http://dods.ndbc.noaa.gov/thredds/dodsC/data/oceansites/DATA/STATION-M/OS_STATION-M-1_194810_D_CTD.nc.html</dct:references>
<dct:references scheme="urn:x-esri:specification:ServiceType:ArcIMS:Metadata:Document">http://www.ngdc.noaa.gov/geoportal/csw?getxml=%7B1DB52543-50EF-471E-BBFE-A5A87C42EC42%7D</dct:references>
<ows:WGS84BoundingBox>
<ows:LowerCorner>-358.2666666507721 66.0</ows:LowerCorner>
<ows:UpperCorner>2.049999952316284 66.16666412353516</ows:UpperCorner>
</ows:WGS84BoundingBox>
<ows:BoundingBox>
<ows:LowerCorner>-358.2666666507721 66.0</ows:LowerCorner>
<ows:UpperCorner>2.049999952316284 66.16666412353516</ows:UpperCorner>
</ows:BoundingBox>
<dc:date>2009-11-01Z</dc:date>
</csw:SummaryRecord>


In [9]:
# get supported result types
csw.getdomain('GetRecords.resultType')
csw.results


---------------------------------------------------------------------------
ExceptionReport                           Traceback (most recent call last)
<ipython-input-9-0d430bef627f> in <module>()
      1 # get supported result types
----> 2 csw.getdomain('GetRecords.resultType')
      3 csw.results

/home/rsignell/epd-7.2-1/lib/python2.7/site-packages/owslib/csw.py in getdomain(self, dname, dtype)
    153         self.request = util.xml2string(etree.tostring(node0))
    154 
--> 155         self._invoke()
    156 
    157         if self.exceptionreport is None:

/home/rsignell/epd-7.2-1/lib/python2.7/site-packages/owslib/csw.py in _invoke(self)
    494         val = self._exml.find(util.nspath_eval('ows:Exception', namespaces))
    495         if val is not None:
--> 496             raise ows.ExceptionReport(self._exml, self.owscommon.namespace)
    497         else:
    498             self.exceptionreport = None

ExceptionReport: 'Not a valid request: GetDomain Valid requests are: GetCapabilities GetRecords GetRecordsSimple DescribeRecord GetRecordById Transaction Harvest GetResource'