Exploring CSW access in Python using OWSLib


In [1]:
from owslib.csw import CatalogueServiceWeb
from owslib.etree import etree

In [2]:
def service_urls(records,service_string=None):
    urls=[]
    for key,rec in records.iteritems():
        #create a generator object, and iterate through it until the match is found
        #if not found, gets the default value (here "none")
        url = next((d['url'] for d in rec.references if d['scheme'] == service_string), None)
        if url is not None:
            urls.append(url)
    return urls

1. Search NODC Geoportal for NCOM


In [3]:
# NODC CSW
endpoint = 'http://www.nodc.noaa.gov/geoportal/csw'   # NODC Geoportal: granule level

csw = CatalogueServiceWeb(endpoint, lang='en-US', version='2.0.2', timeout=30)
csw.version


Out[3]:
'2.0.2'

In [4]:
#bbox=[-141,42,-52,84]
bbox=[-71.5, 39.5, -63.0, 46]
csw.getrecords(keywords=['NCOM'],bbox=bbox,maxrecords=10,esn='full')
csw.results


Out[4]:
{'matches': 2, 'nextrecord': 0, 'returned': 2}

In [5]:
for rec,item in csw.records.iteritems():
    print item.title


NCOM Region 1 Aggregation/Best Time Series
Clean Catalog for NCOM forecast models/NCOM Region 1 Aggregation/NCOM Region 1 Best Time Series

In [6]:
sample_rec=csw.records[csw.records.keys()[0]]
sample_rec.references


Out[6]:
[{'scheme': 'urn:x-esri:specification:ServiceType:ArcIMS:Metadata:Document',
  'url': 'http://www.nodc.noaa.gov/geoportal/csw?getxml=%7B99CB524F-EA88-49F7-8C4C-7BB5077E052A%7D'},
 {'scheme': 'urn:x-esri:specification:ServiceType:OPeNDAP',
  'url': 'http://ecowatch.ncddc.noaa.gov/thredds/dodsC/ncom/ncom_reg1_agg/NCOM_Region_1_Aggregation_best.ncd'},
 {'scheme': 'urn:x-esri:specification:ServiceType:WCS',
  'url': 'http://ecowatch.ncddc.noaa.gov/thredds/wcs/ncom/ncom_reg1_agg/NCOM_Region_1_Aggregation_best.ncd?service=WCS&version=1.0.0&request=GetCapabilities'},
 {'scheme': 'urn:x-esri:specification:ServiceType:WCT',
  'url': 'http://www.ncdc.noaa.gov/oa/wct/wct-jnlp-beta.php?singlefile=http://ecowatch.ncddc.noaa.gov/thredds/dodsC/ncom/ncom_reg1_agg/NCOM_Region_1_Aggregation_best.ncd'}]

In [7]:
dap_urls = service_urls(csw.records,service_string='urn:x-esri:specification:ServiceType:OPeNDAP')
print dap_urls


['http://ecowatch.ncddc.noaa.gov/thredds/dodsC/ncom/ncom_reg1_agg/NCOM_Region_1_Aggregation_best.ncd']

2. Search NGDC Geoportal for NCOM


In [8]:
# NGDC CSW

endpoint = 'http://www.ngdc.noaa.gov/geoportal/csw' # NGDC Geoportal

csw = CatalogueServiceWeb(endpoint, lang='en-US', version='2.0.2', timeout=30)
csw.version


Out[8]:
'2.0.2'

In [9]:
#bbox=[-141,42,-52,84]
bbox=[-71.5, 39.5, -63.0, 46]
csw.getrecords(keywords=['NCOM'],bbox=bbox,maxrecords=10,esn='full')
csw.results


Out[9]:
{'matches': 1, 'nextrecord': 0, 'returned': 1}

In [10]:
for rec,item in csw.records.iteritems():
    print item.title


NCOM Region 1 Aggregation/Best Time Series

In [11]:
sample_rec=csw.records[csw.records.keys()[0]]
sample_rec.references


Out[11]:
[{'scheme': 'urn:x-esri:specification:ServiceType:ArcIMS:Metadata:Onlink',
  'url': 'http://ecowatch.ncddc.noaa.gov/thredds/dodsC/ncom/ncom_reg1_agg/NCOM_Region_1_Aggregation_best.ncd.html'},
 {'scheme': 'urn:x-esri:specification:ServiceType:ArcIMS:Metadata:Document',
  'url': 'http://www.ngdc.noaa.gov/geoportal/csw?getxml=%7B9F1F1A05-13B5-4777-8470-9D4F77FE14E8%7D'},
 {'scheme': 'urn:x-esri:specification:ServiceType:wms:url',
  'url': 'http://ecowatch.ncddc.noaa.gov/thredds/wms/ncom/ncom_reg1_agg/NCOM_Region_1_Aggregation_best.ncd?service=WMS&version=1.3.0&request=GetCapabilities'},
 {'scheme': 'urn:x-esri:specification:ServiceType:wct:url',
  'url': 'http://www.ncdc.noaa.gov/oa/wct/wct-jnlp-beta.php?singlefile=http://ecowatch.ncddc.noaa.gov/thredds/dodsC/ncom/ncom_reg1_agg/NCOM_Region_1_Aggregation_best.ncd'},
 {'scheme': 'urn:x-esri:specification:ServiceType:odp:url',
  'url': 'http://ecowatch.ncddc.noaa.gov/thredds/dodsC/ncom/ncom_reg1_agg/NCOM_Region_1_Aggregation_best.ncd'},
 {'scheme': 'urn:x-esri:specification:ServiceType:download:url',
  'url': 'http://www.ncdc.noaa.gov/oa/wct/wct-jnlp-beta.php?singlefile=http://ecowatch.ncddc.noaa.gov/thredds/dodsC/ncom/ncom_reg1_agg/NCOM_Region_1_Aggregation_best.ncd'}]

In [12]:
dap_urls = service_urls(csw.records,service_string='urn:x-esri:specification:ServiceType:odp:url')
print dap_urls


['http://ecowatch.ncddc.noaa.gov/thredds/dodsC/ncom/ncom_reg1_agg/NCOM_Region_1_Aggregation_best.ncd']