Compare CSW at NODC and DATA.GOV: search on "gamssa"

search both "collection" and "granule" CSW endpoints


In [1]:
from owslib.csw import CatalogueServiceWeb
from owslib import fes

In [2]:
# simple query for text only
val='gamssa'
filter_list = [fes.PropertyIsLike(propertyname='apiso:AnyText',literal=('*%s*' % val),
                        escapeChar='\\',wildCard='*',singleChar='?')]

In [3]:
def service_urls(records,service_string='urn:x-esri:specification:ServiceType:odp:url'):
    """
    Get all URLs matching a specific ServiceType 
 
    Unfortunately these seem to differ between different CSW-ISO services.
    For example, OpenDAP is specified:
    NODC geoportal: 'urn:x-esri:specification:ServiceType:OPeNDAP'
    NGDC geoportal: 'urn:x-esri:specification:ServiceType:odp:url'
    """

    urls=[]
    for key,rec in records.iteritems():
        #create a generator object, and iterate through it until the match is found
        #if not found, gets the default value (here "none")
        url = next((d['url'] for d in rec.references if d['scheme'] == service_string), None)
        if url is not None:
            urls.append(url)
    return urls

Search NODC CSW (Collections)


In [4]:
endpoint = 'http://data.nodc.noaa.gov/geoportal/csw'   
csw = CatalogueServiceWeb(endpoint,timeout=60)
print csw.version


2.0.2

In [5]:
for oper in csw.operations:
    if oper.name == 'GetRecords':
        print '\nISO Queryables:\n',oper.constraints['SupportedISOQueryables']['values']


ISO Queryables:
['apiso:Subject', 'apiso:Title', 'apiso:Abstract', 'apiso:AnyText', 'apiso:Format', 'apiso:Identifier', 'apiso:Modified', 'apiso:Type', 'apiso:BoundingBox', 'apiso:CRS.Authority', 'apiso:CRS.ID', 'apiso:CRS.Version', 'apiso:RevisionDate', 'apiso:AlternateTitle', 'apiso:CreationDate', 'apiso:PublicationDate', 'apiso:OrganizationName', 'apiso:HasSecurityConstraints', 'apiso:Language', 'apiso:ResourceIdentifier', 'apiso:ParentIdentifier', 'apiso:KeywordType', 'apiso:TopicCategory', 'apiso:ResourceLanguage', 'apiso:GeographicDescriptionCode', 'apiso:Denominator', 'apiso:DistanceValue', 'apiso:DistanceUOM', 'apiso:TempExtent_begin', 'apiso:TempExtent_end', 'apiso:ServiceType', 'apiso:ServiceTypeVersion', 'apiso:Operation', 'apiso:OperatesOn', 'apiso:OperatesOnIdentifier', 'apiso:OperatesOnName', 'apiso:CouplingType']

In [6]:
csw.getrecords2(constraints=filter_list,maxrecords=10000,esn='full')
len(csw.records.keys())


Out[6]:
2

In [7]:
choice=random.choice(list(csw.records.keys()))
print choice
csw.records[choice].references


ABOM-L4HRfnd-AUS-RAMSSA_09km
Out[7]:
[{'scheme': 'urn:x-esri:specification:ServiceType:ArcIMS:Metadata:Onlink',
  'url': 'http://data.nodc.noaa.gov/ghrsst/L4/AUS/ABOM/RAMSSA_09km/'},
 {'scheme': 'urn:x-esri:specification:ServiceType:ArcIMS:Metadata:Thumbnail',
  'url': 'http://data.nodc.noaa.gov/nodc/archive/metadata/approved/graphics/GHRSST_ISO_all-ABOM-L4HRfnd-AUS-v01-RAMSSA_09km.jpg'},
 {'scheme': 'urn:x-esri:specification:ServiceType:ArcIMS:Metadata:Thumbnail',
  'url': 'http://data.nodc.noaa.gov/nodc/archive/metadata/approved/graphics/GHRSST_ISO_all-ABOM-L4HRfnd-AUS-v01-RAMSSA_09km.jpg'},
 {'scheme': 'urn:x-esri:specification:ServiceType:ArcIMS:Metadata:Document',
  'url': 'http://data.nodc.noaa.gov/geoportal/csw?getxml=%7B1A6FBA00-26E2-429D-A015-613C348E47E6%7D'},
 {'scheme': 'urn:x-esri:specification:ServiceType:TDS',
  'url': 'http://data.nodc.noaa.gov/thredds/catalog/ghrsst/L4/AUS/ABOM/RAMSSA_09km/'},
 {'scheme': 'urn:x-esri:specification:ServiceType:FTP',
  'url': 'ftp://ftp.nodc.noaa.gov/pub/data.nodc/ghrsst/L4/GLOB/ABOM/GAMSSA_28km/'},
 {'scheme': 'urn:x-esri:specification:ServiceType:OPeNDAP',
  'url': 'http://data.nodc.noaa.gov/opendap/ghrsst/L4/AUS/ABOM/RAMSSA_09km/'},
 {'scheme': 'urn:x-esri:specification:ServiceType:LAS',
  'url': 'http://data.nodc.noaa.gov/las/getUI.do?dsid=id-b75979a55f&varid=analysed_sst-id-b75979a55f&auto=true'},
 {'scheme': 'urn:x-esri:specification:ServiceType:Download',
  'url': 'http://data.nodc.noaa.gov/ghrsst/L4/AUS/ABOM/RAMSSA_09km/'},
 {'scheme': 'urn:x-esri:specification:ServiceType:GRANULE',
  'url': 'http://www.nodc.noaa.gov/geoportal/rest/find/document?searchText=fileIdentifier%3AABOM-L4HRfnd-AUS-RAMSSA_09km*&start=1&max=100&f=searchPage'},
 {'scheme': 'urn:x-esri:specification:ServiceType:GRANULE',
  'url': 'http://www.nodc.noaa.gov/geoportal/rest/find/document?searchText=fileIdentifier%3AABOM-L4HRfnd-AUS-RAMSSA_09km*&start=1&max=100&f=searchPage'}]

In [8]:
#find all WMS URLs
wms_urls = service_urls(csw.records,service_string='urn:x-esri:specification:ServiceType:WMS')
len(wms_urls)


Out[8]:
0

Search NODC CSW (Granules)


In [9]:
endpoint = 'http://www.nodc.noaa.gov/geoportal/csw' 
csw = CatalogueServiceWeb(endpoint,timeout=60)
csw.version


Out[9]:
'2.0.2'

In [10]:
for oper in csw.operations:
    if oper.name == 'GetRecords':
        print '\nISO Queryables:\n',oper.constraints['SupportedISOQueryables']['values']


ISO Queryables:
['apiso:Subject', 'apiso:Title', 'apiso:Abstract', 'apiso:AnyText', 'apiso:Format', 'apiso:Identifier', 'apiso:Modified', 'apiso:Type', 'apiso:BoundingBox', 'apiso:CRS.Authority', 'apiso:CRS.ID', 'apiso:CRS.Version', 'apiso:RevisionDate', 'apiso:AlternateTitle', 'apiso:CreationDate', 'apiso:PublicationDate', 'apiso:OrganizationName', 'apiso:HasSecurityConstraints', 'apiso:Language', 'apiso:ResourceIdentifier', 'apiso:ParentIdentifier', 'apiso:KeywordType', 'apiso:TopicCategory', 'apiso:ResourceLanguage', 'apiso:GeographicDescriptionCode', 'apiso:Denominator', 'apiso:DistanceValue', 'apiso:DistanceUOM', 'apiso:TempExtent_begin', 'apiso:TempExtent_end', 'apiso:ServiceType', 'apiso:ServiceTypeVersion', 'apiso:Operation', 'apiso:OperatesOn', 'apiso:OperatesOnIdentifier', 'apiso:OperatesOnName', 'apiso:CouplingType']

In [11]:
csw.getrecords2(constraints=filter_list,maxrecords=10000,esn='full')
len(csw.records.keys())


Out[11]:
1786

In [12]:
choice=random.choice(list(csw.records.keys()))
print choice
csw.records[choice].references


ABOM-L4LRfnd-GLOB-GAMSSA_28km.20121221-ABOM-L4LRfnd-GLOB-v01-fv01_0-GAMSSA_28km.nc.bz2
Out[12]:
[{'scheme': 'urn:x-esri:specification:ServiceType:ArcIMS:Metadata:Onlink',
  'url': 'http://www.nodc.noaa.gov/SatelliteData/ghrsst/'},
 {'scheme': 'urn:x-esri:specification:ServiceType:ArcIMS:Metadata:Thumbnail',
  'url': 'http://data.nodc.noaa.gov/las/ProductServer.do?xml=%3C%3Fxml+version%3D%221.0%22%3F%3E%3ClasRequest+href%3D%22file%3Alas.xml%22%3E%3Clink+match%3D%22%2Flasdata%2Foperations%2Foperation[%40ID%3D%27Plot_2D_XY_zoom%27]%22%3E%3C%2Flink%3E%3Cproperties%3E%3Cferret%3E%3Cview%3Exy%3C%2Fview%3E%3Cland_type%3Edefault%3C%2Fland_type%3E%3Cset_aspect%3Edefault%3C%2Fset_aspect%3E%3Cmark_grid%3Eno%3C%2Fmark_grid%3E%3Ccontour_levels%3E%3C%2Fcontour_levels%3E%3Cfill_levels%3E%3C%2Ffill_levels%3E%3Ccontour_style%3Edefault%3C%2Fcontour_style%3E%3Cpalette%3Edefault%3C%2Fpalette%3E%3Cdeg_min_sec%3Edefault%3C%2Fdeg_min_sec%3E%3Cmargins%3Edefault%3C%2Fmargins%3E%3Cuse_graticules%3Edefault%3C%2Fuse_graticules%3E%3Csize%3E0.5%3C%2Fsize%3E%3Cimage_format%3Edefault%3C%2Fimage_format%3E%3Cinterpolate_data%3Efalse%3C%2Finterpolate_data%3E%3Cexpression%3E%3C%2Fexpression%3E%3C%2Fferret%3E%3C%2Fproperties%3E%3Cargs%3E%3Clink+match%3D%22%2Flasdata%2Fdatasets%2Fid-c80878d11f%2Fvariables%2Fanalysed_sst-id-c80878d11f%22%3E%3C%2Flink%3E%3Cregion%3E%3Cpoint+type%3D%22t%22+v%3D%2221-Dec-2012%22%3E%3C%2Fpoint%3E%3Crange+type%3D%22y%22+low%3D%22-89.875%22+high%3D%2289.875%22%3E%3C%2Frange%3E%3Crange+type%3D%22x%22+low%3D%22-179.875%22+high%3D%22179.875%22%3E%3C%2Frange%3E%3C%2Fregion%3E%3C%2Fargs%3E%3C%2FlasRequest%3E&stream=true&stream_ID=plot_image'},
 {'scheme': 'urn:x-esri:specification:ServiceType:ArcIMS:Metadata:Thumbnail',
  'url': 'http://data.nodc.noaa.gov/las/ProductServer.do?xml=%3C%3Fxml+version%3D%221.0%22%3F%3E%3ClasRequest+href%3D%22file%3Alas.xml%22%3E%3Clink+match%3D%22%2Flasdata%2Foperations%2Foperation[%40ID%3D%27Plot_2D_XY_zoom%27]%22%3E%3C%2Flink%3E%3Cproperties%3E%3Cferret%3E%3Cview%3Exy%3C%2Fview%3E%3Cland_type%3Edefault%3C%2Fland_type%3E%3Cset_aspect%3Edefault%3C%2Fset_aspect%3E%3Cmark_grid%3Eno%3C%2Fmark_grid%3E%3Ccontour_levels%3E%3C%2Fcontour_levels%3E%3Cfill_levels%3E%3C%2Ffill_levels%3E%3Ccontour_style%3Edefault%3C%2Fcontour_style%3E%3Cpalette%3Edefault%3C%2Fpalette%3E%3Cdeg_min_sec%3Edefault%3C%2Fdeg_min_sec%3E%3Cmargins%3Edefault%3C%2Fmargins%3E%3Cuse_graticules%3Edefault%3C%2Fuse_graticules%3E%3Csize%3E0.5%3C%2Fsize%3E%3Cimage_format%3Edefault%3C%2Fimage_format%3E%3Cinterpolate_data%3Efalse%3C%2Finterpolate_data%3E%3Cexpression%3E%3C%2Fexpression%3E%3C%2Fferret%3E%3C%2Fproperties%3E%3Cargs%3E%3Clink+match%3D%22%2Flasdata%2Fdatasets%2Fid-c80878d11f%2Fvariables%2Fanalysed_sst-id-c80878d11f%22%3E%3C%2Flink%3E%3Cregion%3E%3Cpoint+type%3D%22t%22+v%3D%2221-Dec-2012%22%3E%3C%2Fpoint%3E%3Crange+type%3D%22y%22+low%3D%22-89.875%22+high%3D%2289.875%22%3E%3C%2Frange%3E%3Crange+type%3D%22x%22+low%3D%22-179.875%22+high%3D%22179.875%22%3E%3C%2Frange%3E%3C%2Fregion%3E%3C%2Fargs%3E%3C%2FlasRequest%3E&stream=true&stream_ID=plot_image'},
 {'scheme': 'urn:x-esri:specification:ServiceType:ArcIMS:Metadata:Document',
  'url': 'http://www.nodc.noaa.gov/geoportal/csw?getxml=%7BFE592D84-40A6-45A0-8D73-F5991F097344%7D'},
 {'scheme': 'urn:x-esri:specification:ServiceType:TDS',
  'url': 'http://data.nodc.noaa.gov/thredds/catalog/ghrsst/L4/GLOB/ABOM/GAMSSA_28km/2012/356/catalog.html?dataset=ghrsst/L4/GLOB/ABOM/GAMSSA_28km/2012/356/20121221-ABOM-L4LRfnd-GLOB-v01-fv01_0-GAMSSA_28km.nc.bz2'},
 {'scheme': 'urn:x-esri:specification:ServiceType:FTP',
  'url': 'ftp://ftp.nodc.noaa.gov/pub/data.nodc/ghrsst/L4/GLOB/ABOM/GAMSSA_28km/2012/356/20121221-ABOM-L4LRfnd-GLOB-v01-fv01_0-GAMSSA_28km.nc.bz2'},
 {'scheme': 'urn:x-esri:specification:ServiceType:OPeNDAP',
  'url': 'http://data.nodc.noaa.gov/opendap/ghrsst/L4/GLOB/ABOM/GAMSSA_28km/2012/356/20121221-ABOM-L4LRfnd-GLOB-v01-fv01_0-GAMSSA_28km.nc.bz2.html'},
 {'scheme': 'urn:x-esri:specification:ServiceType:LAS',
  'url': 'http://data.nodc.noaa.gov/las/ProductServer.do?xml=%3C%3Fxml+version%3D%221.0%22%3F%3E%3ClasRequest+href%3D%22file%3Alas.xml%22%3E%3Clink+match%3D%22%2Flasdata%2Foperations%2Foperation[%40ID%3D%27Plot_2D_XY_zoom%27]%22%3E%3C%2Flink%3E%3Cproperties%3E%3Cferret%3E%3Cview%3Exy%3C%2Fview%3E%3Cland_type%3Edefault%3C%2Fland_type%3E%3Cset_aspect%3Edefault%3C%2Fset_aspect%3E%3Cmark_grid%3Eno%3C%2Fmark_grid%3E%3Ccontour_levels%3E%3C%2Fcontour_levels%3E%3Cfill_levels%3E%3C%2Ffill_levels%3E%3Ccontour_style%3Edefault%3C%2Fcontour_style%3E%3Cpalette%3Edefault%3C%2Fpalette%3E%3Cdeg_min_sec%3Edefault%3C%2Fdeg_min_sec%3E%3Cmargins%3Edefault%3C%2Fmargins%3E%3Cuse_graticules%3Edefault%3C%2Fuse_graticules%3E%3Csize%3E0.5%3C%2Fsize%3E%3Cimage_format%3Edefault%3C%2Fimage_format%3E%3Cinterpolate_data%3Efalse%3C%2Finterpolate_data%3E%3Cexpression%3E%3C%2Fexpression%3E%3C%2Fferret%3E%3C%2Fproperties%3E%3Cargs%3E%3Clink+match%3D%22%2Flasdata%2Fdatasets%2Fid-c80878d11f%2Fvariables%2Fanalysed_sst-id-c80878d11f%22%3E%3C%2Flink%3E%3Cregion%3E%3Cpoint+type%3D%22t%22+v%3D%2221-Dec-2012%22%3E%3C%2Fpoint%3E%3Crange+type%3D%22y%22+low%3D%22-89.875%22+high%3D%2289.875%22%3E%3C%2Frange%3E%3Crange+type%3D%22x%22+low%3D%22-179.875%22+high%3D%22179.875%22%3E%3C%2Frange%3E%3C%2Fregion%3E%3C%2Fargs%3E%3C%2FlasRequest%3E'},
 {'scheme': 'urn:x-esri:specification:ServiceType:Download',
  'url': 'http://data.nodc.noaa.gov/ghrsst/L4/GLOB/ABOM/GAMSSA_28km/2012/356/20121221-ABOM-L4LRfnd-GLOB-v01-fv01_0-GAMSSA_28km.nc.bz2'},
 {'scheme': 'urn:x-esri:specification:ServiceType:WMS',
  'url': 'http://data.nodc.noaa.gov/thredds/wms/ghrsst/L4/GLOB/ABOM/GAMSSA_28km/2012/356/20121221-ABOM-L4LRfnd-GLOB-v01-fv01_0-GAMSSA_28km.nc.bz2?service=WMS&version=1.3.0&request=GetCapabilities'},
 {'scheme': 'urn:x-esri:specification:ServiceType:WCS',
  'url': 'http://data.nodc.noaa.gov/thredds/wcs/ghrsst/L4/GLOB/ABOM/GAMSSA_28km/2012/356/20121221-ABOM-L4LRfnd-GLOB-v01-fv01_0-GAMSSA_28km.nc.bz2?service=WCS&version=1.0.0&request=GetCapabilities'}]

In [13]:
#find all WMS URLs
wms_urls = service_urls(csw.records,service_string='urn:x-esri:specification:ServiceType:WMS')
len(wms_urls)


Out[13]:
1786

Search DATA.GOV CSW (Collections)


In [14]:
endpoint = 'http://catalog.data.gov/csw' #  catalog.data.gov CSW
csw = CatalogueServiceWeb(endpoint,timeout=60)
csw.version


Out[14]:
'2.0.2'

In [15]:
for oper in csw.operations:
    if oper.name == 'GetRecords':
        print '\nISO Queryables:\n',oper.constraints['SupportedISOQueryables']['values']


ISO Queryables:
['apiso:DistanceValue', 'apiso:Abstract', 'apiso:RevisionDate', 'apiso:Subject', 'apiso:KeywordType', 'apiso:Title', 'apiso:CRS', 'apiso:PublicationDate', 'apiso:Type', 'apiso:AlternateTitle', 'apiso:BoundingBox', 'apiso:AnyText', 'apiso:ParentIdentifier', 'apiso:Modified', 'apiso:Operation', 'apiso:Format', 'apiso:TempExtent_end', 'apiso:DistanceUOM', 'apiso:OrganisationName', 'apiso:ServiceType', 'apiso:TempExtent_begin', 'apiso:ResourceLanguage', 'apiso:ServiceTypeVersion', 'apiso:OperatesOn', 'apiso:Denominator', 'apiso:HasSecurityConstraints', 'apiso:OperatesOnIdentifier', 'apiso:GeographicDescriptionCode', 'apiso:Language', 'apiso:Identifier', 'apiso:OperatesOnName', 'apiso:TopicCategory', 'apiso:CreationDate', 'apiso:CouplingType']

In [16]:
csw.getrecords2(constraints=filter_list,maxrecords=10000,esn='full')
len(csw.records.keys())


Out[16]:
1

In [17]:
choice=random.choice(list(csw.records.keys()))
print choice
csw.records[choice].references


ABOM-L4LRfnd-GLOB-GAMSSA_28km
Out[17]:
[{'scheme': 'None',
  'url': 'http://data.nodc.noaa.gov/nodc/archive/metadata/approved/graphics/GHRSST_ISO_all-ABOM-L4LRfnd-GLOB-v01-GAMSSA_28km.jpg'},
 {'scheme': 'None', 'url': 'http://ghrsst.jpl.nasa.gov'},
 {'scheme': 'None', 'url': 'http://www.ghrsst.org'},
 {'scheme': 'None', 'url': 'http://opendap.bom.gov.au:8080/thredds/catalog'}]

From the above, we can see that because the 'scheme' is 'None' on all the references, we can't extract the different service types, like OPeNDAP, WCS, etc.


In [18]:
#find all WMS URLs
wms_urls = service_urls(csw.records,service_string='urn:x-esri:specification:ServiceType:WMS')
len(wms_urls)


Out[18]:
0

Search DATA.GOV CSW (Granules)


In [19]:
endpoint = 'http://catalog.data.gov/csw-all'
csw = CatalogueServiceWeb(endpoint,timeout=60)
print csw.version


2.0.2

In [20]:
for oper in csw.operations:
    if oper.name == 'GetRecords':
        print '\nISO Queryables:\n',oper.constraints['SupportedISOQueryables']['values']


ISO Queryables:
['apiso:DistanceValue', 'apiso:Abstract', 'apiso:RevisionDate', 'apiso:Subject', 'apiso:KeywordType', 'apiso:Title', 'apiso:CRS', 'apiso:PublicationDate', 'apiso:Type', 'apiso:AlternateTitle', 'apiso:BoundingBox', 'apiso:AnyText', 'apiso:ParentIdentifier', 'apiso:Modified', 'apiso:Operation', 'apiso:Format', 'apiso:TempExtent_end', 'apiso:DistanceUOM', 'apiso:OrganisationName', 'apiso:ServiceType', 'apiso:TempExtent_begin', 'apiso:ResourceLanguage', 'apiso:ServiceTypeVersion', 'apiso:OperatesOn', 'apiso:Denominator', 'apiso:HasSecurityConstraints', 'apiso:OperatesOnIdentifier', 'apiso:GeographicDescriptionCode', 'apiso:Language', 'apiso:Identifier', 'apiso:OperatesOnName', 'apiso:TopicCategory', 'apiso:CreationDate', 'apiso:CouplingType']

In [21]:
csw.getrecords2(constraints=filter_list,maxrecords=10000,esn='full')
len(csw.records.keys())


Out[21]:
1

In [22]:
choice=random.choice(list(csw.records.keys()))
print choice
csw.records[choice].references


ABOM-L4LRfnd-GLOB-GAMSSA_28km
Out[22]:
[{'scheme': 'None',
  'url': 'http://data.nodc.noaa.gov/nodc/archive/metadata/approved/graphics/GHRSST_ISO_all-ABOM-L4LRfnd-GLOB-v01-GAMSSA_28km.jpg'},
 {'scheme': 'None', 'url': 'http://ghrsst.jpl.nasa.gov'},
 {'scheme': 'None', 'url': 'http://www.ghrsst.org'},
 {'scheme': 'None', 'url': 'http://opendap.bom.gov.au:8080/thredds/catalog'}]

In [23]:
#find all WMS URLs
wms_urls = service_urls(csw.records,service_string='urn:x-esri:specification:ServiceType:WMS')
len(wms_urls)


Out[23]:
0

In [23]: