Use CSW to find model data at NODC, NGDC, and CATALOG.DATA.GOV


In [14]:
from owslib.csw import CatalogueServiceWeb
from owslib import fes
import netCDF4
import numpy as np

In [8]:
def service_urls(records,service_string='urn:x-esri:specification:ServiceType:odp:url'):
    """
    Get all URLs matching a specific ServiceType 
 
    Unfortunately these seem to differ between different CSW-ISO services.
    For example, OpenDAP is specified:
    NODC geoportal: 'urn:x-esri:specification:ServiceType:OPeNDAP'
    NGDC geoportal: 'urn:x-esri:specification:ServiceType:odp:url'
    """

    urls=[]
    for key,rec in records.iteritems():
        #create a generator object, and iterate through it until the match is found
        #if not found, gets the default value (here "none")
        url = next((d['url'] for d in rec.references if d['scheme'] == service_string), None)
        if url is not None:
            urls.append(url)
    return urls

In [9]:
# trying to do this search:
# ('roms' OR 'selfe' OR 'adcirc' OR 'ncom' OR 'hycom' OR 'fvcom') AND 'ocean' NOT 'regridded' NOT 'espresso'
# should return 11 records from NODC geoportal

search_text = ['roms','selfe','adcirc','ncom','hycom','fvcom']
filt=[]
for val in search_text:
    filt.append(fes.PropertyIsLike(propertyname='apiso:AnyText',literal=('*%s*' % val),
                                   escapeChar='\\',wildCard='*',singleChar='?'))
filter1=fes.Or(filt)

val = 'ocean'
filter2=fes.PropertyIsLike(propertyname='apiso:AnyText',literal=('*%s*' % val),
                        escapeChar='\\',wildCard='*',singleChar='?')

val = 'regridded'
filt=fes.PropertyIsLike(propertyname='apiso:AnyText',literal=('*%s*' % val),
                        escapeChar='\\',wildCard='*',singleChar='?')
filter3 = fes.Not([filt])

val = 'espresso'
filt=fes.PropertyIsLike(propertyname='apiso:AnyText',literal=('*%s*' % val),
                        escapeChar='\\',wildCard='*',singleChar='?')
filter4 = fes.Not([filt])


filter_list = [fes.And([filter1, filter2, filter3, filter4])]

Find model results at NODC


In [10]:
endpoint = 'http://www.nodc.noaa.gov/geoportal/csw'   # NODC/UAF Geoportal: granule level
csw = CatalogueServiceWeb(endpoint,timeout=60)
print csw.version


2.0.2

In [11]:
csw.get_operation_by_name('GetRecords').constraints


Out[11]:
[Constraint: SupportedCommonQueryables - ['Subject', 'Title', 'Abstract', 'AnyText', 'Format', 'Identifier', 'Modified', 'Type', 'BoundingBox'],
 Constraint: SupportedISOQueryables - ['apiso:Subject', 'apiso:Title', 'apiso:Abstract', 'apiso:AnyText', 'apiso:Format', 'apiso:Identifier', 'apiso:Modified', 'apiso:Type', 'apiso:BoundingBox', 'apiso:CRS.Authority', 'apiso:CRS.ID', 'apiso:CRS.Version', 'apiso:RevisionDate', 'apiso:AlternateTitle', 'apiso:CreationDate', 'apiso:PublicationDate', 'apiso:OrganizationName', 'apiso:HasSecurityConstraints', 'apiso:Language', 'apiso:ResourceIdentifier', 'apiso:ParentIdentifier', 'apiso:KeywordType', 'apiso:TopicCategory', 'apiso:ResourceLanguage', 'apiso:GeographicDescriptionCode', 'apiso:Denominator', 'apiso:DistanceValue', 'apiso:DistanceUOM', 'apiso:TempExtent_begin', 'apiso:TempExtent_end', 'apiso:ServiceType', 'apiso:ServiceTypeVersion', 'apiso:Operation', 'apiso:OperatesOn', 'apiso:OperatesOnIdentifier', 'apiso:OperatesOnName', 'apiso:CouplingType'],
 Constraint: AdditionalQueryables - ['apiso:Degree', 'apiso:AccessConstraints', 'apiso:OtherConstraints', 'apiso:Classification', 'apiso:ConditionApplyingToAccessAndUse', 'apiso:Lineage', 'apiso:ResponsiblePartyRole', 'apiso:SpecificationTitle', 'apiso:SpecificationDate', 'apiso:SpecificationDateType']]

In [12]:
csw.getrecords2(constraints=filter_list,maxrecords=1000,esn='full')
len(csw.records.keys())


Out[12]:
19

In [13]:
choice = np.random.choice(list(csw.records.keys()))
print choice
csw.records[choice].references


---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-13-6361c64444f8> in <module>()
----> 1 choice=random.choice(list(csw.records.keys()))
      2 print choice
      3 csw.records[choice].references

NameError: name 'random' is not defined

In [ ]:
dap_urls = service_urls(csw.records,service_string='urn:x-esri:specification:ServiceType:OPeNDAP')
len(dap_urls)

Find model results at NGDC


In [ ]:
endpoint = 'http://www.ngdc.noaa.gov/geoportal/csw' #  NGDC/IOOS Geoportal
csw = CatalogueServiceWeb(endpoint,timeout=60)
csw.version

In [ ]:
csw.get_operation_by_name('GetRecords').constraints

In [ ]:
csw.getrecords2(constraints=filter_list,maxrecords=1000,esn='full')
len(csw.records.keys())

In [15]:
choice = np.random.choice(list(csw.records.keys()))
print choice
csw.records[choice].references


pmelTaoMonLw
Out[15]:
[{'scheme': 'urn:x-esri:specification:ServiceType:ArcIMS:Metadata:Onlink',
  'url': 'http://www.pmel.noaa.gov/tao/proj_over/proj_over.html'},
 {'scheme': 'urn:x-esri:specification:ServiceType:ArcIMS:Metadata:Document',
  'url': 'http://www.nodc.noaa.gov/geoportal/csw?getxml=%7B968CD4AC-B6AB-4D78-91B4-0E24BA5889D6%7D'},
 {'scheme': 'urn:x-esri:specification:ServiceType:ERDDAP',
  'url': 'http://coastwatch.pfeg.noaa.gov/erddap/tabledap/pmelTaoMonLw.html'},
 {'scheme': 'urn:x-esri:specification:ServiceType:ERDDAP',
  'url': 'http://coastwatch.pfeg.noaa.gov/erddap/tabledap/pmelTaoMonLw.graph'}]

In [ ]:
#dap_urls = service_urls(csw.records,service_string='urn:x-esri:specification:ServiceType:OPeNDAP')
dap_urls = service_urls(csw.records,service_string='urn:x-esri:specification:ServiceType:odp:url')
len(dap_urls)

Find model data at CATALOG.DATA.GOV


In [ ]:
endpoint = 'http://uat-catalog-fe-data.reisys.com/csw-all' #  catalog.data.gov CSW
csw = CatalogueServiceWeb(endpoint,timeout=60)
csw.version

In [ ]:
for oper in csw.operations:
    if oper.name == 'GetRecords':
        print '\nISO Queryables:\n',oper.constraints['SupportedISOQueryables']['values']

In [ ]:
csw.getrecords2(constraints=filter_list,maxrecords=1000,esn='full')
len(csw.records.keys())

In [ ]:
choice=random.choice(list(csw.records.keys()))
print choice
csw.records[choice].references

From the above, we can see that because the 'scheme' is 'None' on all the references, we can't extract the different service types, like OPeNDAP, WCS, etc.


In [ ]:
#dap_urls = service_urls(csw.records,service_string='urn:x-esri:specification:ServiceType:odp:url')  #NGDC
#dap_urls = service_urls(csw.records,service_string='urn:x-esri:specification:ServiceType:OPeNDAP')  #NODC
dap_urls = service_urls(csw.records,service_string='?????????')    #CATALOG.DATA.GOV
len(dap_urls)

Search at geoport.whoi.edu


In [ ]:
endpoint = 'http://geoport.whoi.edu/geoportal/csw' #  catalog.data.gov CSW
csw = CatalogueServiceWeb(endpoint,timeout=60)
print csw.version

In [ ]:
for oper in csw.operations:
    if oper.name == 'GetRecords':
        print '\nISO Queryables:\n',oper.constraints['SupportedISOQueryables']['values']

In [ ]:
csw.getrecords2(constraints=filt,maxrecords=1000,esn='full')
len(csw.records.keys())

In [ ]:
choice=random.choice(list(csw.records.keys()))
print choice
csw.records[choice].references

In [ ]:


In [ ]: