In [14]:
from owslib.csw import CatalogueServiceWeb
from owslib import fes
import netCDF4
import numpy as np
In [8]:
def service_urls(records,service_string='urn:x-esri:specification:ServiceType:odp:url'):
"""
Get all URLs matching a specific ServiceType
Unfortunately these seem to differ between different CSW-ISO services.
For example, OpenDAP is specified:
NODC geoportal: 'urn:x-esri:specification:ServiceType:OPeNDAP'
NGDC geoportal: 'urn:x-esri:specification:ServiceType:odp:url'
"""
urls=[]
for key,rec in records.iteritems():
#create a generator object, and iterate through it until the match is found
#if not found, gets the default value (here "none")
url = next((d['url'] for d in rec.references if d['scheme'] == service_string), None)
if url is not None:
urls.append(url)
return urls
In [9]:
# trying to do this search:
# ('roms' OR 'selfe' OR 'adcirc' OR 'ncom' OR 'hycom' OR 'fvcom') AND 'ocean' NOT 'regridded' NOT 'espresso'
# should return 11 records from NODC geoportal
search_text = ['roms','selfe','adcirc','ncom','hycom','fvcom']
filt=[]
for val in search_text:
filt.append(fes.PropertyIsLike(propertyname='apiso:AnyText',literal=('*%s*' % val),
escapeChar='\\',wildCard='*',singleChar='?'))
filter1=fes.Or(filt)
val = 'ocean'
filter2=fes.PropertyIsLike(propertyname='apiso:AnyText',literal=('*%s*' % val),
escapeChar='\\',wildCard='*',singleChar='?')
val = 'regridded'
filt=fes.PropertyIsLike(propertyname='apiso:AnyText',literal=('*%s*' % val),
escapeChar='\\',wildCard='*',singleChar='?')
filter3 = fes.Not([filt])
val = 'espresso'
filt=fes.PropertyIsLike(propertyname='apiso:AnyText',literal=('*%s*' % val),
escapeChar='\\',wildCard='*',singleChar='?')
filter4 = fes.Not([filt])
filter_list = [fes.And([filter1, filter2, filter3, filter4])]
In [10]:
endpoint = 'http://www.nodc.noaa.gov/geoportal/csw' # NODC/UAF Geoportal: granule level
csw = CatalogueServiceWeb(endpoint,timeout=60)
print csw.version
In [11]:
csw.get_operation_by_name('GetRecords').constraints
Out[11]:
In [12]:
csw.getrecords2(constraints=filter_list,maxrecords=1000,esn='full')
len(csw.records.keys())
Out[12]:
In [13]:
choice = np.random.choice(list(csw.records.keys()))
print choice
csw.records[choice].references
In [ ]:
dap_urls = service_urls(csw.records,service_string='urn:x-esri:specification:ServiceType:OPeNDAP')
len(dap_urls)
In [ ]:
endpoint = 'http://www.ngdc.noaa.gov/geoportal/csw' # NGDC/IOOS Geoportal
csw = CatalogueServiceWeb(endpoint,timeout=60)
csw.version
In [ ]:
csw.get_operation_by_name('GetRecords').constraints
In [ ]:
csw.getrecords2(constraints=filter_list,maxrecords=1000,esn='full')
len(csw.records.keys())
In [15]:
choice = np.random.choice(list(csw.records.keys()))
print choice
csw.records[choice].references
Out[15]:
In [ ]:
#dap_urls = service_urls(csw.records,service_string='urn:x-esri:specification:ServiceType:OPeNDAP')
dap_urls = service_urls(csw.records,service_string='urn:x-esri:specification:ServiceType:odp:url')
len(dap_urls)
In [ ]:
endpoint = 'http://uat-catalog-fe-data.reisys.com/csw-all' # catalog.data.gov CSW
csw = CatalogueServiceWeb(endpoint,timeout=60)
csw.version
In [ ]:
for oper in csw.operations:
if oper.name == 'GetRecords':
print '\nISO Queryables:\n',oper.constraints['SupportedISOQueryables']['values']
In [ ]:
csw.getrecords2(constraints=filter_list,maxrecords=1000,esn='full')
len(csw.records.keys())
In [ ]:
choice=random.choice(list(csw.records.keys()))
print choice
csw.records[choice].references
From the above, we can see that because the 'scheme' is 'None' on all the references, we can't extract the different service types, like OPeNDAP, WCS, etc.
In [ ]:
#dap_urls = service_urls(csw.records,service_string='urn:x-esri:specification:ServiceType:odp:url') #NGDC
#dap_urls = service_urls(csw.records,service_string='urn:x-esri:specification:ServiceType:OPeNDAP') #NODC
dap_urls = service_urls(csw.records,service_string='?????????') #CATALOG.DATA.GOV
len(dap_urls)
In [ ]:
endpoint = 'http://geoport.whoi.edu/geoportal/csw' # catalog.data.gov CSW
csw = CatalogueServiceWeb(endpoint,timeout=60)
print csw.version
In [ ]:
for oper in csw.operations:
if oper.name == 'GetRecords':
print '\nISO Queryables:\n',oper.constraints['SupportedISOQueryables']['values']
In [ ]:
csw.getrecords2(constraints=filt,maxrecords=1000,esn='full')
len(csw.records.keys())
In [ ]:
choice=random.choice(list(csw.records.keys()))
print choice
csw.records[choice].references
In [ ]:
In [ ]: