In [106]:
    
from owslib.csw import CatalogueServiceWeb
from owslib import fes
import datetime as dt
    
In [107]:
    
def dateRange(start_date='1900-01-01',stop_date='2100-01-01',constraint='overlaps'):
    """
    yields a FES filter based on start/stop times, and "overlaps" or "within"
    """
    if constraint == 'overlaps':
        start = fes.PropertyIsLessThanOrEqualTo(propertyname='apiso:TempExtent_begin', literal=stop_date)
        stop = fes.PropertyIsGreaterThanOrEqualTo(propertyname='apiso:TempExtent_end', literal=start_date)
    elif constraint == 'within':
        start = fes.PropertyIsGreaterThanOrEqualTo(propertyname='apiso:TempExtent_begin', literal=start_date)
        stop = fes.PropertyIsLessThanOrEqualTo(propertyname='apiso:TempExtent_end', literal=stop_date)
    return start,stop
    
In [108]:
    
def service_urls(records,service_string='urn:x-esri:specification:ServiceType:odp:url'):
    """
    Get all URLs matching a specific ServiceType 
 
    Unfortunately these seem to differ between different CSW-ISO services.
    For example, OpenDAP is specified:
    NODC geoportal: 'urn:x-esri:specification:ServiceType:OPeNDAP'
    NGDC geoportal: 'urn:x-esri:specification:ServiceType:odp:url'
    """
    urls=[]
    for key,rec in records.iteritems():
        #create a generator object, and iterate through it until the match is found
        #if not found, gets the default value (here "none")
        url = next((d['url'] for d in rec.references if d['scheme'] == service_string), None)
        if url is not None:
            urls.append(url)
    return urls
    
In [72]:
    
# [lon_min, lat_min, lon_max, lat_max]
box=[-76.4751, 38.3890, -71.7432, 42.9397]
    
In [73]:
    
# relative to now
jd_start = dt.datetime.utcnow()- dt.timedelta(days=3)
jd_stop = dt.datetime.utcnow() + dt.timedelta(days=3)
    
In [74]:
    
# 2014 feb 10-15 storm
jd_start = dt.datetime(2014,2,10)
jd_stop = dt.datetime(2014,2,15)
    
In [76]:
    
# hurricane sandy
jd_start = dt.datetime(2012,10,26)
jd_stop = dt.datetime(2012,11,2)
    
In [77]:
    
start_date = jd_start.strftime('%Y-%m-%d %H:00')
stop_date  = jd_stop.strftime('%Y-%m-%d %H:00')
jd_start = dt.datetime.strptime(start_date,'%Y-%m-%d %H:%M')
jd_stop = dt.datetime.strptime(stop_date,'%Y-%m-%d %H:%M')
print start_date,'to',stop_date
    
    
In [78]:
    
std_name_list=['water_surface_height_above_reference_datum',
    'sea_surface_height_above_geoid','sea_surface_elevation',
    'sea_surface_height_above_reference_ellipsoid','sea_surface_height_above_sea_level',
    'sea_surface_height','water level']
    
In [79]:
    
# convert user input into FES filters
start,stop = dateRange(start_date,stop_date)
bbox = fes.BBox(box)
or_filt = fes.Or([fes.PropertyIsLike(propertyname='apiso:AnyText',literal=('*%s*' % val),
                    escapeChar='\\',wildCard='*',singleChar='?') for val in std_name_list])
# eliminate files with "Averages" 
val = 'Averages'
not_filt = fes.Not([fes.PropertyIsLike(propertyname='apiso:AnyText',literal=('*%s*' % val),
                        escapeChar='\\',wildCard='*',singleChar='?')])
    
In [80]:
    
# complex query:
filter_list = [fes.And([ bbox, start, stop, or_filt, not_filt]) ]
    
In [81]:
    
# simple query for text only
val='gamssa'
filter_list = [fes.PropertyIsLike(propertyname='apiso:AnyText',literal=('*%s*' % val),
                        escapeChar='\\',wildCard='*',singleChar='?')]
    
In [82]:
    
endpoint = 'http://data.nodc.noaa.gov/geoportal/csw'   
csw = CatalogueServiceWeb(endpoint,timeout=60)
print csw.version
    
    
In [83]:
    
for oper in csw.operations:
    if oper.name == 'GetRecords':
        print '\nISO Queryables:\n',oper.constraints['SupportedISOQueryables']['values']
    
    
In [84]:
    
csw.getrecords2(constraints=filter_list,maxrecords=10000,esn='full')
len(csw.records.keys())
    
    Out[84]:
In [85]:
    
choice=random.choice(list(csw.records.keys()))
print choice
csw.records[choice].references
    
    
    Out[85]:
In [86]:
    
#find all WMS URLs
wms_urls = service_urls(csw.records,service_string='urn:x-esri:specification:ServiceType:WMS')
len(wms_urls)
    
    Out[86]:
In [87]:
    
endpoint = 'http://www.nodc.noaa.gov/geoportal/csw' 
csw = CatalogueServiceWeb(endpoint,timeout=60)
csw.version
    
    Out[87]:
In [88]:
    
for oper in csw.operations:
    if oper.name == 'GetRecords':
        print '\nISO Queryables:\n',oper.constraints['SupportedISOQueryables']['values']
    
    
In [89]:
    
csw.getrecords2(constraints=filter_list,maxrecords=10000,esn='full')
len(csw.records.keys())
    
    Out[89]:
In [90]:
    
choice=random.choice(list(csw.records.keys()))
print choice
csw.records[choice].references
    
    
    Out[90]:
In [91]:
    
#find all WMS URLs
wms_urls = service_urls(csw.records,service_string='urn:x-esri:specification:ServiceType:WMS')
len(wms_urls)
    
    Out[91]:
In [92]:
    
endpoint = 'http://catalog.data.gov/csw' #  catalog.data.gov CSW
csw = CatalogueServiceWeb(endpoint,timeout=60)
csw.version
    
    Out[92]:
In [93]:
    
for oper in csw.operations:
    if oper.name == 'GetRecords':
        print '\nISO Queryables:\n',oper.constraints['SupportedISOQueryables']['values']
    
    
In [94]:
    
csw.getrecords2(constraints=filter_list,maxrecords=1000,esn='full')
len(csw.records.keys())
    
    Out[94]:
In [95]:
    
choice=random.choice(list(csw.records.keys()))
print choice
csw.records[choice].references
    
    
    Out[95]:
From the above, we can see that because the 'scheme' is 'None' on all the references, we can't extract the different service types, like OPeNDAP, WCS, etc.
In [98]:
    
#find all WMS URLs
wms_urls = service_urls(csw.records,service_string='urn:x-esri:specification:ServiceType:WMS')
len(wms_urls)
    
    Out[98]:
In [99]:
    
endpoint = 'http://catalog.data.gov/csw'
csw = CatalogueServiceWeb(endpoint,timeout=60)
print csw.version
    
    
In [100]:
    
for oper in csw.operations:
    if oper.name == 'GetRecords':
        print '\nISO Queryables:\n',oper.constraints['SupportedISOQueryables']['values']
    
    
In [103]:
    
csw.getrecords2(constraints=filter_list,maxrecords=1000,esn='full')
len(csw.records.keys())
    
    Out[103]:
In [104]:
    
choice=random.choice(list(csw.records.keys()))
print choice
csw.records[choice].references
    
    
    Out[104]:
In [105]:
    
#find all WMS URLs
wms_urls = service_urls(csw.records,service_string='urn:x-esri:specification:ServiceType:WMS')
len(wms_urls)
    
    Out[105]: