Test NGDC Geoportal bbox, start, stop filters


In [1]:
from owslib.csw import CatalogueServiceWeb
from owslib.fes import SortBy, SortProperty
from owslib import fes
import datetime as dt

In [2]:
csw = CatalogueServiceWeb('http://www.ngdc.noaa.gov/geoportal/csw',timeout=60) # NGDC Geoportal
#csw = CatalogueServiceWeb('http://catalog.data.gov/csw-all',timeout=60)

In [3]:
csw.get_operation_by_name('GetRecords').constraints


Out[3]:
[Constraint: SupportedCommonQueryables - ['Subject', 'Title', 'Abstract', 'AnyText', 'Format', 'Identifier', 'Modified', 'Type', 'BoundingBox'],
 Constraint: SupportedISOQueryables - ['apiso:Subject', 'apiso:Title', 'apiso:Abstract', 'apiso:AnyText', 'apiso:Format', 'apiso:Identifier', 'apiso:Modified', 'apiso:Type', 'apiso:BoundingBox', 'apiso:CRS.Authority', 'apiso:CRS.ID', 'apiso:CRS.Version', 'apiso:RevisionDate', 'apiso:AlternateTitle', 'apiso:CreationDate', 'apiso:PublicationDate', 'apiso:OrganizationName', 'apiso:HasSecurityConstraints', 'apiso:Language', 'apiso:ResourceIdentifier', 'apiso:ParentIdentifier', 'apiso:KeywordType', 'apiso:TopicCategory', 'apiso:ResourceLanguage', 'apiso:GeographicDescriptionCode', 'apiso:Denominator', 'apiso:DistanceValue', 'apiso:DistanceUOM', 'apiso:TempExtent_begin', 'apiso:TempExtent_end', 'apiso:ServiceType', 'apiso:ServiceTypeVersion', 'apiso:Operation', 'apiso:OperatesOn', 'apiso:OperatesOnIdentifier', 'apiso:OperatesOnName', 'apiso:CouplingType'],
 Constraint: AdditionalQueryables - ['apiso:Degree', 'apiso:AccessConstraints', 'apiso:OtherConstraints', 'apiso:Classification', 'apiso:ConditionApplyingToAccessAndUse', 'apiso:Lineage', 'apiso:ResponsiblePartyRole', 'apiso:ResponsiblePartyName', 'apiso:SpecificationTitle', 'apiso:SpecificationDate', 'apiso:SpecificationDateType']]

In [4]:
# adjust to match MaxRecordDefault of CSW, if would be cleaner if we pick this up Capabilities XML
# this issue will allow for this: https://github.com/geopython/OWSLib/issues/211
pagesize = 10
sort_property = 'dc:title'  # a supported queryable of the CSW
sort_order = 'ASC'  # should be 'ASC' or 'DESC'

In [5]:
sortby = SortBy([SortProperty(sort_property, sort_order)])
foo=sortby.properties

In [6]:
# hopefully something like this will be implemented in fes soon
def dateRange(start_date='1900-01-01',stop_date='2100-01-01',constraint='overlaps'):
    if constraint == 'overlaps':
        start = fes.PropertyIsLessThanOrEqualTo(propertyname='apiso:TempExtent_begin', literal=stop_date)
        stop = fes.PropertyIsGreaterThanOrEqualTo(propertyname='apiso:TempExtent_end', literal=start_date)
    elif constraint == 'within':
        start = fes.PropertyIsGreaterThanOrEqualTo(propertyname='apiso:TempExtent_begin', literal=start_date)
        stop = fes.PropertyIsLessThanOrEqualTo(propertyname='apiso:TempExtent_end', literal=stop_date)
    return start,stop

In [7]:
val = 'salinity'
box=[-72.0, 41.0, -69.0, 43.0]   # gulf of maine

In [8]:
# specific specific times (UTC) ...

# hurricane sandy
jd_start = dt.datetime(2012,10,26)
jd_stop = dt.datetime(2012,11,2)

# 2014 feb 10-15 storm
jd_start = dt.datetime(2014,2,10)
jd_stop = dt.datetime(2014,2,15)

# 2014 recent
jd_start = dt.datetime(2014,3,8)
jd_stop = dt.datetime(2014,3,11)

# 2014 recent
jd_start = dt.datetime(1988,1,1)
jd_stop = dt.datetime(2012,3,1)

# 2011 
#jd_start = dt.datetime(2013,4,20)
#jd_stop = dt.datetime(2013,4,24)

# ... or relative to now
#jd_now = dt.datetime.utcnow()
#jd_start = jd_now - dt.timedelta(days=3)
#jd_stop = jd_now + dt.timedelta(days=3)

start_date = jd_start.strftime('%Y-%m-%d %H:00')
stop_date  = jd_stop.strftime('%Y-%m-%d %H:00')

jd_start = dt.datetime.strptime(start_date,'%Y-%m-%d %H:%M')
jd_stop = dt.datetime.strptime(stop_date,'%Y-%m-%d %H:%M')

print start_date,'to',stop_date


1988-01-01 00:00 to 2012-03-01 00:00

In [9]:
start,stop = dateRange(start_date,stop_date)
filter1 = fes.PropertyIsLike(propertyname='apiso:AnyText',literal=('*%s*' % val),
                        escapeChar='\\',wildCard='*',singleChar='?')
bbox = fes.BBox(box,crs='urn:ogc:def:crs:OGC:1.3:CRS84')
#filter_list = [fes.And([ bbox, filter1, start,stop]) ]
filter_list = [fes.And([ bbox, filter1]) ]

In [10]:
# you should be okay from here
startposition = 0
maxrecords = 20
while True:
    print 'getting records %d to %d' % (startposition, startposition+pagesize)
    csw.getrecords2(constraints=filter_list,
                    startposition=startposition, maxrecords=pagesize, sortby=sortby)
#    print csw.request
    for rec,item in csw.records.iteritems():
        print item.title
    if csw.results['nextrecord'] == 0:
        break
    startposition += pagesize
    if startposition >= maxrecords:
        break


getting records 0 to 10
A01 Aanderaa - Historic Surface Currents
A01 Accelerometer - Waves
A01 Directional Waves (waves.mstrain Experimental)
A01 Met - Meteorology
A01 Optics - Chlorophyll / Turbidity
A01 Optode - Oxygen
A01 SBE16 - CTD Transmissivity
A01 Sbe16 Oxygen
A01 SBE16 Oxygen
A01 Sbe37 - CTD
getting records 10 to 20
A01 Sbe37 - CTD
Aquarius Level 3 Sea Surface Salinity Standard Mapped Image Daily Data V3.0
Aquarius Level 3 Sea Surface Salinity Standard Mapped Image Daily Data V4.0
Aquarius Level 3 Sea Surface Salinity Standard Mapped Image Monthly Data V3.0
Aquarius Level 3 Sea Surface Salinity Standard Mapped Image Monthly Data V4.0
Aquarius Level 3 Sea Surface Salinity Standard Mapped Image Weekly Data V3.0
Aquarius Level 3 Sea Surface Salinity Standard Mapped Image Weekly Data V4.0
Aquarius Level 3 Wind Speed Standard Mapped Image 7-Day Data V3.0
Aquarius Level 3 Wind Speed Standard Mapped Image Daily Data V3.0
Aquarius Level 3 Wind Speed Standard Mapped Image Daily Data V3.0

In [11]:
filter_list = [fes.And([ bbox, filter1]) ]
csw.getrecords2(constraints=filter_list)
csw.results['matches']


Out[11]:
63

In [12]:
filter_list = [fes.And([ bbox, filter1, start,stop]) ]
csw.getrecords2(constraints=filter_list)
csw.results['matches']


Out[12]:
37

In [13]:
filter_list = [filter1]
csw.getrecords2(constraints=filter_list)
csw.results['matches']


Out[13]:
570