IOOS CSW queries: NANOOS & OOI Endurance tests

12/30/2015. Emilio Mayorga, NANOOS. Much of this is adapted from notebooks from Filipe Fernandes.


In [1]:
import numpy as np
import pandas as pd

In [2]:
from owslib.csw import CatalogueServiceWeb
from owslib import fes

In [3]:
from pprint import pprint
fmt = '{:*^64}'.format

In [4]:
def fes_date_filter(start, stop, constraint='overlaps'):
    """
    Take datetime-like objects and returns a fes filter for date range
    (begin and end inclusive).
    NOTE: Truncates the minutes!!!

    Examples
    --------
    >>> from datetime import datetime, timedelta
    >>> stop = datetime(2010, 1, 1, 12, 30, 59).replace(tzinfo=pytz.utc)
    >>> start = stop - timedelta(days=7)
    >>> begin, end = fes_date_filter(start, stop, constraint='overlaps')
    >>> begin.literal, end.literal
    ('2010-01-01 12:00', '2009-12-25 12:00')
    >>> begin.propertyoperator, end.propertyoperator
    ('ogc:PropertyIsLessThanOrEqualTo', 'ogc:PropertyIsGreaterThanOrEqualTo')
    >>> begin, end = fes_date_filter(start, stop, constraint='within')
    >>> begin.literal, end.literal
    ('2009-12-25 12:00', '2010-01-01 12:00')
    >>> begin.propertyoperator, end.propertyoperator
    ('ogc:PropertyIsGreaterThanOrEqualTo', 'ogc:PropertyIsLessThanOrEqualTo')

    """
    start = start.strftime('%Y-%m-%d %H:00')
    stop = stop.strftime('%Y-%m-%d %H:00')
    if constraint == 'overlaps':
        propertyname = 'apiso:TempExtent_begin'
        begin = fes.PropertyIsLessThanOrEqualTo(propertyname=propertyname,
                                                literal=stop)
        propertyname = 'apiso:TempExtent_end'
        end = fes.PropertyIsGreaterThanOrEqualTo(propertyname=propertyname,
                                                 literal=start)
    elif constraint == 'within':
        propertyname = 'apiso:TempExtent_begin'
        begin = fes.PropertyIsGreaterThanOrEqualTo(propertyname=propertyname,
                                                   literal=start)
        propertyname = 'apiso:TempExtent_end'
        end = fes.PropertyIsLessThanOrEqualTo(propertyname=propertyname,
                                              literal=stop)
    else:
        raise NameError('Unrecognized constraint {}'.format(constraint))
    return begin, end

In [5]:
def run_ngdc_csw(filter_list):
    csw = CatalogueServiceWeb('http://www.ngdc.noaa.gov/geoportal/csw',
                              timeout=60)
    csw.getrecords2(constraints=filter_list, maxrecords=1000, esn='full')
    
    print(fmt(' Catalog information '))
    print("CSW version: {}".format(csw.version))
    print("Number of datasets available: {}".format(len(csw.records.keys())))
    
    return csw

In [6]:
def service_urls(csw):
    df = []
    for key, rec in csw.records.items():
        df.append(pd.DataFrame(rec.references))

    df = pd.concat(df, ignore_index=True)
    df['scheme'] = [scheme.split(':')[-2] for scheme in df['scheme']]

    return df.set_index('scheme').sort_index().stack()

GeoPortal uuid's for NGDC IOOS Collections


In [7]:
# IOOS NGDC collection uuid's
# uuidscsv_url = 'https://raw.githubusercontent.com/ioos/registry/master/uuid.csv'
# The separator used is messy! It's ',\t' in most cases,
# except OceanSITES
# That's too messy! I've created my own, cleaned-up copy, for now:
uuidscsv_url = 'https://raw.githubusercontent.com/emiliom/stuff/master/ioos_ngdc_registry_uuid.csv'
uuids_df = pd.read_csv(uuidscsv_url)
uuids_df.set_index('MetadataCollection', inplace=True)

In [8]:
uuids = uuids_df.to_dict()['UUID']
uuids


Out[8]:
{'AOOS': '{1706F520-2647-4A33-B7BF-592FAFDE4B45}',
 'ATN_DAC': '{07875897-E6A6-4EDB-B111-F5D6BE841ED6}',
 'CARICOOS': '{117F1684-A5E3-400E-98D8-A270BDBA1603}',
 'CENCOOS': '{4BA5624D-A61F-4C7E-BAEE-7F8BDDB8D9C4}',
 'GCOOS': '{003747E7-4818-43CD-937D-44D5B8E2F4E9}',
 'GLOS': '{B664427E-6953-4517-A874-78DDBBD3893E}',
 'Glider_DAC': '{2546E50F-F0C7-4365-9D45-694DD22E5F26}',
 'HFradar_DAC': '{A4A65346-6B65-4ED2-A2DC-5D529074EE6D}',
 'MARACOOS': '{C664F631-6E53-4108-B8DD-EFADF558E408}',
 'MODELING_TESTBED': '{8BF00750-66C7-49FF-8894-4D4F96FD86C0}',
 'NANOOS': '{254CCFC0-E408-4E13-BD62-87567E7586BB}',
 'NAVY': '{3B94DAAE-B7E9-4789-993B-0045AD9149D9}',
 'NDBC': '{828981B0-0039-4360-9788-E788FA6B0875}',
 'NERACOOS': '{E41F4FCD-0297-415D-AC53-967B970C3A3E}',
 'NOS/CO-OPS': '{72E748DF-23B1-4E80-A2C4-81E70783094A}',
 'OceanSITES': '{D479A77C-414E-404A-AF9B-C0BA9975E90D}',
 'Other': '{7EDF86E1-573C-4B3C-A979-AD499A11FD22}',
 'PacIOOS': '{68FF11D8-D66B-45EE-B33A-21919BB26421}',
 'SCCOOS': '{B70B3E3C-3851-4BA9-8E9B-C9F195DCEAC7}',
 'SECOORA': '{B3EA8869-B726-4E39-898A-299E53ABBC98}',
 'USACE': '{73019DFF-2E01-4800-91CD-0B3F812256A7}',
 'USGS/CMGP': '{C6F11F00-C2BD-4AC6-8E2C-013E16F4932E}'}

Query 1: OOI Endurance bbox, datetime range, and SST

Set up filters

OOI Endurance bbox, and "now" +/- 5 days, and 'sea_water_temperature'


In [9]:
bbox = [-127, 43, -123.75, 48]

In [10]:
from datetime import datetime, timedelta

dt = 5

now = datetime.utcnow()
start = now - timedelta(days=dt)
stop = now + timedelta(days=dt)

In [11]:
sos_name = 'sea_water_temperature'

name_list = ['sea_water_temperature',
             'sea_surface_temperature',
             'sea_water_potential_temperature',
             'equivalent_potential_temperature',
             'sea_water_conservative_temperature',
             'pseudo_equivalent_potential_temperature']
# Not includeing 'Water Temperature'

In [12]:
begin, end = fes_date_filter(start, stop)

In [13]:
kw = dict(wildCard='*',
          escapeChar='\\',
          singleChar='?',
          propertyname='apiso:AnyText')

variable_names_filt = fes.Or([fes.PropertyIsLike(literal=('*%s*' % val), **kw)
                              for val in name_list])

filter_list = [fes.And([begin, end, fes.BBox(bbox), variable_names_filt])]

Run CSW query


In [14]:
csw = run_ngdc_csw(filter_list)
csw.records.keys()


********************* Catalog information **********************
CSW version: 2.0.2
Number of datasets available: 7
Out[14]:
['NANOOS Sensor Observation Service (SOS), a 52North IOOS SOS server',
 'cwwcNDBCMet',
 'hycom_global',
 'National Data Buoy Center SOS',
 'dhw',
 'CMOP_f33',
 'CA_DAS']

Note that the COOPS SOS is not returned. This is probably due to an improper setup for the temporal extent in the COOPS SOS' ISO metadata record.

CSW query w/o the date filter and limited to the NANOOS, NOS/CO-OPS and NDBC collections

Just to double-check what's returned when the date filter is removed, but limited to those 3 collections so that a reasonable and relevant set of records is returned.


In [15]:
collection_uuids = fes.Or(
    [fes.PropertyIsEqualTo(propertyname='dc:source', literal=uuids[collection])
     for collection in ['NANOOS', 'NOS/CO-OPS', 'NDBC']]
    )
filter_list = [fes.And([fes.BBox(bbox), variable_names_filt, collection_uuids])]
csw = run_ngdc_csw(filter_list)
csw.records.keys()


********************* Catalog information **********************
CSW version: 2.0.2
Number of datasets available: 6
Out[15]:
['org.nanoos:OCOS_latest_aggregation',
 'NANOOS Sensor Observation Service (SOS), a 52North IOOS SOS server',
 'osuSstClimate',
 'NOAA.NOS.CO-OPS SOS',
 'National Data Buoy Center SOS',
 'CMOP_f33']

Query 2: IOOS NGDC Collection UUID's for NANOOS and NOS/CO-OPS


In [16]:
uuid_selection = ['NANOOS', 'NOS/CO-OPS']

for collection in uuid_selection:
    collection_uuid = fes.PropertyIsEqualTo(propertyname='dc:source',
                                            literal=uuids[collection])
    filter_list = [collection_uuid]
    
    print(fmt(' Collection: %s ' % collection))
    csw = run_ngdc_csw(filter_list)
    pprint(csw.records.keys())
    services = service_urls(csw)
    print('---------- OPeNDAP services:')
    pprint(set(services['odp'].values.tolist()))
    print('---------- SOS services:')
    pprint(set(services['sos'].values.tolist()))


********************** Collection: NANOOS **********************
********************* Catalog information **********************
CSW version: 2.0.2
Number of datasets available: 8
['CMOP_f33',
 'org.nanoos:OCOS_latest_aggregation',
 '{725A8A9B-1937-4DBF-ADF4-F6405543DC70}',
 'NANOOS Sensor Observation Service (SOS), a 52North IOOS SOS server',
 'osuSstClimate',
 'osuSstAnom',
 'osuChlaClimate',
 'osuChlaAnom']
---------- OPeNDAP services:
set(['http://amb6400b.stccmop.org:8080/thredds/dodsC/model_data/forecast',
     'http://ona.coas.oregonstate.edu:8080/thredds/dodsC/NANOOS/OCOS'])
---------- SOS services:
set(['http://data.nanoos.org/52nsos/sos/kvp?service=SOS&request=GetCapabilities&acceptVersions=1.0.0'])
******************** Collection: NOS/CO-OPS ********************
********************* Catalog information **********************
CSW version: 2.0.2
Number of datasets available: 11
['gov.noaa.nos.co-ops:TBOFS/fmrc/Aggregated_7_day_TBOFS_Fields_Forecast_best.ncd',
 'gov.noaa.nos.co-ops:SJROFS/fmrc/Aggregated_7_day_SJROFS_Fields_Forecast_best.ncd',
 'gov.noaa.nos.co-ops:NYOFS/fmrc/Aggregated_7_day_NYOFS_Fields_Forecast_best.ncd',
 'gov.noaa.nos.co-ops:LSOFS/fmrc/Aggregated_7_day_LSOFS_Fields_Forecast_best.ncd',
 'gov.noaa.nos.co-ops:LOOFS/fmrc/Aggregated_7_day_LOOFS_Fields_Forecast_best.ncd',
 'gov.noaa.nos.co-ops:LMOFS/fmrc/Aggregated_7_day_LMOFS_Fields_Forecast_best.ncd',
 'gov.noaa.nos.co-ops:LHOFS/fmrc/Aggregated_7_day_LHOFS_Fields_Forecast_best.ncd',
 'gov.noaa.nos.co-ops:LEOFS/fmrc/Aggregated_7_day_LEOFS_Fields_Forecast_best.ncd',
 'gov.noaa.nos.co-ops:DBOFS/fmrc/Aggregated_7_day_DBOFS_Fields_Forecast_best.ncd',
 'gov.noaa.nos.co-ops:CBOFS/fmrc/Aggregated_7_day_CBOFS_Fields_Forecast_best.ncd',
 'NOAA.NOS.CO-OPS SOS']
---------- OPeNDAP services:
set(['http://opendap.co-ops.nos.noaa.gov/thredds/dodsC/CBOFS/fmrc/Aggregated_7_day_CBOFS_Fields_Forecast_best.ncd',
     'http://opendap.co-ops.nos.noaa.gov/thredds/dodsC/DBOFS/fmrc/Aggregated_7_day_DBOFS_Fields_Forecast_best.ncd',
     'http://opendap.co-ops.nos.noaa.gov/thredds/dodsC/LEOFS/fmrc/Aggregated_7_day_LEOFS_Fields_Forecast_best.ncd',
     'http://opendap.co-ops.nos.noaa.gov/thredds/dodsC/LHOFS/fmrc/Aggregated_7_day_LHOFS_Fields_Forecast_best.ncd',
     'http://opendap.co-ops.nos.noaa.gov/thredds/dodsC/LMOFS/fmrc/Aggregated_7_day_LMOFS_Fields_Forecast_best.ncd',
     'http://opendap.co-ops.nos.noaa.gov/thredds/dodsC/LOOFS/fmrc/Aggregated_7_day_LOOFS_Fields_Forecast_best.ncd',
     'http://opendap.co-ops.nos.noaa.gov/thredds/dodsC/LSOFS/fmrc/Aggregated_7_day_LSOFS_Fields_Forecast_best.ncd',
     'http://opendap.co-ops.nos.noaa.gov/thredds/dodsC/NYOFS/fmrc/Aggregated_7_day_NYOFS_Fields_Forecast_best.ncd',
     'http://opendap.co-ops.nos.noaa.gov/thredds/dodsC/SJROFS/fmrc/Aggregated_7_day_SJROFS_Fields_Forecast_best.ncd',
     'http://opendap.co-ops.nos.noaa.gov/thredds/dodsC/TBOFS/fmrc/Aggregated_7_day_TBOFS_Fields_Forecast_best.ncd'])
---------- SOS services:
set(['http://opendap.co-ops.nos.noaa.gov/ioos-dif-sos/SOS?service=SOS&request=GetCapabilities&acceptVersions=1.0.0'])

NOTE for NANOOS Collection: {725A8A9B-1937-4DBF-ADF4-F6405543DC70} is the NANOOS GeoServer WMS

Examine the NOS/CO-OPS SOS record


In [17]:
collection_uuid = fes.PropertyIsEqualTo(propertyname='dc:source',
                                        literal=uuids['NOS/CO-OPS'])
filter_list = [collection_uuid]
csw = run_ngdc_csw(filter_list)


********************* Catalog information **********************
CSW version: 2.0.2
Number of datasets available: 11

In [18]:
print(csw.records['NOAA.NOS.CO-OPS SOS'].xml)


<csw:Record xmlns:csw="http://www.opengis.net/cat/csw/2.0.2" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:dcmiBox="http://dublincore.org/documents/2000/07/11/dcmi-box/" xmlns:dct="http://purl.org/dc/terms/" xmlns:gml="http://www.opengis.net/gml" xmlns:ows="http://www.opengis.net/ows" xmlns:xsd="http://www.w3.org/2001/XMLSchema">
<dc:identifier scheme="urn:x-esri:specification:ServiceType:ArcIMS:Metadata:FileID">NOAA.NOS.CO-OPS SOS</dc:identifier>
<dc:identifier scheme="urn:x-esri:specification:ServiceType:ArcIMS:Metadata:DocID">{EAA709FD-780B-43E3-9729-6D793EBFC261}</dc:identifier>
<dc:title>NOAA.NOS.CO-OPS SOS</dc:title>
<dc:type scheme="urn:x-esri:specification:ServiceType:ArcIMS:Metadata:ContentType">liveData</dc:type>
<dc:subject>Air Temperature</dc:subject>
<dc:subject>Barometric Pressure</dc:subject>
<dc:subject>Conductivity</dc:subject>
<dc:subject>Currents</dc:subject>
<dc:subject>Datum</dc:subject>
<dc:subject>Harmonic Constituents</dc:subject>
<dc:subject>Rain Fall</dc:subject>
<dc:subject>Relative Humidity</dc:subject>
<dc:subject>Salinity</dc:subject>
<dc:subject>Visibility</dc:subject>
<dc:subject>Water Level</dc:subject>
<dc:subject>Water Level Predictions</dc:subject>
<dc:subject>Water Temperature</dc:subject>
<dc:subject>Winds</dc:subject>
<dc:subject>air_temperature</dc:subject>
<dc:subject>air_pressure</dc:subject>
<dc:subject>sea_water_electrical_conductivity</dc:subject>
<dc:subject>currents</dc:subject>
<dc:subject>sea_water_salinity</dc:subject>
<dc:subject>water_surface_height_above_reference_datum</dc:subject>
<dc:subject>sea_surface_height_amplitude_due_to_equilibrium_ocean_tide</dc:subject>
<dc:subject>sea_water_temperature</dc:subject>
<dc:subject>winds</dc:subject>
<dc:subject>harmonic_constituents</dc:subject>
<dc:subject>datums</dc:subject>
<dc:subject>relative_humidity</dc:subject>
<dc:subject>rain_fall</dc:subject>
<dc:subject>visibility</dc:subject>
<dct:modified>2015-12-30T10:51:30-07:00</dct:modified>
<dct:abstract>NOAA.NOS.CO-OPS Sensor Observation Service (SOS) Server</dct:abstract>
<dct:references scheme="urn:x-esri:specification:ServiceType:distribution:url">http://opendap.co-ops.nos.noaa.gov/ioos-dif-sos/SOS</dct:references>
<dct:references scheme="urn:x-esri:specification:ServiceType:distribution:url">http://opendap.co-ops.nos.noaa.gov/ioos-dif-sos/SOS?service=SOS&amp;request=GetCapabilities&amp;acceptVersions=1.0.0</dct:references>
<dct:references scheme="urn:x-esri:specification:ServiceType:sos:url">http://opendap.co-ops.nos.noaa.gov/ioos-dif-sos/SOS?service=SOS&amp;request=GetCapabilities&amp;acceptVersions=1.0.0</dct:references>
<dct:references scheme="urn:x-esri:specification:ServiceType:sos:url">http://opendap.co-ops.nos.noaa.gov/ioos-dif-sos/SOS?service=SOS&amp;request=GetCapabilities&amp;acceptVersions=1.0.0</dct:references>
<dct:references scheme="urn:x-esri:specification:ServiceType:sos:url">http://opendap.co-ops.nos.noaa.gov/ioos-dif-sos/SOS?service=SOS&amp;request=GetCapabilities&amp;acceptVersions=1.0.0</dct:references>
<dct:references scheme="urn:x-esri:specification:ServiceType:download:url">http://opendap.co-ops.nos.noaa.gov/ioos-dif-sos/SOS</dct:references>
<dct:references scheme="urn:x-esri:specification:ServiceType:download:url">http://opendap.co-ops.nos.noaa.gov/ioos-dif-sos/SOS?service=SOS&amp;request=GetCapabilities&amp;acceptVersions=1.0.0</dct:references>
<ows:WGS84BoundingBox>
<ows:LowerCorner>-177.3608 -18.1333</ows:LowerCorner>
<ows:UpperCorner>178.425 71.36</ows:UpperCorner>
</ows:WGS84BoundingBox>
<ows:BoundingBox>
<ows:LowerCorner>-177.3608 -18.1333</ows:LowerCorner>
<ows:UpperCorner>178.425 71.36</ows:UpperCorner>
</ows:BoundingBox>
<dc:source>{72E748DF-23B1-4E80-A2C4-81E70783094A}</dc:source>
</csw:Record>

Query 3: NANOOS in keywords or any text


In [19]:
kw = dict(wildCard='*', escapeChar='\\', singleChar='?')

In keywords, apiso:Subject


In [20]:
kw['propertyname'] = 'apiso:Subject'
filt_nanoos = fes.PropertyIsLike(literal=('*%s*' % 'NANOOS'), **kw)

filter_list = [filt_nanoos]

In [21]:
csw = run_ngdc_csw(filter_list)
csw.records.keys()


********************* Catalog information **********************
CSW version: 2.0.2
Number of datasets available: 2
Out[21]:
['NANOOS Sensor Observation Service (SOS), a 52North IOOS SOS server',
 'org.nanoos:OCOS_latest_aggregation']

In apiso:AnyText


In [22]:
kw['propertyname'] = 'apiso:AnyText'
filt_nanoos = fes.PropertyIsLike(literal=('*%s*' % 'NANOOS'), **kw)

filter_list = [filt_nanoos]

In [23]:
csw = run_ngdc_csw(filter_list)
csw.records.keys()


********************* Catalog information **********************
CSW version: 2.0.2
Number of datasets available: 4
Out[23]:
['NANOOS Sensor Observation Service (SOS), a 52North IOOS SOS server',
 '{725A8A9B-1937-4DBF-ADF4-F6405543DC70}',
 'org.nanoos:OCOS_latest_aggregation',
 'UCSC']