12/30/2015. Emilio Mayorga, NANOOS. Much of this is adapted from notebooks from Filipe Fernandes.
In [1]:
import numpy as np
import pandas as pd
In [2]:
from owslib.csw import CatalogueServiceWeb
from owslib import fes
In [3]:
from pprint import pprint
fmt = '{:*^64}'.format
In [4]:
def fes_date_filter(start, stop, constraint='overlaps'):
"""
Take datetime-like objects and returns a fes filter for date range
(begin and end inclusive).
NOTE: Truncates the minutes!!!
Examples
--------
>>> from datetime import datetime, timedelta
>>> stop = datetime(2010, 1, 1, 12, 30, 59).replace(tzinfo=pytz.utc)
>>> start = stop - timedelta(days=7)
>>> begin, end = fes_date_filter(start, stop, constraint='overlaps')
>>> begin.literal, end.literal
('2010-01-01 12:00', '2009-12-25 12:00')
>>> begin.propertyoperator, end.propertyoperator
('ogc:PropertyIsLessThanOrEqualTo', 'ogc:PropertyIsGreaterThanOrEqualTo')
>>> begin, end = fes_date_filter(start, stop, constraint='within')
>>> begin.literal, end.literal
('2009-12-25 12:00', '2010-01-01 12:00')
>>> begin.propertyoperator, end.propertyoperator
('ogc:PropertyIsGreaterThanOrEqualTo', 'ogc:PropertyIsLessThanOrEqualTo')
"""
start = start.strftime('%Y-%m-%d %H:00')
stop = stop.strftime('%Y-%m-%d %H:00')
if constraint == 'overlaps':
propertyname = 'apiso:TempExtent_begin'
begin = fes.PropertyIsLessThanOrEqualTo(propertyname=propertyname,
literal=stop)
propertyname = 'apiso:TempExtent_end'
end = fes.PropertyIsGreaterThanOrEqualTo(propertyname=propertyname,
literal=start)
elif constraint == 'within':
propertyname = 'apiso:TempExtent_begin'
begin = fes.PropertyIsGreaterThanOrEqualTo(propertyname=propertyname,
literal=start)
propertyname = 'apiso:TempExtent_end'
end = fes.PropertyIsLessThanOrEqualTo(propertyname=propertyname,
literal=stop)
else:
raise NameError('Unrecognized constraint {}'.format(constraint))
return begin, end
In [5]:
def run_ngdc_csw(filter_list):
csw = CatalogueServiceWeb('http://www.ngdc.noaa.gov/geoportal/csw',
timeout=60)
csw.getrecords2(constraints=filter_list, maxrecords=1000, esn='full')
print(fmt(' Catalog information '))
print("CSW version: {}".format(csw.version))
print("Number of datasets available: {}".format(len(csw.records.keys())))
return csw
In [6]:
def service_urls(csw):
df = []
for key, rec in csw.records.items():
df.append(pd.DataFrame(rec.references))
df = pd.concat(df, ignore_index=True)
df['scheme'] = [scheme.split(':')[-2] for scheme in df['scheme']]
return df.set_index('scheme').sort_index().stack()
In [7]:
# IOOS NGDC collection uuid's
# uuidscsv_url = 'https://raw.githubusercontent.com/ioos/registry/master/uuid.csv'
# The separator used is messy! It's ',\t' in most cases,
# except OceanSITES
# That's too messy! I've created my own, cleaned-up copy, for now:
uuidscsv_url = 'https://raw.githubusercontent.com/emiliom/stuff/master/ioos_ngdc_registry_uuid.csv'
uuids_df = pd.read_csv(uuidscsv_url)
uuids_df.set_index('MetadataCollection', inplace=True)
In [8]:
uuids = uuids_df.to_dict()['UUID']
uuids
Out[8]:
In [9]:
bbox = [-127, 43, -123.75, 48]
In [10]:
from datetime import datetime, timedelta
dt = 5
now = datetime.utcnow()
start = now - timedelta(days=dt)
stop = now + timedelta(days=dt)
In [11]:
sos_name = 'sea_water_temperature'
name_list = ['sea_water_temperature',
'sea_surface_temperature',
'sea_water_potential_temperature',
'equivalent_potential_temperature',
'sea_water_conservative_temperature',
'pseudo_equivalent_potential_temperature']
# Not includeing 'Water Temperature'
In [12]:
begin, end = fes_date_filter(start, stop)
In [13]:
kw = dict(wildCard='*',
escapeChar='\\',
singleChar='?',
propertyname='apiso:AnyText')
variable_names_filt = fes.Or([fes.PropertyIsLike(literal=('*%s*' % val), **kw)
for val in name_list])
filter_list = [fes.And([begin, end, fes.BBox(bbox), variable_names_filt])]
In [14]:
csw = run_ngdc_csw(filter_list)
csw.records.keys()
Out[14]:
Note that the COOPS SOS is not returned. This is probably due to an improper setup for the temporal extent in the COOPS SOS' ISO metadata record.
In [15]:
collection_uuids = fes.Or(
[fes.PropertyIsEqualTo(propertyname='dc:source', literal=uuids[collection])
for collection in ['NANOOS', 'NOS/CO-OPS', 'NDBC']]
)
filter_list = [fes.And([fes.BBox(bbox), variable_names_filt, collection_uuids])]
csw = run_ngdc_csw(filter_list)
csw.records.keys()
Out[15]:
In [16]:
uuid_selection = ['NANOOS', 'NOS/CO-OPS']
for collection in uuid_selection:
collection_uuid = fes.PropertyIsEqualTo(propertyname='dc:source',
literal=uuids[collection])
filter_list = [collection_uuid]
print(fmt(' Collection: %s ' % collection))
csw = run_ngdc_csw(filter_list)
pprint(csw.records.keys())
services = service_urls(csw)
print('---------- OPeNDAP services:')
pprint(set(services['odp'].values.tolist()))
print('---------- SOS services:')
pprint(set(services['sos'].values.tolist()))
NOTE for NANOOS Collection: {725A8A9B-1937-4DBF-ADF4-F6405543DC70} is the NANOOS GeoServer WMS
In [17]:
collection_uuid = fes.PropertyIsEqualTo(propertyname='dc:source',
literal=uuids['NOS/CO-OPS'])
filter_list = [collection_uuid]
csw = run_ngdc_csw(filter_list)
In [18]:
print(csw.records['NOAA.NOS.CO-OPS SOS'].xml)
In [19]:
kw = dict(wildCard='*', escapeChar='\\', singleChar='?')
In [20]:
kw['propertyname'] = 'apiso:Subject'
filt_nanoos = fes.PropertyIsLike(literal=('*%s*' % 'NANOOS'), **kw)
filter_list = [filt_nanoos]
In [21]:
csw = run_ngdc_csw(filter_list)
csw.records.keys()
Out[21]:
In [22]:
kw['propertyname'] = 'apiso:AnyText'
filt_nanoos = fes.PropertyIsLike(literal=('*%s*' % 'NANOOS'), **kw)
filter_list = [filt_nanoos]
In [23]:
csw = run_ngdc_csw(filter_list)
csw.records.keys()
Out[23]: