CSW access with OWSLib using ISO queryables with SOS

Demonstration of how to use the OGC Catalog Services for the Web (CSW) to search for find all datasets containing a specified variable that fall withing a specified date range and geospatial bounding box, and then use the data access service contained in the returned metadata to retrieve and visualize the data.

Here we are accessing a Geoportal Server CSW, but in the future we should be able to point it toward any another CSW service, such as the one provided by catalog.data.gov.


In [83]:
from pylab import *
from owslib.csw import CatalogueServiceWeb
from owslib import fes
from owslib.sos import SensorObservationService
import pdb
from owslib.etree import etree
import random
import netCDF4
import pandas as pd
import datetime as dt

These are tags NGDC would like tested:

apiso:ResponsiblePartyName
apiso:Keywords
apiso:KeywordType
apiso:VertExtent_min
apiso:VertExtent_max
apiso:ServiceType
apiso:ServiceTypeVersion
apiso:OperatesOn
resource.url
website.url

In [84]:
def parse_om_xml(response):
    # extract data and time from OM-XML response
    root = etree.fromstring(response)
    # root.findall(".//{%(om)s}Observation" % root.nsmap )
    values = root.find(".//{%(swe)s}values" % root.nsmap )
    date_value = array( [ (dt.datetime.strptime(d,"%Y-%m-%dT%H:%M:%SZ"),float(v))
            for d,v in [l.split(',') for l in values.text.split()]] )
    time = date_value[:,0]
    data = date_value[:,1]
    return data,time

In [85]:
#from IPython.core.display import HTML
#HTML('<iframe src=http://www.ngdc.noaa.gov/geoportal/ width=950 height=400></iframe>')

In [86]:
# connect to CSW, explore it's properties

endpoint = 'http://www.ngdc.noaa.gov/geoportal/csw' # NGDC Geoportal

#endpoint = 'http://www.nodc.noaa.gov/geoportal/csw'   # NODC Geoportal: granule level
#endpoint = 'http://data.nodc.noaa.gov/geoportal/csw'  # NODC Geoportal: collection level   
#endpoint = 'http://geodiscover.cgdi.ca/wes/serviceManagerCSW/csw'  # NRCAN CUSTOM
#endpoint = 'http://geoport.whoi.edu/gi-cat/services/cswiso' # USGS Woods Hole GI_CAT
#endpoint = 'http://cida.usgs.gov/gdp/geonetwork/srv/en/csw' # USGS CIDA Geonetwork
#endpoint = 'http://cmgds.marine.usgs.gov/geonetwork/srv/en/csw' # USGS Coastal and Marine Program
#endpoint = 'http://geoport.whoi.edu/geoportal/csw' # USGS Woods Hole Geoportal 
#endpoint = 'http://geo.gov.ckan.org/csw'  # CKAN testing site for new Data.gov
#endpoint = 'https://edg.epa.gov/metadata/csw'  # EPA
#endpoint = 'http://cwic.csiss.gmu.edu/cwicv1/discovery'  # CWIC

csw = CatalogueServiceWeb(endpoint,timeout=60)
csw.version


Out[86]:
'2.0.2'

In [87]:
[op.name for op in csw.operations]


Out[87]:
['GetCapabilities',
 'DescribeRecord',
 'GetRecords',
 'GetRecordById',
 'Transaction']

In [88]:
# hopefully something like this will be implemented in fes soon
def dateRange(start_date='1900-01-01',stop_date='2100-01-01',constraint='overlaps'):
    if constraint == 'overlaps':
        start = fes.PropertyIsLessThanOrEqualTo(propertyname='apiso:TempExtent_begin', literal=stop_date)
        stop = fes.PropertyIsGreaterThanOrEqualTo(propertyname='apiso:TempExtent_end', literal=start_date)
    elif constraint == 'within':
        start = fes.PropertyIsGreaterThanOrEqualTo(propertyname='apiso:TempExtent_begin', literal=start_date)
        stop = fes.PropertyIsLessThanOrEqualTo(propertyname='apiso:TempExtent_end', literal=stop_date)
    return start,stop

In [89]:
# hopefully something like this will be implemented in fes soon
def zRange(min_z='-5000', max_z='0', constraint='overlaps'):
    if constraint == 'overlaps':
        zmin = fes.PropertyIsLessThanOrEqualTo(propertyname='apiso:VertExtent_min', literal=min_z)
        zmax = fes.PropertyIsGreaterThanOrEqualTo(propertyname='apiso:VertExtent_max', literal=max_z)
    elif constraint == 'within':
        zmin = fes.PropertyIsGreaterThanOrEqualTo(propertyname='apiso:VertExtent_min', literal=min_z)
        zmax = fes.PropertyIsLessThanOrEqualTo(propertyname='apiso:VertExtent_max', literal=max_z)
    return zmin,zmax

In [90]:
# User Input for query

#box=[-120, 2.0, -110.0, 6.0] #  oceansites
box=[-160, 19, -156, 23]   # pacioos
start_date='2012-05-01'
stop_date='2012-06-01'
min_z = '-5000'
max_z = '0'
responsible_party = 'Jim Potemra'
responsible_party = 'Margaret McManus'
std_name = 'sea_water_temperature'
service_type='sos' # 'opendap'

In [91]:
# convert User Input into FES filters
start,stop = dateRange(start_date,stop_date)
zmin,zmax = zRange(min_z,max_z,constraint='within')
bbox = fes.BBox(box)
#keywords = fes.PropertyIsLike(propertyname='anyText', literal=std_name)
#keywords = fes.PropertyIsLike(propertyname='apiso:Keywords', literal=std_name)
keywords = fes.PropertyIsEqualTo(propertyname='apiso:Keywords', literal=std_name)
serviceType = fes.PropertyIsLike(propertyname='apiso:ServiceType', literal=('*%s*' % service_type))
ResponsiblePartyName = fes.PropertyIsEqualTo(propertyname='apiso:ResponsiblePartyName', literal=responsible_party)
#serviceType = fes.PropertyIsEqualTo(propertyname='apiso:ServiceType', literal=service_type)

In [92]:
# try simple request using only keywords

csw.getrecords2(constraints=[keywords,serviceType],maxrecords=5,esn='full')
csw.records.keys()


Out[92]:
['NANOOS Sensor Observation Service (SOS)',
 'National Data Buoy Center SOS',
 'Northeastern Regional Association of Coastal Ocean Observing Systems',
 'NOAA.NOS.CO-OPS SOS',
 'Dauphin Island Sea Lab SOS']

In [114]:
csw.getrecords2(constraints=[serviceType],esn='full')
csw.records.keys()


Out[114]:
['testDatasetScan/testBuoy_20130825.nc',
 'Dauphin Island Sea Lab SOS',
 'testDatasetScan/testBuoy_10.nc',
 'testDatasetScan/testBuoy_11.nc',
 'NERRS SOS',
 'testDatasetScan/testBuoy_20130824.nc',
 'id_testBuoy',
 'Louisiana Universities Marine Consortium (LUMCON) SOS',
 'SECOORA SOS',
 'University of South Florida COMPS SOS']

In [74]:
# try request using multiple filters "and" syntax: [[filter1,filter2]]
csw.getrecords2(constraints=[[keywords,start,stop,serviceType,bbox]],maxrecords=5,esn='full')
csw.records.keys()


Out[74]:
['National Data Buoy Center SOS', 'NOAA.NOS.CO-OPS SOS']

In the next cell we take a look at the references for a random record.


In [103]:
choice=random.choice(list(csw.records.keys()))
print choice
csw.records[choice].references


NOAA.NOS.CO-OPS SOS
Out[103]:
[{'scheme': 'urn:x-esri:specification:ServiceType:distribution:url',
  'url': 'http://opendap.co-ops.nos.noaa.gov/ioos-dif-sos/SOS?service=SOS&request=GetCapabilities'},
 {'scheme': 'urn:x-esri:specification:ServiceType:distribution:url',
  'url': 'http://opendap.co-ops.nos.noaa.gov/ioos-dif-sos/SOS?service=SOS&request=GetCapabilities'},
 {'scheme': 'urn:x-esri:specification:ServiceType:sos:url',
  'url': 'http://opendap.co-ops.nos.noaa.gov/ioos-dif-sos/SOS?service=SOS&request=GetCapabilities'},
 {'scheme': 'urn:x-esri:specification:ServiceType:sos:url',
  'url': 'http://opendap.co-ops.nos.noaa.gov/ioos-dif-sos/SOS?service=SOS&request=GetCapabilities'}]

In [76]:
foo=csw.records[choice]

In the next cell we try using the ResponsiblePartyName, we are looking for 'Jim Potemra', who we see in the ISO record for 'NS10agg' here
http://oos.soest.hawaii.edu/thredds/iso/hioos/nss/ns10agg?catalog=http%3A%2F%2Foos.soest.hawaii.edu%2Fthredds%2Fidd%2Fnss_hioos.html&dataset=NS10agg


In [77]:
# try adding responsible party role 
csw.getrecords2(constraints=[ResponsiblePartyName],maxrecords=10,esn='full')
csw.records.keys()


Out[77]:
['NS05agg',
 'NS10agg',
 'NS09agg',
 'NS15agg',
 'NS04agg',
 'NS06agg',
 'NS03agg',
 'NS11agg',
 'NS07agg',
 'NS08agg']

Now print out some titles


In [104]:
for rec,item in csw.records.iteritems():
    print item.title


NANOOS Sensor Observation Service (SOS)
National Data Buoy Center SOS
Northeastern Regional Association of Coastal Ocean Observing Systems
NOAA.NOS.CO-OPS SOS
Dauphin Island Sea Lab SOS

In the next cell we try using a vertical extent range, which doesn't seem to be working, as we are looking for z values between [-5000, 0] and we see in the ISO record for 'NS10agg' here:
http://oos.soest.hawaii.edu/thredds/iso/hioos/nss/ns10agg?catalog=http%3A%2F%2Foos.soest.hawaii.edu%2Fthredds%2Fidd%2Fnss_hioos.html&dataset=NS10agg
that the vertical extent is from [-2, -2]


In [79]:
csw.getrecords2(constraints=[[keywords,start,stop,serviceType,bbox,zmin,zmax]],maxrecords=5,esn='full')
csw.records.keys()


Out[79]:
[]

Define a function that will return the endpoint for a specified service type


In [79]:


In [105]:
# get specific ServiceType URL from records
def service_urls(records,service_string='urn:x-esri:specification:ServiceType:odp:url'):
    urls=[]
    for key,rec in records.iteritems():
        #create a generator object, and iterate through it until the match is found
        #if not found, gets the default value (here "none")
        url = next((d['url'] for d in rec.references if d['scheme'] == service_string), None)
        if url is not None:
            urls.append(url)
    return urls

Print out all the SOS endpoints


In [106]:
sos_urls = service_urls(csw.records,service_string='urn:x-esri:specification:ServiceType:sos:url')
print "\n".join(sos_urls)


http://habu.apl.washington.edu/pyws/sos.py?service=SOS&request=GetCapabilities
http://sdf.ndbc.noaa.gov/sos/server.php?service=SOS&request=GetCapabilities
http://opendap.co-ops.nos.noaa.gov/ioos-dif-sos/SOS?service=SOS&request=GetCapabilities

In [81]:


In [111]:
sos = SensorObservationService(sos_urls[2])
contents = sos.contents
#print(contents)

In [112]:
sos.filters

In [113]:
for url in sos_urls:
    sos = SensorObservationService(url)
    off = sos.offerings[1]
    print sos.filters
#    print(off)


None
None
None

In [ ]:
for url in sos_urls:
    csw = CatalogueServiceWeb(url,timeout=60)

    #pdb.set_trace()
    response = csw.get_observation(offerings=['1211-A1H'],
                                 responseFormat='text/xml;subtype="om/1.0.0"',
                                 observedProperties=['u_1205'],
                                 procedure='urn:ioos:station:gov.usgs:1211-A1H')ncv = netCDF4.Dataset(url).variables

            data_dict[v]=ncv[v][istart:istop].flatten()
        # Create Pandas data frame, with time index
        ts = pd.DataFrame.from_dict(data_dict)
        ts.index=dtime
        ts.plot(figsize=(12,4));
        title(std_name)

In [ ]: