Demonstration of how to use the OGC Catalog Services for the Web (CSW) to search for find all datasets containing a specified variable that fall withing a specified date range and geospatial bounding box, and then use the data access service contained in the returned metadata to retrieve and visualize the data.
Here we are accessing a Geoportal Server CSW, but in the future we should be able to point it toward any another CSW service, such as the one provided by catalog.data.gov.
In [83]:
from pylab import *
from owslib.csw import CatalogueServiceWeb
from owslib import fes
from owslib.sos import SensorObservationService
import pdb
from owslib.etree import etree
import random
import netCDF4
import pandas as pd
import datetime as dt
These are tags NGDC would like tested:
apiso:ResponsiblePartyName
apiso:Keywords
apiso:KeywordType
apiso:VertExtent_min
apiso:VertExtent_max
apiso:ServiceType
apiso:ServiceTypeVersion
apiso:OperatesOn
resource.url
website.url
In [84]:
def parse_om_xml(response):
# extract data and time from OM-XML response
root = etree.fromstring(response)
# root.findall(".//{%(om)s}Observation" % root.nsmap )
values = root.find(".//{%(swe)s}values" % root.nsmap )
date_value = array( [ (dt.datetime.strptime(d,"%Y-%m-%dT%H:%M:%SZ"),float(v))
for d,v in [l.split(',') for l in values.text.split()]] )
time = date_value[:,0]
data = date_value[:,1]
return data,time
In [85]:
#from IPython.core.display import HTML
#HTML('<iframe src=http://www.ngdc.noaa.gov/geoportal/ width=950 height=400></iframe>')
In [86]:
# connect to CSW, explore it's properties
endpoint = 'http://www.ngdc.noaa.gov/geoportal/csw' # NGDC Geoportal
#endpoint = 'http://www.nodc.noaa.gov/geoportal/csw' # NODC Geoportal: granule level
#endpoint = 'http://data.nodc.noaa.gov/geoportal/csw' # NODC Geoportal: collection level
#endpoint = 'http://geodiscover.cgdi.ca/wes/serviceManagerCSW/csw' # NRCAN CUSTOM
#endpoint = 'http://geoport.whoi.edu/gi-cat/services/cswiso' # USGS Woods Hole GI_CAT
#endpoint = 'http://cida.usgs.gov/gdp/geonetwork/srv/en/csw' # USGS CIDA Geonetwork
#endpoint = 'http://cmgds.marine.usgs.gov/geonetwork/srv/en/csw' # USGS Coastal and Marine Program
#endpoint = 'http://geoport.whoi.edu/geoportal/csw' # USGS Woods Hole Geoportal
#endpoint = 'http://geo.gov.ckan.org/csw' # CKAN testing site for new Data.gov
#endpoint = 'https://edg.epa.gov/metadata/csw' # EPA
#endpoint = 'http://cwic.csiss.gmu.edu/cwicv1/discovery' # CWIC
csw = CatalogueServiceWeb(endpoint,timeout=60)
csw.version
Out[86]:
In [87]:
[op.name for op in csw.operations]
Out[87]:
In [88]:
# hopefully something like this will be implemented in fes soon
def dateRange(start_date='1900-01-01',stop_date='2100-01-01',constraint='overlaps'):
if constraint == 'overlaps':
start = fes.PropertyIsLessThanOrEqualTo(propertyname='apiso:TempExtent_begin', literal=stop_date)
stop = fes.PropertyIsGreaterThanOrEqualTo(propertyname='apiso:TempExtent_end', literal=start_date)
elif constraint == 'within':
start = fes.PropertyIsGreaterThanOrEqualTo(propertyname='apiso:TempExtent_begin', literal=start_date)
stop = fes.PropertyIsLessThanOrEqualTo(propertyname='apiso:TempExtent_end', literal=stop_date)
return start,stop
In [89]:
# hopefully something like this will be implemented in fes soon
def zRange(min_z='-5000', max_z='0', constraint='overlaps'):
if constraint == 'overlaps':
zmin = fes.PropertyIsLessThanOrEqualTo(propertyname='apiso:VertExtent_min', literal=min_z)
zmax = fes.PropertyIsGreaterThanOrEqualTo(propertyname='apiso:VertExtent_max', literal=max_z)
elif constraint == 'within':
zmin = fes.PropertyIsGreaterThanOrEqualTo(propertyname='apiso:VertExtent_min', literal=min_z)
zmax = fes.PropertyIsLessThanOrEqualTo(propertyname='apiso:VertExtent_max', literal=max_z)
return zmin,zmax
In [90]:
# User Input for query
#box=[-120, 2.0, -110.0, 6.0] # oceansites
box=[-160, 19, -156, 23] # pacioos
start_date='2012-05-01'
stop_date='2012-06-01'
min_z = '-5000'
max_z = '0'
responsible_party = 'Jim Potemra'
responsible_party = 'Margaret McManus'
std_name = 'sea_water_temperature'
service_type='sos' # 'opendap'
In [91]:
# convert User Input into FES filters
start,stop = dateRange(start_date,stop_date)
zmin,zmax = zRange(min_z,max_z,constraint='within')
bbox = fes.BBox(box)
#keywords = fes.PropertyIsLike(propertyname='anyText', literal=std_name)
#keywords = fes.PropertyIsLike(propertyname='apiso:Keywords', literal=std_name)
keywords = fes.PropertyIsEqualTo(propertyname='apiso:Keywords', literal=std_name)
serviceType = fes.PropertyIsLike(propertyname='apiso:ServiceType', literal=('*%s*' % service_type))
ResponsiblePartyName = fes.PropertyIsEqualTo(propertyname='apiso:ResponsiblePartyName', literal=responsible_party)
#serviceType = fes.PropertyIsEqualTo(propertyname='apiso:ServiceType', literal=service_type)
In [92]:
# try simple request using only keywords
csw.getrecords2(constraints=[keywords,serviceType],maxrecords=5,esn='full')
csw.records.keys()
Out[92]:
In [114]:
csw.getrecords2(constraints=[serviceType],esn='full')
csw.records.keys()
Out[114]:
In [74]:
# try request using multiple filters "and" syntax: [[filter1,filter2]]
csw.getrecords2(constraints=[[keywords,start,stop,serviceType,bbox]],maxrecords=5,esn='full')
csw.records.keys()
Out[74]:
In the next cell we take a look at the references
for a random record.
In [103]:
choice=random.choice(list(csw.records.keys()))
print choice
csw.records[choice].references
Out[103]:
In [76]:
foo=csw.records[choice]
In the next cell we try using the ResponsiblePartyName
, we are looking for 'Jim Potemra', who we see in the ISO record for 'NS10agg' here
http://oos.soest.hawaii.edu/thredds/iso/hioos/nss/ns10agg?catalog=http%3A%2F%2Foos.soest.hawaii.edu%2Fthredds%2Fidd%2Fnss_hioos.html&dataset=NS10agg
In [77]:
# try adding responsible party role
csw.getrecords2(constraints=[ResponsiblePartyName],maxrecords=10,esn='full')
csw.records.keys()
Out[77]:
Now print out some titles
In [104]:
for rec,item in csw.records.iteritems():
print item.title
In the next cell we try using a vertical extent range, which doesn't seem to be working, as we are looking for z values between [-5000, 0] and we see in the ISO record for 'NS10agg' here:
http://oos.soest.hawaii.edu/thredds/iso/hioos/nss/ns10agg?catalog=http%3A%2F%2Foos.soest.hawaii.edu%2Fthredds%2Fidd%2Fnss_hioos.html&dataset=NS10agg
that the vertical extent is from [-2, -2]
In [79]:
csw.getrecords2(constraints=[[keywords,start,stop,serviceType,bbox,zmin,zmax]],maxrecords=5,esn='full')
csw.records.keys()
Out[79]:
Define a function that will return the endpoint for a specified service type
In [79]:
In [105]:
# get specific ServiceType URL from records
def service_urls(records,service_string='urn:x-esri:specification:ServiceType:odp:url'):
urls=[]
for key,rec in records.iteritems():
#create a generator object, and iterate through it until the match is found
#if not found, gets the default value (here "none")
url = next((d['url'] for d in rec.references if d['scheme'] == service_string), None)
if url is not None:
urls.append(url)
return urls
Print out all the SOS endpoints
In [106]:
sos_urls = service_urls(csw.records,service_string='urn:x-esri:specification:ServiceType:sos:url')
print "\n".join(sos_urls)
In [81]:
In [111]:
sos = SensorObservationService(sos_urls[2])
contents = sos.contents
#print(contents)
In [112]:
sos.filters
In [113]:
for url in sos_urls:
sos = SensorObservationService(url)
off = sos.offerings[1]
print sos.filters
# print(off)
In [ ]:
for url in sos_urls:
csw = CatalogueServiceWeb(url,timeout=60)
#pdb.set_trace()
response = csw.get_observation(offerings=['1211-A1H'],
responseFormat='text/xml;subtype="om/1.0.0"',
observedProperties=['u_1205'],
procedure='urn:ioos:station:gov.usgs:1211-A1H')ncv = netCDF4.Dataset(url).variables
data_dict[v]=ncv[v][istart:istop].flatten()
# Create Pandas data frame, with time index
ts = pd.DataFrame.from_dict(data_dict)
ts.index=dtime
ts.plot(figsize=(12,4));
title(std_name)
In [ ]: