In [1]:
from utilities import *
css_styles()
Out[1]:
Dissolved oxygen is a comon indicator of water quality and measures how much oxygen is dissolved in water samples. This test searches dissolved oxygen data in real-time sensors and models, and tries to ask if all the values are in the same units for comparison.
In [5]:
from pylab import *
from IPython.display import HTML
In [6]:
import pandas as pd
pd.set_option('display.max_columns', 20)
pd.set_option('display.max_rows', 500)
from SPARQLWrapper import SPARQLWrapper, JSON
sparql = SPARQLWrapper("http://mmisw.org/sparql")
query = """
PREFIX ioos: <http://mmisw.org/ont/ioos/parameter/>
SELECT DISTINCT ?cat ?parameter ?property ?value
WHERE {?parameter a ioos:Parameter .
?parameter ?property ?value .
?cat skos:narrowMatch ?parameter .
FILTER (regex(str(?property), "Match", "i") && regex(str(?value), "cf", "i") )
}
ORDER BY ?cat ?parameter
"""
sparql.setQuery(query)
sparql.setReturnFormat(JSON)
j = sparql.query().convert()
cf_standard_uris = list(set([ x["value"]["value"] for x in j.get("results").get("bindings") ]))
cf_standard_names = map(lambda x: x.split("/")[-1], cf_standard_uris)
pd.DataFrame.from_records(zip(cf_standard_names, cf_standard_uris), columns=("CF Name", "CF URI",))
Out[6]:
In [7]:
bounding_box = [ -81.03, 27.59, -66.14, 44.92] # East Coast
"Geographic subset: {!s}".format(bounding_box)
Out[7]:
In [8]:
from datetime import datetime
start_date = datetime(2014,1,1)
start_date_string = start_date.strftime('%Y-%m-%d %H:00')
end_date = datetime(2014,8,1)
end_date_string = end_date.strftime('%Y-%m-%d %H:00')
"Temporal subset: ( {!s} to {!s} )".format(start_date_string, end_date_string)
Out[8]:
In [9]:
variables_to_query = [ x for x in cf_standard_names if "oxygen" in x ]
custom_variables = ['dissolved_oxygen', 'oxygen'] # Do we need any, or are they all extractable from MMI?
variables_to_query += custom_variables
"Variable subset: {!s}".format(" , ".join(variables_to_query))
Out[9]:
In [10]:
# https://github.com/ioos/system-test/wiki/Service-Registries-and-Data-Catalogs
known_csw_servers = ['http://data.nodc.noaa.gov/geoportal/csw',
'http://www.nodc.noaa.gov/geoportal/csw',
'http://www.ngdc.noaa.gov/geoportal/csw',
'http://cwic.csiss.gmu.edu/cwicv1/discovery',
'http://geoport.whoi.edu/geoportal/csw',
'https://edg.epa.gov/metadata/csw',
'http://cmgds.marine.usgs.gov/geonetwork/srv/en/csw',
'http://cida.usgs.gov/gdp/geonetwork/srv/en/csw',
'http://geodiscover.cgdi.ca/wes/serviceManagerCSW/csw',
'http://geoport.whoi.edu/gi-cat/services/cswiso',
'https://data.noaa.gov/csw',
]
In [11]:
from owslib import fes
def fes_date_filter(start_date='1900-01-01',stop_date='2100-01-01',constraint='overlaps'):
if constraint == 'overlaps':
start = fes.PropertyIsGreaterThanOrEqualTo(propertyname='apiso:TempExtent_end', literal=start_date)
stop = fes.PropertyIsLessThanOrEqualTo(propertyname='apiso:TempExtent_begin', literal=stop_date)
elif constraint == 'within':
start = fes.PropertyIsGreaterThanOrEqualTo(propertyname='apiso:TempExtent_begin', literal=start_date)
stop = fes.PropertyIsLessThanOrEqualTo(propertyname='apiso:TempExtent_end', literal=stop_date)
return fes.And([start, stop])
In [12]:
# Standard Name filters
cf_name_filters = []
for cf_name in variables_to_query:
text_filter = fes.PropertyIsLike(propertyname='apiso:AnyText', literal="*%s*" % cf_name, wildCard='*')
cf_name_filters.append(text_filter)
cf_name_filters = fes.Or(cf_name_filters)
# Geographic filters
geographic_filter = fes.BBox(bbox=bounding_box)
# Temporal filters
temporal_filter = fes_date_filter(start_date_string, end_date_string)
filters = fes.And([cf_name_filters, geographic_filter, temporal_filter])
In [13]:
from owslib.etree import etree
print etree.tostring(filters.toXML(), pretty_print=True)
In [14]:
from owslib.csw import CatalogueServiceWeb
bbox_endpoints = []
for url in known_csw_servers:
queryables = []
try:
csw = CatalogueServiceWeb(url, timeout=20)
except BaseException:
print "Failure - %s - Timed out" % url
if "BBOX" in csw.filters.spatial_operators:
print "Success - %s - BBOX Query supported" % url
bbox_endpoints.append(url)
else:
print "Failure - %s - BBOX Query NOT supported" % url
In [15]:
urls = []
service_types = []
servers = []
for url in bbox_endpoints:
print "*", url
try:
csw = CatalogueServiceWeb(url, timeout=20)
csw.getrecords2(constraints=[filters], maxrecords=200, esn='full')
for record, item in csw.records.items():
# Get URLs
service_url, scheme = next(((d['url'], d['scheme']) for d in item.references), None)
if service_url:
if len(item.title) > 100:
title = "{!s}...{!s}".format(item.title[0:50], item.title[-50:])
else:
title = item.title
print " [x] {!s}".format(title)
urls.append(service_url)
service_types.append(scheme)
servers.append(url)
except BaseException as e:
print " [-] FAILED: {!s}".format(e)
In [16]:
srvs = pd.DataFrame(zip(urls, service_types, servers), columns=("URL", "Service Type", "Server"))
srvs = srvs.drop_duplicates()
pd.set_option('display.max_rows', 10)
srvs
Out[16]:
In [17]:
pd.DataFrame(srvs.groupby("Service Type").size(), columns=("Number of services",))
Out[17]:
In [18]:
def find_sos(x):
d = x.lower()
if "sos" in d and "dods" not in d:
return x
return None
In [19]:
sos_servers = filter(None, srvs["URL"].map(find_sos))
sos_servers
Out[19]:
In [54]:
def find_dap(x):
d = x.lower()
if ("dap" in d or "dods" in d) and "tabledap" not in d:
return x
return None
In [55]:
import os
dap_servers = filter(None, srvs["URL"].map(find_dap))
dap_servers = map(lambda x: os.path.splitext(x)[0], dap_servers)
dap_servers
Out[55]:
In [59]:
import iris
import iris.plot as iplt
import matplotlib.pyplot as plt
%matplotlib inline
variables = lambda cube: cube.standard_name in variables_to_query
constraint = iris.Constraint(cube_func=variables)
def iris_grid_plot(cube_slice, name=None):
plt.figure(figsize=(12, 8))
lat = cube_slice.coord(axis='Y').points
lon = cube_slice.coord(axis='X').points
time = cube_slice.coord('time')[0]
plt.subplot(111, aspect=(1.0 / cos(mean(lat) * pi / 180.0)))
plt.pcolormesh(lon, lat, ma.masked_invalid(cube_slice.data));
plt.colorbar()
plt.grid()
date = time.units.num2date(time.points)
date_str = date[0].strftime('%Y-%m-%d %H:%M:%S %Z')
plt.title('%s: %s: %s' % (name, cube_slice.long_name, date_str));
plt.show()
for dap in dap_servers:
print "[*] {!s}".format(dap)
try:
cube = iris.load_cube(dap, constraint)
except BaseException as e:
print " [-] Could not load: {!s}".format(e)
continue
print " [-] Identified as a Grid"
print " [-] {!s}".format(cube.attributes["title"])
try:
try:
cube.coord(axis='T').rename('time')
except:
pass
if len(cube.shape) == 4:
cube = cube[0, -1, ::1, ::1]
elif len(cube.shape) == 3:
cube = cube[0, ::1, ::1]
elif len(cube.shape) == 2:
cube = cube[::1, ::1]
else:
raise ValueError("Dimensions do not adhere to plotting requirements")
iris_grid_plot(cube, cube.attributes["title"])
except ValueError as e:
print " [-] Could not plot: {!s}".format(e)
continue
In [ ]: