7/11,9/2016, Emilio Mayorga, for NANOOS.
Access and parse the SensorML responses from an IOOS SOS M1 service endpoint. Extract all/most station-level attributes defined as part of the IOOS convention, and populate a GeoDataFrame from all the station responses. Request may involve getting all stations procedures available in the SOS endpoint, or only a subset of stations based on the station urn's. OWSLib and pyoos are used for the requests and parsing.
In [1]:
from datetime import datetime
from urllib import urlencode
from collections import OrderedDict
import numpy as np
import pandas as pd
from shapely.geometry import Point
import geopandas as gpd
from owslib.sos import SensorObservationService
from owslib.swe.sensor.sml import SensorML, Contact, Documentation
from owslib.util import testXMLValue, testXMLAttribute, nspath_eval
from owslib.namespaces import Namespaces
from pyoos.collectors.ioos.swe_sos import IoosSweSos
from pyoos.parsers.ioos.describe_sensor import IoosDescribeSensor
from pyoos.parsers.ioos.one.describe_sensor import ont
In [2]:
# These functions are all from OWSLib, with minor adaptations
def get_namespaces():
n = Namespaces()
namespaces = n.get_namespaces(["sml", "gml", "xlink", "swe"])
namespaces["ism"] = "urn:us:gov:ic:ism:v2"
return namespaces
namespaces = get_namespaces()
def nsp(path):
return nspath_eval(path, namespaces)
In [3]:
def get_stations_df(sos_url, station_urns_sel=None):
""" Returns a GeoDataFrame
"""
# LATER: ADD ERROR TEST/CATCH AFTER EACH WEB REQUEST
oFrmt = 'text/xml; subtype="sensorML/1.0.1/profiles/ioos_sos/1.0"'
if station_urns_sel is not None:
params = {'service': 'SOS', 'request': 'GetCapabilities', 'acceptVersions': '1.0.0'}
sosgc = SensorObservationService(sos_url + '?' + urlencode(params))
station_urns = station_urns_sel
else:
sos_collector = IoosSweSos(sos_url)
station_urns = [urn.name for urn in sos_collector.server.offerings
if 'network' not in urn.name.split(':')]
sos_collector.features = station_urns
sml_lst = sos_collector.metadata(timeout=200)
station_recs = []
for station_idx, station_urn in enumerate(station_urns):
if station_urns_sel is not None:
sml_str = sosgc.describe_sensor(procedure=station_urn, outputFormat=oFrmt,
timeout=200)
sml = SensorML(sml_str)
else:
sml = sml_lst[station_idx]
ds = IoosDescribeSensor(sml._root)
pos = testXMLValue(ds.system.location.find(nsp('gml:Point/gml:pos')))
system_el = sml._root.findall(nsp('sml:member'))[0].find(nsp('sml:System'))
# Assume there's a single DocumentList/member; will read the first one only.
# Assume that member corresponds to xlink:arcrole="urn:ogc:def:role:webPage"
documents = system_el.findall(nsp('sml:documentation/sml:DocumentList/sml:member'))
if len(documents) > 0:
document = Documentation(documents[0])
webpage_url = document.documents[0].url
else:
webpage_url = None
contacts = system_el.findall(nsp('sml:contact/sml:ContactList/sml:member'))
contacts_dct = {}
for c in contacts:
contact = Contact(c)
role = contact.role.split('/')[-1]
contacts_dct[role] = contact
sweQuants = system_el.findall(nsp('sml:outputs/sml:OutputList/sml:output/swe:Quantity'))
quant_lst = [sweQuant.attrib['definition'] for sweQuant in sweQuants]
parameter_lst = [sweQuant.split('/')[-1] for sweQuant in quant_lst]
station = OrderedDict()
station['station_urn'] = station_urn
station['lon'] = float(pos.split()[1])
station['lat'] = float(pos.split()[0])
station['shortName'] = ds.shortName
station['longName'] = ds.longName
station['wmoID'] = ds.get_ioos_def('wmoID', 'identifier', ont)
# Beware that a station can have >1 classifier of the same type
# This code does not accommodate that possibility
station['platformType'] = ds.platformType
station['parentNetwork'] = ds.get_ioos_def('parentNetwork', 'classifier', ont)
station['sponsor'] = ds.get_ioos_def('sponsor', 'classifier', ont)
station['webpage_url'] = webpage_url
station['operatorSector'] = ds.get_ioos_def('operatorSector', 'classifier', ont)
station['operator_org'] = contacts_dct['operator'].organization
station['operator_country'] = contacts_dct['operator'].country
station['operator_url'] = contacts_dct['operator'].url
# pull out email address(es) too?
# station_dct['operator_email'] = contacts_dct['operator'].electronicMailAddress
station['publisher'] = ds.get_ioos_def('publisher', 'classifier', ont)
station['publisher_org'] = contacts_dct['publisher'].organization
station['publisher_url'] = contacts_dct['publisher'].url
# station_dct['publisher_email'] = contacts_dct['publisher'].electronicMailAddress
station['starting'] = ds.starting
station['ending'] = ds.ending
station['starting_isostr'] = datetime.isoformat(ds.starting)
station['ending_isostr'] = datetime.isoformat(ds.ending)
station['parameter_uris'] = ','.join(quant_lst)
station['parameters'] = ','.join(parameter_lst)
station_recs.append(station)
stations_df = pd.DataFrame.from_records(station_recs, columns=station.keys())
stations_df.index = stations_df['station_urn']
return stations_df
In [4]:
sos_endpoints = {
'CeNCOOS': 'http://sos.cencoos.org/sos/sos/kvp',
'NANOOS': 'http://data.nanoos.org/52nsos/sos/kvp'
}
Select SOS (sos_label) and optionally a list of station urn's. Using station_urns = None
will result in querying the SOS GetCapabilities to extract all non-network, station offerings. Otherwise, pass a list of station_urns
to process only those stations.
In [5]:
sos_label = 'CeNCOOS'
station_urns = [
'urn:ioos:station:cencoos:Tiburon',
'urn:ioos:station:cencoos:Carquinez',
'urn:ioos:station:cencoos:Trinidad',
'urn:ioos:station:cencoos:Humboldt',
'urn:ioos:station:cencoos:Monterey',
'urn:ioos:station:cencoos:SantaCruz',
'urn:ioos:station:mlml:mlml-sea'
]
#station_urns = ['urn:ioos:station:cencoos:Trinidad']
#station_urns = None
In [6]:
sos_label = 'NANOOS'
# station_urns = ['urn:ioos:station:nanoos:nerrs_sosecwq']
station_urns = None
In [7]:
stations_df = get_stations_df(sos_endpoints[sos_label], station_urns)
In [8]:
# Assign EPSG:4326 CRS, retrieved from epsg.io
# The OGC WKT crs string is available directly at http://epsg.io/4326.wkt
# or http://spatialreference.org/ref/epsg/4326/ogcwkt/
crs = '''GEOGCS["WGS 84",
DATUM["WGS_1984",SPHEROID["WGS 84",6378137,298.257223563,AUTHORITY["EPSG","7030"]],
AUTHORITY["EPSG","6326"]],
PRIMEM["Greenwich",0,AUTHORITY["EPSG","8901"]],
UNIT["degree",0.0174532925199433,AUTHORITY["EPSG","9122"]],
AUTHORITY["EPSG","4326"]]'
'''
geometry = [Point(xy) for xy in zip(stations_df.lon, stations_df.lat)]
stations_gdf = gpd.GeoDataFrame(stations_df, geometry=geometry, crs=crs)
In [9]:
stations_gdf.head()
Out[9]:
From Lance, about what's an active/working station: "I meant by an 'older' station a station that does not have data for the past 7 days."
In [10]:
active_cnt = len(stations_gdf[stations_gdf.ending > '2016-7-3'])
total_cnt = len(stations_gdf)
print("'Active' stations: %d / Total stations: %d" % (active_cnt, total_cnt))
In [11]:
stations_gdf.dtypes
Out[11]:
In [12]:
for d in stations_gdf:
print(d, type(stations_gdf['{}'.format(d)][0]))
In [13]:
# Remove attributes that lead to puking when exporting to file
stations_gdf_out = stations_gdf.drop(['starting', 'ending'], axis=1)
In [14]:
fpth = '~/ipythonnotebooks/%s_sossml_stations.gpkg' % sos_label
stations_gdf_out.to_file(fpth, driver='GPKG')
In [ ]: