IOOS SOS SensorML Access and Parsing

7/11,9/2016, Emilio Mayorga, for NANOOS.

Access and parse the SensorML responses from an IOOS SOS M1 service endpoint. Extract all/most station-level attributes defined as part of the IOOS convention, and populate a GeoDataFrame from all the station responses. Request may involve getting all stations procedures available in the SOS endpoint, or only a subset of stations based on the station urn's. OWSLib and pyoos are used for the requests and parsing.

Import modules and set up low-level utility functions


In [1]:
from datetime import datetime
from urllib import urlencode
from collections import OrderedDict

import numpy as np
import pandas as pd
from shapely.geometry import Point
import geopandas as gpd

from owslib.sos import SensorObservationService
from owslib.swe.sensor.sml import SensorML, Contact, Documentation
from owslib.util import testXMLValue, testXMLAttribute, nspath_eval
from owslib.namespaces import Namespaces

from pyoos.collectors.ioos.swe_sos import IoosSweSos
from pyoos.parsers.ioos.describe_sensor import IoosDescribeSensor
from pyoos.parsers.ioos.one.describe_sensor import ont

In [2]:
# These functions are all from OWSLib, with minor adaptations

def get_namespaces():
    n = Namespaces()
    namespaces = n.get_namespaces(["sml", "gml", "xlink", "swe"])
    namespaces["ism"] = "urn:us:gov:ic:ism:v2"
    return namespaces

namespaces = get_namespaces()


def nsp(path):
    return nspath_eval(path, namespaces)

Workhorse function to issue requests and parse the GetCapabilities and SensorML responses


In [3]:
def get_stations_df(sos_url, station_urns_sel=None):
    """ Returns a GeoDataFrame
    """
    # LATER: ADD ERROR TEST/CATCH AFTER EACH WEB REQUEST
    oFrmt = 'text/xml; subtype="sensorML/1.0.1/profiles/ioos_sos/1.0"'

    if station_urns_sel is not None:
        params = {'service': 'SOS', 'request': 'GetCapabilities', 'acceptVersions': '1.0.0'}        
        sosgc = SensorObservationService(sos_url + '?' + urlencode(params))
        station_urns = station_urns_sel
    else:
        sos_collector = IoosSweSos(sos_url)
        station_urns = [urn.name for urn in sos_collector.server.offerings
                        if 'network' not in urn.name.split(':')]
        sos_collector.features = station_urns
        sml_lst = sos_collector.metadata(timeout=200)

    station_recs = []
    for station_idx, station_urn in enumerate(station_urns):
        if station_urns_sel is not None:
            sml_str = sosgc.describe_sensor(procedure=station_urn, outputFormat=oFrmt,
                                            timeout=200)
            sml = SensorML(sml_str)
        else:
            sml = sml_lst[station_idx]

        ds = IoosDescribeSensor(sml._root)

        pos = testXMLValue(ds.system.location.find(nsp('gml:Point/gml:pos')))

        system_el = sml._root.findall(nsp('sml:member'))[0].find(nsp('sml:System'))

        # Assume there's a single DocumentList/member; will read the first one only.
        # Assume that member corresponds to xlink:arcrole="urn:ogc:def:role:webPage"
        documents = system_el.findall(nsp('sml:documentation/sml:DocumentList/sml:member'))
        if len(documents) > 0:
            document = Documentation(documents[0])
            webpage_url = document.documents[0].url
        else:
            webpage_url = None
        
        contacts = system_el.findall(nsp('sml:contact/sml:ContactList/sml:member'))
        contacts_dct = {}
        for c in contacts:
            contact = Contact(c)
            role = contact.role.split('/')[-1]
            contacts_dct[role] = contact

        sweQuants = system_el.findall(nsp('sml:outputs/sml:OutputList/sml:output/swe:Quantity'))
        quant_lst = [sweQuant.attrib['definition'] for sweQuant in sweQuants]
        parameter_lst = [sweQuant.split('/')[-1] for sweQuant in quant_lst]

        station = OrderedDict()
        station['station_urn'] = station_urn
        station['lon'] = float(pos.split()[1])
        station['lat'] = float(pos.split()[0])

        station['shortName'] = ds.shortName
        station['longName'] = ds.longName
        station['wmoID'] = ds.get_ioos_def('wmoID', 'identifier', ont)

        # Beware that a station can have >1 classifier of the same type
        # This code does not accommodate that possibility
        station['platformType'] = ds.platformType
        station['parentNetwork'] = ds.get_ioos_def('parentNetwork', 'classifier', ont)
        station['sponsor'] = ds.get_ioos_def('sponsor', 'classifier', ont)
        station['webpage_url'] = webpage_url

        station['operatorSector'] = ds.get_ioos_def('operatorSector', 'classifier', ont)
        station['operator_org'] = contacts_dct['operator'].organization
        station['operator_country'] = contacts_dct['operator'].country
        station['operator_url'] = contacts_dct['operator'].url
        # pull out email address(es) too?
        # station_dct['operator_email'] = contacts_dct['operator'].electronicMailAddress

        station['publisher'] = ds.get_ioos_def('publisher', 'classifier', ont)
        station['publisher_org'] = contacts_dct['publisher'].organization
        station['publisher_url'] = contacts_dct['publisher'].url
        # station_dct['publisher_email'] = contacts_dct['publisher'].electronicMailAddress

        station['starting'] = ds.starting
        station['ending'] = ds.ending
        station['starting_isostr'] = datetime.isoformat(ds.starting)
        station['ending_isostr'] = datetime.isoformat(ds.ending)

        station['parameter_uris'] = ','.join(quant_lst)
        station['parameters'] = ','.join(parameter_lst)

        station_recs.append(station)

    stations_df = pd.DataFrame.from_records(station_recs, columns=station.keys())
    stations_df.index = stations_df['station_urn']

    return stations_df

Select SOS and optionally station urn's, the issue DescribeSensor requests


In [4]:
sos_endpoints = {
    'CeNCOOS': 'http://sos.cencoos.org/sos/sos/kvp',
    'NANOOS': 'http://data.nanoos.org/52nsos/sos/kvp'
}

Select SOS (sos_label) and optionally a list of station urn's. Using station_urns = None will result in querying the SOS GetCapabilities to extract all non-network, station offerings. Otherwise, pass a list of station_urns to process only those stations.

The 2nd of these two cells will be run, naturally


In [5]:
sos_label = 'CeNCOOS'

station_urns = [
    'urn:ioos:station:cencoos:Tiburon',
    'urn:ioos:station:cencoos:Carquinez',
    'urn:ioos:station:cencoos:Trinidad',
    'urn:ioos:station:cencoos:Humboldt',
    'urn:ioos:station:cencoos:Monterey',
    'urn:ioos:station:cencoos:SantaCruz',
    'urn:ioos:station:mlml:mlml-sea'
    ]

#station_urns = ['urn:ioos:station:cencoos:Trinidad']
#station_urns = None

In [6]:
sos_label = 'NANOOS'

# station_urns = ['urn:ioos:station:nanoos:nerrs_sosecwq']
station_urns = None

Request and parse SensorML documents (and possibly SOS GetCapabilities)


In [7]:
stations_df = get_stations_df(sos_endpoints[sos_label], station_urns)

In [8]:
# Assign EPSG:4326 CRS, retrieved from epsg.io
# The OGC WKT crs string is available directly at http://epsg.io/4326.wkt
# or http://spatialreference.org/ref/epsg/4326/ogcwkt/
crs = '''GEOGCS["WGS 84", 
           DATUM["WGS_1984",SPHEROID["WGS 84",6378137,298.257223563,AUTHORITY["EPSG","7030"]],
             AUTHORITY["EPSG","6326"]],
           PRIMEM["Greenwich",0,AUTHORITY["EPSG","8901"]],
           UNIT["degree",0.0174532925199433,AUTHORITY["EPSG","9122"]],
         AUTHORITY["EPSG","4326"]]'
'''
geometry = [Point(xy) for xy in zip(stations_df.lon, stations_df.lat)]
stations_gdf = gpd.GeoDataFrame(stations_df, geometry=geometry, crs=crs)

In [9]:
stations_gdf.head()


Out[9]:
station_urn lon lat shortName longName wmoID platformType parentNetwork sponsor webpage_url ... publisher publisher_org publisher_url starting ending starting_isostr ending_isostr parameter_uris parameters geometry
station_urn
urn:ioos:station:nanoos:apl_chaba urn:ioos:station:nanoos:apl_chaba -124.949203 47.965900 APL-UW Ćháʔba· (APL-UW) Ćháʔba· UW/NANOOS Moored Buoy nea... None moored_buoy NANOOS None http://wavechasers.apl.washington.edu/projects... ... APL-UW NANOOS http://nanoos.org 2016-03-19 17:41:37+00:00 2016-06-20 06:51:16+00:00 2016-03-19T17:41:37+00:00 2016-06-20T06:51:16+00:00 http://mmisw.org/ont/cf/parameter/sea_water_te... sea_water_temperature,sea_water_salinity,mole_... POINT (-124.9492034912109 47.96590042114258)
urn:ioos:station:nanoos:apl_npb1ptwells urn:ioos:station:nanoos:apl_npb1ptwells -122.397202 47.761200 APL-UW NPB-1 (APL-UW) LSG NPB-1 Profiling Buoy at Pt. Wells 46120 moored_buoy NANOOS None http://orca.ocean.washington.edu/data_pointWel... ... APL-UW NANOOS http://nanoos.org 2015-12-04 06:28:48+00:00 2016-07-11 22:49:38+00:00 2015-12-04T06:28:48+00:00 2016-07-11T22:49:38+00:00 http://mmisw.org/ont/cf/parameter/sea_water_si... sea_water_sigma_t,sea_water_salinity,net_downw... POINT (-122.3972015380859 47.76119995117188)
urn:ioos:station:nanoos:apl_npb2carr urn:ioos:station:nanoos:apl_npb2carr -122.730003 47.279999 APL-UW NPB-2 (APL-UW) LSG NPB-2 Profiling Buoy at Carr Inlet 46121 moored_buoy NANOOS None http://orca.ocean.washington.edu/data_carrInle... ... APL-UW NANOOS http://nanoos.org 2015-12-07 20:09:44+00:00 2016-07-11 22:58:02+00:00 2015-12-07T20:09:44+00:00 2016-07-11T22:58:02+00:00 http://mmisw.org/ont/cf/parameter/mass_concent... mass_concentration_of_oxygen_in_sea_water,sea_... POINT (-122.7300033569336 47.27999877929688)
urn:ioos:station:nanoos:cmop_coaof urn:ioos:station:nanoos:cmop_coaof -123.774002 46.204700 CMOP_Coaof (CMOP) Waste water outfall (City of Astoria) None fixed NANOOS None http://www.stccmop.org/datamart/observation_ne... ... CMOP NANOOS http://nanoos.org 2013-08-16 21:54:00+00:00 2013-08-16 21:54:00+00:00 2013-08-16T21:54:00+00:00 2013-08-16T21:54:00+00:00 http://mmisw.org/ont/cf/parameter/sea_water_te... sea_water_temperature,sea_water_salinity POINT (-123.7740020751953 46.2047004699707)
urn:ioos:station:nanoos:cmop_dsdma urn:ioos:station:nanoos:cmop_dsdma -123.955101 46.225700 CMOP_Dsdma (CMOP) Desdemona Sands Light None fixed NANOOS None http://www.stccmop.org/datamart/observation_ne... ... CMOP NANOOS http://nanoos.org 2014-08-01 00:14:12+00:00 2015-09-07 20:02:44+00:00 2014-08-01T00:14:12+00:00 2015-09-07T20:02:44+00:00 http://mmisw.org/ont/cf/parameter/sea_water_te... sea_water_temperature,air_temperature,wind_fro... POINT (-123.9551010131836 46.22570037841797)

5 rows × 24 columns

From Lance, about what's an active/working station: "I meant by an 'older' station a station that does not have data for the past 7 days."


In [10]:
active_cnt = len(stations_gdf[stations_gdf.ending > '2016-7-3'])
total_cnt = len(stations_gdf)
print("'Active' stations: %d / Total stations: %d" % (active_cnt, total_cnt))


'Active' stations: 39 / Total stations: 63

Export to GeoPackage (gpkg) file


In [11]:
stations_gdf.dtypes


Out[11]:
station_urn                      object
lon                             float64
lat                             float64
shortName                        object
longName                         object
wmoID                            object
platformType                     object
parentNetwork                    object
sponsor                          object
webpage_url                      object
operatorSector                   object
operator_org                     object
operator_country                 object
operator_url                     object
publisher                        object
publisher_org                    object
publisher_url                    object
starting            datetime64[ns, UTC]
ending              datetime64[ns, UTC]
starting_isostr                  object
ending_isostr                    object
parameter_uris                   object
parameters                       object
geometry                         object
dtype: object

In [12]:
for d in stations_gdf:
    print(d, type(stations_gdf['{}'.format(d)][0]))


('station_urn', <type 'str'>)
('lon', <type 'numpy.float64'>)
('lat', <type 'numpy.float64'>)
('shortName', <type 'unicode'>)
('longName', <type 'unicode'>)
('wmoID', <type 'NoneType'>)
('platformType', <type 'str'>)
('parentNetwork', <type 'str'>)
('sponsor', <type 'NoneType'>)
('webpage_url', <type 'str'>)
('operatorSector', <type 'str'>)
('operator_org', <type 'str'>)
('operator_country', <type 'str'>)
('operator_url', <type 'str'>)
('publisher', <type 'str'>)
('publisher_org', <type 'str'>)
('publisher_url', <type 'str'>)
('starting', <class 'pandas.tslib.Timestamp'>)
('ending', <class 'pandas.tslib.Timestamp'>)
('starting_isostr', <type 'str'>)
('ending_isostr', <type 'str'>)
('parameter_uris', <type 'str'>)
('parameters', <type 'str'>)
('geometry', <class 'shapely.geometry.point.Point'>)

In [13]:
# Remove attributes that lead to puking when exporting to file
stations_gdf_out = stations_gdf.drop(['starting', 'ending'], axis=1)

In [14]:
fpth = '~/ipythonnotebooks/%s_sossml_stations.gpkg' % sos_label
stations_gdf_out.to_file(fpth, driver='GPKG')

In [ ]: