Demo the capabilities of the pyoos NdbcSos collector.
Starting point was a notebook from Filipe, for the OOI Endurance Array, which has a spatial extent encompassed within the NANOOS domain.
12/23/2015. Emilio Mayorga, NANOOS
In [1]:
import pandas as pd
from pyoos.collectors.ndbc.ndbc_sos import NdbcSos
import owslib.swe.sensor.sml as owslibsml
fmt = '{:*^64}'.format
In [2]:
# OOI Endurance Array bounding box
bbox = [-127, 43, -123.75, 48]
In [3]:
from datetime import datetime, timedelta
dt = 5 # days
now = datetime.utcnow()
start = now - timedelta(days=dt)
stop = now + timedelta(days=dt)
In [4]:
sos_name = 'sea_water_temperature'
In [5]:
collector_ndbc = NdbcSos()
collector_ndbc.set_bbox(bbox)
collector_ndbc.end_time = stop
collector_ndbc.start_time = start
collector_ndbc.variables = [sos_name]
ofrs = collector_ndbc.server.offerings
title = collector_ndbc.server.identification.title
print(fmt(' NDBC Collector offerings '))
print('{}: {} offerings'.format(title, len(ofrs)))
Note that the filters set on the collector don't apply to the server offerings. server shows everything available. That's why there are 964 offerings.
In [6]:
# 'offering' is a list; here's one entry
ofr1 = collector_ndbc.server.offerings[1]
vars(ofr1)
Out[6]:
In [7]:
ofr1.id, ofr1.name, ofr1.procedures
Out[7]:
In [8]:
# 'content' is a dictionary; here's one entry
vars(collector_ndbc.server.contents['station-46211'])
Out[8]:
In [9]:
collector_ndbc_raw = collector_ndbc.raw(responseFormat="text/csv")
collector_ndbc_raw is the entire csv string.
In [10]:
type(collector_ndbc_raw), len(collector_ndbc_raw)
Out[10]:
In [11]:
# See a piece of the csv
ndbc_raw_lst = collector_ndbc_raw.splitlines()
len(ndbc_raw_lst)
Out[11]:
In [12]:
ndbc_raw_lst[:5]
Out[12]:
Read the time series data into a DataFrame; but keep only the information/columns we'll use for plotting.
In [13]:
from StringIO import StringIO
datacsv_df = pd.read_csv(StringIO(collector_ndbc_raw.encode('utf-8')),
parse_dates=True)
columns = {'station_id': 'station',
'depth (m)': 'depth_m',
'sea_water_temperature (C)': sos_name}
datacsv_df.rename(columns=columns, inplace=True)
datacsv_df['station'] = [s.split(':')[-1] for s in datacsv_df['station']]
datacsv_df.drop(['sensor_id', 'latitude (degree)', 'longitude (degree)'],
axis=1, inplace=True)
datacsv_df.head(10)
Out[13]:
In [14]:
stations = list(datacsv_df.station.unique())
len(stations), stations
Out[14]:
In [15]:
descsen = collector_ndbc.server.get_operation_by_name('describesensor')
descsen.parameters
Out[15]:
In [16]:
# The metadata method for the NdbcSos collector expects
# features to be in the shortened, "station wmo id" form (eg, 46098),
# rather than the offering name (eg, station-46089)
# or the station urn (eg, urn:ioos:station:wmo:46089)
output_format = descsen.parameters['outputFormat']['values'][0]
collector_ndbc.features = stations
ndbc_md_lst = collector_ndbc.metadata(output_format=output_format)
type(ndbc_md_lst), len(ndbc_md_lst), ndbc_md_lst[0]
Out[16]:
In [17]:
for sml in ndbc_md_lst:
print sml.members[0].identifiers['stationId'].value
One of the SensorML responses, for illustration. The corresponding url for the DecribeSensor request for sta0_sml is http://sdf.ndbc.noaa.gov/sos/server.php?request=DescribeSensor&service=SOS&version=1.0.0&outputformat=text/xml;subtype=%22sensorML/1.0.1%22&procedure=urn:ioos:station:wmo:46211
In [18]:
sta0_sml = ndbc_md_lst[4].members[0]
sta0_sml
Out[18]:
In [19]:
vars(sta0_sml)
Out[19]:
Available contacts roles, identifiers, and classifiers:
In [20]:
sta0_sml.contacts.keys(), sta0_sml.identifiers.keys(), sta0_sml.classifiers.keys()
Out[20]:
In [21]:
stations_md_rec = []
for sta_sml_members in ndbc_md_lst:
sta_sml = sta_sml_members.members[0]
station_urn = sta_sml.identifiers['stationId'].value
# The XPath for the location/point coordinates doesn't follow
# the IOOS SOS convention.
loc_str = sta_sml.location.find(owslibsml.nsp('gml:Point/gml:coordinates')).text
# In some cases the platform type XML element seems to be missing ...
if 'platformType' in sta_sml.classifiers:
platform_type = sta_sml.classifiers['platformType'].value
else:
platform_type = 'Unknown'
sta_md = dict(
station_id = station_urn.split(':')[-1],
station_urn = station_urn,
# Long name is also available in the offering as the offering description
longname = sta_sml.identifiers['longName'].value,
lat = float(loc_str.split()[0]),
lon = float(loc_str.split()[1]),
operator = sta_sml.contacts['http://mmisw.org/ont/ioos/definition/operator'].organization,
platform_type = platform_type
)
stations_md_rec.append(sta_md)
In [22]:
stations_df = pd.DataFrame.from_records(stations_md_rec, index='station_id')
print "Number of stations: %d" % len(stations_df)
stations_df.head(20)
Out[22]:
In [23]:
%matplotlib inline
import seaborn
import matplotlib.pyplot as plt
In [24]:
fig, ax = plt.subplots(figsize=(14, 5))
colors = seaborn.color_palette("Set2", len(stations))
for k, station in enumerate(stations):
stadatadf = datacsv_df[datacsv_df.station == station]
stadatadf = stadatadf.set_index('date_time')
stamddf = stations_df.ix[station]
label = "%s: %s" % (station, stamddf.longname)
stadatadf[sos_name].plot(ax=ax, label=label, color=colors[k])
ax.legend(bbox_to_anchor=(1, 1))
ax.set_ylabel(sos_name + ' (C)');
The sefo3 station can predict the future!
In [25]:
# The argument offerings=['urn:ioos:network:noaa.nws.ndbc:all'] is not needed.
# It is passed by default.
collector_ndbc_collect = collector_ndbc.collect(responseFormat='text/xml;subtype="om/1.0.0"')
type(collector_ndbc_collect), len(collector_ndbc_collect), collector_ndbc_collect[0]
Out[25]:
In [26]:
for s in collector_ndbc_collect:
print s.description
In [27]:
# The feature object property is not populated,
# so the time series data are not parsed and made available
ncc=collector_ndbc_collect[0]
vars(ncc)
Out[27]:
In [ ]: