In [1]:
%run utilities.ipynb

IOOS System Test: Extreme Events Theme: Inundation

  • Query CSW to find datasets that match criteria
  • Extract OPeNDAP data endpoints from model datasets and SOS endpoints from observational datasets
  • OPeNDAP model datasets will be granules
  • SOS endpoints may be datasets (from ncSOS) or collections of datasets (from NDBC, CO-OPS SOS servers)
  • Filter SOS services to obtain datasets
  • Extract data from SOS datasets
  • Extract data from model datasets at locations of observations
  • Compare time series data on same vertical datum

In [2]:
# Scientific Stack.
import iris
import pandas as pd
from datetime import datetime, timedelta

# Custom IOOS/ASA modules (available at PyPI).
from owslib import fes
from owslib.csw import CatalogueServiceWeb
from pyoos.collectors.coops.coops_sos import CoopsSos

Specify a bounding box and time range of interest:


In [3]:
box = [-76.4751, 38.3890, -71.7432, 42.9397]

Specific specific times (UTC):


In [4]:
times = dict({'Hurricane Sandy':
              [datetime(2012, 10, 26), datetime(2012, 11, 2)],
              '2014 Feb 10-15 Storm':
              [datetime(2014, 2, 10), datetime(2014, 2, 15)],
              '2014 Recent':
              [datetime(2014, 3, 8), datetime(2014, 3, 11)],
              '2011':
              [datetime(2013, 4, 20), datetime(2013, 4, 24)],
              'Now':
              [datetime.utcnow() - timedelta(days=3),
               datetime.utcnow() + timedelta(days=3)]
              })

jd_start, jd_stop = times['Now']

start_date = jd_start.strftime('%Y-%m-%d %H:00')
stop_date = jd_stop.strftime('%Y-%m-%d %H:00')

jd_start = datetime.strptime(start_date, '%Y-%m-%d %H:%M')
jd_stop = datetime.strptime(stop_date, '%Y-%m-%d %H:%M')

print('%s to %s' % (start_date, stop_date))


2014-04-13 19:00 to 2014-04-19 19:00

Search CSW for datasets of interest:


In [5]:
if False:
    from IPython.core.display import HTML
    HTML('<iframe src=http://www.ngdc.noaa.gov/geoportal/ width=950 height=400></iframe>')

Connect to CSW, explore it's properties:


In [6]:
CSW = dict({'NGDC Geoportal':
            'http://www.ngdc.noaa.gov/geoportal/csw',
            'USGS WHSC Geoportal':
            'http://geoport.whoi.edu/geoportal/csw',
            'NODC Geoportal: granule level':
            'http://www.nodc.noaa.gov/geoportal/csw',
            'NODC Geoportal: collection level':
            'http://data.nodc.noaa.gov/geoportal/csw',
            'NRCAN CUSTOM':
            'http://geodiscover.cgdi.ca/wes/serviceManagerCSW/csw',
            'USGS Woods Hole GI_CAT':
            'http://geoport.whoi.edu/gi-cat/services/cswiso',
            'USGS CIDA Geonetwork':
            'http://cida.usgs.gov/gdp/geonetwork/srv/en/csw',
            'USGS Coastal and Marine Program':
            'http://cmgds.marine.usgs.gov/geonetwork/srv/en/csw',
            'USGS Woods Hole Geoportal':
            'http://geoport.whoi.edu/geoportal/csw',
            'CKAN':
            'http://geo.gov.ckan.org/csw',
            'EPA':
            'https://edg.epa.gov/metadata/csw',
            'CWIC':
            'http://cwic.csiss.gmu.edu/cwicv1/discovery',
            })

endpoint = CSW['NGDC Geoportal']
csw = CatalogueServiceWeb(endpoint, timeout=60)

print(csw.version)


2.0.2

In [7]:
for oper in csw.operations:
    if oper.name == 'GetRecords':
        print('\nISO Queryables:\n\t%s' %
              '\n\t'.join(oper.constraints['SupportedISOQueryables']['values']))


ISO Queryables:
	apiso:Subject
	apiso:Title
	apiso:Abstract
	apiso:AnyText
	apiso:Format
	apiso:Identifier
	apiso:Modified
	apiso:Type
	apiso:BoundingBox
	apiso:CRS.Authority
	apiso:CRS.ID
	apiso:CRS.Version
	apiso:RevisionDate
	apiso:AlternateTitle
	apiso:CreationDate
	apiso:PublicationDate
	apiso:OrganizationName
	apiso:HasSecurityConstraints
	apiso:Language
	apiso:ResourceIdentifier
	apiso:ParentIdentifier
	apiso:KeywordType
	apiso:TopicCategory
	apiso:ResourceLanguage
	apiso:GeographicDescriptionCode
	apiso:Denominator
	apiso:DistanceValue
	apiso:DistanceUOM
	apiso:TempExtent_begin
	apiso:TempExtent_end
	apiso:ServiceType
	apiso:ServiceTypeVersion
	apiso:Operation
	apiso:OperatesOn
	apiso:OperatesOnIdentifier
	apiso:OperatesOnName
	apiso:CouplingType

In [8]:
# Convert User Input into FES filters.
start, stop = dateRange(start_date, stop_date)
bbox = fes.BBox(box)

or_filt = fes.Or([fes.PropertyIsLike(propertyname='apiso:AnyText',
                                     literal=('*%s*' % val),
                                     escapeChar='\\',
                                     wildCard='*',
                                     singleChar='?') for val in std_name_list])

ROMS model output often has Averages and History files. The Averages files are usually averaged over a tidal cycle or more, while the History files are snapshots at that time instant. We are not interested in averaged data for this test, so in the cell below we remove any Averages files here by removing any datasets that have the term "Averages" in the metadata text. A better approach would be to look at the cell_methods attributes propagated through to some term in the ISO metadata, but this is not implemented yet, as far as I know.


In [9]:
val = 'Averages'
not_filt = fes.Not([fes.PropertyIsLike(propertyname='apiso:AnyText',
                                       literal=('*%s*' % val),
                                       escapeChar='\\',
                                       wildCard='*',
                                       singleChar='?')])


filter_list = [fes.And([bbox, start, stop, or_filt, not_filt])]

# Try request using multiple filters "and" syntax: [[filter1,filter2]].
csw.getrecords2(constraints=filter_list, maxrecords=1000, esn='full')

print(len(csw.records.keys()))


16

Now print out some titles:


In [10]:
for rec, item in csw.records.items():
    print(item.title)


DBOFS - Delaware Bay Operational Forecast System - NOAA CO-OPS - POM
HRECOS Aggregated Station HRPIER84 Data
HRECOS Aggregated Station HRWSTPTH Data
NECOFS GOM3 (FVCOM) - Northeast US - Latest Forecast
CBOFS - Chesapeake Bay Operational Forecast System - NOAA CO-OPS - POM
HRECOS Aggregated Station HRMARPH Data
HRECOS Aggregated Station HRPMNTH Data
ESTOFS Storm Surge Model - Atlantic - v1.0.0 - NOAA - NCEP - ADCIRC
HRECOS Aggregated Station HRLCK8H Data
HRECOS Aggregated Station HRALBPH Data
ROMS ESPRESSO Real-Time Operational IS4DVAR Forecast System Version 2 (NEW) 2013-present FMRC History (Best)
Barotropic Tide Model for the Pacific Basin
TEST: NOAA.NOS.CO-OPS SOS
NYOFS - New York and New Jersey Operational Forecast System - NOAA CO-OPS - POM
HYbrid Coordinate Ocean Model (HYCOM): Global
NECOFS GOM3 Wave - Northeast US - Latest Forecast

In [11]:
service = 'urn:x-esri:specification:ServiceType:odp:url'

dap_urls = service_urls(csw.records, service=service)

print("\n".join(dap_urls))


http://opendap.co-ops.nos.noaa.gov/thredds/dodsC/DBOFS/fmrc/Aggregated_7_day_DBOFS_Fields_Forecast_best.ncd
http://sos.maracoos.org/stable/dodsC/stationHRPIER84-agg.ncml
http://sos.maracoos.org/stable/dodsC/stationHRWSTPTH-agg.ncml
http://www.smast.umassd.edu:8080/thredds/dodsC/FVCOM/NECOFS/Forecasts/NECOFS_GOM3_FORECAST.nc
http://opendap.co-ops.nos.noaa.gov/thredds/dodsC/CBOFS/fmrc/Aggregated_7_day_CBOFS_Fields_Forecast_best.ncd
http://sos.maracoos.org/stable/dodsC/stationHRMARPH-agg.ncml
http://sos.maracoos.org/stable/dodsC/stationHRPMNTH-agg.ncml
http://geoport-dev.whoi.edu/thredds/dodsC/estofs/atlantic
http://sos.maracoos.org/stable/dodsC/stationHRLCK8H-agg.ncml
http://sos.maracoos.org/stable/dodsC/stationHRALBPH-agg.ncml
http://tds.marine.rutgers.edu/thredds/dodsC/roms/espresso/2013_da/his_Best/ESPRESSO_Real-Time_v2_History_Best_Available_best.ncd
http://oos.soest.hawaii.edu/thredds/dodsC/hioos/tide_pac
http://opendap.co-ops.nos.noaa.gov/thredds/dodsC/NYOFS/fmrc/Aggregated_7_day_NYOFS_Fields_Forecast_best.ncd
http://oos.soest.hawaii.edu/thredds/dodsC/pacioos/hycom/global
http://www.smast.umassd.edu:8080/thredds/dodsC/FVCOM/NECOFS/Forecasts/NECOFS_WAVE_FORECAST.nc

In [12]:
service = 'urn:x-esri:specification:ServiceType:sos:url'

sos_urls = service_urls(csw.records, service=service)

print("\n".join(sos_urls))


http://opendap.co-ops.nos.noaa.gov/ioos-dif-sos/SOS?service=SOS&request=GetCapabilities

1. Get observations from SOS:


In [13]:
collector = CoopsSos()
collector.server.identification.title

collector.start_time = jd_start
collector.end_time = jd_stop
collector.variables = [sos_name]

ofrs = collector.server.offerings

print(len(ofrs))
for p in ofrs[700:710]:
    print(p)


1018
Offering id: station-8447386, name: urn:ioos:station:NOAA.NOS.CO-OPS:8447386
Offering id: station-8447387, name: urn:ioos:station:NOAA.NOS.CO-OPS:8447387
Offering id: station-8447435, name: urn:ioos:station:NOAA.NOS.CO-OPS:8447435
Offering id: station-8447930, name: urn:ioos:station:NOAA.NOS.CO-OPS:8447930
Offering id: station-8449130, name: urn:ioos:station:NOAA.NOS.CO-OPS:8449130
Offering id: station-8452660, name: urn:ioos:station:NOAA.NOS.CO-OPS:8452660
Offering id: station-8452944, name: urn:ioos:station:NOAA.NOS.CO-OPS:8452944
Offering id: station-8452951, name: urn:ioos:station:NOAA.NOS.CO-OPS:8452951
Offering id: station-8454000, name: urn:ioos:station:NOAA.NOS.CO-OPS:8454000
Offering id: station-8454049, name: urn:ioos:station:NOAA.NOS.CO-OPS:8454049

Find the SOS stations within our bounding box and time extent.

We would like to just use a filter on a collection to get a new collection, but PYOOS doesn't do that yet. So we do a GetObservation request for a collection, including a bounding box, and asking for one value at the start of the time period of interest. We use that to do a bounding box filter on the SOS server, which returns 1 point for each station found. So for 3 stations, we get back 3 records, in CSV format. We can strip the station ids from the CSV, and then we have a list of stations we can use with pyoos. The template for the GetObservation query for the bounding box filtered collection was generated using the GUI.


In [14]:
iso_start = jd_start.strftime('%Y-%m-%dT%H:%M:%SZ')
box_str = ','.join(str(e) for e in box)

print('%s\n%s' % (iso_start, box_str))


2014-04-13T19:00:00Z
-76.4751,38.389,-71.7432,42.9397

In [15]:
url = ('http://opendap.co-ops.nos.noaa.gov/ioos-dif-sos/SOS?'
       'service=SOS&request=GetObservation&version=1.0.0&'
       'observedProperty=%s&offering='
       'urn:ioos:network:NOAA.NOS.CO-OPS:WaterLevelActive&'
       'featureOfInterest=BBOX:%s&responseFormat=text/csv&eventTime=%s&'
       'result=VerticalDatum==urn:ogc:def:datum:epsg::5103&'
       'dataType=PreliminarySixMinute') % (sos_name, box_str, iso_start)

print(url)


http://opendap.co-ops.nos.noaa.gov/ioos-dif-sos/SOS?service=SOS&request=GetObservation&version=1.0.0&observedProperty=water_surface_height_above_reference_datum&offering=urn:ioos:network:NOAA.NOS.CO-OPS:WaterLevelActive&featureOfInterest=BBOX:-76.4751,38.389,-71.7432,42.9397&responseFormat=text/csv&eventTime=2014-04-13T19:00:00Z&result=VerticalDatum==urn:ogc:def:datum:epsg::5103&dataType=PreliminarySixMinute

In [16]:
obs_loc_df = pd.read_csv(url)
obs_loc_df.head()


Out[16]:
station_id sensor_id latitude (degree) longitude (degree) date_time water_surface_height_above_reference_datum (m) datum_id vertical_position (m)
0 urn:ioos:station:NOAA.NOS.CO-OPS:8461490 urn:ioos:sensor:NOAA.NOS.CO-OPS:8461490:A1 41.3614 -72.0900 2014-04-13T19:00:00Z -0.445 urn:ogc:def:datum:epsg::5103 1.634
1 urn:ioos:station:NOAA.NOS.CO-OPS:8467150 urn:ioos:sensor:NOAA.NOS.CO-OPS:8467150:A1 41.1733 -73.1817 2014-04-13T19:00:00Z -0.622 urn:ogc:def:datum:epsg::5103 1.775
2 urn:ioos:station:NOAA.NOS.CO-OPS:8510560 urn:ioos:sensor:NOAA.NOS.CO-OPS:8510560:A1 41.0483 -71.9600 2014-04-13T19:00:00Z -0.379 urn:ogc:def:datum:epsg::5103 1.655
3 urn:ioos:station:NOAA.NOS.CO-OPS:8518750 urn:ioos:sensor:NOAA.NOS.CO-OPS:8518750:A1 40.7006 -74.0142 2014-04-13T19:00:00Z -0.669 urn:ogc:def:datum:epsg::5103 1.848
4 urn:ioos:station:NOAA.NOS.CO-OPS:8531680 urn:ioos:sensor:NOAA.NOS.CO-OPS:8531680:A1 40.4669 -74.0094 2014-04-13T19:00:00Z -0.518 urn:ogc:def:datum:epsg::5103 1.624

5 rows × 8 columns


In [17]:
stations = [sta.split(':')[-1] for sta in obs_loc_df['station_id']]
obs_lon = list(obs_loc_df['longitude (degree)'])
obs_lat = list(obs_loc_df['latitude (degree)'])

print(stations)


['8461490', '8467150', '8510560', '8518750', '8531680', '8534720', '8536110', '8545240', '8551910', '8557380', '8571892']

In [18]:
def time(start=jd_start, end=jd_stop, freq='6Min'):
    """Generate a uniform 6-min time base for model/data comparison."""
    ts_rng = pd.date_range(start=jd_start, end=jd_stop, freq='6Min')
    return pd.DataFrame(index=ts_rng)

ts = time(start=jd_start, end=jd_stop, freq='6Min')

print(ts.index)
print(len(ts))


<class 'pandas.tseries.index.DatetimeIndex'>
[2014-04-13 19:00:00, ..., 2014-04-19 19:00:00]
Length: 1441, Freq: 6T, Timezone: None
1441

Create a list of obs DataFrames, one for each station:


In [19]:
obs_df = []
for sta in stations:
    b = coops2df(collector, sta, sos_name)
    print(b.name)
    # Limit interpolation to 10 points (10 @ 6min = 1 hour).
    obs_df.append(pd.DataFrame(
        pd.concat([b, ts], axis=1).interpolate(limit=10)['Observed Data']))
    obs_df[-1].name = b.name


New London, CT
Bridgeport, CT
Montauk, NY
The Battery, NY
Sandy Hook, NJ
Atlantic City, NJ
Cape May, NJ
Philadelphia, PA
Reedy Point, DE
Lewes, DE
Cambridge, MD

Get model output from OPeNDAP URLS.

Try to open all the OPeNDAP URLS using Iris from the British Met Office. If 1D, assume dataset is data, if 2D assume dataset is an unstructured grid model, and if 3D, assume it's a structured grid model.


In [20]:
for url in dap_urls:
    try:
        a = iris.load_cube(url, constraint)
        # Take first 20 chars for model name.
        mod_name = a.attributes['title'][0:20]
        r = a.shape
        timevar = find_timevar(a)
        lat = a.coord(axis='Y').points
        lon = a.coord(axis='X').points
        jd = timevar.units.num2date(timevar.points)
        start = timevar.units.date2num(jd_start)
        stop = timevar.units.date2num(jd_stop)
        istart = timevar.nearest_neighbour_index(start)
        istop = timevar.nearest_neighbour_index(stop)

        # Only proceed if we have data in the range requested.
        if istart != istop:
            print(url)
            nsta = len(obs_lon)
            if len(r) == 3:
                d = a[0, ...].data
                # Find the closest non-land point from a structured grid model.
                j, i, dd = find_ij(lon, lat, d, obs_lon, obs_lat)
                for n in range(nsta):
                    # Only use if model cell is within 0.1 degree of req, loc.
                    if dd[n] < 0.1:
                        arr = a[istart:istop, j[n], i[n]].data
                        c = mod_df(arr, timevar, istart, istop, mod_name, ts)
                        name = obs_df[n].name
                        obs_df[n] = pd.concat([obs_df[n], c], axis=1)
                        obs_df[n].name = name
            elif len(r) == 2:
                # Find the closest point from an unstructured grid model.
                index, dd = nearxy(lon.flatten(), lat.flatten(),
                                   obs_lon, obs_lat)
                for n in range(nsta):
                    # Only use if model cell is within 0.1 degree of req. loc.
                    if dd[n] < 0.1:
                        arr = a[istart:istop, index[n]].data
                        c = mod_df(arr, timevar, istart, istop, mod_name, ts)
                        name = obs_df[n].name
                        obs_df[n] = pd.concat([obs_df[n], c], axis=1)
                        obs_df[n].name = name
    except:
        print("No data in the requested range.")
        pass


/home/filipe/.virtualenvs/iris/lib/python2.7/site-packages/iris/fileformats/_pyke_rules/compiled_krb/fc_rules_cf_fc.py:1240: UserWarning: Gracefully filling 'time' dimension coordinate masked points
  warnings.warn(msg.format(str(cf_coord_var.cf_name)))
/home/filipe/.virtualenvs/iris/lib/python2.7/site-packages/iris/fileformats/_pyke_rules/compiled_krb/fc_rules_cf_fc.py:1171: UserWarning: Ignoring netCDF variable 'sea_water_turbidity' invalid units 'FNU'
  warnings.warn(msg.format(msg_name, msg_units))
No data in the requested range.
No data in the requested range.
http://sos.maracoos.org/stable/dodsC/stationHRWSTPTH-agg.ncml
No data in the requested range.
No data in the requested range.
/home/filipe/.virtualenvs/iris/lib/python2.7/site-packages/iris/fileformats/_pyke_rules/compiled_krb/fc_rules_cf_fc.py:1171: UserWarning: Ignoring netCDF variable 'relative_humidity' invalid units 'pct'
  warnings.warn(msg.format(msg_name, msg_units))
/home/filipe/.virtualenvs/iris/lib/python2.7/site-packages/iris/fileformats/_pyke_rules/compiled_krb/fc_rules_cf_fc.py:1171: UserWarning: Ignoring netCDF variable 'sea_water_salinity' invalid units 'psu'
  warnings.warn(msg.format(msg_name, msg_units))
/home/filipe/.virtualenvs/iris/lib/python2.7/site-packages/iris/fileformats/_pyke_rules/compiled_krb/fc_rules_cf_fc.py:1171: UserWarning: Ignoring netCDF variable 'wind_from_direction' invalid units 'deg'
  warnings.warn(msg.format(msg_name, msg_units))
/home/filipe/.virtualenvs/iris/lib/python2.7/site-packages/iris/fileformats/_pyke_rules/compiled_krb/fc_rules_cf_fc.py:1171: UserWarning: Ignoring netCDF variable 'fractional_saturation_of_oxygen_in_sea_water' invalid units 'pct'
  warnings.warn(msg.format(msg_name, msg_units))
/home/filipe/.virtualenvs/iris/lib/python2.7/site-packages/iris/fileformats/_pyke_rules/compiled_krb/fc_rules_cf_fc.py:1171: UserWarning: Ignoring netCDF variable 'sea_water_turbidity' invalid units 'NTU'
  warnings.warn(msg.format(msg_name, msg_units))
http://sos.maracoos.org/stable/dodsC/stationHRMARPH-agg.ncml
No data in the requested range.
/home/filipe/.virtualenvs/iris/lib/python2.7/site-packages/iris/fileformats/_pyke_rules/compiled_krb/fc_rules_cf_fc.py:1171: UserWarning: Ignoring netCDF variable 'nbdv' invalid units 'nondimensional'
  warnings.warn(msg.format(msg_name, msg_units))
/home/filipe/.virtualenvs/iris/lib/python2.7/site-packages/iris/fileformats/_pyke_rules/compiled_krb/fc_rules_cf_fc.py:1171: UserWarning: Ignoring netCDF variable 'neta' invalid units 'nondimensional'
  warnings.warn(msg.format(msg_name, msg_units))
http://geoport-dev.whoi.edu/thredds/dodsC/estofs/atlantic
http://sos.maracoos.org/stable/dodsC/stationHRLCK8H-agg.ncml
http://sos.maracoos.org/stable/dodsC/stationHRALBPH-agg.ncml
No data in the requested range.
http://oos.soest.hawaii.edu/thredds/dodsC/hioos/tide_pac
No data in the requested range.
/home/filipe/.virtualenvs/iris/lib/python2.7/site-packages/iris/fileformats/_pyke_rules/compiled_krb/fc_rules_cf_fc.py:1171: UserWarning: Ignoring netCDF variable 'ibtypee' invalid units 'nondimensional'
  warnings.warn(msg.format(msg_name, msg_units))
/home/filipe/.virtualenvs/iris/lib/python2.7/site-packages/iris/fileformats/_pyke_rules/compiled_krb/fc_rules_cf_fc.py:1171: UserWarning: Ignoring netCDF variable 'nvell' invalid units 'nondimensional'
  warnings.warn(msg.format(msg_name, msg_units))
/home/filipe/.virtualenvs/iris/lib/python2.7/site-packages/iris/fileformats/_pyke_rules/compiled_krb/fc_rules_cf_fc.py:1171: UserWarning: Ignoring netCDF variable 'ibtype' invalid units 'nondimensional'
  warnings.warn(msg.format(msg_name, msg_units))
/home/filipe/.virtualenvs/iris/lib/python2.7/site-packages/iris/fileformats/_pyke_rules/compiled_krb/fc_rules_cf_fc.py:1171: UserWarning: Ignoring netCDF variable 'nbvv' invalid units 'nondimensional'
  warnings.warn(msg.format(msg_name, msg_units))
/home/filipe/.virtualenvs/iris/lib/python2.7/site-packages/iris/fileformats/_pyke_rules/compiled_krb/fc_rules_cf_fc.py:1171: UserWarning: Ignoring netCDF variable 'nvel' invalid units 'nondimensional'
  warnings.warn(msg.format(msg_name, msg_units))
/home/filipe/.virtualenvs/iris/lib/python2.7/site-packages/iris/fileformats/_pyke_rules/compiled_krb/fc_rules_cf_fc.py:1171: UserWarning: Ignoring netCDF variable 'nvdll' invalid units 'nondimensional'
  warnings.warn(msg.format(msg_name, msg_units))
/home/filipe/.virtualenvs/iris/lib/python2.7/site-packages/iris/fileformats/_pyke_rules/compiled_krb/fc_rules_cf_fc.py:1171: UserWarning: Ignoring netCDF variable 'mask' invalid units 'nondimensional'
  warnings.warn(msg.format(msg_name, msg_units))
No data in the requested range.
http://oos.soest.hawaii.edu/thredds/dodsC/pacioos/hycom/global
No data in the requested range.
No data in the requested range.
/home/filipe/.virtualenvs/iris/lib/python2.7/site-packages/iris/fileformats/_pyke_rules/compiled_krb/fc_rules_cf_fc.py:1171: UserWarning: Ignoring netCDF variable 'sigma' invalid units 'sigma_level'
  warnings.warn(msg.format(msg_name, msg_units))

In [21]:
for df in obs_df:
    ax = df.plot(figsize=(14, 6), title=df.name)
    ax.set_ylabel('m')