In [1]:
import time


start_time = time.time()

In [2]:
import pytz
from datetime import datetime, timedelta


# Choose the date range (e.g: stop = datetime(2014, 7, 7, 12)).
stop = datetime(2015, 1, 30, 12)
run_name = '{:%Y-%m-%d}'.format(stop)

stop = stop.replace(tzinfo=pytz.utc)
start = stop - timedelta(days=7)

# NERACOOS Mass Bay Region.
bbox = [-72.0, 41.0, -69.0, 43.0]

# CF-names to look for (Sea Surface Height).
name_list = ['sea_surface_height',
             'sea_surface_elevation',
             'sea_surface_height_above_geoid',
             'sea_surface_height_above_sea_level',
             'water_surface_height_above_reference_datum',
             'sea_surface_height_above_reference_ellipsoid']

In [3]:
import iris
import pyoos
import owslib

import logging as log
reload(log)

fmt = '{:*^64}'.format
log.captureWarnings(True)
LOG_FILENAME = 'log.txt'
log.basicConfig(filename=LOG_FILENAME,
                filemode='w',
                format='%(asctime)s %(levelname)s: %(message)s',
                datefmt='%I:%M:%S',
                level=log.INFO,
                stream=None)

log.info(fmt(' Run information '))
log.info('Run date: {:%Y-%m-%d %H:%M:%S}'.format(datetime.utcnow()))
log.info('Download start: {:%Y-%m-%d %H:%M:%S}'.format(start))
log.info('Download stop: {:%Y-%m-%d %H:%M:%S}'.format(stop))
log.info('Bounding box: {0:3.2f}, {1:3.2f},'
         '{2:3.2f}, {3:3.2f}'.format(*bbox))
log.info(fmt(' Software version '))
log.info('Iris version: {}'.format(iris.__version__))
log.info('owslib version: {}'.format(owslib.__version__))
log.info('pyoos version: {}'.format(pyoos.__version__))

In [4]:
def fes_date_filter(start, stop, constraint='overlaps'):
    """Take datetime-like objects and returns a fes filter for date range.
    NOTE: Truncates the minutes!"""
    start = start.strftime('%Y-%m-%d %H:00')
    stop = stop.strftime('%Y-%m-%d %H:00')
    if constraint == 'overlaps':
        propertyname = 'apiso:TempExtent_begin'
        begin = fes.PropertyIsLessThanOrEqualTo(propertyname=propertyname,
                                                literal=stop)
        propertyname = 'apiso:TempExtent_end'
        end = fes.PropertyIsGreaterThanOrEqualTo(propertyname=propertyname,
                                                 literal=start)
    elif constraint == 'within':
        propertyname = 'apiso:TempExtent_begin'
        begin = fes.PropertyIsGreaterThanOrEqualTo(propertyname=propertyname,
                                                   literal=start)
        propertyname = 'apiso:TempExtent_end'
        end = fes.PropertyIsLessThanOrEqualTo(propertyname=propertyname,
                                              literal=stop)
    else:
        raise NameError('Unrecognized constraint {}'.format(constraint))
    return begin, end

In [5]:
from owslib import fes

kw = dict(wildCard='*',
          escapeChar='\\',
          singleChar='?',
          propertyname='apiso:AnyText')

or_filt = fes.Or([fes.PropertyIsLike(literal=('*%s*' % val), **kw)
                  for val in name_list])

# Exculde ROMS Averages and History files.
not_filt = fes.Not([fes.PropertyIsLike(literal='*Averages*', **kw)])

begin, end = fes_date_filter(start, stop)
filter_list = [fes.And([fes.BBox(bbox), begin, end, or_filt, not_filt])]

In [6]:
from owslib.csw import CatalogueServiceWeb

endpoint = 'http://www.ngdc.noaa.gov/geoportal/csw'
csw = CatalogueServiceWeb(endpoint, timeout=60)
csw.getrecords2(constraints=filter_list, maxrecords=1000, esn='full')

log.info(fmt(' Catalog information '))
log.info("URL: {}".format(endpoint))
log.info("CSW version: {}".format(csw.version))
log.info("Number of datasets available: {}".format(len(csw.records.keys())))

In [7]:
def service_urls(records, service='odp:url'):
    """Extract service_urls of a specific type (DAP, SOS) from records."""
    service_string = 'urn:x-esri:specification:ServiceType:' + service
    urls = []
    for key, rec in records.items():
        # Create a generator object, and iterate through it until the match is
        # found if not found, gets the default value (here "none").
        url = next((d['url'] for d in rec.references if
                    d['scheme'] == service_string), None)
        if url is not None:
            urls.append(url)
    urls = sorted(set(urls))
    return urls

In [8]:
dap_urls = service_urls(csw.records, service='odp:url')
sos_urls = service_urls(csw.records, service='sos:url')

log.info(fmt(' CSW '))
for rec, item in csw.records.items():
    log.info('{}'.format(item.title))

log.info(fmt(' DAP '))
for url in dap_urls:
    log.info('{}.html'.format(url))

log.info(fmt(' SOS '))
for url in sos_urls:
    log.info('{}'.format(url))

In [9]:
from pyoos.collectors.coops.coops_sos import CoopsSos

collector = CoopsSos()
sos_name = 'water_surface_height_above_reference_datum'

datum = 'NAVD'
collector.set_datum(datum)
collector.end_time = stop
collector.start_time = start
collector.variables = [sos_name]

ofrs = collector.server.offerings
title = collector.server.identification.title
log.info(fmt(' Collector offerings '))
log.info('{}: {} offerings'.format(title, len(ofrs)))

In [10]:
import requests
from urlparse import urlparse


# Web-parsing.
def parse_url(url):
    """This will preserve any given scheme but will add http if none is
    provided."""
    if not urlparse(url).scheme:
        url = "http://{}".format(url)
    return url


def sos_request(url='opendap.co-ops.nos.noaa.gov/ioos-dif-sos/SOS', **kw):
    url = parse_url(url)
    offering = 'urn:ioos:network:NOAA.NOS.CO-OPS:CurrentsActive'
    params = dict(service='SOS',
                  request='GetObservation',
                  version='1.0.0',
                  offering=offering,
                  responseFormat='text/csv')
    params.update(kw)
    r = requests.get(url, params=params)
    r.raise_for_status()
    content = r.headers['Content-Type']
    if 'excel' in content or 'csv' in content:
        return r.url
    else:
        raise TypeError('Bad url {}'.format(r.url))

In [11]:
from pandas import read_csv

params = dict(observedProperty=sos_name,
              eventTime=start.strftime('%Y-%m-%dT%H:%M:%SZ'),
              featureOfInterest='BBOX:{0},{1},{2},{3}'.format(*bbox),
              offering='urn:ioos:network:NOAA.NOS.CO-OPS:WaterLevelActive')

uri = 'http://opendap.co-ops.nos.noaa.gov/ioos-dif-sos/SOS'
url = sos_request(uri, **params)
observations = read_csv(url)

log.info('SOS URL request: {}'.format(url))

Clean the dataframe (visualization purpose only)


In [12]:
from lxml import etree
from urllib import urlopen
from IPython.display import HTML


def get_coops_longname(station):
    """Get longName for specific station from COOPS SOS using DescribeSensor
    request."""
    url = ('opendap.co-ops.nos.noaa.gov/ioos-dif-sos/SOS?service=SOS&'
           'request=DescribeSensor&version=1.0.0&'
           'outputFormat=text/xml;subtype="sensorML/1.0.1"&'
           'procedure=urn:ioos:station:NOAA.NOS.CO-OPS:%s') % station
    url = parse_url(url)
    tree = etree.parse(urlopen(url))
    root = tree.getroot()
    path = "//sml:identifier[@name='longName']/sml:Term/sml:value/text()"
    namespaces = dict(sml="http://www.opengis.net/sensorML/1.0.1")
    longName = root.xpath(path, namespaces=namespaces)
    if len(longName) == 0:
        longName = station
    return longName[0]

In [13]:
columns = {'datum_id': 'datum',
           'sensor_id': 'sensor',
           'station_id': 'station',
           'latitude (degree)': 'lat',
           'longitude (degree)': 'lon',
           'vertical_position (m)': 'height',
           'water_surface_height_above_reference_datum (m)': 'ssh above datum'}

observations.rename(columns=columns, inplace=True)

observations['datum'] = [s.split(':')[-1] for s in observations['datum']]
observations['sensor'] = [s.split(':')[-1] for s in observations['sensor']]
observations['station'] = [s.split(':')[-1] for s in observations['station']]
observations['name'] = [get_coops_longname(s) for s in observations['station']]

observations.set_index('name', inplace=True)

observations.head()


Out[13]:
station sensor lat lon date_time ssh above datum datum height
name
Boston, MA 8443970 B1 42.3548 -71.0534 2015-01-23T12:00:00Z -0.341 MLLW 1.074
Fall River, MA 8447386 B1 41.7043 -71.1641 2015-01-23T12:00:00Z 0.555 MLLW 6.356
Chatham, MA 8447435 A1 41.6885 -69.9511 2015-01-23T12:00:00Z 0.403 MLLW 1.003
Woods Hole, MA 8447930 A1 41.5233 -70.6717 2015-01-23T12:00:00Z 0.100 MLLW 0.797
Nantucket Island, MA 8449130 B1 41.2850 -70.0967 2015-01-23T12:00:00Z 0.004 MLLW 0.915

Generate a uniform 6-min time base for model/data comparison


In [14]:
from io import BytesIO
from iris.pandas import as_cube


def coops2df(collector, coops_id):
    """Request CSV response from SOS and convert to Pandas DataFrames."""
    collector.features = [coops_id]
    long_name = get_coops_longname(coops_id)
    response = collector.raw(responseFormat="text/csv")
    kw = dict(parse_dates=True, index_col='date_time')
    data_df = read_csv(BytesIO(response.encode('utf-8')), **kw)
    data_df.name = long_name
    return data_df


def save_timeseries(df, outfile, standard_name, **kw):
    """http://cfconventions.org/Data/cf-convetions/cf-conventions-1.6/build
    /cf-conventions.html#idp5577536"""
    cube = as_cube(df, calendars={1: iris.unit.CALENDAR_GREGORIAN})
    cube.coord("index").rename("time")
    cube.coord("columns").rename("station name")
    cube.rename(standard_name)

    longitude = kw.get("longitude")
    latitude = kw.get("latitude")
    if longitude is not None:
        longitude = iris.coords.AuxCoord(longitude,
                                         var_name="lon",
                                         standard_name="longitude",
                                         long_name="station longitude",
                                         units=iris.unit.Unit("degrees"))
    cube.add_aux_coord(longitude, data_dims=1)

    if latitude is not None:
        latitude = iris.coords.AuxCoord(latitude,
                                        var_name="lat",
                                        standard_name="latitude",
                                        long_name="station latitude",
                                        units=iris.unit.Unit("degrees"))
        cube.add_aux_coord(latitude, data_dims=1)

    # Work around iris to get String instead of np.array object.
    string_list = cube.coord("station name").points.tolist()
    cube.coord("station name").points = string_list
    cube.coord("station name").var_name = 'station'

    station_attr = kw.get("station_attr")
    if station_attr is not None:
        cube.coord("station name").attributes.update(station_attr)

    cube_attr = kw.get("cube_attr")
    if cube_attr is not None:
        cube.attributes.update(cube_attr)

    iris.save(cube, outfile)

In [15]:
import iris
from pandas import DataFrame
from owslib.ows import ExceptionReport

iris.FUTURE.netcdf_promote = True

log.info(fmt(' Observations '))
fname = '{:%Y-%m-%d}-OBS_DATA.nc'.format(stop)

log.info(fmt(' Downloading to file {} '.format(fname)))
data = dict()
bad_datum = []
for station in observations.station:
    try:
        df = coops2df(collector, station)
        col = 'water_surface_height_above_reference_datum (m)'
        data.update({station: df[col]})
    except ExceptionReport as e:
        bad_datum.append(station)
        name = get_coops_longname(station)
        log.warning("[{}] {}:\n{}".format(station, name, e))
obs_data = DataFrame.from_dict(data)

# Split good and bad vertical datum stations.
pattern = '|'.join(bad_datum)
if pattern:
    non_navd = observations.station.str.contains(pattern)
    bad_datum = observations[non_navd]
    observations = observations[~non_navd]

comment = "Several stations from http://opendap.co-ops.nos.noaa.gov"
kw = dict(longitude=observations.lon,
          latitude=observations.lat,
          station_attr=dict(cf_role="timeseries_id"),
          cube_attr=dict(featureType='timeSeries',
                         Conventions='CF-1.6',
                         standard_name_vocabulary='CF-1.6',
                         cdm_data_type="Station",
                         comment=comment,
                         datum=datum,
                         url=url))

save_timeseries(obs_data, outfile=fname,
                standard_name=sos_name, **kw)

obs_data.head()


Out[15]:
8443970 8447435 8447930 8452660 8454000 8510560
date_time
2015-01-23 12:00:00 -2.020 -0.628 -0.315 -0.108 -0.195 -0.271
2015-01-23 12:06:00 -2.017 -0.651 -0.290 -0.071 -0.151 -0.254
2015-01-23 12:12:00 -2.004 -0.673 -0.264 -0.030 -0.099 -0.224
2015-01-23 12:18:00 -1.992 -0.696 -0.218 0.011 -0.045 -0.205
2015-01-23 12:24:00 -1.966 -0.719 -0.202 0.055 -0.006 -0.190

Loop discovered models and save the nearest time-series


In [16]:
import signal
from contextlib import contextmanager

import numpy as np
from oceans import wrap_lon180

from iris import Constraint
from iris.cube import CubeList
from iris.exceptions import CoordinateMultiDimError

water_level = ['sea_surface_height',
               'sea_surface_elevation',
               'sea_surface_height_above_geoid',
               'sea_surface_height_above_sea_level',
               'water_surface_height_above_reference_datum',
               'sea_surface_height_above_reference_ellipsoid']


class TimeoutException(Exception):
    """
    Example
    -------
    >>> def long_function_call():
    >>>     import time
    >>>     sec = 0
    >>>>    while True:
    >>>         sec += 1
    >>>         print(sec)
    >>>         time.sleep(1)
    >>>
    >>> try:
    >>>     with time_limit(10):
    >>>     long_function_call()
    >>> except TimeoutException as msg:
    >>>     print("Timed out!")
    """
    pass


@contextmanager
def time_limit(seconds=10):
    def signal_handler(signum, frame):
        raise TimeoutException("Timed out!")
    signal.signal(signal.SIGALRM, signal_handler)
    signal.alarm(seconds)
    try:
        yield
    finally:
        signal.alarm(0)


# Iris.
def z_coord(cube):
    """Heuristic way to return **one** the vertical coordinate."""
    try:
        z = cube.coord(axis='Z')
    except CoordinateNotFoundError:
        z = None
        for coord in cube.coords(axis='Z'):
            if coord.name() not in water_level:
                z = coord
    return z


def get_surface(cube):
    """Work around `iris.cube.Cube.slices` error:
    The requested coordinates are not orthogonal."""
    z = z_coord(cube)
    if z:
        positive = z.attributes.get('positive', None)
        if positive == 'up':
            idx = np.unique(z.points.argmax(axis=0))[0]
        else:
            idx = np.unique(z.points.argmin(axis=0))[0]
        return cube[:, idx, ...]
    else:
        return cube


def time_coord(cube):
    """Return the variable attached to time axis and rename it to time."""
    try:
        cube.coord(axis='T').rename('time')
    except CoordinateNotFoundError:
        pass
    timevar = cube.coord('time')
    return timevar


def time_near(cube, datetime):
    """Return the nearest index to a `datetime`."""
    timevar = time_coord(cube)
    try:
        time = timevar.units.date2num(datetime)
        idx = timevar.nearest_neighbour_index(time)
    except IndexError:
        idx = -1
    return idx


def time_slice(cube, start, stop=None):
    """Slice time by indexes using a nearest criteria.
    NOTE: Assumes time is the first dimension!"""
    istart = time_near(cube, start)
    if stop:
        istop = time_near(cube, stop)
        if istart == istop:
            raise ValueError('istart must be different from istop! '
                             'Got istart {!r} and '
                             ' istop {!r}'.format(istart, istop))
        return cube[istart:istop, ...]
    else:
        return cube[istart, ...]


def time_constraint(cube, start, stop):
    """Slice time by constraint."""
    begin = lambda cell: cell >= start
    end = lambda cell: cell <= stop
    constraint = Constraint(begin & end)
    return cube.extract(constraint)


def minmax(v):
    return np.min(v), np.max(v)


def bbox_extract_2Dcoords(cube, bbox):
    """Extract a sub-set of a cube inside a lon, lat bounding box
    bbox=[lon_min lon_max lat_min lat_max].
    NOTE: This is a work around too subset an iris cube that has
    2D lon, lat coords."""
    lons = cube.coord('longitude').points
    lats = cube.coord('latitude').points
    lons = wrap_lon180(lons)

    inregion = np.logical_and(np.logical_and(lons > bbox[0],
                                             lons < bbox[2]),
                              np.logical_and(lats > bbox[1],
                                             lats < bbox[3]))
    region_inds = np.where(inregion)
    imin, imax = minmax(region_inds[0])
    jmin, jmax = minmax(region_inds[1])
    return cube[..., imin:imax+1, jmin:jmax+1]


def bbox_extract_1Dcoords(cube, bbox):
    lat = Constraint(latitude=lambda cell: bbox[1] <= cell < bbox[3])
    lon = Constraint(longitude=lambda cell: bbox[0] <= cell <= bbox[2])
    cube = cube.extract(lon & lat)
    return cube


def subset(cube, bbox):
    """Sub sets cube with 1D or 2D lon, lat coords.
    Using `intersection` instead of `extract` we deal with 0--360
    longitudes automagically."""
    if (cube.coord(axis='X').ndim == 1 and cube.coord(axis='Y').ndim == 1):
        # Workaround `cube.intersection` hanging up on FVCOM models.
        title = cube.attributes.get('title', None)
        featureType = cube.attributes.get('featureType', None)
        if (('FVCOM' in title) or ('ESTOFS' in title) or
           featureType == 'timeSeries'):
            cube = bbox_extract_1Dcoords(cube, bbox)
        else:
            cube = cube.intersection(longitude=(bbox[0], bbox[2]),
                                     latitude=(bbox[1], bbox[3]))
    elif (cube.coord(axis='X').ndim == 2 and
          cube.coord(axis='Y').ndim == 2):
        cube = bbox_extract_2Dcoords(cube, bbox)
    else:
        msg = "Cannot deal with X:{!r} and Y:{!r} dimensions."
        raise CoordinateMultiDimError(msg.format(cube.coord(axis='X').ndim),
                                      cube.coord(axis='y').ndim)
    return cube


def get_cube(url, name_list, bbox=None, time=None, units=None, callback=None,
             constraint=None):
    """Only `url` and `name_list` are mandatory.  The kw args are:
    `bbox`, `callback`, `time`, `units`, `constraint`."""

    cubes = iris.load_raw(url, callback=callback)

    in_list = lambda cube: cube.standard_name in name_list
    cubes = CubeList([cube for cube in cubes if in_list(cube)])
    if not cubes:
        raise ValueError('Cube does not contain {!r}'.format(name_list))
    else:
        cube = cubes.merge_cube()

    if constraint:
        cube = cube.extract(constraint)
        if not cube:
            raise ValueError('No cube using {!r}'.format(constraint))
    if bbox:
        cube = subset(cube, bbox)
        if not cube:
            raise ValueError('No cube using {!r}'.format(bbox))
    if time:
        if isinstance(time, datetime):
            start, stop = time, None
        elif isinstance(time, tuple):
            start, stop = time[0], time[1]
        else:
            raise ValueError('Time must be start or (start, stop).'
                             '  Got {!r}'.format(time))
        cube = time_slice(cube, start, stop)
    if units:
        if cube.units != units:
            cube.convert_units(units)
    return cube


def get_model_name(cube, url):
    url = parse_url(url)
    try:
        model_full_name = cube.attributes['title']
    except AttributeError:
        model_full_name = url
    mod_name = model_full_name.split()[0]
    return mod_name, model_full_name

In [17]:
import warnings
from iris.exceptions import (CoordinateNotFoundError, ConstraintMismatchError,
                             MergeError)


log.info(fmt(' Models '))
cubes = dict()

with warnings.catch_warnings():
    # Suppress iris warnings :
    warnings.simplefilter("ignore")
    for k, url in enumerate(dap_urls):
        log.info('\n[Reading url {}/{}]: {}'.format(k+1, len(dap_urls), url))
        try:
            with time_limit(60*5):
                cube = get_cube(url, name_list=name_list,
                                bbox=bbox, time=(start, stop),
                                units=iris.unit.Unit('meters'))
            # TODO: Need a better way to identify model data and observed data.
            if cube.ndim > 1:
                mod_name, model_full_name = get_model_name(cube, url)
                cubes.update({mod_name: cube})
            else:
                log.warning('url {} is probably a timeSeries!'.format(url))
        except (RuntimeError, ValueError, MergeError, TimeoutException,
                ConstraintMismatchError, CoordinateNotFoundError) as e:
            log.warning('Cannot get cube for: {}\n{}'.format(url, e))

In [18]:
import numpy.ma as ma
from scipy.spatial import cKDTree as KDTree


def standardize_fill_value(cube):
    """Work around default `fill_value` when obtaining
    `_CubeSignature` (iris) using `lazy_data()` (biggus).
    Warning use only when you DO KNOW that the slices should
    have the same `fill_value`!!!"""
    if ma.isMaskedArray(cube._my_data):
        fill_value = ma.empty(0, dtype=cube._my_data.dtype).fill_value
        cube._my_data.fill_value = fill_value
    return cube


def make_tree(cube):
    """Create KDTree."""
    lon = cube.coord(axis='X').points
    lat = cube.coord(axis='Y').points
    # Structured models with 1D lon, lat.
    if (lon.ndim == 1) and (lat.ndim == 1) and (cube.ndim == 3):
        lon, lat = np.meshgrid(lon, lat)
    # Unstructure are already paired!
    tree = KDTree(zip(lon.ravel(), lat.ravel()))
    return tree, lon, lat


def get_nearest_water(cube, tree, xi, yi, k=10, max_dist=0.04, min_var=0.01):
    """Find `k` nearest model data points from an iris `cube` at station
    lon: `xi`, lat: `yi` up to `max_dist` in degrees.  Must provide a Scipy's
    KDTree `tree`."""
    # TODO: Use rtree instead of KDTree.
    # NOTE: Based on the iris `_nearest_neighbour_indices_ndcoords`.

    distances, indices = tree.query(np.array([xi, yi]).T, k=k)
    if indices.size == 0:
        raise ValueError("No data found.")
    # Get data up to specified distance.
    mask = distances <= max_dist
    distances, indices = distances[mask], indices[mask]
    if distances.size == 0:
        msg = "No data near ({}, {}) max_dist={}.".format
        raise ValueError(msg(xi, yi, max_dist))
    # Unstructured model.
    if (cube.coord(axis='X').ndim == 1) and (cube.ndim == 2):
        i = j = indices
        unstructured = True
    # Structured model.
    else:
        unstructured = False
        if cube.coord(axis='X').ndim == 2:  # CoordinateMultiDim
            i, j = np.unravel_index(indices, cube.coord(axis='X').shape)
        else:
            shape = (cube.coord(axis='Y').shape[0],
                     cube.coord(axis='X').shape[0])
            i, j = np.unravel_index(indices, shape)
    # Use only data where the standard deviation of the time series exceeds
    # 0.01 m (1 cm) this eliminates flat line model time series that come from
    # land points that should have had missing values.
    series, dist, idx = None, None, None
    for dist, idx in zip(distances, zip(i, j)):
        if unstructured:  # NOTE: This would be so elegant in py3k!
            idx = (idx[0],)
        # This weird syntax allow for idx to be len 1 or 2.
        series = cube[(slice(None),)+idx]
        # Accounting for wet-and-dry models.
        arr = ma.masked_invalid(series.data).filled(fill_value=0)
        if arr.std() <= min_var:
            series = None
            break
    return series, dist, idx


def add_station(cube, station):
    """Add a station Auxiliary Coordinate and its name."""
    kw = dict(var_name="station", long_name="station name")
    coord = iris.coords.AuxCoord(station, **kw)
    cube.add_aux_coord(coord)
    return cube


def ensure_timeseries(cube):
    """Ensure that the cube is CF-timeSeries compliant."""
    if not cube.coord('time').shape == cube.shape[0]:
        cube.transpose()
    make_aux_coord(cube, axis='Y')
    make_aux_coord(cube, axis='X')

    cube.attributes.update({'featureType': 'timeSeries'})
    cube.coord("station name").attributes = dict(cf_role='timeseries_id')
    return cube


def make_aux_coord(cube, axis='Y'):
    """Make any given coordinate an Auxiliary Coordinate."""
    coord = cube.coord(axis=axis)
    cube.remove_coord(coord)
    if cube.ndim == 2:
        cube.add_aux_coord(coord, 1)
    else:
        cube.add_aux_coord(coord)
    return cube

In [19]:
from iris.pandas import as_series


for mod_name, cube in cubes.items():
    fname = '{:%Y-%m-%d}-{}.nc'.format(stop, mod_name)
    log.info(fmt(' Saving to file {} '.format(fname)))
    # NOTE: 2D coords KDtree.  (Iris can only do 1D coords KDtree for now.)
    try:
        tree, lon, lat = make_tree(cube)
    except CoordinateNotFoundError as e:
        log.warning('Cannot create KDTree for: {}'.format(mod_name))
        continue
    # Get model series at observed locations.
    raw_series = dict()
    for station, obs in observations.iterrows():
        try:
            kw = dict(k=10, max_dist=0.04, min_var=0.01)
            args = cube, tree, obs.lon, obs.lat
            series, dist, idx = get_nearest_water(*args, **kw)
        except ValueError as e:
            log.warning(e)
            continue
        if not series:
            status = "Land "
        else:
            raw_series.update({obs['station']: series})
            series = as_series(series)
            status = "Water"

        log.info('[{}] {}'.format(status, obs.name))

    if raw_series:  # Save cube.
        for station, cube in raw_series.items():
            cube = standardize_fill_value(cube)
            cube = add_station(cube, station)
        try:
            cube = iris.cube.CubeList(raw_series.values()).merge_cube()
        except MergeError as e:
            log.warning(e)

        ensure_timeseries(cube)
        iris.save(cube, fname)
        del cube

    log.info('Finished processing [{}]: {}'.format(mod_name, url))

Add extra stations.


In [20]:
include = dict({'Scituate, MA': dict(lon=-70.7166, lat=42.9259),
                'Wells, ME': dict(lon=-70.583883, lat=43.272411)})

models = dict()
extra_series = dict()
for station, obs in include.items():
    for mod_name, cube in cubes.items():
        mod_name, model_full_name = get_model_name(cube, url)
        try:
            tree, lon, lat = make_tree(cube)
        except CoordinateNotFoundError as e:
            log.warning('Cannot create KDTree for: {}'.format(mod_name))
            continue
        # Get model series at observed locations.
        try:
            kw = dict(k=10, max_dist=0.04, min_var=0.01)
            args = cube, tree, obs['lon'], obs['lat']
            series, dist, idx = get_nearest_water(*args, **kw)
        except ValueError as e:
            log.warning(e)
            continue
        if not series:
            status = "Land "
        else:
            status = "Water"
            models.update({mod_name: series})

        log.info('[{}] {}'.format(status, station))
    extra_series.update({station: models})
    models = dict()

Load saved files and interpolate to the observations time interval


In [21]:
from iris.pandas import as_data_frame


def nc2df(fname):
    cube = iris.load_cube(fname)
    for coord in cube.coords(dimensions=[0]):
        name = coord.name()
        if name != 'time':
            cube.remove_coord(name)
    for coord in cube.coords(dimensions=[1]):
        name = coord.name()
        if name != 'station name':
            cube.remove_coord(name)
    df = as_data_frame(cube)
    if cube.ndim == 1:  # Horrible work around iris.
        station = cube.coord('station name').points[0]
        df.columns = [station]
    return df

In [22]:
from glob import glob
from operator import itemgetter

from pandas import Panel

fname = '{}-OBS_DATA.nc'.format(run_name)
OBS_DATA = nc2df(fname)
OBS_DATA.index = OBS_DATA.index.tz_localize(start.tzinfo)

from pandas import date_range
index = date_range(start=start, end=stop, freq='6min', tz=start.tzinfo)

dfs = dict(OBS_DATA=OBS_DATA)
for fname in glob("*.nc"):
    if 'OBS_DATA' in fname:
        continue
    else:
        model = fname.split('.')[0].split('-')[-1]
        df = nc2df(fname)
        if len(df.index.values) != len(np.unique(df.index.values)):
            # FIXME: Horrible work around duplicate times.
            opts = dict(cols='index', take_last=True)
            df = df.reset_index().drop_duplicates(**opts).set_index('index')
        df.index = df.index.tz_localize(start.tzinfo)
        if False:  # if True interpolates to 6 min series.
            kw = dict(method='time', limit=30)
            df = df.reindex(index).interpolate(**kw).ix[index]
        dfs.update({model: df})

dfs = Panel.fromDict(dfs).swapaxes(0, 2)

Bias


In [23]:
from pandas import DataFrame

panel = dfs.copy()
means = dict()
for station, df in panel.iteritems():
    df.dropna(axis=1, how='all', inplace=True)
    mean = df.mean()
    df = df - mean + mean['OBS_DATA']
    means.update({station: mean.drop('OBS_DATA') - mean['OBS_DATA']})

bias = DataFrame.from_dict(means).dropna(axis=1, how='all')
bias = bias.applymap('{:.2f}'.format).replace('nan', '--')

columns = dict()
[columns.update({station: get_coops_longname(station)}) for
 station in bias.columns.values]

bias.rename(columns=columns, inplace=True)

bias.T


Out[23]:
COAWST ESTOFS HYbrid NECOFS ROMS
Boston, MA -- -- -- 0.21 --
Chatham, MA -- -0.15 -0.74 0.01 --
Woods Hole, MA 0.13 -0.06 -- 0.23 --
Newport, RI 0.16 -0.09 -- 0.31 -0.35
Providence, RI -- -- -- 0.28 --
Montauk, NY -- -0.11 -- 0.28 --

Model skill


In [24]:
def both_valid(x, y):
    """Returns a mask where both series are valid."""
    mask_x = np.isnan(x)
    mask_y = np.isnan(y)
    return np.logical_and(~mask_x, ~mask_y)

In [25]:
from scipy.stats.stats import pearsonr


skills = dict()
for station, df in panel.iteritems():
    obs = df['OBS_DATA']
    skill = dict()
    for model, y in df.iteritems():
        if 'OBS_DATA' not in model:
            mask = both_valid(obs, y)
            r, p = pearsonr(obs[mask]-obs.mean(), y[mask]-y.mean())
            skill.update({model: r})
    skills.update({station: skill})

df = DataFrame.from_dict(skills)

columns = dict()
[columns.update({station: get_coops_longname(station)}) for
 station in df.columns.values]

df.rename(columns=columns, inplace=True)

In [26]:
fname = 'skill.html'

df_skill = df.applymap('{:.2f}'.format).replace('nan', '--')

df_skill.T


Out[26]:
COAWST ESTOFS HYbrid NECOFS ROMS
Boston, MA -- -- -- 0.99 --
Chatham, MA -- 0.90 0.32 0.95 --
Woods Hole, MA 0.79 0.83 -- 0.70 --
Newport, RI 0.83 0.90 -- 0.97 0.88
Providence, RI -- -- -- 0.95 --
Montauk, NY -- 0.93 -- 0.95 --

Map


In [27]:
from folium.folium import Map
import matplotlib.pyplot as plt
from IPython.display import IFrame


def get_coordinates(bbox):
    """Create bounding box coordinates for the map.  It takes flat or
    nested list/numpy.array and returns 4 points for the map corners."""
    bbox = np.asanyarray(bbox).ravel()
    if bbox.size == 4:
        bbox = bbox.reshape(2, 2)
        coordinates = []
        coordinates.append([bbox[0][1], bbox[0][0]])
        coordinates.append([bbox[0][1], bbox[1][0]])
        coordinates.append([bbox[1][1], bbox[1][0]])
        coordinates.append([bbox[1][1], bbox[0][0]])
        coordinates.append([bbox[0][1], bbox[0][0]])
    else:
        raise ValueError('Wrong number corners.'
                         '  Expected 4 got {}'.format(bbox.size))
    return coordinates


def inline_map(m):
    """Takes a folium instance or a html path and load into an iframe."""
    if isinstance(m, Map):
        m._build_map()
        srcdoc = m.HTML.replace('"', '&quot;')
        embed = HTML('<iframe srcdoc="{srcdoc}" '
                     'style="width: 100%; height: 500px; '
                     'border: none"></iframe>'.format(srcdoc=srcdoc))
    elif isinstance(m, str):
        embed = IFrame(m, width=750, height=500)
    return embed


def make_map(bbox, **kw):
    """Creates a folium map instance."""
    line = kw.pop('line', True)
    zoom_start = kw.pop('zoom_start', 7)

    lon, lat = np.array(bbox).reshape(2, 2).mean(axis=0)
    m = Map(width=750, height=500,
            location=[lat, lon], zoom_start=zoom_start)
    if line:
        # Create the map and add the bounding box line.
        kw = dict(line_color='#FF0000', line_weight=2)
        m.line(get_coordinates(bbox), **kw)
    return m


def plot_series():
    fig, ax = plt.subplots(figsize=(width, height))
    ax.set_ylabel('Sea surface height (m)')
    ax.set_ylim(-2.5, 2.5)
    ax.grid(True)
    return fig, ax

In [28]:
# Clusters.
big_list = []
for fname in glob("*.nc"):
    if 'OBS_DATA' in fname:
        continue
    nc = iris.load_cube(fname)
    model = fname.split('-')[-1].split('.')[0]
    lons = nc.coord(axis='X').points
    lats = nc.coord(axis='Y').points
    stations = nc.coord('station name').points
    models = [model]*lons.size
    lista = zip(models, lons.tolist(), lats.tolist(), stations.tolist())
    big_list.extend(lista)

big_list.sort(key=itemgetter(3))
df = DataFrame(big_list, columns=['name', 'lon', 'lat', 'station'])
df.set_index('station', drop=True, inplace=True)
groups = df.groupby(df.index)

In [29]:
from mpld3 import save_html
from mpld3.plugins import LineLabelTooltip, connect

mapa = make_map(bbox, line=True, states=False)

# Clusters.
for station, info in groups:
    station = get_coops_longname(station)
    for lat, lon, name in zip(info.lat, info.lon, info.name):
        location = lat, lon
        popup = '<b>{}</b>\n{}'.format(station, name)
        mapa.simple_marker(location=location, popup=popup,
                           clustered_marker=True)

# Model and observations.
resolution, width, height = 75, 7, 3
for station in dfs:
    sta_name = get_coops_longname(station)
    # This will eliminate all NaNs columns.
    df = dfs[station].dropna(axis=1, how='all')

    fig, ax = plot_series()
    labels = []
    for col in df.columns:
        # This restore the series to its original "index."
        # Not needed if interpolating the series.
        serie = df[col].dropna()
        lines = ax.plot(serie.index, serie, label=col,
                        linewidth=2.5, alpha=0.5)
        if 'OBS_DATA' not in col:
            text0 = col
            text1 = bias[sta_name][col]
            text2 = df_skill[sta_name][col]
            tooltip = '{}:\nbias {}\nskill: {}'.format
            labels.append(tooltip(text0, text1, text2))
        else:
            labels.append('OBS_DATA')

    kw = dict(loc='upper center', bbox_to_anchor=(0.5, 1.05), numpoints=1,
              ncol=2, framealpha=0)
    l = ax.legend(**kw)
    l.set_title("")  # Workaround str(None).

    [connect(fig, LineLabelTooltip(line, name))
     for line, name in zip(ax.lines, labels)]

    html = 'station_{}.html'.format(station)
    save_html(fig, '{}'.format(html))

    popup = "<div align='center'> {} <br><iframe src='{}' alt='image'"
    popup += "width='{}px' height='{}px' frameBorder='0'></div>"
    popup = popup.format('{}'.format(sta_name), html,
                         (width*resolution)+75, (height*resolution)+50)
    kw = dict(popup=popup, width=(width*resolution)+75)

    if (df.columns == 'OBS_DATA').all():
        kw.update(dict(marker_color="blue", marker_icon="ok"))
    else:
        kw.update(dict(marker_color="green", marker_icon="ok-sign"))
    obs = observations[observations['station'] == station].squeeze()
    mapa.simple_marker(location=[obs['lat'], obs['lon']], **kw)

# Bad datum.
if isinstance(bad_datum, DataFrame):
    for station, obs in bad_datum.iterrows():
        popup = '<b>Station:</b> {}<br><b>Datum:</b> {}<br>'
        popup = popup.format(station, obs['datum'])
        kw = dict(popup=popup, marker_color="red", marker_icon="question-sign")
        mapa.simple_marker(location=[obs['lat'], obs['lon']], **kw)

In [30]:
for station in include.keys():
    models = extra_series[station]
    if models:
        fig, ax = plot_series()
        labels = []
        for model, cube in models.items():
            t = time_coord(cube)
            t = t.units.num2date(t.points)
            lines = ax.plot(t, cube.data, linewidth=2.5, alpha=0.5,
                            label=model)
            labels.append(model)

        kw = dict(loc='upper center', bbox_to_anchor=(0.5, 1.05), numpoints=1,
                  ncol=2, framealpha=0)
        l = ax.legend(**kw)
        l.set_title("")  # Workaround str(None).

        [connect(fig, LineLabelTooltip(line, name))
         for line, name in zip(ax.lines, labels)]

        html = 'station_{}.html'.format
        html = html(station.lower().replace(' ', '_').replace(',', ''))
        save_html(fig, '{}'.format(html))

        popup = "<div align='center'> {} <br><iframe src='{}' alt='image'"
        popup += "width='{}px' height='{}px' frameBorder='0'></div>"
        popup = popup.format('{}'.format(sta_name), html,
                             (width*resolution)+75, (height*resolution)+50)
        kw = dict(popup=popup, width=(width*resolution)+75)

        kw.update(dict(marker_color="green", marker_icon="ok"))
        obs = observations[observations['station'] == station].squeeze()
        mapa.simple_marker(location=[include[station]['lat'],
                                     include[station]['lon']], **kw)

In [31]:
mapa.create_map(path='mapa.html')
inline_map('mapa.html')


Out[31]:

In [32]:
elapsed = time.time() - start_time
log.info('{:.2f} minutes'.format(elapsed/60.))
log.info('EOF')

In [33]:
with open('log.txt') as f:
    print(f.read())


10:38:47 INFO: *********************** Run information ************************
10:38:47 INFO: Run date: 2015-01-27 13:38:47
10:38:47 INFO: Download start: 2015-01-23 12:00:00
10:38:47 INFO: Download stop: 2015-01-30 12:00:00
10:38:47 INFO: Bounding box: -72.00, 41.00,-69.00, 43.00
10:38:47 INFO: *********************** Software version ***********************
10:38:47 INFO: Iris version: 1.7.2-DEV
10:38:47 INFO: owslib version: 0.8-dev
10:38:47 INFO: pyoos version: 0.6.2
10:38:52 INFO: ********************* Catalog information **********************
10:38:52 INFO: URL: http://www.ngdc.noaa.gov/geoportal/csw
10:38:52 INFO: CSW version: 2.0.2
10:38:52 INFO: Number of datasets available: 7
10:38:52 INFO: ***************************** CSW ******************************
10:38:52 INFO: NECOFS Massachusetts (FVCOM) - Massachusetts Coastal - Latest Forecast
10:38:52 INFO: ROMS ESPRESSO Real-Time Operational IS4DVAR Forecast System Version 2 (NEW) 2013-present FMRC History (Best)
10:38:52 INFO: NECOFS GOM3 (FVCOM) - Northeast US - Latest Forecast
10:38:52 INFO: COAWST Forecast System : USGS : US East Coast and Gulf of Mexico (Experimental)
10:38:52 INFO: Barotropic Tide Model for the Pacific Basin
10:38:52 INFO: ESTOFS Storm Surge Model - Atlantic - v1.0.0 - NOAA - NCEP - ADCIRC
10:38:52 INFO: HYbrid Coordinate Ocean Model (HYCOM): Global
10:38:52 INFO: ***************************** DAP ******************************
10:38:52 INFO: http://geoport-dev.whoi.edu/thredds/dodsC/estofs/atlantic.html
10:38:52 INFO: http://geoport.whoi.edu/thredds/dodsC/coawst_4/use/fmrc/coawst_4_use_best.ncd.html
10:38:52 INFO: http://oos.soest.hawaii.edu/thredds/dodsC/hioos/tide_pac.html
10:38:52 INFO: http://oos.soest.hawaii.edu/thredds/dodsC/pacioos/hycom/global.html
10:38:52 INFO: http://tds.marine.rutgers.edu/thredds/dodsC/roms/espresso/2013_da/his_Best/ESPRESSO_Real-Time_v2_History_Best_Available_best.ncd.html
10:38:52 INFO: http://www.smast.umassd.edu:8080/thredds/dodsC/FVCOM/NECOFS/Forecasts/NECOFS_FVCOM_OCEAN_MASSBAY_FORECAST.nc.html
10:38:52 INFO: http://www.smast.umassd.edu:8080/thredds/dodsC/FVCOM/NECOFS/Forecasts/NECOFS_GOM3_FORECAST.nc.html
10:38:52 INFO: ***************************** SOS ******************************
10:39:00 INFO: ********************* Collector offerings **********************
10:39:00 INFO: NOAA.NOS.CO-OPS SOS: 1030 offerings
10:39:01 INFO: Starting new HTTP connection (1): opendap.co-ops.nos.noaa.gov
10:39:02 INFO: SOS URL request: http://opendap.co-ops.nos.noaa.gov/ioos-dif-sos/SOS?eventTime=2015-01-23T12%3A00%3A00Z&service=SOS&offering=urn%3Aioos%3Anetwork%3ANOAA.NOS.CO-OPS%3AWaterLevelActive&request=GetObservation&version=1.0.0&responseFormat=text%2Fcsv&featureOfInterest=BBOX%3A-72.0%2C41.0%2C-69.0%2C43.0&observedProperty=water_surface_height_above_reference_datum
10:39:11 INFO: ************************* Observations *************************
10:39:11 INFO: ********** Downloading to file 2015-01-30-OBS_DATA.nc **********
10:39:16 WARNING: [8447386] Fall River, MA:
'Wrong Datum for this station: The correct Datum values are: MHHW, MHW, MTL, MSL, MLW, MLLW, STND'
10:39:26 WARNING: [8449130] Nantucket Island, MA:
'Wrong Datum for this station: The correct Datum values are: MHHW, MHW, MTL, MSL, MLW, MLLW, STND'
10:39:32 WARNING: [8452944] Conimicut Light, RI:
'Wrong Datum for this station: The correct Datum values are: MHHW, MHW, MTL, MSL, MLW, MLLW, STND'
10:39:43 INFO: **************************** Models ****************************
10:39:43 INFO: 
[Reading url 1/7]: http://geoport-dev.whoi.edu/thredds/dodsC/estofs/atlantic
10:41:31 INFO: 
[Reading url 2/7]: http://geoport.whoi.edu/thredds/dodsC/coawst_4/use/fmrc/coawst_4_use_best.ncd
10:42:36 INFO: 
[Reading url 3/7]: http://oos.soest.hawaii.edu/thredds/dodsC/hioos/tide_pac
10:42:48 INFO: 
[Reading url 4/7]: http://oos.soest.hawaii.edu/thredds/dodsC/pacioos/hycom/global
10:42:52 INFO: 
[Reading url 5/7]: http://tds.marine.rutgers.edu/thredds/dodsC/roms/espresso/2013_da/his_Best/ESPRESSO_Real-Time_v2_History_Best_Available_best.ncd
10:43:06 INFO: 
[Reading url 6/7]: http://www.smast.umassd.edu:8080/thredds/dodsC/FVCOM/NECOFS/Forecasts/NECOFS_FVCOM_OCEAN_MASSBAY_FORECAST.nc
10:43:28 INFO: 
[Reading url 7/7]: http://www.smast.umassd.edu:8080/thredds/dodsC/FVCOM/NECOFS/Forecasts/NECOFS_GOM3_FORECAST.nc
10:43:41 INFO: ************* Saving to file 2015-01-30-ESTOFS.nc **************
10:43:41 WARNING: No data near (-71.0534, 42.3548) max_dist=0.04.
10:43:46 INFO: [Water] Chatham, MA
10:43:57 INFO: [Water] Woods Hole, MA
10:44:07 INFO: [Water] Newport, RI
10:44:07 WARNING: No data near (-71.4012, 41.8071) max_dist=0.04.
10:44:18 INFO: [Water] Montauk, NY
10:44:18 INFO: Finished processing [ESTOFS]: http://www.smast.umassd.edu:8080/thredds/dodsC/FVCOM/NECOFS/Forecasts/NECOFS_GOM3_FORECAST.nc
10:44:18 INFO: ************** Saving to file 2015-01-30-ROMS.nc ***************
10:44:21 INFO: [Land ] Boston, MA
10:44:24 INFO: [Land ] Chatham, MA
10:44:27 INFO: [Land ] Woods Hole, MA
10:44:30 INFO: [Water] Newport, RI
10:44:33 INFO: [Land ] Providence, RI
10:44:36 INFO: [Land ] Montauk, NY
10:44:39 INFO: Finished processing [ROMS]: http://www.smast.umassd.edu:8080/thredds/dodsC/FVCOM/NECOFS/Forecasts/NECOFS_GOM3_FORECAST.nc
10:44:39 INFO: *********** Saving to file 2015-01-30-Barotropic.nc ************
10:44:39 WARNING: No data near (-71.0534, 42.3548) max_dist=0.04.
10:44:39 WARNING: No data near (-69.9511, 41.6885) max_dist=0.04.
10:44:39 WARNING: No data near (-70.6717, 41.5233) max_dist=0.04.
10:44:39 WARNING: No data near (-71.3267, 41.505) max_dist=0.04.
10:44:39 WARNING: No data near (-71.4012, 41.8071) max_dist=0.04.
10:44:39 WARNING: No data near (-71.96, 41.0483) max_dist=0.04.
10:44:39 INFO: Finished processing [Barotropic]: http://www.smast.umassd.edu:8080/thredds/dodsC/FVCOM/NECOFS/Forecasts/NECOFS_GOM3_FORECAST.nc
10:44:39 INFO: ************* Saving to file 2015-01-30-COAWST.nc **************
10:44:48 INFO: [Land ] Boston, MA
10:44:54 INFO: [Land ] Chatham, MA
10:44:59 INFO: [Water] Woods Hole, MA
10:45:11 INFO: [Water] Newport, RI
10:45:17 INFO: [Land ] Providence, RI
10:45:29 INFO: [Land ] Montauk, NY
10:45:41 INFO: Finished processing [COAWST]: http://www.smast.umassd.edu:8080/thredds/dodsC/FVCOM/NECOFS/Forecasts/NECOFS_GOM3_FORECAST.nc
10:45:41 INFO: ************* Saving to file 2015-01-30-HYbrid.nc **************
10:45:45 INFO: [Land ] Boston, MA
10:45:47 INFO: [Water] Chatham, MA
10:45:47 WARNING: No data near (-70.6717, 41.5233) max_dist=0.04.
10:45:51 INFO: [Land ] Newport, RI
10:45:51 WARNING: No data near (-71.4012, 41.8071) max_dist=0.04.
10:45:51 WARNING: No data near (-71.96, 41.0483) max_dist=0.04.
10:45:51 INFO: Finished processing [HYbrid]: http://www.smast.umassd.edu:8080/thredds/dodsC/FVCOM/NECOFS/Forecasts/NECOFS_GOM3_FORECAST.nc
10:45:51 INFO: ************* Saving to file 2015-01-30-NECOFS.nc **************
10:46:05 INFO: [Water] Boston, MA
10:46:19 INFO: [Water] Chatham, MA
10:46:33 INFO: [Water] Woods Hole, MA
10:46:47 INFO: [Water] Newport, RI
10:47:00 INFO: [Water] Providence, RI
10:47:16 INFO: [Water] Montauk, NY
10:47:16 INFO: Finished processing [NECOFS]: http://www.smast.umassd.edu:8080/thredds/dodsC/FVCOM/NECOFS/Forecasts/NECOFS_GOM3_FORECAST.nc
10:47:28 INFO: [Water] Scituate, MA
10:47:28 WARNING: No data near (-70.7166, 42.9259) max_dist=0.04.
10:47:28 WARNING: No data near (-70.7166, 42.9259) max_dist=0.04.
10:47:42 INFO: [Water] Scituate, MA
10:47:45 INFO: [Water] Scituate, MA
10:47:59 INFO: [Water] Scituate, MA
10:47:59 WARNING: No data near (-70.583883, 43.272411) max_dist=0.04.
10:47:59 WARNING: No data near (-70.583883, 43.272411) max_dist=0.04.
10:47:59 WARNING: No data near (-70.583883, 43.272411) max_dist=0.04.
10:48:05 INFO: [Land ] Wells, ME
10:48:05 WARNING: No data near (-70.583883, 43.272411) max_dist=0.04.
10:48:05 WARNING: No data near (-70.583883, 43.272411) max_dist=0.04.
10:48:05 WARNING: /home/filipe/miniconda/envs/SECOORA/lib/python2.7/site-packages/pandas/util/decorators.py:81: FutureWarning: the 'cols' keyword is deprecated, use 'subset' instead
  warnings.warn(msg, FutureWarning)

10:48:32 INFO: 9.76 minutes
10:48:32 INFO: EOF