In [1]:
%run utilities.ipynb
In [2]:
# Scientific Stack.
import iris
import pandas as pd
from datetime import datetime, timedelta
# Custom IOOS/ASA modules (available at PyPI).
from owslib import fes
from owslib.csw import CatalogueServiceWeb
from pyoos.collectors.coops.coops_sos import CoopsSos
In [3]:
box = [-76.4751, 38.3890, -71.7432, 42.9397]
In [4]:
times = dict({'Hurricane Sandy':
[datetime(2012, 10, 26), datetime(2012, 11, 2)],
'2014 Feb 10-15 Storm':
[datetime(2014, 2, 10), datetime(2014, 2, 15)],
'2014 Recent':
[datetime(2014, 3, 8), datetime(2014, 3, 11)],
'2011':
[datetime(2013, 4, 20), datetime(2013, 4, 24)],
'Now':
[datetime.utcnow() - timedelta(days=3),
datetime.utcnow() + timedelta(days=3)]
})
jd_start, jd_stop = times['Now']
start_date = jd_start.strftime('%Y-%m-%d %H:00')
stop_date = jd_stop.strftime('%Y-%m-%d %H:00')
jd_start = datetime.strptime(start_date, '%Y-%m-%d %H:%M')
jd_stop = datetime.strptime(stop_date, '%Y-%m-%d %H:%M')
print('%s to %s' % (start_date, stop_date))
In [5]:
if False:
from IPython.core.display import HTML
HTML('<iframe src=http://www.ngdc.noaa.gov/geoportal/ width=950 height=400></iframe>')
In [6]:
CSW = dict({'NGDC Geoportal':
'http://www.ngdc.noaa.gov/geoportal/csw',
'USGS WHSC Geoportal':
'http://geoport.whoi.edu/geoportal/csw',
'NODC Geoportal: granule level':
'http://www.nodc.noaa.gov/geoportal/csw',
'NODC Geoportal: collection level':
'http://data.nodc.noaa.gov/geoportal/csw',
'NRCAN CUSTOM':
'http://geodiscover.cgdi.ca/wes/serviceManagerCSW/csw',
'USGS Woods Hole GI_CAT':
'http://geoport.whoi.edu/gi-cat/services/cswiso',
'USGS CIDA Geonetwork':
'http://cida.usgs.gov/gdp/geonetwork/srv/en/csw',
'USGS Coastal and Marine Program':
'http://cmgds.marine.usgs.gov/geonetwork/srv/en/csw',
'USGS Woods Hole Geoportal':
'http://geoport.whoi.edu/geoportal/csw',
'CKAN':
'http://geo.gov.ckan.org/csw',
'EPA':
'https://edg.epa.gov/metadata/csw',
'CWIC':
'http://cwic.csiss.gmu.edu/cwicv1/discovery',
})
endpoint = CSW['NGDC Geoportal']
csw = CatalogueServiceWeb(endpoint, timeout=60)
print(csw.version)
In [7]:
for oper in csw.operations:
if oper.name == 'GetRecords':
print('\nISO Queryables:\n\t%s' %
'\n\t'.join(oper.constraints['SupportedISOQueryables']['values']))
In [8]:
# Convert User Input into FES filters.
start, stop = dateRange(start_date, stop_date)
bbox = fes.BBox(box)
or_filt = fes.Or([fes.PropertyIsLike(propertyname='apiso:AnyText',
literal=('*%s*' % val),
escapeChar='\\',
wildCard='*',
singleChar='?') for val in std_name_list])
ROMS model output often has Averages and History files. The Averages files
are usually averaged over a tidal cycle or more, while the History files are
snapshots at that time instant. We are not interested in averaged data for
this test, so in the cell below we remove any Averages files here by
removing any datasets that have the term "Averages" in the metadata text.
A better approach would be to look at the cell_methods
attributes
propagated through to some term in the ISO metadata, but this is not
implemented yet, as far as I know.
In [9]:
val = 'Averages'
not_filt = fes.Not([fes.PropertyIsLike(propertyname='apiso:AnyText',
literal=('*%s*' % val),
escapeChar='\\',
wildCard='*',
singleChar='?')])
filter_list = [fes.And([bbox, start, stop, or_filt, not_filt])]
# Try request using multiple filters "and" syntax: [[filter1,filter2]].
csw.getrecords2(constraints=filter_list, maxrecords=1000, esn='full')
print(len(csw.records.keys()))
In [10]:
for rec, item in csw.records.items():
print(item.title)
In [11]:
service = 'urn:x-esri:specification:ServiceType:odp:url'
dap_urls = service_urls(csw.records, service=service)
print("\n".join(dap_urls))
In [12]:
service = 'urn:x-esri:specification:ServiceType:sos:url'
sos_urls = service_urls(csw.records, service=service)
print("\n".join(sos_urls))
In [13]:
collector = CoopsSos()
collector.server.identification.title
collector.start_time = jd_start
collector.end_time = jd_stop
collector.variables = [sos_name]
ofrs = collector.server.offerings
print(len(ofrs))
for p in ofrs[700:710]:
print(p)
We would like to just use a filter on a collection to get a new collection, but PYOOS doesn't do that yet. So we do a GetObservation request for a collection, including a bounding box, and asking for one value at the start of the time period of interest. We use that to do a bounding box filter on the SOS server, which returns 1 point for each station found. So for 3 stations, we get back 3 records, in CSV format. We can strip the station ids from the CSV, and then we have a list of stations we can use with pyoos. The template for the GetObservation query for the bounding box filtered collection was generated using the GUI.
In [14]:
iso_start = jd_start.strftime('%Y-%m-%dT%H:%M:%SZ')
box_str = ','.join(str(e) for e in box)
print('%s\n%s' % (iso_start, box_str))
In [15]:
url = ('http://opendap.co-ops.nos.noaa.gov/ioos-dif-sos/SOS?'
'service=SOS&request=GetObservation&version=1.0.0&'
'observedProperty=%s&offering='
'urn:ioos:network:NOAA.NOS.CO-OPS:WaterLevelActive&'
'featureOfInterest=BBOX:%s&responseFormat=text/csv&eventTime=%s&'
'result=VerticalDatum==urn:ogc:def:datum:epsg::5103&'
'dataType=PreliminarySixMinute') % (sos_name, box_str, iso_start)
print(url)
In [16]:
obs_loc_df = pd.read_csv(url)
obs_loc_df.head()
Out[16]:
In [17]:
stations = [sta.split(':')[-1] for sta in obs_loc_df['station_id']]
obs_lon = list(obs_loc_df['longitude (degree)'])
obs_lat = list(obs_loc_df['latitude (degree)'])
print(stations)
In [18]:
def time(start=jd_start, end=jd_stop, freq='6Min'):
"""Generate a uniform 6-min time base for model/data comparison."""
ts_rng = pd.date_range(start=jd_start, end=jd_stop, freq='6Min')
return pd.DataFrame(index=ts_rng)
ts = time(start=jd_start, end=jd_stop, freq='6Min')
print(ts.index)
print(len(ts))
In [19]:
obs_df = []
for sta in stations:
b = coops2df(collector, sta, sos_name)
print(b.name)
# Limit interpolation to 10 points (10 @ 6min = 1 hour).
obs_df.append(pd.DataFrame(
pd.concat([b, ts], axis=1).interpolate(limit=10)['Observed Data']))
obs_df[-1].name = b.name
In [20]:
for url in dap_urls:
try:
a = iris.load_cube(url, constraint)
# Take first 20 chars for model name.
mod_name = a.attributes['title'][0:20]
r = a.shape
timevar = find_timevar(a)
lat = a.coord(axis='Y').points
lon = a.coord(axis='X').points
jd = timevar.units.num2date(timevar.points)
start = timevar.units.date2num(jd_start)
stop = timevar.units.date2num(jd_stop)
istart = timevar.nearest_neighbour_index(start)
istop = timevar.nearest_neighbour_index(stop)
# Only proceed if we have data in the range requested.
if istart != istop:
print(url)
nsta = len(obs_lon)
if len(r) == 3:
d = a[0, ...].data
# Find the closest non-land point from a structured grid model.
j, i, dd = find_ij(lon, lat, d, obs_lon, obs_lat)
for n in range(nsta):
# Only use if model cell is within 0.1 degree of req, loc.
if dd[n] < 0.1:
arr = a[istart:istop, j[n], i[n]].data
c = mod_df(arr, timevar, istart, istop, mod_name, ts)
name = obs_df[n].name
obs_df[n] = pd.concat([obs_df[n], c], axis=1)
obs_df[n].name = name
elif len(r) == 2:
# Find the closest point from an unstructured grid model.
index, dd = nearxy(lon.flatten(), lat.flatten(),
obs_lon, obs_lat)
for n in range(nsta):
# Only use if model cell is within 0.1 degree of req. loc.
if dd[n] < 0.1:
arr = a[istart:istop, index[n]].data
c = mod_df(arr, timevar, istart, istop, mod_name, ts)
name = obs_df[n].name
obs_df[n] = pd.concat([obs_df[n], c], axis=1)
obs_df[n].name = name
except:
print("No data in the requested range.")
pass
In [21]:
for df in obs_df:
ax = df.plot(figsize=(14, 6), title=df.name)
ax.set_ylabel('m')