IOOS System Test: Extreme Events Theme: Coastal Inundation
This notebook is based on IOOS System Test: Inundation
Methodology:
In [1]:
import datetime as dt
from warnings import warn
import folium
from IPython.display import HTML
import iris
from iris.exceptions import CoordinateNotFoundError, ConstraintMismatchError
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
from owslib.csw import CatalogueServiceWeb
from owslib import fes
import pandas as pd
from pyoos.collectors.coops.coops_sos import CoopsSos
import requests
from utilities import (fes_date_filter, coops2df, find_timevar, find_ij, nearxy, service_urls, mod_df,
get_coordinates, inline_map, get_Coops_longName)
In [2]:
bounding_box_type = "box"
# Bounding Box [lon_min, lat_min, lon_max, lat_max]
area = {'Hawaii': [-160.0, 18.0, -154., 23.0],
'Gulf of Maine': [-72.0, 41.0, -69.0, 43.0],
'New York harbor region': [-75., 39., -71., 41.5],
'Puerto Rico': [-75, 12, -55, 26],
'East Coast': [-77, 36, -73, 38],
'North West': [-130, 38, -121, 50]}
bounding_box = area['East Coast']
#temporal range
jd_now = dt.datetime.utcnow()
jd_start, jd_stop = jd_now - dt.timedelta(days=4), jd_now #+ dt.timedelta(days=3)
start_date = jd_start.strftime('%Y-%m-%d %H:00')
stop_date = jd_stop.strftime('%Y-%m-%d %H:00')
jd_start = dt.datetime.strptime(start_date, '%Y-%m-%d %H:%M')
jd_stop = dt.datetime.strptime(stop_date, '%Y-%m-%d %H:%M')
print start_date,'to',stop_date
In [3]:
#put the names in a dict for ease of access
data_dict = {}
sos_name = 'Currents'
data_dict['currents'] = {
"u_names":['eastward_sea_water_velocity_assuming_no_tide','surface_eastward_sea_water_velocity','*surface_eastward_sea_water_velocity*', 'eastward_sea_water_velocity'],
"v_names":['northward_sea_water_velocity_assuming_no_tide','surface_northward_sea_water_velocity','*surface_northward_sea_water_velocity*', 'northward_sea_water_velocity'],
"sos_name":['currents']}
In [4]:
# endpoint = 'http://geo.gov.ckan.org/csw' # data.gov
# endpoint = 'https://data.noaa.gov/csw' # data.noaa.gov
# endpoint = 'http://www.nodc.noaa.gov/geoportal/csw' # nodc
endpoint = 'http://www.ngdc.noaa.gov/geoportal/csw' # NGDC Geoportal
csw = CatalogueServiceWeb(endpoint,timeout=60)
for oper in csw.operations:
if oper.name == 'GetRecords':
cnstr = oper.constraints['SupportedISOQueryables']['values']
print('\nISO Queryables:%s\n' % '\n'.join(cnstr))
In [5]:
# convert User Input into FES filters
start,stop = fes_date_filter(start_date,stop_date)
bbox = fes.BBox(bounding_box)
#use the search name to create search filter
or_filt = fes.Or([fes.PropertyIsLike(propertyname='apiso:AnyText',literal=('*%s*' % val),
escapeChar='\\',wildCard='*',singleChar='?') for val in data_dict['currents']['u_names']])
val = 'Averages'
not_filt = fes.Not([fes.PropertyIsLike(propertyname='apiso:AnyText',
literal=('*%s*' % val),
escapeChar='\\',
wildCard='*',
singleChar='?')])
filter_list = [fes.And([ bbox, start, stop, or_filt, not_filt]) ]
# connect to CSW, explore it's properties
# try request using multiple filters "and" syntax: [[filter1,filter2]]
csw.getrecords2(constraints=filter_list, maxrecords=1000, esn='full')
print str(len(csw.records)) + " csw records found"
for rec, item in csw.records.items():
print(item.title)
Dap URLS
In [6]:
dap_urls = service_urls(csw.records)
#remove duplicates and organize
dap_urls = sorted(set(dap_urls))
print "\n".join(dap_urls)
SOS URLs
In [7]:
sos_urls = service_urls(csw.records,service='sos:url')
#Add known NDBC SOS
sos_urls.append("http://sdf.ndbc.noaa.gov/sos/server.php") #?request=GetCapabilities&service=SOS
sos_urls = sorted(set(sos_urls))
print "Total SOS:",len(sos_urls)
print "\n".join(sos_urls)
In [8]:
start_time = dt.datetime.strptime(start_date,'%Y-%m-%d %H:%M')
end_time = dt.datetime.strptime(stop_date,'%Y-%m-%d %H:%M')
iso_start = start_time.strftime('%Y-%m-%dT%H:%M:%SZ')
iso_end = end_time.strftime('%Y-%m-%dT%H:%M:%SZ')
In [9]:
collector = CoopsSos()
collector.start_time = start_time
collector.end_time = end_time
collector.variables = data_dict["currents"]["sos_name"]
collector.server.identification.title
print collector.start_time,":", collector.end_time
ofrs = collector.server.offerings
In [10]:
print "Date: ",iso_start," to ", iso_end
box_str=','.join(str(e) for e in bounding_box)
print "Lat/Lon Box: ",box_str
url = (('http://opendap.co-ops.nos.noaa.gov/ioos-dif-sos/SOS?'
'service=SOS&request=GetObservation&version=1.0.0&'
'observedProperty=%s&bin=1&'
'offering=urn:ioos:network:NOAA.NOS.CO-OPS:CurrentsActive&'
'featureOfInterest=BBOX:%s&responseFormat=text/csv') % (sos_name, box_str))
print url
obs_loc_df = pd.read_csv(url)
In [11]:
obs_loc_df = obs_loc_df.loc[obs_loc_df['bin (count)']==1,:]
obs_loc_df.head()
Out[11]:
In [12]:
# Index the data frame to filter repeats by bin #
stations = [sta.split(':')[-1] for sta in obs_loc_df['station_id']]
obs_lon = [sta for sta in obs_loc_df['longitude (degree)']]
obs_lat = [sta for sta in obs_loc_df['latitude (degree)']]
In [13]:
obs_df = []
current_speed_df = []
sta_names = []
sta_failed = []
for sta in stations:
try:
df = coops2df(collector, sta, sos_name, iso_start, iso_end)
except Exception as e:
print "Error" + str(e)
continue
name = df.name
sta_names.append(name)
# if df.empty:
# sta_failed.append(name)
# df = DataFrame(np.arange(len(ts)) * np.NaN, index=ts.index, columns=['Observed Data'])
# df.name = name
obs_df.append(df)
obs_df[-1].name = name
# Create a separate dataframe for only sea water speed
current_speed_df.append(pd.DataFrame(df['sea_water_speed (cm/s)']))
current_speed_df[-1].name = name
In [14]:
for df in obs_df:
num_bins = df['number_of_bins'][0]
depth = df['bin_distance (m)'].values[0:num_bins]
time = df.loc[df['bin (count)']==(1)].index.values
xdates = [dt.datetime.strptime(str(date).split('.')[0],'%Y-%m-%dT%H:%M:%S') for date in time]
dates = mdates.date2num(xdates)
# Extract data from each depth bin to create a 2D matrix of current speeds (depth x time)
data = np.zeros((num_bins, len(df.index)/num_bins))
for n in range(num_bins):
data[n,:] = df.loc[df['bin (count)']==(n+1),'sea_water_speed (cm/s)'].values
fig, ax = plt.subplots(figsize=(12, 6))
ax.xaxis.set_major_formatter(mdates.DateFormatter('%m/%d/%Y'))
ax.xaxis.set_major_locator(mdates.DayLocator())
im = ax.pcolor(dates, depth, data, vmin=abs(data).min(), vmax=abs(data).max())
cb = fig.colorbar(im, ax=ax)
ax.set_ylabel = 'bin_distance (m)'
ax.set_title = df.name
In [15]:
# break the current speed data frame into data frames by bin
for df in obs_df:
fig, axes = plt.subplots(1, 1, figsize=(20,6))
# Only plot the first bin
axes = df.loc[df['bin (count)']==(1),'sea_water_speed (cm/s)'].plot(title=df.name, legend=False, color='b')
plt.setp(axes.lines[0], linewidth=1.0, zorder=1)
axes.set_ylabel('Current Speed (cm/s)')
for tl in axes.get_yticklabels():
tl.set_color('b')
axes.yaxis.label.set_color('blue')
ax2 = axes.twinx()
axes2 = df.loc[df['bin (count)']==(1),'direction_of_sea_water_velocity (degree)'].plot(title=df.name, legend=False, ax=ax2, color='g')
plt.setp(axes.lines[0], linewidth=1.0, zorder=1)
axes2.set_ylabel('Current Direction (degrees)')
for tl in ax2.get_yticklabels():
tl.set_color('g')
axes2.yaxis.label.set_color('green')
In [16]:
# break the current speed data frame into data frames by bin
for df in obs_df:
figure()
# Only plot the first bin
ax = df.loc[df['bin (count)']==(1),'sea_water_temperature (C)'].plot(figsize=(14, 6), title=df.name, legend=False)
plt.setp(ax.lines[0], linewidth=1.0, zorder=1)
ax.legend()
ax.set_ylabel('sea_water_temperature (C)')
In [17]:
name_in_list = lambda cube: cube.standard_name in data_dict['currents']['u_names']
u_constraint = iris.Constraint(cube_func=name_in_list)
name_in_list = lambda cube: cube.standard_name in data_dict['currents']['v_names']
v_constraint = iris.Constraint(cube_func=name_in_list)
In [18]:
# # Create time index for model DataFrame
ts_rng = pd.date_range(start=jd_start, end=jd_stop, freq='6Min')
ts = pd.DataFrame(index=ts_rng)
# Create list of model DataFrames for each station
model_df = []
for df in obs_df:
model_df.append(pd.DataFrame(index=ts.index))
model_df[-1].name = df.name
model_lat = []
model_lon = []
# Use only data within 1 degrees.
max_dist = 1
# Use only data where the standard deviation of the time series exceeds 0.01 m (1 cm).
# This eliminates flat line model time series that come from land points that should have had missing values.
min_var = 0.01
for url in dap_urls:
if 'hfrnet' in url:
print 'Skipping HF Radar Obs Data'
continue
if 'NECOFS' in url:
try:
print 'Attemping to load {0}'.format(url)
u = iris.load_cube(url, u_constraint)
v = iris.load_cube(url, v_constraint)
# take first 20 chars for model name
mod_name = u.attributes['title'][0:20]
r = u.shape
timevar = find_timevar(u)
lat = u.coord(axis='Y').points
lon = u.coord(axis='X').points
jd = timevar.units.num2date(timevar.points)
start = timevar.units.date2num(jd_start)
istart = timevar.nearest_neighbour_index(start)
stop = timevar.units.date2num(jd_stop)
istop = timevar.nearest_neighbour_index(stop)
# Only proceed if we have data in the range requested.
if istart != istop:
nsta = len(stations)
if len(r) == 4:
#HYCOM and ROMS
# Dimensions are time, elevation, lat, lon
d = u[0, 0:1, :, :].data
# Find the closest non-land point from a structured grid model.
if len(lon.shape) == 1:
lon, lat = np.meshgrid(lon, lat)
j, i, dd = find_ij(lon, lat, d, obs_lon, obs_lat)
for n in range(nsta):
# Only use if model cell is within max_dist of station
if dd[n] <= max_dist:
u_arr = u[istart:istop, 0:1, j[n], i[n]].data
v_arr = v[istart:istop, 0:1, j[n], i[n]].data
arr = np.sqrt(u_arr**2 + v_arr**2)
if u_arr.std() >= min_var:
c = mod_df(arr, timevar, istart, istop,
mod_name, ts)
name = obs_df[n].name
model_df[n] = pd.concat([model_df[n], c], axis=1)
model_df[n].name = name
else:
print 'min_var error'
else:
print 'Max dist error'
if len(r) == 3:
#NECOFS
print('[Structured grid model]:', url)
d = u[0, 0:1, :].data
# Find the closest non-land point from a structured grid model.
index, dd = nearxy(lon.flatten(), lat.flatten(),
obs_lon, obs_lat)
# Keep the lat lon of the grid point
model_lat = lat[index].tolist()
model_lon = lon[index].tolist()
for n in range(nsta):
# Only use if model cell is within max_dist of station
if dd[n] <= max_dist:
u_arr = u[istart:istop, 0:1, index[n]].data
v_arr = v[istart:istop, 0:1, index[n]].data
# Model data is in m/s so convert to cm/s
arr = np.sqrt((u_arr*100.0)**2 + (v_arr*100.0)**2)
if u_arr.std() >= min_var:
c = mod_df(arr, timevar, istart, istop,
mod_name, ts)
name = obs_df[n].name
model_df[n] = pd.concat([model_df[n], c], axis=1)
model_df[n].name = name
else:
print 'min_var error'
else:
print 'Max dist error'
elif len(r) == 2:
print('[Unstructured grid model]:', url)
# Find the closest point from an unstructured grid model.
index, dd = nearxy(lon.flatten(), lat.flatten(),
obs_lon, obs_lat)
for n in range(nsta):
# Only use if model cell is within max_dist of station
if dd[n] <= max_dist:
arr = u[istart:istop, index[n]].data
print arr
if arr.std() >= min_var:
c = mod_df(arr, timevar, istart, istop,
mod_name, ts)
name = obs_df[n].name
model_df[n] = pd.concat([model_df[n], c], axis=1)
model_df[n].name = name
else:
print 'min_var error'
else:
print 'Max dist error'
elif len(r) == 1:
print('[Data]:', url)
else:
print 'No data in range'
except (ValueError, RuntimeError, CoordinateNotFoundError,
ConstraintMismatchError) as e:
warn("\n%s\n" % e)
pass
In [19]:
#find center of bounding box
lat_center = abs(bounding_box[3] - bounding_box[1])/2 + bounding_box[1]
lon_center = abs(bounding_box[0]-bounding_box[2])/2 + bounding_box[0]
m = folium.Map(location=[lat_center, lon_center], zoom_start=7)
for n in range(len(stations)):
#get the station data from the sos end point
name = stations[n]
longname = obs_df[n].name
lat = obs_lat[n]
lon = obs_lon[n]
popup_string = ('<b>Station:</b><br>'+ longname)
m.simple_marker([lat, lon], popup=popup_string)
popup_string = ('<b>Model Grid Point</b>')
m.circle_marker([model_lat[n], model_lon[n]], popup=popup_string, fill_color='#ff0000', radius=5000, line_color='#ff0000')
m.line(get_coordinates(bounding_box, bounding_box_type), line_color='#FF0000', line_weight=5)
inline_map(m)
Out[19]:
In [20]:
# for df in model_df:
for n in range(len(obs_df)):
ax = model_df[n].plot(figsize=(14, 6), title=model_df[n].name, legend=False)
plt.setp(ax.lines[0], linewidth=3, color='0.7', zorder=1)
ax.legend()
# ax.set_ylabel('Current speed m/s')
# Overlay the obs data plot the first bin
ax = obs_df[n].loc[obs_df[n]['bin (count)']==(1),'sea_water_speed (cm/s)'].plot(figsize=(14, 6), legend=False)
plt.setp(ax.lines[1], linewidth=1.0, zorder=1)
ax.legend()
ax.set_ylabel('Current Speed (cm/s)')
plt.show()