Extract ESTOFS water levels using NetCDF4-Python and analyze/visualize with Pandas


In [1]:
# Plot forecast water levels from NECOFS model from list of lon,lat locations
# (uses the nearest point, no interpolation)
%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
import netCDF4
import datetime as dt
import pandas as pd
from StringIO import StringIO
import matplotlib.tri as Tri

In [2]:
#NECOFS MassBay grid
#url='http://www.smast.umassd.edu:8080/thredds/dodsC/FVCOM/NECOFS/Forecasts/NECOFS_FVCOM_OCEAN_MASSBAY_FORECAST.nc'
# GOM3 Grid
#model='GOM3'
url='http://www.smast.umassd.edu:8080/thredds/dodsC/FVCOM/NECOFS/Forecasts/NECOFS_GOM3_FORECAST.nc'
#ESTOFS
url='http://geoport-dev.whoi.edu/thredds/dodsC/estofs/atlantic'

In [3]:
# open NECOFS remote OPeNDAP dataset 
nc=netCDF4.Dataset(url)
ncv = nc.variables
print nc.title


ESTOFS Storm Surge Model - Atlantic - v1.0.0 - NOAA - NCEP - ADCIRC

In [4]:
# Get lon,lat coordinates for nodes (depth)
lat = ncv['y'][:]
lon = ncv['x'][:]
h = ncv['depth'][:]
# Get Connectivity array
nv = ncv['element'][:].T - 1

In [5]:
tri = Tri.Triangulation(lon,lat, triangles=nv.T)

In [22]:
# Plot model domain
plt.figure(figsize=(18,10))
plt.subplot(111,adjustable='box',aspect=(1.0/np.cos(lat.mean()*np.pi/180.0)))
plt.tricontourf(tri, -h,50,shading='flat');



In [7]:
# Enter desired (Station, Lat, Lon) values here:
x = '''
Station, Lat, Lon
Boston,             42.368186, -71.047984
Scituate Harbor,    42.199447, -70.720090
Scituate Beach,     42.209973, -70.724523
Falmouth Harbor,    41.541575, -70.608020
Marion,             41.689008, -70.746576
Marshfield,         42.108480, -70.648691
Provincetown,       42.042745, -70.171180
Sandwich,           41.767990, -70.466219
Hampton Bay,        42.900103, -70.818510
Gloucester,         42.610253, -70.660570
'''

In [8]:
# Create a Pandas DataFrame
obs=pd.read_csv(StringIO(x.strip()), sep=",\s*",index_col='Station', engine='python')

In [9]:
obs


Out[9]:
Lat Lon
Station
Boston 42.368186 -71.047984
Scituate Harbor 42.199447 -70.720090
Scituate Beach 42.209973 -70.724523
Falmouth Harbor 41.541575 -70.608020
Marion 41.689008 -70.746576
Marshfield 42.108480 -70.648691
Provincetown 42.042745 -70.171180
Sandwich 41.767990 -70.466219
Hampton Bay 42.900103 -70.818510
Gloucester 42.610253 -70.660570

In [10]:
# find the indices of the points in (x,y) closest to the points in (xi,yi)
def nearxy(x,y,xi,yi):
    ind = np.ones(len(xi),dtype=int)
    for i in xrange(len(xi)):
        dist = np.sqrt((x-xi[i])**2+(y-yi[i])**2)
        ind[i] = dist.argmin()
    return ind

In [11]:
# find closest NECOFS nodes to station locations
obs['0-Based Index'] = nearxy(ncv['x'][:],ncv['y'][:],obs['Lon'],obs['Lat'])
obs


Out[11]:
Lat Lon 0-Based Index
Station
Boston 42.368186 -71.047984 243466
Scituate Harbor 42.199447 -70.720090 242945
Scituate Beach 42.209973 -70.724523 243002
Falmouth Harbor 41.541575 -70.608020 241269
Marion 41.689008 -70.746576 241770
Marshfield 42.108480 -70.648691 242771
Provincetown 42.042745 -70.171180 242590
Sandwich 41.767990 -70.466219 241962
Hampton Bay 42.900103 -70.818510 244860
Gloucester 42.610253 -70.660570 244036

In [12]:
# Desired time for snapshot
# ....right now (or some number of hours from now) ...
start = dt.datetime.utcnow() + dt.timedelta(hours=-72)
stop = dt.datetime.utcnow() + dt.timedelta(hours=+72)
# ... or specific time (UTC)
#start = dt.datetime(2014,9,9,0,0,0) + dt.timedelta(hours=-1)

In [13]:
timev = ncv['time']
istart = netCDF4.date2index(start,timev,select='nearest')
istop = netCDF4.date2index(stop,timev,select='nearest')
jd = netCDF4.num2date(timev[istart:istop],timev.units)

In [14]:
# get all time steps of water level from each station
nsta = len(obs)
z =  np.ones((len(jd),nsta))
for i in range(nsta):
    z[:,i] = ncv['zeta'][istart:istop,obs['0-Based Index'][i]]

In [15]:
# make a DataFrame out of the interpolated time series at each location
zvals=pd.DataFrame(z,index=jd,columns=obs.index)

In [16]:
# list out a few values
zvals.head()


Out[16]:
Station Boston Scituate Harbor Scituate Beach Falmouth Harbor Marion Marshfield Provincetown Sandwich Hampton Bay Gloucester
2015-10-16 21:00:00 0.073420 0.115456 0.109958 -0.157214 -0.608952 0.134633 0.283680 0.250731 -0.081096 0.043873
2015-10-16 22:00:00 -0.738094 -0.651503 -0.659418 -0.228015 -0.624685 -0.625568 -0.455452 -0.532430 -0.793677 -0.725401
2015-10-16 23:00:00 -1.418356 -1.340293 -1.340159 -0.296796 -0.329881 -1.359304 -1.350628 -1.433547 -1.326128 -1.320291
2015-10-17 00:00:00 -1.603425 -1.590671 -1.563854 -0.289037 -0.039774 -1.609673 -1.732387 -1.771660 -1.553141 -1.530750
2015-10-17 01:00:00 -1.483670 -1.418544 -1.413577 -0.173519 0.089801 -1.413193 -1.409485 -1.373876 -1.384603 -1.394331

In [17]:
# plotting at DataFrame is easy!
ax = zvals.plot(figsize=(16,6),grid=True,title=('Forecast Water Level from %s Forecast' % nc.title),legend=False);
# read units from dataset for ylabel
plt.ylabel(ncv['zeta'].units)
# plotting the legend outside the axis is a bit tricky
box = ax.get_position()
ax.set_position([box.x0, box.y0, box.width * 0.8, box.height])
ax.legend(loc='center left', bbox_to_anchor=(1, 0.5));



In [18]:
# make a new DataFrame of maximum water levels at all stations
b=pd.DataFrame(zvals.idxmax(),columns=['time of max water level (UTC)'])
# create heading for new column containing max water level
zmax_heading='zmax (%s)' % ncv['zeta'].units
# Add new column to DataFrame
b[zmax_heading]=zvals.max()

In [19]:
b


Out[19]:
time of max water level (UTC) zmax (m)
Station
Boston 2015-10-17 18:00:00 1.340194
Scituate Harbor 2015-10-17 18:00:00 1.364624
Scituate Beach 2015-10-17 18:00:00 1.358795
Falmouth Harbor 2015-10-22 20:00:00 0.285599
Marion 2015-10-22 20:00:00 0.688453
Marshfield 2015-10-17 18:00:00 1.390489
Provincetown 2015-10-17 18:00:00 1.480635
Sandwich 2015-10-17 18:00:00 1.514342
Hampton Bay 2015-10-19 20:00:00 1.289487
Gloucester 2015-10-17 18:00:00 1.311621

In [20]:
zvals.describe()


Out[20]:
Boston Scituate Harbor Scituate Beach Falmouth Harbor Marion Marshfield Provincetown Sandwich Hampton Bay Gloucester
count 138.000000 138.000000 138.000000 138.000000 138.000000 138.000000 138.000000 138.000000 138.000000 138.000000
mean -0.129733 -0.123426 -0.121282 -0.089558 -0.082652 -0.120893 -0.112812 -0.118678 -0.123419 -0.119666
std 0.928489 0.916941 0.913691 0.172090 0.329215 0.921806 0.956773 0.972588 0.911033 0.901188
min -1.603425 -1.602071 -1.585171 -0.537982 -0.730660 -1.615116 -1.732387 -1.771660 -1.578518 -1.559164
25% -1.067143 -1.041524 -1.039178 -0.220131 -0.312825 -1.036162 -1.029204 -1.033618 -1.034770 -1.030328
50% -0.059158 -0.076557 -0.068495 -0.110418 -0.086708 -0.069131 -0.135028 -0.106642 -0.078302 -0.044668
75% 0.711722 0.728458 0.734624 0.050448 0.146584 0.706947 0.697725 0.688555 0.743467 0.716298
max 1.340194 1.364624 1.358795 0.285599 0.688453 1.390489 1.480635 1.514342 1.289487 1.311621

In [20]:


In [20]: