Extract ESTOFS water levels using NetCDF4-Python and analyze/visualize with Pandas


In [1]:
# Plot forecast water levels from NECOFS model from list of lon,lat locations
# (uses the nearest point, no interpolation)
from pylab import *
import netCDF4
import datetime as dt
import pandas as pd
from StringIO import StringIO
import matplotlib.tri as Tri

In [2]:
#NECOFS MassBay grid
#url='http://www.smast.umassd.edu:8080/thredds/dodsC/FVCOM/NECOFS/Forecasts/NECOFS_FVCOM_OCEAN_MASSBAY_FORECAST.nc'
# GOM3 Grid
#model='GOM3'
url='http://www.smast.umassd.edu:8080/thredds/dodsC/FVCOM/NECOFS/Forecasts/NECOFS_GOM3_FORECAST.nc'
#ESTOFS
url='http://geoport-dev.whoi.edu/thredds/dodsC/estofs/atlantic'

In [3]:
# open NECOFS remote OPeNDAP dataset 
nc=netCDF4.Dataset(url)
ncv = nc.variables
print nc.title


ESTOFS Storm Surge Model ATLANTIC V1.0.0 NCEP - ADCIRC

In [4]:
# Get lon,lat coordinates for nodes (depth)
lat = ncv['y'][:]
lon = ncv['x'][:]
h = ncv['depth'][:]
# Get Connectivity array
nv = ncv['element'][:].T - 1

In [5]:
tri = Tri.Triangulation(lon,lat, triangles=nv.T)

In [6]:
# Plot model domain
figure(figsize=(18,10))
subplot(111,aspect=(1.0/cos(mean(lat)*pi/180.0)))
tricontourf(tri, -h,50,shading='faceted')


Out[6]:
<matplotlib.tri.tricontour.TriContourSet instance at 0x311c7e8>

In [7]:
def dms2dd(d,m,s):
    return d+(m+s/60.)/60.

In [8]:
dms2dd(41,33,15.7)


Out[8]:
41.55436111111111

In [9]:
-dms2dd(70,30,20.2)


Out[9]:
-70.50561111111111

In [10]:
x = '''
Station, Lat, Lon
Falmouth Harbor,    41.541575, -70.608020
Sage Lot Pond, 41.554361, -70.505611
'''

In [11]:
x = '''
Station, Lat, Lon
Boston,             42.368186, -71.047984
Carolyn Seep Spot,    39.8083, -69.5917
Falmouth Harbor,  41.541575, -70.608020
'''

In [12]:
# Enter desired (Station, Lat, Lon) values here:
x = '''
Station, Lat, Lon
Boston,             42.368186, -71.047984
Scituate Harbor,    42.199447, -70.720090
Scituate Beach,     42.209973, -70.724523
Falmouth Harbor,    41.541575, -70.608020
Marion,             41.689008, -70.746576
Marshfield,         42.108480, -70.648691
Provincetown,       42.042745, -70.171180
Sandwich,           41.767990, -70.466219
Hampton Bay,        42.900103, -70.818510
Gloucester,         42.610253, -70.660570
'''

In [13]:
# Create a Pandas DataFrame
obs=pd.read_csv(StringIO(x.strip()), sep=",\s*",index_col='Station')

In [14]:
obs


Out[14]:
Lat Lon
Station
Boston 42.368186 -71.047984
Scituate Harbor 42.199447 -70.720090
Scituate Beach 42.209973 -70.724523
Falmouth Harbor 41.541575 -70.608020
Marion 41.689008 -70.746576
Marshfield 42.108480 -70.648691
Provincetown 42.042745 -70.171180
Sandwich 41.767990 -70.466219
Hampton Bay 42.900103 -70.818510
Gloucester 42.610253 -70.660570

10 rows × 2 columns


In [15]:
# find the indices of the points in (x,y) closest to the points in (xi,yi)
def nearxy(x,y,xi,yi):
    ind=ones(len(xi),dtype=int)
    for i in arange(len(xi)):
        dist=sqrt((x-xi[i])**2+(y-yi[i])**2)
        ind[i]=dist.argmin()
    return ind

In [16]:
# find closest NECOFS nodes to station locations
obs['0-Based Index'] = nearxy(ncv['x'][:],ncv['y'][:],obs['Lon'],obs['Lat'])
obs


Out[16]:
Lat Lon 0-Based Index
Station
Boston 42.368186 -71.047984 243466
Scituate Harbor 42.199447 -70.720090 242945
Scituate Beach 42.209973 -70.724523 243002
Falmouth Harbor 41.541575 -70.608020 241269
Marion 41.689008 -70.746576 241770
Marshfield 42.108480 -70.648691 242771
Provincetown 42.042745 -70.171180 242590
Sandwich 41.767990 -70.466219 241962
Hampton Bay 42.900103 -70.818510 244860
Gloucester 42.610253 -70.660570 244036

10 rows × 3 columns


In [17]:
# get time values and convert to datetime objects
times = ncv['time']
jd = netCDF4.num2date(times[:],times.units)

In [18]:
# get all time steps of water level from each station
nsta=len(obs)
z=ones((len(jd),nsta))
for i in range(nsta):
    z[:,i] = ncv['zeta'][:,obs['0-Based Index'][i]]

In [19]:
# make a DataFrame out of the interpolated time series at each location
zvals=pd.DataFrame(z,index=jd,columns=obs.index)

In [20]:
# list out a few values
zvals.head()


Out[20]:
Station Boston Scituate Harbor Scituate Beach Falmouth Harbor Marion Marshfield Provincetown Sandwich Hampton Bay Gloucester
2014-03-01 19:00:00 0.198002 0.203072 0.195180 -0.107492 -0.658724 0.224922 0.417438 0.367310 0.042653 0.121332
2014-03-01 20:00:00 -0.731439 -0.679839 -0.698904 -0.165489 -0.596432 -0.675422 -0.499667 -0.574952 -0.862382 -0.760289
2014-03-01 21:00:00 -1.619715 -1.567605 -1.556519 -0.213468 -0.244892 -1.570220 -1.515124 -1.637617 -1.502166 -1.512689
2014-03-01 22:00:00 -1.787492 -1.789059 -1.765949 -0.178055 0.149842 -1.825946 -1.984561 -2.063187 -1.710904 -1.715951
2014-03-01 23:00:00 -1.604457 -1.574856 -1.572709 0.021960 0.387697 -1.571055 -1.615170 -1.603416 -1.549846 -1.528072

5 rows × 10 columns


In [21]:
# plotting at DataFrame is easy!
ax=zvals.plot(figsize=(16,6),grid=True,title=('Forecast Water Level from %s Forecast' % nc.title),legend=False);
# read units from dataset for ylabel
ylabel(ncv['zeta'].units)
# plotting the legend outside the axis is a bit tricky
box = ax.get_position()
ax.set_position([box.x0, box.y0, box.width * 0.8, box.height])
ax.legend(loc='center left', bbox_to_anchor=(1, 0.5))


Out[21]:
<matplotlib.legend.Legend at 0x3647ad0>

In [22]:
# make a new DataFrame of maximum water levels at all stations
b=pd.DataFrame(zvals.idxmax(),columns=['time of max water level (UTC)'])
# create heading for new column containing max water level
zmax_heading='zmax (%s)' % ncv['zeta'].units
# Add new column to DataFrame
b[zmax_heading]=zvals.max()

In [23]:
b


Out[23]:
time of max water level (UTC) zmax (m)
Station
Boston 2014-03-02 16:00:00 1.700322
Scituate Harbor 2014-03-02 04:00:00 1.688818
Scituate Beach 2014-03-02 04:00:00 1.673804
Falmouth Harbor 2014-03-02 01:00:00 0.459704
Marion 2014-03-02 01:00:00 1.000175
Marshfield 2014-03-02 04:00:00 1.716233
Provincetown 2014-03-02 04:00:00 1.864590
Sandwich 2014-03-02 04:00:00 1.882568
Hampton Bay 2014-03-02 04:00:00 1.614229
Gloucester 2014-03-02 04:00:00 1.632494

10 rows × 2 columns


In [24]:
zvals.describe()


Out[24]:
Station Boston Scituate Harbor Scituate Beach Falmouth Harbor Marion Marshfield Provincetown Sandwich Hampton Bay Gloucester
count 306.000000 306.000000 306.000000 306.000000 306.000000 306.000000 306.000000 306.000000 306.000000 306.000000
mean -0.022654 -0.015337 -0.016184 -0.002502 -0.035572 -0.012492 -0.014931 0.002208 -0.025705 -0.023169
std 0.968731 0.961313 0.957816 0.175327 0.361950 0.965627 1.001372 1.019241 0.950835 0.940929
min -1.937293 -1.920853 -1.894133 -0.426884 -0.939393 -1.954278 -2.106213 -2.136763 -1.870852 -1.851696
25% -0.903872 -0.890297 -0.898304 -0.126966 -0.273862 -0.894925 -0.892208 -0.896025 -0.897439 -0.888660
50% 0.006759 -0.001119 0.007965 0.002416 -0.041165 -0.001131 0.002490 0.057178 0.033499 0.023986
75% 0.832179 0.824625 0.827631 0.121912 0.199332 0.824982 0.815585 0.826172 0.818883 0.814383
max 1.700322 1.688818 1.673804 0.459704 1.000175 1.716233 1.864590 1.882568 1.614229 1.632494

8 rows × 10 columns


In [23]: