Extract NECOFS water levels using NetCDF4-Python and analyze/visualize with Pandas


In [45]:
# Plot forecast water levels from NECOFS model from list of lon,lat locations
# (uses the nearest point, no interpolation)
from pylab import *
import netCDF4
import datetime as dt
import pandas as pd
from StringIO import StringIO

In [46]:
#NECOFS MassBay grid forecast
model='Massbay'
url='http://www.smast.umassd.edu:8080/thredds/dodsC/FVCOM/NECOFS/Forecasts/NECOFS_FVCOM_OCEAN_MASSBAY_FORECAST.nc'

In [47]:
#NECOFS GOM3 grid forecast
model='GOM3'
url='http://www.smast.umassd.edu:8080/thredds/dodsC/FVCOM/NECOFS/Forecasts/NECOFS_GOM3_FORECAST.nc'

In [48]:
# FVCOM GOM3v11 NECOFS Forecast Archive (Apr 01,2010-Nov 10,2011)
model='GOM3'
url='http://www.smast.umassd.edu:8080/thredds/dodsC/fvcom/archives/necofs_gom3v11'

In [49]:
# FVCOM GOM3v12 NECOFS Forecast Archive (Nov 11,2011-May 08,2013)
model='GOM3'
url='http://www.smast.umassd.edu:8080/thredds/dodsC/fvcom/archives/necofs_gom3v12'

In [50]:
# FVCOM GOM3v13 NECOFS Forecast Archive (May 09,2013-Jul 31,2013)
model='GOM3'
url='http://www.smast.umassd.edu:8080/thredds/dodsC/fvcom/archives/necofs_gom3v13'

In [51]:
# Enter desired (Station, Lat, Lon) values here:
x = '''
Station, Lat, Lon
Boston,             42.368186, -71.047984
Scituate Harbor,    42.199447, -70.720090
Scituate Beach,     42.209973, -70.724523
Falmouth Harbor,    41.541575, -70.608020
Marion,             41.689008, -70.746576
Marshfield,         42.108480, -70.648691
Provincetown,       42.042745, -70.171180
Sandwich,           41.767990, -70.466219
Hampton Bay,        42.900103, -70.818510
Gloucester,         42.610253, -70.660570
'''

In [52]:
def dms2dd(d,m,s):
    return d+(m+s/60.)/60.

In [53]:
dms2dd(41,33,15.7)


Out[53]:
41.55436111111111

In [54]:
-dms2dd(70,30,20.2)


Out[54]:
-70.50561111111111

In [55]:
x = '''
Station, Lat, Lon
Falmouth Harbor,    41.541575, -70.608020
Sage Lot Pond, 41.554361, -70.505611
'''

In [56]:
x = '''
Station, Lat, Lon
Boston,             42.368186, -71.047984
Carolyn Seep Spot,    39.8083, -69.5917
Falmouth Harbor,  41.541575, -70.608020
'''

In [57]:
# Create a Pandas DataFrame
obs=pd.read_csv(StringIO(x.strip()), sep=",\s*",index_col='Station')

In [58]:
obs


Out[58]:
Lat Lon
Station
Boston 42.368186 -71.047984
Carolyn Seep Spot 39.808300 -69.591700
Falmouth Harbor 41.541575 -70.608020

In [59]:
# find the indices of the points in (x,y) closest to the points in (xi,yi)
def nearxy(x,y,xi,yi):
    ind=ones(len(xi),dtype=int)
    for i in arange(len(xi)):
        dist=sqrt((x-xi[i])**2+(y-yi[i])**2)
        ind[i]=dist.argmin()
    return ind

In [60]:
# open NECOFS remote OPeNDAP dataset 
nc=netCDF4.Dataset(url).variables

In [61]:
# find closest NECOFS nodes to station locations
obs['0-Based Index'] = nearxy(nc['lon'][:],nc['lat'][:],obs['Lon'],obs['Lat'])
obs


Out[61]:
Lat Lon 0-Based Index
Station
Boston 42.368186 -71.047984 49907
Carolyn Seep Spot 39.808300 -69.591700 5074
Falmouth Harbor 41.541575 -70.608020 43593

In [62]:
# get time values and convert to datetime objects
time_var = nc['time']

# all time steps
istart = 0
istop = len(time_var)-1

# near now
start = dt.datetime.utcnow() - dt.timedelta(days=2)
istart = netCDF4.date2index(start,time_var,select='nearest')
stop = dt.datetime.utcnow() + dt.timedelta(days=2)
istop = netCDF4.date2index(start,time_var,select='nearest')

# specific times (UTC)

In [63]:
start = dt.datetime(2012,2,16,0,0,0)
istart = netCDF4.date2index(start,time_var,select='nearest')
stop = dt.datetime(2012,2,18,0,0,0)
istop = netCDF4.date2index(stop,time_var,select='nearest')

In [64]:
start = dt.datetime(2013,6,2,0,0,0)
istart = netCDF4.date2index(start,time_var,select='nearest')
stop = dt.datetime(2013,6,4,0,0,0)
istop = netCDF4.date2index(stop,time_var,select='nearest')

In [65]:
# get all time steps of water level from each station
nsta=len(obs)
ii=range(istart,istop)
z=ones((len(ii),nsta))
for i in range(nsta):
    z[:,i] = nc['zeta'][ii,obs['0-Based Index'][i]]

In [66]:
jd = netCDF4.num2date(time_var[ii],time_var.units)

In [67]:
# make a DataFrame out of the interpolated time series at each location
zvals=pd.DataFrame(z,index=jd,columns=obs.index)

In [68]:
# list out a few values
zvals.head()


Out[68]:
Station Boston Carolyn Seep Spot Falmouth Harbor
2013-06-02 00:00:00 1.136924 -0.298893 0.396693
2013-06-02 01:01:53 0.611228 -0.316746 0.298929
2013-06-02 01:58:08 -0.079664 -0.233548 0.158852
2013-06-02 03:00:00 -0.607635 -0.082271 0.026821
2013-06-02 04:01:53 -1.003246 0.120820 0.003202

In [69]:
# plotting at DataFrame is easy!
ax=zvals.plot(figsize=(16,4),grid=True,title=('NECOFS Forecast Water Level from %s Forecast' % model),legend=False);
# read units from dataset for ylabel
ylabel(nc['zeta'].units)
# plotting the legend outside the axis is a bit tricky
box = ax.get_position()
ax.set_position([box.x0, box.y0, box.width * 0.8, box.height])
ax.legend(loc='center left', bbox_to_anchor=(1, 0.5));



In [70]:
# make a new DataFrame of maximum water levels at all stations
b=pd.DataFrame(zvals.idxmax(),columns=['time of max water level (UTC)'])
# create heading for new column containing max water level
zmax_heading='zmax (%s)' % nc['zeta'].units
# Add new column to DataFrame
b[zmax_heading]=zvals.max()

In [71]:
b


Out[71]:
time of max water level (UTC) zmax (meters)
Station
Boston 2013-06-02 10:58:08 1.683046
Carolyn Seep Spot 2013-06-03 19:58:08 0.548166
Falmouth Harbor 2013-06-03 22:01:53 0.540688