Extract NECOFS water levels using NetCDF4-Python and analyze/visualize with Pandas


In [1]:
# Plot forecast water levels from NECOFS model from list of lon,lat locations
# (uses the nearest point, no interpolation)
from pylab import *
import netCDF4
import datetime as dt
import pandas as pd
from StringIO import StringIO
%matplotlib inline

In [2]:
#NECOFS MassBay grid
model='Massbay'
url='http://www.smast.umassd.edu:8080/thredds/dodsC/FVCOM/NECOFS/Forecasts/NECOFS_FVCOM_OCEAN_MASSBAY_FORECAST.nc'
# GOM3 Grid
#model='GOM3'
#url='http://www.smast.umassd.edu:8080/thredds/dodsC/FVCOM/NECOFS/Forecasts/NECOFS_GOM3_FORECAST.nc'

In [3]:
# Enter desired (Station, Lat, Lon) values here:
x = '''
Station, Lat, Lon
Boston,             42.368186, -71.047984
Scituate Harbor,    42.199447, -70.720090
Scituate Beach,     42.209973, -70.724523
Falmouth Harbor,    41.541575, -70.608020
Marion,             41.689008, -70.746576
Marshfield,         42.108480, -70.648691
Provincetown,       42.042745, -70.171180
Sandwich,           41.767990, -70.466219
Hampton Bay,        42.900103, -70.818510
Gloucester,         42.610253, -70.660570
'''

In [4]:
# Create a Pandas DataFrame
obs=pd.read_csv(StringIO(x.strip()), sep=",\s*",index_col='Station')


/home/usgs/anaconda/lib/python2.7/site-packages/pandas/io/parsers.py:624: ParserWarning: Falling back to the 'python' engine because the 'c' engine does not support regex separators; you can avoid this warning by specifying engine='python'.
  ParserWarning)

In [5]:
obs


Out[5]:
Lat Lon
Station
Boston 42.368186 -71.047984
Scituate Harbor 42.199447 -70.720090
Scituate Beach 42.209973 -70.724523
Falmouth Harbor 41.541575 -70.608020
Marion 41.689008 -70.746576
Marshfield 42.108480 -70.648691
Provincetown 42.042745 -70.171180
Sandwich 41.767990 -70.466219
Hampton Bay 42.900103 -70.818510
Gloucester 42.610253 -70.660570

In [6]:
# find the indices of the points in (x,y) closest to the points in (xi,yi)
def nearxy(x,y,xi,yi):
    ind=ones(len(xi),dtype=int)
    for i in arange(len(xi)):
        dist=sqrt((x-xi[i])**2+(y-yi[i])**2)
        ind[i]=dist.argmin()
    return ind

In [7]:
# open NECOFS remote OPeNDAP dataset 
nc=netCDF4.Dataset(url).variables

In [8]:
# find closest NECOFS nodes to station locations
obs['0-Based Index'] = nearxy(nc['lon'][:],nc['lat'][:],obs['Lon'],obs['Lat'])
obs


Out[8]:
Lat Lon 0-Based Index
Station
Boston 42.368186 -71.047984 90913
Scituate Harbor 42.199447 -70.720090 37964
Scituate Beach 42.209973 -70.724523 28474
Falmouth Harbor 41.541575 -70.608020 47470
Marion 41.689008 -70.746576 49654
Marshfield 42.108480 -70.648691 24272
Provincetown 42.042745 -70.171180 26595
Sandwich 41.767990 -70.466219 38036
Hampton Bay 42.900103 -70.818510 13022
Gloucester 42.610253 -70.660570 22082

In [9]:
# get time values and convert to datetime objects
times = nc['time']
jd = netCDF4.num2date(times[:],times.units)

In [10]:
# get all time steps of water level from each station
nsta=len(obs)
z=ones((len(jd),nsta))
for i in range(nsta):
    z[:,i] = nc['zeta'][:,obs['0-Based Index'][i]]

In [11]:
# make a DataFrame out of the interpolated time series at each location
zvals=pd.DataFrame(z,index=jd,columns=obs.index)

In [12]:
# list out a few values
zvals.head()


Out[12]:
Station Boston Scituate Harbor Scituate Beach Falmouth Harbor Marion Marshfield Provincetown Sandwich Hampton Bay Gloucester
2015-01-23 00:00:00 -1.647568 0.486000 -0.640000 0.134952 0.489599 -1.405723 -1.457119 -1.460698 -0.289000 -1.393647
2015-01-23 01:01:53 -0.948652 0.486000 -0.640000 0.433659 1.149074 -0.907857 -1.017890 -0.988066 -0.289000 -0.857342
2015-01-23 01:58:08 -0.284563 0.486000 -0.198581 0.682866 1.514852 -0.199026 -0.249463 -0.191480 -0.206848 -0.162681
2015-01-23 03:00:00 0.509363 0.643395 0.669003 0.708308 1.400437 0.644069 0.538067 0.581143 0.680393 0.707169
2015-01-23 04:01:53 1.504024 1.410724 1.423928 0.604528 0.968546 1.410803 1.347864 1.394954 1.369548 1.429520

In [13]:
# plotting at DataFrame is easy!
ax=zvals.plot(figsize=(16,4),grid=True,title=('NECOFS Forecast Water Level from %s Forecast' % model),legend=False);
# read units from dataset for ylabel
ylabel(nc['zeta'].units)
# plotting the legend outside the axis is a bit tricky
box = ax.get_position()
ax.set_position([box.x0, box.y0, box.width * 0.8, box.height])
ax.legend(loc='center left', bbox_to_anchor=(1, 0.5));



In [14]:
# what is the maximum water level at Scituate over this period?
zvals['Scituate Harbor'].max()


Out[14]:
2.3371098041534424

In [15]:
# make a new DataFrame of maximum water levels at all stations
b=pd.DataFrame(zvals.idxmax(),columns=['time of max water level (UTC)'])
# create heading for new column containing max water level
zmax_heading='zmax (%s)' % nc['zeta'].units
# Add new column to DataFrame
b[zmax_heading]=zvals.max()

In [16]:
b


Out[16]:
time of max water level (UTC) zmax (meters)
Station
Boston 2015-01-27 09:00:00 2.439042
Scituate Harbor 2015-01-27 09:00:00 2.337110
Scituate Beach 2015-01-27 09:00:00 2.316742
Falmouth Harbor 2015-01-27 07:01:53 0.943780
Marion 2015-01-23 01:58:08 1.514852
Marshfield 2015-01-27 09:00:00 2.322528
Provincetown 2015-01-27 10:01:53 2.246832
Sandwich 2015-01-27 09:00:00 2.421149
Hampton Bay 2015-01-27 09:00:00 2.246337
Gloucester 2015-01-27 09:00:00 2.196114