Extract NECOFS water levels using NetCDF4-Python and analyze/visualize with Pandas


In [9]:
# Plot forecast water levels from NECOFS model from list of lon,lat locations
# (uses the nearest point, no interpolation)
import matplotlib.pyplot as plt
import numpy as np
import netCDF4
import datetime as dt
import pandas as pd
from cStringIO import StringIO
%matplotlib inline

In [10]:
#NECOFS MassBay grid
#model='Massbay'
#url='http://www.smast.umassd.edu:8080/thredds/dodsC/FVCOM/NECOFS/Forecasts/NECOFS_FVCOM_OCEAN_MASSBAY_FORECAST.nc'
# GOM3 Grid
model='GOM3'
url='http://www.smast.umassd.edu:8080/thredds/dodsC/FVCOM/NECOFS/Forecasts/NECOFS_GOM3_FORECAST.nc'

In [11]:
def dms2dd(d,m,s):
    return d+(m+s/60.)/60.

In [12]:
dms2dd(41,33,15.7)


Out[12]:
41.55436111111111

In [13]:
-dms2dd(70,30,20.2)


Out[13]:
-70.50561111111111

In [14]:
# Enter desired (Station, Lat, Lon) values here:
x = '''
Station, Lat, Lon
Boston,             42.368186, -71.047984
Scituate Harbor,    42.199447, -70.720090
Scituate Beach,     42.209973, -70.724523
Falmouth Harbor,    41.541575, -70.608020
Marion,             41.689008, -70.746576
Marshfield,         42.108480, -70.648691
Provincetown,       42.042745, -70.171180
Sandwich,           41.767990, -70.466219
Hampton Bay,        42.900103, -70.818510
Gloucester,         42.610253, -70.660570
'''

In [15]:
# Create a Pandas DataFrame
obs=pd.read_csv(StringIO(x.strip()), sep=",\s*",index_col='Station')

In [16]:
obs


Out[16]:
Lat Lon
Station
Boston 42.368186 -71.047984
Scituate Harbor 42.199447 -70.720090
Scituate Beach 42.209973 -70.724523
Falmouth Harbor 41.541575 -70.608020
Marion 41.689008 -70.746576
Marshfield 42.108480 -70.648691
Provincetown 42.042745 -70.171180
Sandwich 41.767990 -70.466219
Hampton Bay 42.900103 -70.818510
Gloucester 42.610253 -70.660570

In [17]:
# find the indices of the points in (x,y) closest to the points in (xi,yi)
def nearxy(x,y,xi,yi):
    ind=ones(len(xi),dtype=int)
    for i in arange(len(xi)):
        dist=sqrt((x-xi[i])**2+(y-yi[i])**2)
        ind[i]=dist.argmin()
    return ind

In [18]:
# open NECOFS remote OPeNDAP dataset 
nc=netCDF4.Dataset(url).variables

In [19]:
# find closest NECOFS nodes to station locations
obs['0-Based Index'] = nearxy(nc['lon'][:],nc['lat'][:],obs['Lon'],obs['Lat'])
obs


Out[19]:
Lat Lon 0-Based Index
Station
Boston 42.368186 -71.047984 49907
Scituate Harbor 42.199447 -70.720090 46132
Scituate Beach 42.209973 -70.724523 46127
Falmouth Harbor 41.541575 -70.608020 43593
Marion 41.689008 -70.746576 48179
Marshfield 42.108480 -70.648691 42223
Provincetown 42.042745 -70.171180 40802
Sandwich 41.767990 -70.466219 45220
Hampton Bay 42.900103 -70.818510 47160
Gloucester 42.610253 -70.660570 39307

In [20]:
# get time values and convert to datetime objects
times = nc['time']
jd = netCDF4.num2date(times[:],times.units)

In [21]:
# get all time steps of water level from each station
nsta=len(obs)
z=ones((len(jd),nsta))
for i in range(nsta):
    z[:,i] = nc['zeta'][:,obs['0-Based Index'][i]]

In [22]:
# make a DataFrame out of the interpolated time series at each location
zvals=pd.DataFrame(z,index=jd,columns=obs.index)

In [23]:
# list out a few values
zvals.head()


Out[23]:
Station Boston Scituate Harbor Scituate Beach Falmouth Harbor Marion Marshfield Provincetown Sandwich Hampton Bay Gloucester
2014-11-03 00:00:00 1.428371 1.403064 1.436073 0.049156 -0.258989 1.447364 1.496953 1.565573 1.350644 1.329161
2014-11-03 01:01:53 1.176544 1.146818 1.174457 0.041758 -0.494846 1.202951 1.312003 1.358622 1.018364 1.035092
2014-11-03 01:58:08 0.593437 0.582742 0.610145 -0.118447 -0.586351 0.628440 0.725912 0.732516 0.469484 0.481327
2014-11-03 03:00:00 -0.088418 -0.131552 -0.104804 -0.335123 -0.732184 -0.087666 -0.002740 -0.004370 -0.255540 -0.234291
2014-11-03 04:01:53 -0.847808 -0.817936 -0.791864 -0.537964 -0.851829 -0.772306 -0.695273 -0.699899 -0.923514 -0.908058

In [24]:
# plotting at DataFrame is easy!
ax=zvals.plot(figsize=(16,4),grid=True,title=('NECOFS Forecast Water Level from %s Forecast' % model),legend=False);
# read units from dataset for ylabel
ylabel(nc['zeta'].units)
# plotting the legend outside the axis is a bit tricky
box = ax.get_position()
ax.set_position([box.x0, box.y0, box.width * 0.8, box.height])
ax.legend(loc='center left', bbox_to_anchor=(1, 0.5));



In [27]:
# what is the maximum water level over this period?
zvals['Sandwich'].max()


Out[27]:
1.8315410614013672

In [28]:
# make a new DataFrame of maximum water levels at all stations
b=pd.DataFrame(zvals.idxmax(),columns=['time of max water level (UTC)'])
# create heading for new column containing max water level
zmax_heading='zmax (%s)' % nc['zeta'].units
# Add new column to DataFrame
b[zmax_heading]=zvals.max()

In [29]:
b


Out[29]:
time of max water level (UTC) zmax (meters)
Station
Boston 2014-11-07 16:01:53 1.796400
Scituate Harbor 2014-11-07 16:01:53 1.741577
Scituate Beach 2014-11-07 16:01:53 1.751451
Falmouth Harbor 2014-11-07 13:01:53 0.523534
Marion 2014-11-05 10:58:08 1.129240
Marshfield 2014-11-07 16:01:53 1.756355
Provincetown 2014-11-07 16:01:53 1.805107
Sandwich 2014-11-07 16:01:53 1.831541
Hampton Bay 2014-11-06 15:00:00 1.729943
Gloucester 2014-11-06 15:00:00 1.717201

In [29]:


In [ ]: