Reading NDBC's heirarchical NetCDF4 buoy data

NDBC decided in 2011 to go with NetCDF4 files using groups to store buoy data. Can we read them using standard tools?


In [2]:
import urllib
import netCDF4
from IPython.core.display import HTML
import pandas as pd

If you visit http://www.nodc.noaa.gov/BUOY/ and select the "41013" buoy, then select a one of the links from the "Buoy Data (netCDF)" table, you will find yourself on a THREDDS dataset page like the one below


In [3]:
HTML('<iframe src=http://data.nodc.noaa.gov/thredds/catalog/ndbc/cmanwx/2013/08/catalog.html?dataset=ndbc/cmanwx/2013/08/NDBC_41013_201308_D2_v00.nc width=900 height=500></iframe>')


Out[3]:

In [4]:
# try to open DAP URL
dap_url='http://data.nodc.noaa.gov/thredds/dodsC/ndbc/cmanwx/2013/08/NDBC_41013_201308_D2_v00.nc'
nc = netCDF4.Dataset(dap_url)


---------------------------------------------------------------------------
RuntimeError                              Traceback (most recent call last)
<ipython-input-4-0b3628dfc3e2> in <module>()
      1 # try to open DAP URL
      2 dap_url='http://data.nodc.noaa.gov/thredds/dodsC/ndbc/cmanwx/2013/08/NDBC_41013_201308_D2_v00.nc'
----> 3 nc = netCDF4.Dataset(dap_url)

/home/local/python27_epd/lib/python2.7/site-packages/netCDF4.so in netCDF4.Dataset.__init__ (netCDF4.c:15904)()

RuntimeError: NetCDF: String match to name in use

In [5]:
# since DAP didn't work, download the entire NetCDF file
urllib.urlretrieve('http://data.nodc.noaa.gov/thredds/fileServer/ndbc/cmanwx/2013/08/NDBC_41013_201308_D2_v00.nc',
                                                                                    'NDBC_41013_201308_D2_v00.nc')


Out[5]:
('NDBC_41013_201308_D2_v00.nc', <httplib.HTTPMessage instance at 0x37daef0>)

In [6]:
# open the local netcdf file
nc = netCDF4.Dataset('NDBC_41013_201308_D2_v00.nc')

In [7]:
# see if there are variables at the top level
nc.variables.keys()


Out[7]:
[u'time', u'time10', u'timem', u'time_wpm_20', u'wave_wpm', u'wave_wpm_bnds']

In [8]:
# convert time to Python datetime objects
time_var = nc.variables['time']
print time_var
jd = netCDF4.num2date(time_var[:],time_var.units)


<type 'netCDF4.Variable'>
int32 time(u'time',)
    long_name: time
    standard_name: time
    units: seconds since 1970-01-01 00:00:00 UTC
unlimited dimensions = ()
current size = (744,)


In [9]:
# see if there are groups
nc.groups.keys()


Out[9]:
[u'payload_1']

In [10]:
# only one group. Let's take a look:
payload1= nc.groups['payload_1']

In [11]:
# are there variables in payload_1?
payload1.variables.keys()


Out[11]:
[]

In [12]:
# are there more groups in payload_1?
payload1.groups.keys()


Out[12]:
[u'anemometer_1',
 u'anemometer_2',
 u'barometer_1',
 u'barometer_2',
 u'air_temperature_sensor_1',
 u'air_temperature_sensor_2',
 u'humidity_sensor_1',
 u'ocean_temperature_sensor_1',
 u'gps_1',
 u'wave_sensor_1']

In [13]:
# let's pick 'anemometer_2' from 'payload_1'
payload1_anemometer2 = payload1.groups['anemometer_2']

In [14]:
# any groups in group 2?
payload1_anemometer2.groups.keys()


Out[14]:
[]

In [15]:
# any variables in group 2?
payload1_anemometer2.variables.keys()


Out[15]:
[u'wind_speed',
 u'wind_speed_qc',
 u'wind_speed_detail_qc',
 u'wind_speed_release',
 u'wind_direction',
 u'wind_direction_qc',
 u'wind_direction_detail_qc',
 u'wind_direction_release',
 u'wind_gust',
 u'wind_gust_qc',
 u'wind_gust_detail_qc',
 u'wind_gust_release',
 u'continuous_wind_speed',
 u'continuous_wind_speed_qc',
 u'continuous_wind_speed_detail_qc',
 u'continuous_wind_speed_release',
 u'continuous_wind_direction',
 u'continuous_wind_direction_qc',
 u'continuous_wind_direction_detail_qc',
 u'continuous_wind_direction_release',
 u'hourly_max_gust',
 u'hourly_max_gust_qc',
 u'hourly_max_gust_detail_qc',
 u'hourly_max_gust_release',
 u'direction_of_hourly_max_gust',
 u'direction_of_hourly_max_gust_qc',
 u'direction_of_hourly_max_gust_detail_qc',
 u'direction_of_hourly_max_gust_release']

In [16]:
# open the wind speed variable from /payload_1/anemometer_2
var = payload1_anemometer2.variables['wind_speed']

In [17]:
print var


<type 'netCDF4.Variable'>
float32 wind_speed(u'time',)
    long_name: wind_speed
    standard_name: wind_speed
    units: m/s
path = /payload_1/anemometer_2
unlimited dimensions = ()
current size = (744,)


In [18]:
# Create Pandas time series object, using the time from group 0, and data from group 12
ts = pd.Series(var,index=jd)

In [19]:
# Make a Plot
ts.plot(title=var.long_name,figsize=(12,4))
ylabel(var.units)


Out[19]:
<matplotlib.text.Text at 0x3604550>

In [19]: