In [1]:
import datetime
import json 
import logging
logging.root.setLevel(logging.DEBUG)
import netCDF4
import pandas
import numpy as np
import dateutil.parser

In [2]:
filename = 'id1-VLISSGN-indexed.nc'

In [3]:
%%bash
nc3tonc4 -o --classic 0 id1-VLISSGN.nc id1-VLISSGN-indexed.nc


copying global attributes ..
copying dimensions ..
copying variable platform_id
copying variable platform_name
copying variable lon
copying variable lat
copying variable wgs84
copying variable epsg
copying variable x
copying variable y
copying variable z
copying variable time
copying variable sea_surface_height

In [4]:
ds = netCDF4.Dataset(filename)
dates = netCDF4.num2date(
        ds.variables['time'][:],
        ds.variables['time'].units
)
# convert to integer value representing the number of
# milliseconds since 1 January 1970 00:00:00 UTC
h = ds.variables['sea_surface_height'][0, :]
ds.close()
ts = pandas.TimeSeries(data=h, index=dates)

In [5]:
# timeseries without NA
ts_na = ts.dropna()
offset_aliases = ['AS' ,'MS', 'D', 'H']
logging.debug('%s -> %d', 'raw', len(ts_na))
indices = {}
for alias in offset_aliases:
    index = ts_na.resample(alias, how='mean', closed='left')
    logging.debug('%s -> %d', alias, len(index))
    indices[alias] = index


DEBUG:root:raw -> 1797800
DEBUG:root:AS -> 152
DEBUG:root:MS -> 1815
DEBUG:root:D -> 55228
DEBUG:root:H -> 1325441

In [6]:
units = 'seconds since 1970-01-01'
ds = netCDF4.Dataset(filename, 'a')
group = ds.createGroup('index')
for alias, index in indices.items():
    dim = group.createDimension(alias, len(index)) 
    var = group.createVariable('time_%s' % (alias,), datatype='float64',dimensions=(alias,), zlib=True)
    var.units = units
    var[:] = netCDF4.date2num(index.index.to_pydatetime(), units=units)
    var = group.createVariable('%s' % (alias,), datatype='float64',dimensions=(alias,), zlib=True)
    var.cell_method = 'mean'
    var.coordinates = 'time_%s' % (alias,)
    var[:] = index.values
    
ds.close()

In [7]:



Out[7]:
array([datetime.datetime(1863, 9, 1, 0, 0),
       datetime.datetime(1863, 10, 1, 0, 0),
       datetime.datetime(1863, 11, 1, 0, 0), ...,
       datetime.datetime(2014, 9, 1, 0, 0),
       datetime.datetime(2014, 10, 1, 0, 0),
       datetime.datetime(2014, 11, 1, 0, 0)], dtype=object)

In [ ]: