In [1]:
import datetime
import json
import logging
logging.root.setLevel(logging.DEBUG)
import netCDF4
import pandas
import numpy as np
import dateutil.parser
In [2]:
filename = 'id1-VLISSGN-indexed.nc'
In [3]:
%%bash
nc3tonc4 -o --classic 0 id1-VLISSGN.nc id1-VLISSGN-indexed.nc
In [4]:
ds = netCDF4.Dataset(filename)
dates = netCDF4.num2date(
ds.variables['time'][:],
ds.variables['time'].units
)
# convert to integer value representing the number of
# milliseconds since 1 January 1970 00:00:00 UTC
h = ds.variables['sea_surface_height'][0, :]
ds.close()
ts = pandas.TimeSeries(data=h, index=dates)
In [5]:
# timeseries without NA
ts_na = ts.dropna()
offset_aliases = ['AS' ,'MS', 'D', 'H']
logging.debug('%s -> %d', 'raw', len(ts_na))
indices = {}
for alias in offset_aliases:
index = ts_na.resample(alias, how='mean', closed='left')
logging.debug('%s -> %d', alias, len(index))
indices[alias] = index
In [6]:
units = 'seconds since 1970-01-01'
ds = netCDF4.Dataset(filename, 'a')
group = ds.createGroup('index')
for alias, index in indices.items():
dim = group.createDimension(alias, len(index))
var = group.createVariable('time_%s' % (alias,), datatype='float64',dimensions=(alias,), zlib=True)
var.units = units
var[:] = netCDF4.date2num(index.index.to_pydatetime(), units=units)
var = group.createVariable('%s' % (alias,), datatype='float64',dimensions=(alias,), zlib=True)
var.cell_method = 'mean'
var.coordinates = 'time_%s' % (alias,)
var[:] = index.values
ds.close()
In [7]:
Out[7]:
In [ ]: