In [146]:
import datetime
import json
import netCDF4
import pandas
import numpy as np
import dateutil.parser
In [156]:
def load_timeseries(filename='id1-DELFZL.nc'):
ds = netCDF4.Dataset(filename)
dates = netCDF4.num2date(ds.variables['time'][:], ds.variables['time'].units)
# convert to integer value representing the number of milliseconds since 1 January 1970 00:00:00 UTC
h = ds.variables['sea_surface_height'][0,:]
ds.close()
ts = pandas.TimeSeries(data=h, index=dates)
return ts
In [157]:
def make_levels(timeseries):
# resample timeseries into 10 minutes, hours days, seconds, minutes, using mean and padding
levels = {}
levels['r'] = ts.dropna()
levels['m'] = ts.resample('600s', how='mean', label='left', convention='s', fill_method='pad')
levels['H'] = ts.resample('H', how='mean', convention='s', fill_method='pad')
levels['D'] = ts.resample('D', how='mean', convention='s', fill_method='pad')
levels['M'] = ts.resample('M', how='mean', label='left', loffset='1d')
levels['A'] = ts.resample('A', how='mean', label='left', loffset='1d')
return levels
In [78]:
In [151]:
def choose_level(start, end):
timediff = end - start
seconds = timediff.total_seconds()
seconds *= 0.8
if seconds > 60 * 60 * 24 * 365 * 10:
# decades, return annual data
return 'A'
elif seconds > 60 * 60 * 24 * 365 :
# years return monthly data
return 'M'
elif seconds > 60 * 60 * 24 * 31 :
# months, return daily data
return 'D'
elif seconds > 60 * 60 * 24 :
# days, return minutes
return 'm'
# or return raw data
return 'r'
def subset(start, end):
start = dateutil.parser.parse(start)
end = dateutil.parser.parse(end)
level_key = choose_level(start, end)
level = levels[level_key]
idx = np.logical_and(level.index >= start, level.index < end )
return level.ix[idx]
def subset2txt(timeseries):
"""convert timeseries to json compatbile format"""
# convert to json compatbile format
data = [
{"t":t.value/1e6, "h": float(h)}
for t, h
in timeseries.iteritems()
]
txt = json.dumps(data)
return txt
In [159]:
# this is a bit slow but only done once
timeseries = load_timeseries()
levels = make_levels(timeseries)
In [160]:
%%timeit
selection = subset('2000-01-01', '2000-03-01')
subset2txt(selection)
In [ ]: