In [146]:
import datetime
import json 

import netCDF4
import pandas
import numpy as np
import dateutil.parser

In [156]:
def load_timeseries(filename='id1-DELFZL.nc'):
    ds = netCDF4.Dataset(filename)
    dates = netCDF4.num2date(ds.variables['time'][:], ds.variables['time'].units)
    # convert to integer value representing the number of milliseconds since 1 January 1970 00:00:00 UTC 
    h = ds.variables['sea_surface_height'][0,:]
    ds.close()
    ts = pandas.TimeSeries(data=h, index=dates)
    return ts

In [157]:
def make_levels(timeseries):
    # resample timeseries into 10 minutes, hours days, seconds, minutes, using mean and padding
    levels = {}
    levels['r'] = ts.dropna()
    levels['m'] = ts.resample('600s', how='mean', label='left', convention='s', fill_method='pad')
    levels['H'] = ts.resample('H', how='mean', convention='s', fill_method='pad')
    levels['D'] = ts.resample('D', how='mean', convention='s', fill_method='pad')
    levels['M'] = ts.resample('M', how='mean', label='left', loffset='1d')
    levels['A'] = ts.resample('A', how='mean', label='left', loffset='1d')
    return levels

In [78]:


In [151]:
def choose_level(start, end):
    timediff = end - start 
    seconds = timediff.total_seconds()
    seconds *= 0.8
    if seconds > 60 * 60 * 24 * 365 * 10:
        # decades, return annual data
        return 'A'
    elif seconds > 60 * 60 * 24 * 365 :
        # years return monthly data
        return 'M'
    elif seconds > 60 * 60 * 24 * 31 :
        # months, return daily data
        return 'D'
    elif seconds > 60 * 60 * 24  :
        # days, return minutes
        return 'm'
    # or return raw data
    return 'r'
def subset(start, end):
    start = dateutil.parser.parse(start)
    end = dateutil.parser.parse(end)
    level_key = choose_level(start, end)
    level = levels[level_key]
    idx = np.logical_and(level.index >= start, level.index < end )
    return level.ix[idx]
def subset2txt(timeseries):
    """convert timeseries to json compatbile format"""
    # convert to json compatbile format
    data = [
        {"t":t.value/1e6, "h": float(h)} 
         for t, h 
         in timeseries.iteritems()
    ]
    txt = json.dumps(data)
    return txt

In [159]:
# this is a bit slow but only done once
timeseries = load_timeseries()
levels = make_levels(timeseries)

In [160]:
%%timeit
selection = subset('2000-01-01', '2000-03-01')
subset2txt(selection)


1000 loops, best of 3: 623 µs per loop

In [ ]: