In [1]:
url = 'http://opendap.deltares.nl/thredds/dodsC/opendap/rijkswaterstaat/waterbase/27_Waterhoogte_in_cm_t.o.v._normaal_amsterdams_peil_in_oppervlaktewater/nc/id1-BROUWHVSGT08.nc'

In [2]:
import datetime

import pandas as pd
import numpy as np

import xarray
import matplotlib.pyplot as plt

import statsmodels.regression.linear_model 
import statsmodels.api as sm

In [3]:
# open dataset
ds = xarray.open_dataset(url)

In [4]:
# get first location (only 1 location in file)
ssh = ds.sea_surface_height[0]

In [5]:
# convert to dataframe
df = pd.DataFrame(ssh.to_pandas(), columns=['ssh'])

In [6]:
# check number of measurements per year
# this data is up to date until 2018-09
# after that waterbase was is no longer available.
# the updates based on DDL are not yet available
counts = df.resample('A', label='left').count()
print(counts.head(n=3))
print(counts.tail(n=3))


             ssh
time            
1978-12-31     2
1979-12-31  8784
1980-12-31  8760
              ssh
time             
2015-12-31  52704
2016-12-31  52560
2017-12-31  35943

In [7]:
# drop incomplete years
df = df[df.index > datetime.datetime(1980, 1, 1)]
df = df[df.index < datetime.datetime(2018, 1, 1)]

In [8]:
df.tail()


Out[8]:
ssh
time
2017-12-31 23:19:59.999998976 1.80
2017-12-31 23:30:00.000003328 1.80
2017-12-31 23:39:59.999997696 1.87
2017-12-31 23:50:00.000002304 1.89
2017-12-31 23:59:59.999996672 1.88

In [9]:
# double check date calculation (different timezones)
# calculation is done while in UK, double check if back in NL
test_df = df[
    np.logical_and(
        df.index >= datetime.datetime(2015, 1, 1),
        df.index < datetime.datetime(2016, 1, 1)
    )
]
# this should only show 2015
test_df.resample('A').mean().index.year


Out[9]:
Int64Index([2015], dtype='int64', name='time')

In [10]:
# define some extra variables
annual_mean_df = df.resample('A', label='right').mean()
annual_max_df = df.resample('A', label='right').max()
year = [x.year for x in annual_mean_df.index]
annual_mean_df['year'] = year
year = [x.year for x in annual_max_df.index]
annual_max_df['year'] = year
# m to mm, sealevelmonitor is in mm
annual_mean_df['height'] = annual_mean_df['ssh'] * 1000
annual_max_df['height'] = annual_max_df['ssh'] * 1000

In [11]:
# plot measured data (labels are a bit off)
fig, ax = plt.subplots(figsize=(8, 5))
ax.plot(annual_mean_df.year, annual_mean_df.height, 'k.')
ax.set_ylabel("{} [{}]".format(ssh.standard_name, 'mm'))
ax.set_title('Annual mean');



In [12]:
# plot annual maxima
fig, ax = plt.subplots(figsize=(8, 5))
ax.plot(annual_max_df['year'], annual_max_df['height'], 'k.')
ax.set_ylabel("{} [{}]".format(ssh.standard_name, 'mm'))
ax.set_title('Annual max (values at end of period)');



In [13]:
# define the statistical model
def linear_model(df, with_ar=True):
    y = df['height']
    X = np.c_[
        df['year']-1970, 
        np.cos(2*np.pi*(df['year']-1970)/18.613),
        np.sin(2*np.pi*(df['year']-1970)/18.613)
    ]
    month = np.mod(df['year'], 1) * 12.0
    names = ['Constant', 'Trend', 'Nodal U', 'Nodal V']
    X = sm.add_constant(X)
    if with_ar:
        model = sm.GLSAR(y, X, missing='drop', rho=1)
    else:
        model = sm.OLS(y, X, missing='drop')
    fit = model.fit(cov_type='HC0')
    return fit, names

In [14]:
# print the fit properties
fit, names = linear_model(annual_mean_df)
prediction = fit.get_prediction()
fit.summary(xname=names)


Out[14]:
GLSAR Regression Results
Dep. Variable: height R-squared: 0.317
Model: GLSAR Adj. R-squared: 0.255
Method: Least Squares F-statistic: 5.949
Date: Fri, 10 May 2019 Prob (F-statistic): 0.00233
Time: 15:58:08 Log-Likelihood: -179.32
No. Observations: 37 AIC: 366.6
Df Residuals: 33 BIC: 373.1
Df Model: 3
Covariance Type: HC0
coef std err z P>|z| [0.025 0.975]
Constant -54.8593 17.873 -3.069 0.002 -89.889 -19.829
Trend 2.0208 0.522 3.874 0.000 0.998 3.043
Nodal U -1.5794 8.248 -0.191 0.848 -17.745 14.586
Nodal V -19.1110 6.761 -2.827 0.005 -32.361 -5.861
Omnibus: 1.283 Durbin-Watson: 1.341
Prob(Omnibus): 0.526 Jarque-Bera (JB): 0.876
Skew: -0.377 Prob(JB): 0.645
Kurtosis: 2.967 Cond. No. 98.6


Warnings:
[1] Standard Errors are heteroscedasticity robust (HC0)

In [15]:
fig, ax = plt.subplots(figsize=(10, 5))
ax.plot(annual_mean_df.year, annual_mean_df.height, 'k.')
ax.set_ylabel("{} [{}]".format(ssh.standard_name, 'mm'))
ax.set_title('Annual mean')
conf_int = prediction.conf_int()
pred_int = prediction.conf_int(obs=True)
ax.fill_between(annual_mean_df.year, pred_int[:, 0], pred_int[:, 1], alpha=0.1, color='blue', label='prediction interval')
ax.fill_between(annual_mean_df.year, conf_int[:, 0], conf_int[:, 1], alpha=0.3, color='blue', label='confidence interval')
ax.plot(annual_mean_df.year, fit.predict(), color='blue', label='fitted trend')
ax.legend()
ax.grid(True)
fig.savefig('nodal.pdf')



In [16]:
exog = fit.model.exog.copy()
exog[:, 2:4] = 0 
prediction = fit.get_prediction(exog=exog)

fig, ax = plt.subplots(figsize=(10, 5))
ax.plot(annual_mean_df.year, annual_mean_df.height, 'k.')
ax.set_ylabel("{} [{}]".format(ssh.standard_name, 'mm'))
ax.set_title('Annual mean')
conf_int = prediction.conf_int()
pred_int = prediction.conf_int(obs=True)
ax.fill_between(annual_mean_df.year, pred_int[:, 0], pred_int[:, 1], alpha=0.1, color='blue', label='prediction interval')
ax.fill_between(annual_mean_df.year, conf_int[:, 0], conf_int[:, 1], alpha=0.3, color='blue', label='confidence interval')
ax.plot(annual_mean_df.year, fit.predict(exog=exog), color='blue', label='fitted trend')
ax.legend()
ax.grid(True)
fig.savefig('linear.pdf')