In [1]:
url = 'http://opendap.deltares.nl/thredds/dodsC/opendap/rijkswaterstaat/waterbase/27_Waterhoogte_in_cm_t.o.v._normaal_amsterdams_peil_in_oppervlaktewater/nc/id1-BROUWHVSGT08.nc'
In [2]:
import datetime
import pandas as pd
import numpy as np
import xarray
import matplotlib.pyplot as plt
import statsmodels.regression.linear_model
import statsmodels.api as sm
In [3]:
# open dataset
ds = xarray.open_dataset(url)
In [4]:
# get first location (only 1 location in file)
ssh = ds.sea_surface_height[0]
In [5]:
# convert to dataframe
df = pd.DataFrame(ssh.to_pandas(), columns=['ssh'])
In [6]:
# check number of measurements per year
# this data is up to date until 2018-09
# after that waterbase was is no longer available.
# the updates based on DDL are not yet available
counts = df.resample('A', label='left').count()
print(counts.head(n=3))
print(counts.tail(n=3))
In [7]:
# drop incomplete years
df = df[df.index > datetime.datetime(1980, 1, 1)]
df = df[df.index < datetime.datetime(2018, 1, 1)]
In [8]:
df.tail()
Out[8]:
In [9]:
# double check date calculation (different timezones)
# calculation is done while in UK, double check if back in NL
test_df = df[
np.logical_and(
df.index >= datetime.datetime(2015, 1, 1),
df.index < datetime.datetime(2016, 1, 1)
)
]
# this should only show 2015
test_df.resample('A').mean().index.year
Out[9]:
In [10]:
# define some extra variables
annual_mean_df = df.resample('A', label='right').mean()
annual_max_df = df.resample('A', label='right').max()
year = [x.year for x in annual_mean_df.index]
annual_mean_df['year'] = year
year = [x.year for x in annual_max_df.index]
annual_max_df['year'] = year
# m to mm, sealevelmonitor is in mm
annual_mean_df['height'] = annual_mean_df['ssh'] * 1000
annual_max_df['height'] = annual_max_df['ssh'] * 1000
In [11]:
# plot measured data (labels are a bit off)
fig, ax = plt.subplots(figsize=(8, 5))
ax.plot(annual_mean_df.year, annual_mean_df.height, 'k.')
ax.set_ylabel("{} [{}]".format(ssh.standard_name, 'mm'))
ax.set_title('Annual mean');
In [12]:
# plot annual maxima
fig, ax = plt.subplots(figsize=(8, 5))
ax.plot(annual_max_df['year'], annual_max_df['height'], 'k.')
ax.set_ylabel("{} [{}]".format(ssh.standard_name, 'mm'))
ax.set_title('Annual max (values at end of period)');
In [13]:
# define the statistical model
def linear_model(df, with_ar=True):
y = df['height']
X = np.c_[
df['year']-1970,
np.cos(2*np.pi*(df['year']-1970)/18.613),
np.sin(2*np.pi*(df['year']-1970)/18.613)
]
month = np.mod(df['year'], 1) * 12.0
names = ['Constant', 'Trend', 'Nodal U', 'Nodal V']
X = sm.add_constant(X)
if with_ar:
model = sm.GLSAR(y, X, missing='drop', rho=1)
else:
model = sm.OLS(y, X, missing='drop')
fit = model.fit(cov_type='HC0')
return fit, names
In [14]:
# print the fit properties
fit, names = linear_model(annual_mean_df)
prediction = fit.get_prediction()
fit.summary(xname=names)
Out[14]:
In [15]:
fig, ax = plt.subplots(figsize=(10, 5))
ax.plot(annual_mean_df.year, annual_mean_df.height, 'k.')
ax.set_ylabel("{} [{}]".format(ssh.standard_name, 'mm'))
ax.set_title('Annual mean')
conf_int = prediction.conf_int()
pred_int = prediction.conf_int(obs=True)
ax.fill_between(annual_mean_df.year, pred_int[:, 0], pred_int[:, 1], alpha=0.1, color='blue', label='prediction interval')
ax.fill_between(annual_mean_df.year, conf_int[:, 0], conf_int[:, 1], alpha=0.3, color='blue', label='confidence interval')
ax.plot(annual_mean_df.year, fit.predict(), color='blue', label='fitted trend')
ax.legend()
ax.grid(True)
fig.savefig('nodal.pdf')
In [16]:
exog = fit.model.exog.copy()
exog[:, 2:4] = 0
prediction = fit.get_prediction(exog=exog)
fig, ax = plt.subplots(figsize=(10, 5))
ax.plot(annual_mean_df.year, annual_mean_df.height, 'k.')
ax.set_ylabel("{} [{}]".format(ssh.standard_name, 'mm'))
ax.set_title('Annual mean')
conf_int = prediction.conf_int()
pred_int = prediction.conf_int(obs=True)
ax.fill_between(annual_mean_df.year, pred_int[:, 0], pred_int[:, 1], alpha=0.1, color='blue', label='prediction interval')
ax.fill_between(annual_mean_df.year, conf_int[:, 0], conf_int[:, 1], alpha=0.3, color='blue', label='confidence interval')
ax.plot(annual_mean_df.year, fit.predict(exog=exog), color='blue', label='fitted trend')
ax.legend()
ax.grid(True)
fig.savefig('linear.pdf')