notebook.community

Edit and run



In [1]:

    
url = 'http://opendap.deltares.nl/thredds/dodsC/opendap/rijkswaterstaat/waterbase/27_Waterhoogte_in_cm_t.o.v._normaal_amsterdams_peil_in_oppervlaktewater/nc/id1-BROUWHVSGT08.nc'



In [2]:

    
import datetime

import pandas as pd
import numpy as np

import xarray
import matplotlib.pyplot as plt

import statsmodels.regression.linear_model 
import statsmodels.api as sm



In [3]:

    
# open dataset
ds = xarray.open_dataset(url)



In [4]:

    
# get first location (only 1 location in file)
ssh = ds.sea_surface_height[0]



In [5]:

    
# convert to dataframe
df = pd.DataFrame(ssh.to_pandas(), columns=['ssh'])



In [6]:

    
# check number of measurements per year
# this data is up to date until 2018-09
# after that waterbase was is no longer available.
# the updates based on DDL are not yet available
counts = df.resample('A', label='left').count()
print(counts.head(n=3))
print(counts.tail(n=3))









    



             ssh
time            
1978-12-31     2
1979-12-31  8784
1980-12-31  8760
              ssh
time             
2015-12-31  52704
2016-12-31  52560
2017-12-31  35943



In [7]:

    
# drop incomplete years
df = df[df.index > datetime.datetime(1980, 1, 1)]
df = df[df.index < datetime.datetime(2018, 1, 1)]



In [8]:

    
df.tail()









    Out[8]:







  
    
      
      ssh
    
    
      time
      
    
  
  
    
      2017-12-31 23:19:59.999998976
      1.80
    
    
      2017-12-31 23:30:00.000003328
      1.80
    
    
      2017-12-31 23:39:59.999997696
      1.87
    
    
      2017-12-31 23:50:00.000002304
      1.89
    
    
      2017-12-31 23:59:59.999996672
      1.88



In [9]:

    
# double check date calculation (different timezones)
# calculation is done while in UK, double check if back in NL
test_df = df[
    np.logical_and(
        df.index >= datetime.datetime(2015, 1, 1),
        df.index < datetime.datetime(2016, 1, 1)
    )
]
# this should only show 2015
test_df.resample('A').mean().index.year









    Out[9]:





Int64Index([2015], dtype='int64', name='time')



In [10]:

    
# define some extra variables
annual_mean_df = df.resample('A', label='right').mean()
annual_max_df = df.resample('A', label='right').max()
year = [x.year for x in annual_mean_df.index]
annual_mean_df['year'] = year
year = [x.year for x in annual_max_df.index]
annual_max_df['year'] = year
# m to mm, sealevelmonitor is in mm
annual_mean_df['height'] = annual_mean_df['ssh'] * 1000
annual_max_df['height'] = annual_max_df['ssh'] * 1000



In [11]:

    
# plot measured data (labels are a bit off)
fig, ax = plt.subplots(figsize=(8, 5))
ax.plot(annual_mean_df.year, annual_mean_df.height, 'k.')
ax.set_ylabel("{} [{}]".format(ssh.standard_name, 'mm'))
ax.set_title('Annual mean');



In [12]:

    
# plot annual maxima
fig, ax = plt.subplots(figsize=(8, 5))
ax.plot(annual_max_df['year'], annual_max_df['height'], 'k.')
ax.set_ylabel("{} [{}]".format(ssh.standard_name, 'mm'))
ax.set_title('Annual max (values at end of period)');



In [13]:

    
# define the statistical model
def linear_model(df, with_ar=True):
    y = df['height']
    X = np.c_[
        df['year']-1970, 
        np.cos(2*np.pi*(df['year']-1970)/18.613),
        np.sin(2*np.pi*(df['year']-1970)/18.613)
    ]
    month = np.mod(df['year'], 1) * 12.0
    names = ['Constant', 'Trend', 'Nodal U', 'Nodal V']
    X = sm.add_constant(X)
    if with_ar:
        model = sm.GLSAR(y, X, missing='drop', rho=1)
    else:
        model = sm.OLS(y, X, missing='drop')
    fit = model.fit(cov_type='HC0')
    return fit, names



In [14]:

    
# print the fit properties
fit, names = linear_model(annual_mean_df)
prediction = fit.get_prediction()
fit.summary(xname=names)









    Out[14]:





GLSAR Regression Results

  Dep. Variable:          height         R-squared:             0.317


  Model:                   GLSAR         Adj. R-squared:        0.255


  Method:              Least Squares     F-statistic:           5.949


  Date:              Fri, 10 May 2019    Prob (F-statistic):   0.00233


  Time:                  15:58:08        Log-Likelihood:      -179.32


  No. Observations:           37         AIC:                   366.6


  Df Residuals:               33         BIC:                   373.1


  Df Model:                    3                                     


  Covariance Type:          HC0                                      




              coef      std err       z       P>|z|   [0.025     0.975]  


  Constant    -54.8593     17.873     -3.069   0.002    -89.889    -19.829


  Trend         2.0208      0.522      3.874   0.000      0.998      3.043


  Nodal U      -1.5794      8.248     -0.191   0.848    -17.745     14.586


  Nodal V     -19.1110      6.761     -2.827   0.005    -32.361     -5.861




  Omnibus:         1.283    Durbin-Watson:         1.341


  Prob(Omnibus):   0.526    Jarque-Bera (JB):      0.876


  Skew:           -0.377    Prob(JB):              0.645


  Kurtosis:        2.967    Cond. No.               98.6



Warnings:
[1] Standard Errors are heteroscedasticity robust (HC0)



In [15]:

    
fig, ax = plt.subplots(figsize=(10, 5))
ax.plot(annual_mean_df.year, annual_mean_df.height, 'k.')
ax.set_ylabel("{} [{}]".format(ssh.standard_name, 'mm'))
ax.set_title('Annual mean')
conf_int = prediction.conf_int()
pred_int = prediction.conf_int(obs=True)
ax.fill_between(annual_mean_df.year, pred_int[:, 0], pred_int[:, 1], alpha=0.1, color='blue', label='prediction interval')
ax.fill_between(annual_mean_df.year, conf_int[:, 0], conf_int[:, 1], alpha=0.3, color='blue', label='confidence interval')
ax.plot(annual_mean_df.year, fit.predict(), color='blue', label='fitted trend')
ax.legend()
ax.grid(True)
fig.savefig('nodal.pdf')



In [16]:

    
exog = fit.model.exog.copy()
exog[:, 2:4] = 0 
prediction = fit.get_prediction(exog=exog)

fig, ax = plt.subplots(figsize=(10, 5))
ax.plot(annual_mean_df.year, annual_mean_df.height, 'k.')
ax.set_ylabel("{} [{}]".format(ssh.standard_name, 'mm'))
ax.set_title('Annual mean')
conf_int = prediction.conf_int()
pred_int = prediction.conf_int(obs=True)
ax.fill_between(annual_mean_df.year, pred_int[:, 0], pred_int[:, 1], alpha=0.1, color='blue', label='prediction interval')
ax.fill_between(annual_mean_df.year, conf_int[:, 0], conf_int[:, 1], alpha=0.3, color='blue', label='confidence interval')
ax.plot(annual_mean_df.year, fit.predict(exog=exog), color='blue', label='fitted trend')
ax.legend()
ax.grid(True)
fig.savefig('linear.pdf')

	ssh
time
2017-12-31 23:19:59.999998976	1.80
2017-12-31 23:30:00.000003328	1.80
2017-12-31 23:39:59.999997696	1.87
2017-12-31 23:50:00.000002304	1.89
2017-12-31 23:59:59.999996672	1.88

Dep. Variable:	height	R-squared:	0.317
Model:	GLSAR	Adj. R-squared:	0.255
Method:	Least Squares	F-statistic:	5.949
Date:	Fri, 10 May 2019	Prob (F-statistic):	0.00233
Time:	15:58:08	Log-Likelihood:	-179.32
No. Observations:	37	AIC:	366.6
Df Residuals:	33	BIC:	373.1
Df Model:	3
Covariance Type:	HC0

	coef	std err	z	P>\|z\|	[0.025	0.975]
Constant	-54.8593	17.873	-3.069	0.002	-89.889	-19.829
Trend	2.0208	0.522	3.874	0.000	0.998	3.043
Nodal U	-1.5794	8.248	-0.191	0.848	-17.745	14.586
Nodal V	-19.1110	6.761	-2.827	0.005	-32.361	-5.861

Omnibus:	1.283	Durbin-Watson:	1.341
Prob(Omnibus):	0.526	Jarque-Bera (JB):	0.876
Skew:	-0.377	Prob(JB):	0.645
Kurtosis:	2.967	Cond. No.	98.6