Sea-level rise


In [48]:
# We need a few libraries
import io

import numpy as np
import pandas
import requests

import matplotlib.pyplot as plt
import matplotlib.style
matplotlib.style.use('ggplot')
%matplotlib inline

In [49]:
stations = [
    ['Vlissingen', 20, lambda x: x - (6976-46)],
    ['Hoek van Holland', 22, lambda x:x - (6994 - 121)],
    ['Den Helder', 23, lambda x: x - (6988-42)],
    ['Delfzijl', 24, lambda x: x - (6978-155)],
    ['Harlingen', 25, lambda x: x - (7036-122)],
    ['IJmuiden', 32, lambda x: x - (7033-83)],
]

# you could use the monthly data
monthly_url = 'http://www.psmsl.org/data/obtaining/rlr.monthly.data/%d.rlrdata'
# and you can use the annual dat
annual_url = 'http://www.psmsl.org/data/obtaining/rlr.annual.data/%d.rlrdata'

In [50]:
# Let's download them both
monthly_dfs = []
annual_dfs = []

for station, id, to_local in stations:
    f = io.BytesIO(requests.get(monthly_url % (id,)).content)
    df = pandas.read_csv(f, sep=';', names=('year', 'waterlevel', 'code', 'another code'))
    # add a column with the station name
    df['station'] = station
    # convert to local coordinate system
    df['nap'] = df['waterlevel'].apply(to_local)
    # ignore the part before 1890
    df = df[df.year >= 1890]
    monthly_dfs.append(df)
    
    f = io.BytesIO(requests.get(annual_url % (id,)).content)
    df = pandas.read_csv(f, sep=';', names=('year', 'waterlevel', 'code', 'another code'))
    df['station'] = station
    df['nap'] = df['waterlevel'].apply(to_local)
    df = df[df.year >= 1890]
    annual_dfs.append(df)

Now we have downloaded the stations, we can create a new station with the mean. You can use many other techniques, but the mean works just fine here.


In [51]:
# compute the monthly mean
monthly_df = pandas.concat(monthly_dfs)
mean = monthly_df.groupby('year').mean().reset_index()
mean['station'] = 'mean'
monthly_df = pandas.concat([monthly_df, mean[['year', 'waterlevel', 'nap', 'station']]])
monthly_df = monthly_df.reset_index()
# add the year number 
monthly_df['year_floor'] = np.floor(monthly_df['year'])

annual_df = pandas.concat(annual_dfs)
mean = annual_df.groupby('year').mean().reset_index()
mean['station'] = 'mean'
annual_df = pandas.concat([annual_df, mean[['year', 'waterlevel', 'nap', 'station']]])
annual_df = annual_df.reset_index()

# save the files to json
# add the annual mean also to the monthly means
monthly_df = monthly_df.merge(annual_df[['year', 'nap']], left_on='year_floor', right_on='year', suffixes=['', '_annual'])
monthly_df[['year', 'nap', 'nap_annual', 'station']].to_json('monthly.json', orient='records')
annual_df[['year', 'nap', 'station']].to_json('annual.json', orient='records')

df = annual_df

In [52]:
# in memory file
grouped = df.groupby(['station'])

In [53]:
fig, ax = plt.subplots(figsize=(10,7))
for station, df in grouped:
    annual_jitter = np.random.uniform(-0.5, 0.5, size=len(df))
    monthly_jitter = np.random.uniform(-0.5*1/12.0, 0.5*1/12.0, size=len(df))
    if station == 'mean':
        pch = '-'
        alpha = 1.0
        ax.plot(df['year'] + annual_jitter, df['nap']/10 , pch, alpha=alpha, label=station)
    else:
        pch = '.'
        alpha = 0.5
        ax.plot(df['year'] + annual_jitter, df['nap']/10 , pch, alpha=alpha, label=station)
    ax.set_ylabel('waterlevel [cm NAP]')
    ax.set_xlabel('year')
legend = ax.legend(loc='best')
legend.legendPatch.set_alpha(0.5)
legend.legendPatch.set_facecolor('white')

fig.savefig('stations.png')



In [54]:
import statsmodels.api as sm
df = grouped.get_group('mean')

In [55]:
y = df['waterlevel']
X = df['year']
X = sm.add_constant(X)

In [56]:
model = sm.OLS(y, X)
results = model.fit()
from IPython.display import display
display(results.summary())


OLS Regression Results
Dep. Variable: waterlevel R-squared: 0.828
Model: OLS Adj. R-squared: 0.827
Method: Least Squares F-statistic: 589.0
Date: Sun, 16 Nov 2014 Prob (F-statistic): 1.60e-48
Time: 23:49:03 Log-Likelihood: -598.81
No. Observations: 124 AIC: 1202.
Df Residuals: 122 BIC: 1207.
Df Model: 1
coef std err t P>|t| [95.0% Conf. Int.]
const 3219.4017 149.443 21.543 0.000 2923.564 3515.239
year 1.8581 0.077 24.269 0.000 1.707 2.010
Omnibus: 3.101 Durbin-Watson: 1.544
Prob(Omnibus): 0.212 Jarque-Bera (JB): 2.704
Skew: -0.357 Prob(JB): 0.259
Kurtosis: 3.120 Cond. No. 1.06e+05

In [57]:
def fit(startyear):
    df_selected = df[df['year'] > startyear]
    y = df_selected['waterlevel']
    X = df_selected['year']
    X = sm.add_constant(X)
    model = sm.OLS(y, X)
    results = model.fit()
    display(results.summary())

In [58]:
from IPython.html.widgets import interactive

In [59]:
interactive(fit, startyear=(1890, 1980, 10))


OLS Regression Results
Dep. Variable: waterlevel R-squared: 0.681
Model: OLS Adj. R-squared: 0.677
Method: Least Squares F-statistic: 172.6
Date: Sun, 16 Nov 2014 Prob (F-statistic): 8.98e-22
Time: 23:49:06 Log-Likelihood: -403.92
No. Observations: 83 AIC: 811.8
Df Residuals: 81 BIC: 816.7
Df Model: 1
coef std err t P>|t| [95.0% Conf. Int.]
const 3107.2138 287.403 10.811 0.000 2535.372 3679.056
year 1.9146 0.146 13.138 0.000 1.625 2.205
Omnibus: 3.114 Durbin-Watson: 1.547
Prob(Omnibus): 0.211 Jarque-Bera (JB): 2.643
Skew: -0.434 Prob(JB): 0.267
Kurtosis: 3.101 Cond. No. 1.62e+05

In [60]:
def plot(startyear):
    df_selected = df[df['year'] > startyear]
    y = df_selected['waterlevel']
    X = df_selected['year']
    X = sm.add_constant(X)
    model = sm.OLS(y, X)
    results = model.fit()
    fig, ax = plt.subplots()
    ax.plot(df['year'], df['waterlevel'], '.', alpha=0.1)
    ax.plot(df_selected['year'], results.fittedvalues)
    
    ax.set_ylim(6400,7300)

In [61]:
interactive(plot, startyear=(1860, 1980, 10))



In [62]:
from statsmodels.sandbox.regression.predstd import wls_prediction_std

def plot(startyear):
    df_selected = df[df['year'] > startyear]
    y = df_selected['waterlevel']
    X = df_selected['year']
    X = sm.add_constant(X)
    model = sm.OLS(y, X)
    results = model.fit()
    fig, ax = plt.subplots()
    ax.plot(df['year'], df['waterlevel'], '.', alpha=0.1)
    ax.plot(df_selected['year'], results.fittedvalues)
    fit, lower, upper = wls_prediction_std(results)

    plt.fill_between(df_selected['year'], lower, upper, alpha=0.3)
    
    ax.set_ylim(6400,7300)

In [63]:
import statsmodels.sandbox.regression.predstd

In [64]:
interactive(plot, startyear=(1860, 1980, 10))



In [65]:
%load_ext rpy2.ipython


The rpy2.ipython extension is already loaded. To reload it, use:
  %reload_ext rpy2.ipython

In [66]:
%Rpush df
%R library('ggplot2')


Out[66]:
<StrVector - Python:0x114c18998 / R:0x7ff2298824a8>
[str, str, str, ..., str, str, str]

In [67]:
def test(startyear):
    df_selected = df[df['year']>startyear]
    %Rpush df_selected
    %R print(ggplot(df, aes(year, waterlevel)) + geom_point(alpha=0.3) + stat_smooth(data=df_selected, method='loess'))
interactive(test, startyear=(1860, 1980, 10))



In [68]:
def plot(startyear):
    df_selected = df[(df['year'] >= startyear) & (df['year'] < (startyear + 18.6*2))]
    
    y = df_selected['waterlevel']
    X = np.c_[df_selected['year']-1970, (df_selected['year']-1970)**2]
    X = sm.add_constant(X)
    model = sm.OLS(y, X)
    results = model.fit()
    fig, ax = plt.subplots()
    ax.plot(df['year'], df['waterlevel'], '.', alpha=0.3)
    ax.plot(df_selected['year'], results.fittedvalues, linewidth=5)

In [69]:
interactive(plot, startyear=(1860, 1980, 5))



In [70]:
def plot(startyear, n_periods):

    df_selected = df[(df['year'] >= startyear) & (df['year'] < (startyear + 18.6*n_periods))]
    
    y = df_selected['waterlevel']
    X = np.c_[
        df_selected['year']-1970, 
        np.cos(2*np.pi*(df_selected['year']-1970)/18.613),
        np.sin(2*np.pi*(df_selected['year']-1970)/18.613)

    ]
    X = sm.add_constant(X)
    model = sm.OLS(y, X)
    results = model.fit()
    # e^{i\theta} = \cos\theta + i\sin\theta
    phase = np.angle(complex(results.params['x2'], results.params['x3']))
    ampl = np.sqrt(results.params['x2']**2 + results.params['x3']**2)
    
    # plot it
    fig, ax = plt.subplots()
    ax.set_title("Phase (rad): %.1f, amplitude: %.1f\nA(cos): %.1f, B(sin): %.1f" % (phase, ampl, results.params['x2'], results.params['x3']))
    ax.plot(df['year'], df['waterlevel'], '.', alpha=0.3)
    ax.plot(df_selected['year'], results.fittedvalues, linewidth=5)

In [71]:
interactive(plot, startyear=(1860, 1980, 5), n_periods=(2,5))



In [72]:
df = annual_df[annual_df['station']=='mean']
hkv = pandas.read_csv('full_output_station_gemiddeld.csv', sep=';')
df = df.merge(hkv[['year', 'U2sin', 'U2cos']], on='year', suffixes=('', '_hkv'))
df = df.reset_index()
y = df['nap']
X = np.c_[
    df['year']-1970, 
    np.cos(2*np.pi*(df['year']-1970)/18.613),
    np.sin(2*np.pi*(df['year']-1970)/18.613),
    df['U2cos'], 
    df['U2sin']
]
X = sm.add_constant(X)
model = sm.OLS(y, X)
results = model.fit()
linear = results.params['const'] + results.params['x1'] * (df['year']-1970)
nodal = results.params['x2'] * np.cos(2*np.pi*(df['year']-1970)/18.613) + results.params['x3'] * np.sin(2*np.pi*(df['year']-1970)/18.613)
wind = results.params['x4'] * df['U2cos'] + results.params['x5'] * df['U2sin']
df['linear'] = linear
df['nodal'] = nodal
df['wind'] = wind
df['residual'] = results.resid
df['loess'] = sm.nonparametric.lowess(df['residual'], df['year'], frac=60.0/len(df))[:,1]
df['residual_loess'] = df['residual'] - df['loess']

In [73]:
df[['year', 'nap', 'station', 'linear', 'nodal', 'residual', 'wind', 'loess', 'residual_loess']].to_json('fit.json', orient='records')

In [74]:
y = df['nap']
X = np.c_[
    np.ones(shape=(len(df),))
]
# compute semipartial correlations
rsquares = []
terms = [df['year']-1970, 
              np.c_[np.cos(2*np.pi*(df['year']-1970)/18.613), np.sin(2*np.pi*(df['year']-1970)/18.613)],
              np.c_[df['U2cos'], df['U2sin']]]
names = ['linear', 'nodal', 'wind']
rsquare = 0
for name, term in zip(names, terms):
    X = np.c_[X, term]
    model = sm.OLS(y, X)
    results = model.fit()
    rsquares.append((name, results.rsquared - rsquare))
    rsquare = results.rsquared
rsquares.append(('loess', 1- np.var(df['residual_loess'])/np.var(df['nap'])- rsquare))
rsquare = results.rsquared
rsquares.append(('residual', np.var(df['residual_loess'])/np.var(df['nap'])))


import json
json.dump(dict(rsquares), open('rsquares.json', 'w'))
rsquares


Out[74]:
[('linear', 0.82769308510876782),
 ('nodal', 0.019216129938318116),
 ('wind', 0.043763073036397149),
 ('loess', 0.0036103727985814515),
 ('residual', 0.10571733911793549)]

In [75]:
a = results.summary(yname='waterlevel', xname=['constant', 'linear', 'nodal cos', 'nodal sin', 'wind cos', 'wind sin'])
#print(a.as_latex())
a


Out[75]:
OLS Regression Results
Dep. Variable: waterlevel R-squared: 0.891
Model: OLS Adj. R-squared: 0.886
Method: Least Squares F-statistic: 190.6
Date: Sun, 16 Nov 2014 Prob (F-statistic): 1.71e-54
Time: 23:49:24 Log-Likelihood: -566.06
No. Observations: 123 AIC: 1144.
Df Residuals: 117 BIC: 1161.
Df Model: 5
coef std err t P>|t| [95.0% Conf. Int.]
constant -44.6608 6.507 -6.863 0.000 -57.548 -31.774
linear 1.7577 0.067 26.278 0.000 1.625 1.890
nodal cos 3.7984 3.233 1.175 0.242 -2.604 10.201
nodal sin -11.8578 3.138 -3.778 0.000 -18.073 -5.642
wind cos 1.2649 0.189 6.694 0.000 0.891 1.639
wind sin -0.4672 0.266 -1.753 0.082 -0.995 0.060
Omnibus: 0.139 Durbin-Watson: 1.435
Prob(Omnibus): 0.933 Jarque-Bera (JB): 0.311
Skew: 0.013 Prob(JB): 0.856
Kurtosis: 2.755 Cond. No. 122.

In [76]:
def plot(degree):
    fitted = pandas.DataFrame(dict(fit=results.resid, year=df['year']))
    %Rpush fitted
    %Rpush degree
    %R l = predict(loess(fitted, degree=degree, control=loess.control(surface='direct')), se=TRUE, newdata=data.frame(year=seq(1890,2100)))
    l = %Rget l
    import pandas.rpy.common as com
    l = com.load_data('l')
    plt.fill_between(np.arange(1890, 2101), l['fit'] - l['se.fit']*1.96, l['fit'] + l['se.fit']*1.96, alpha=0.5)
    plt.plot(np.arange(1890, 2101), l['fit'])
    plt.ylim(-200,400)
    plt.title('Loess model with degree %d' % degree)
    plt.ylabel("sea surface height residual ['mm']")
interactive(plot, degree=(1,2))



In [77]:
# The broken linear model
y = df['nap']
X = np.c_[
    df['year']-1970, 
    np.cos(2*np.pi*(df['year']-1970)/18.613),
    np.sin(2*np.pi*(df['year']-1970)/18.613),
    df['U2cos'], 
    df['U2sin'],
    (df['year']-1990 > 0)*(df['year']-1990)
]
X = sm.add_constant(X)
model = sm.OLS(y, X)
fit = model.fit()

In [78]:
%Rpush df 
%R fit <- lm(nap ~ year + I((year > 1990)*(year-1990)) + I(cos((2*pi*year-1970)/18.613)) + I(sin((2*pi*year-1970)/18.613) + U2cos + U2sin), data=df)
%R years <- seq(1890, 2100)
%R pred <- predict(fit, interval="confidence", newdata=data.frame(year=years, U2cos=rep(mean(df$U2cos), length(years)), U2sin=rep(mean(df$U2sin), length(years))))
pred = %Rget pred
%R print(summary(fit))


Call:
lm(formula = nap ~ year + I((year > 1990) * (year - 1990)) + 
    I(cos((2 * pi * year - 1970)/18.613)) + I(sin((2 * pi * year - 
    1970)/18.613) + U2cos + U2sin), data = df)

Residuals:
    Min      1Q  Median      3Q     Max 
-66.682 -19.736   2.081  19.674  63.102 

Coefficients:
                                                        Estimate Std. Error
(Intercept)                                           -3.382e+03  1.867e+02
year                                                   1.688e+00  9.715e-02
I((year > 1990) * (year - 1990))                       8.555e-01  6.372e-01
I(cos((2 * pi * year - 1970)/18.613))                  5.595e+00  3.709e+00
I(sin((2 * pi * year - 1970)/18.613) + U2cos + U2sin)  7.036e-01  1.751e-01
                                                      t value Pr(>|t|)    
(Intercept)                                           -18.112  < 2e-16 ***
year                                                   17.378  < 2e-16 ***
I((year > 1990) * (year - 1990))                        1.343 0.181947    
I(cos((2 * pi * year - 1970)/18.613))                   1.508 0.134162    
I(sin((2 * pi * year - 1970)/18.613) + U2cos + U2sin)   4.018 0.000104 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 28.58 on 118 degrees of freedom
Multiple R-squared:  0.8528,	Adjusted R-squared:  0.8478 
F-statistic: 170.9 on 4 and 118 DF,  p-value: < 2.2e-16


In [79]:
def compute(split):
    %Rpush split
    %Rpush df 
    %R fit <- lm(nap ~  year + I((year > split)*(year-split)) + I(cos((2*pi*year-1970)/18.613)) + I(sin((2*pi*year-1970)/18.613) + U2cos + U2sin), data=df)
    %R years <- seq(1890, 2100)
    %R pred <- predict(fit, interval="confidence", newdata=data.frame(year=years, U2cos=rep(mean(df$U2cos), length(years)), U2sin=rep(mean(df$U2sin), length(years))))
    pred = %Rget pred
    %R print(summary(fit))
interactive(compute, split=(1980, 2000))


Call:
lm(formula = nap ~ year + I((year > split) * (year - split)) + 
    I(cos((2 * pi * year - 1970)/18.613)) + I(sin((2 * pi * year - 
    1970)/18.613) + U2cos + U2sin), data = df)

Residuals:
    Min      1Q  Median      3Q     Max 
-66.682 -19.736   2.081  19.674  63.102 

Coefficients:
                                                        Estimate Std. Error
(Intercept)                                           -3.382e+03  1.867e+02
year                                                   1.688e+00  9.715e-02
I((year > split) * (year - split))                     8.555e-01  6.372e-01
I(cos((2 * pi * year - 1970)/18.613))                  5.595e+00  3.709e+00
I(sin((2 * pi * year - 1970)/18.613) + U2cos + U2sin)  7.036e-01  1.751e-01
                                                      t value Pr(>|t|)    
(Intercept)                                           -18.112  < 2e-16 ***
year                                                   17.378  < 2e-16 ***
I((year > split) * (year - split))                      1.343 0.181947    
I(cos((2 * pi * year - 1970)/18.613))                   1.508 0.134162    
I(sin((2 * pi * year - 1970)/18.613) + U2cos + U2sin)   4.018 0.000104 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 28.58 on 118 degrees of freedom
Multiple R-squared:  0.8528,	Adjusted R-squared:  0.8478 
F-statistic: 170.9 on 4 and 118 DF,  p-value: < 2.2e-16


In [80]:
plt.fill_between(np.arange(1890, 2101), np.array(pred)[:,1], np.array(pred)[:,2], alpha=0.5)
plt.plot(np.arange(1890, 2101), np.array(pred)[:,0])


Out[80]:
[<matplotlib.lines.Line2D at 0x1140ae490>]

In [81]:
# The linear model
%Rpush df 
%R fit <- lm(nap ~ year + I(cos((2*pi*year-1970)/18.613)) + I(sin((2*pi*year-1970)/18.613)) + U2cos + U2sin, data=df)
%R years <- seq(1890, 2100)
# Padd with means
%R U2cos = df$U2cos
%R U2cos[years>2012] = mean(df$U2cos)
%R U2sin = df$U2cos
%R U2sin[years>2012] = mean(df$U2sin)
%R pred <- predict(fit, interval="confidence", newdata=data.frame(year=years, U2cos=U2cos, U2sin=U2sin))
pred = %Rget pred
%R print(summary(fit))


Call:
lm(formula = nap ~ year + I(cos((2 * pi * year - 1970)/18.613)) + 
    I(sin((2 * pi * year - 1970)/18.613)) + U2cos + U2sin, data = df)

Residuals:
    Min      1Q  Median      3Q     Max 
-59.919 -17.607   1.216  15.672  59.747 

Coefficients:
                                        Estimate Std. Error t value Pr(>|t|)
(Intercept)                           -3.507e+03  1.289e+02 -27.207  < 2e-16
year                                   1.758e+00  6.689e-02  26.278  < 2e-16
I(cos((2 * pi * year - 1970)/18.613))  3.427e+00  3.233e+00   1.060  0.29120
I(sin((2 * pi * year - 1970)/18.613)) -1.197e+01  3.139e+00  -3.814  0.00022
U2cos                                  1.265e+00  1.890e-01   6.694 7.94e-10
U2sin                                 -4.672e-01  2.665e-01  -1.753  0.08214
                                         
(Intercept)                           ***
year                                  ***
I(cos((2 * pi * year - 1970)/18.613))    
I(sin((2 * pi * year - 1970)/18.613)) ***
U2cos                                 ***
U2sin                                 .  
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 24.73 on 117 degrees of freedom
Multiple R-squared:  0.8907,	Adjusted R-squared:  0.886 
F-statistic: 190.6 on 5 and 117 DF,  p-value: < 2.2e-16


In [124]:
fitted = pandas.DataFrame(dict(fit=results.resid, year=df['year']))
degree = 1
%Rpush fitted
%Rpush degree
%R l = predict(loess(fitted, degree=degree, control=loess.control(surface='direct')), se=TRUE, newdata=data.frame(year=seq(1890,2100)))
l = %Rget l
import pandas.rpy.common as com
l = com.load_data('l')
ci_loess = pandas.DataFrame(data=dict(year=np.arange(1890, 2101), se=l['se.fit'], fit=l['fit'], lwr=l['fit'] - l['se.fit']*1.96, upr=l['fit'] + l['se.fit']*1.96))

# compute angle
beta = (ci_loess[ci_loess['year'] == 2100].fit.item() - ci_loess[ci_loess['year'] == 2013].fit.item())/(2100-2013)
# compute std error (1.96 -> z 5%, 2 two-tailed)
se = ((ci_loess[ci_loess['year'] == 2100].upr.item() - ci_loess[ci_loess['year'] == 2100].lwr.item()))/1.96/2.0/(2100-2013)
beta, se


Out[124]:
(0.27802282630813135, 0.23436251419913748)

In [114]:
fitted = pandas.DataFrame(dict(fit=results.resid, year=df['year']))
degree = 2
%Rpush fitted
%Rpush degree
%R fit <- loess(fitted, degree=degree, control=loess.control(surface='direct'))
%R l <- predict(fit, se=TRUE, newdata=data.frame(year=seq(1890,2100)))

import pandas.rpy.common as com
l = com.load_data('l')
fit = %Rget fit

ci_loess2 = pandas.DataFrame(data=dict(year=np.arange(1890, 2101),  fit=l['fit'], lwr=l['fit'] - l['se.fit']*1.96, upr=l['fit'] + l['se.fit']*1.96))
#ci_loess = ci_loess.set_index('year')

In [84]:
ci_linear = com.load_data("pred")
ci_linear['year'] = np.arange(1890, 2101)
#ci_linear= ci_linear.set_index('year')

In [85]:
ci = pandas.merge(ci_loess, ci_loess2, on='year', suffixes=('_loess', '_loess2'))
ci = pandas.merge(ci_linear, ci, on='year', suffixes=('_linear', ''))
#ci['fit'].name('fit_linear')
#ci['lwr'].rename('lwr_linear')
#ci['upr'].rename('upr_linear')
ci.rename(columns={'fit':'fit_linear', 'lwr': 'lwr_linear', 'upr': 'upr_linear'}, inplace=True)
ci.to_csv('ci.csv')

In [86]:
plt.fill_between(np.arange(1890,2101), ci_linear['lwr'] + ci_loess['lwr'], ci_linear['upr'] + ci_loess['upr'])
plt.plot(np.arange(1890,2101), ci_linear['fit'] + ci_loess['fit'])
plt.plot(np.arange(1890,2101), ci_linear['fit'], color='black')


Out[86]:
[<matplotlib.lines.Line2D at 0x114c1b7d0>]

In [40]:
plt.fill_between(np.arange(1890,2101), ci_linear['lwr'] , ci_linear['upr'] )
plt.plot(np.arange(1890,2101), ci_linear['fit'])
plt.fill_between(np.arange(1890,2101), ci_loess['lwr'] , ci_loess['upr'] )
plt.plot(np.arange(1890,2101), ci_loess['fit'])


Out[40]:
[<matplotlib.lines.Line2D at 0x11390aad0>]

In [40]:


In [41]:
df = annual_df[annual_df['station']=='mean']
hkv = pandas.read_csv('full_output_station_gemiddeld.csv', sep=';')
df = df.merge(hkv[['year', 'U2sin', 'U2cos']], on='year', suffixes=('', '_hkv'))
df = df.reset_index()
y = df['nap']
plt.subplots(figsize=(10,6))
for split in np.arange(1990, 2000):
    X = np.c_[
        df['year']-1970, 
        (df['year']-split)*(df['year']>split),
        np.cos(2*np.pi*(df['year']-1970)/18.613),
        np.sin(2*np.pi*(df['year']-1970)/18.613),
        df['U2cos'], 
        df['U2sin']
    ]
    X = sm.add_constant(X)
    model = sm.OLS(y, X)
    results = model.fit()
    plt.plot(df['year'], results.fittedvalues, label=str(split), alpha=0.3)



In [42]:
# Based on the records of Amsterdam/Den Helder
df = pandas.read_csv('/Users/baart_f/src/oetpython/applications/sealevel//sealevel/static/data/extra/9000.csv')

# Create a plot
fig, ax1 = plt.subplots(1,1, figsize=(10,6))
# compute a trend
def trend(index):
    # Select based on index
    df_selected = df.ix[index]
    # Create a linear model
    y = df_selected['waterlevel']
    X = np.c_[
        df_selected['year.month'],
        np.cos(2*np.pi*(df_selected['year.month']-1970)/18.613),
        np.sin(2*np.pi*(df_selected['year.month']-1970)/18.613)
    ]
    X = statsmodels.regression.linear_model.add_constant(X)
    model= statsmodels.regression.linear_model.OLS(y, X)
    # fit the model
    result = model.fit()
    # ignore if we have missings
    if np.isnan(df_selected['year.month']).any():
        return np.nan
    # create a plot
    ax1.plot(df_selected['year.month'], result.fittedvalues, 'k-', alpha=0.2, linewidth=3)
    # return the trend
    return result.params['x1']
df['waterlevel'][df['waterlevel'] == -99999] = np.nan
ax1.plot(df['year.month'], df['waterlevel'], '.')
ax1.set_xlabel('tijd [jaar]')
ax1.set_ylabel('zeespiegelniveau [m boven NAP]')
df = df[df['year.month']<1890]
betas = pandas.rolling_apply(np.array(df.index, dtype='int'),20, trend)
fig.savefig('amsterdam.pdf')



In [43]:
# standard error of beta_1 and confidence range 
np.std(betas[~np.isnan(betas)]), 1.96 * 2 * np.std(betas[~np.isnan(betas)])


Out[43]:
(0.17157195469765205, 0.67256206241479599)

In [44]:
# the same for the whole of amsterdam
resp = requests.get('http://www.psmsl.org/data/longrecords/amsterdam.sea.level')
f = io.BytesIO(resp.content)
df = pandas.read_fwf(f, widths=(10,20), names=('year.month', 'waterlevel'), skiprows=1)
df['waterlevel']/=10.0
# Create a plot
fig, ax1 = plt.subplots(1,1, figsize=(10,6))
ax1.plot(df['year.month'], df['waterlevel'], '.')
ax1.set_xlabel('tijd [jaar]')
ax1.set_ylabel('zeespiegelniveau [m boven NAP]')

betas = pandas.rolling_apply(np.array(df.index, dtype='int'),20, trend)
np.std(betas[~np.isnan(betas)]), 1.96 * 2 * np.std(betas[~np.isnan(betas)])


Out[44]:
(0.17411785038363373, 0.68254197350384416)

In [44]: