Sea-level rise



In [48]:

    
# We need a few libraries
import io

import numpy as np
import pandas
import requests

import matplotlib.pyplot as plt
import matplotlib.style
matplotlib.style.use('ggplot')
%matplotlib inline

Let's start with downloading the data from the different tidal guages. We use the 6 main Dutch stations.

http://www.psmsl.org/data/obtaining/stations/23.php Den Helder
http://www.psmsl.org/data/obtaining/stations/32.php IJmuiden
http://www.psmsl.org/data/obtaining/stations/20.php Vlissingen
http://www.psmsl.org/data/obtaining/stations/25.php Harlingen
http://www.psmsl.org/data/obtaining/stations/22.php Hoek van Holland
http://www.psmsl.org/data/obtaining/stations/24.php Delfzijl



In [49]:

    
stations = [
    ['Vlissingen', 20, lambda x: x - (6976-46)],
    ['Hoek van Holland', 22, lambda x:x - (6994 - 121)],
    ['Den Helder', 23, lambda x: x - (6988-42)],
    ['Delfzijl', 24, lambda x: x - (6978-155)],
    ['Harlingen', 25, lambda x: x - (7036-122)],
    ['IJmuiden', 32, lambda x: x - (7033-83)],
]

# you could use the monthly data
monthly_url = 'http://www.psmsl.org/data/obtaining/rlr.monthly.data/%d.rlrdata'
# and you can use the annual dat
annual_url = 'http://www.psmsl.org/data/obtaining/rlr.annual.data/%d.rlrdata'



In [50]:

    
# Let's download them both
monthly_dfs = []
annual_dfs = []

for station, id, to_local in stations:
    f = io.BytesIO(requests.get(monthly_url % (id,)).content)
    df = pandas.read_csv(f, sep=';', names=('year', 'waterlevel', 'code', 'another code'))
    # add a column with the station name
    df['station'] = station
    # convert to local coordinate system
    df['nap'] = df['waterlevel'].apply(to_local)
    # ignore the part before 1890
    df = df[df.year >= 1890]
    monthly_dfs.append(df)
    
    f = io.BytesIO(requests.get(annual_url % (id,)).content)
    df = pandas.read_csv(f, sep=';', names=('year', 'waterlevel', 'code', 'another code'))
    df['station'] = station
    df['nap'] = df['waterlevel'].apply(to_local)
    df = df[df.year >= 1890]
    annual_dfs.append(df)

Now we have downloaded the stations, we can create a new station with the mean. You can use many other techniques, but the mean works just fine here.



In [51]:

    
# compute the monthly mean
monthly_df = pandas.concat(monthly_dfs)
mean = monthly_df.groupby('year').mean().reset_index()
mean['station'] = 'mean'
monthly_df = pandas.concat([monthly_df, mean[['year', 'waterlevel', 'nap', 'station']]])
monthly_df = monthly_df.reset_index()
# add the year number 
monthly_df['year_floor'] = np.floor(monthly_df['year'])

annual_df = pandas.concat(annual_dfs)
mean = annual_df.groupby('year').mean().reset_index()
mean['station'] = 'mean'
annual_df = pandas.concat([annual_df, mean[['year', 'waterlevel', 'nap', 'station']]])
annual_df = annual_df.reset_index()

# save the files to json
# add the annual mean also to the monthly means
monthly_df = monthly_df.merge(annual_df[['year', 'nap']], left_on='year_floor', right_on='year', suffixes=['', '_annual'])
monthly_df[['year', 'nap', 'nap_annual', 'station']].to_json('monthly.json', orient='records')
annual_df[['year', 'nap', 'station']].to_json('annual.json', orient='records')

df = annual_df



In [52]:

    
# in memory file
grouped = df.groupby(['station'])



In [53]:

    
fig, ax = plt.subplots(figsize=(10,7))
for station, df in grouped:
    annual_jitter = np.random.uniform(-0.5, 0.5, size=len(df))
    monthly_jitter = np.random.uniform(-0.5*1/12.0, 0.5*1/12.0, size=len(df))
    if station == 'mean':
        pch = '-'
        alpha = 1.0
        ax.plot(df['year'] + annual_jitter, df['nap']/10 , pch, alpha=alpha, label=station)
    else:
        pch = '.'
        alpha = 0.5
        ax.plot(df['year'] + annual_jitter, df['nap']/10 , pch, alpha=alpha, label=station)
    ax.set_ylabel('waterlevel [cm NAP]')
    ax.set_xlabel('year')
legend = ax.legend(loc='best')
legend.legendPatch.set_alpha(0.5)
legend.legendPatch.set_facecolor('white')

fig.savefig('stations.png')



In [54]:

    
import statsmodels.api as sm
df = grouped.get_group('mean')



In [55]:

    
y = df['waterlevel']
X = df['year']
X = sm.add_constant(X)



In [56]:

    
model = sm.OLS(y, X)
results = model.fit()
from IPython.display import display
display(results.summary())









    





OLS Regression Results

  Dep. Variable:        waterlevel       R-squared:             0.828


  Model:                    OLS          Adj. R-squared:        0.827


  Method:              Least Squares     F-statistic:           589.0


  Date:              Sun, 16 Nov 2014    Prob (F-statistic):  1.60e-48


  Time:                  23:49:03        Log-Likelihood:      -598.81


  No. Observations:          124         AIC:                   1202.


  Df Residuals:              122         BIC:                   1207.


  Df Model:                    1                                     




           coef      std err       t       P>|t|  [95.0% Conf. Int.] 


  const   3219.4017    149.443     21.543   0.000   2923.564  3515.239


  year       1.8581      0.077     24.269   0.000      1.707     2.010




  Omnibus:         3.101    Durbin-Watson:         1.544


  Prob(Omnibus):   0.212    Jarque-Bera (JB):      2.704


  Skew:           -0.357    Prob(JB):              0.259


  Kurtosis:        3.120    Cond. No.           1.06e+05



In [57]:

    
def fit(startyear):
    df_selected = df[df['year'] > startyear]
    y = df_selected['waterlevel']
    X = df_selected['year']
    X = sm.add_constant(X)
    model = sm.OLS(y, X)
    results = model.fit()
    display(results.summary())



In [58]:

    
from IPython.html.widgets import interactive



In [59]:

    
interactive(fit, startyear=(1890, 1980, 10))









    





OLS Regression Results

  Dep. Variable:        waterlevel       R-squared:             0.681


  Model:                    OLS          Adj. R-squared:        0.677


  Method:              Least Squares     F-statistic:           172.6


  Date:              Sun, 16 Nov 2014    Prob (F-statistic):  8.98e-22


  Time:                  23:49:06        Log-Likelihood:      -403.92


  No. Observations:           83         AIC:                   811.8


  Df Residuals:               81         BIC:                   816.7


  Df Model:                    1                                     




           coef      std err       t       P>|t|  [95.0% Conf. Int.] 


  const   3107.2138    287.403     10.811   0.000   2535.372  3679.056


  year       1.9146      0.146     13.138   0.000      1.625     2.205




  Omnibus:         3.114    Durbin-Watson:         1.547


  Prob(Omnibus):   0.211    Jarque-Bera (JB):      2.643


  Skew:           -0.434    Prob(JB):              0.267


  Kurtosis:        3.101    Cond. No.           1.62e+05



In [60]:

    
def plot(startyear):
    df_selected = df[df['year'] > startyear]
    y = df_selected['waterlevel']
    X = df_selected['year']
    X = sm.add_constant(X)
    model = sm.OLS(y, X)
    results = model.fit()
    fig, ax = plt.subplots()
    ax.plot(df['year'], df['waterlevel'], '.', alpha=0.1)
    ax.plot(df_selected['year'], results.fittedvalues)
    
    ax.set_ylim(6400,7300)



In [61]:

    
interactive(plot, startyear=(1860, 1980, 10))



In [62]:

    
from statsmodels.sandbox.regression.predstd import wls_prediction_std

def plot(startyear):
    df_selected = df[df['year'] > startyear]
    y = df_selected['waterlevel']
    X = df_selected['year']
    X = sm.add_constant(X)
    model = sm.OLS(y, X)
    results = model.fit()
    fig, ax = plt.subplots()
    ax.plot(df['year'], df['waterlevel'], '.', alpha=0.1)
    ax.plot(df_selected['year'], results.fittedvalues)
    fit, lower, upper = wls_prediction_std(results)

    plt.fill_between(df_selected['year'], lower, upper, alpha=0.3)
    
    ax.set_ylim(6400,7300)



In [63]:

    
import statsmodels.sandbox.regression.predstd



In [64]:

    
interactive(plot, startyear=(1860, 1980, 10))



In [65]:

    
%load_ext rpy2.ipython









    



The rpy2.ipython extension is already loaded. To reload it, use:
  %reload_ext rpy2.ipython



In [66]:

    
%Rpush df
%R library('ggplot2')









    Out[66]:





<StrVector - Python:0x114c18998 / R:0x7ff2298824a8>
[str, str, str, ..., str, str, str]



In [67]:

    
def test(startyear):
    df_selected = df[df['year']>startyear]
    %Rpush df_selected
    %R print(ggplot(df, aes(year, waterlevel)) + geom_point(alpha=0.3) + stat_smooth(data=df_selected, method='loess'))
interactive(test, startyear=(1860, 1980, 10))



In [68]:

    
def plot(startyear):
    df_selected = df[(df['year'] >= startyear) & (df['year'] < (startyear + 18.6*2))]
    
    y = df_selected['waterlevel']
    X = np.c_[df_selected['year']-1970, (df_selected['year']-1970)**2]
    X = sm.add_constant(X)
    model = sm.OLS(y, X)
    results = model.fit()
    fig, ax = plt.subplots()
    ax.plot(df['year'], df['waterlevel'], '.', alpha=0.3)
    ax.plot(df_selected['year'], results.fittedvalues, linewidth=5)



In [69]:

    
interactive(plot, startyear=(1860, 1980, 5))



In [70]:

    
def plot(startyear, n_periods):

    df_selected = df[(df['year'] >= startyear) & (df['year'] < (startyear + 18.6*n_periods))]
    
    y = df_selected['waterlevel']
    X = np.c_[
        df_selected['year']-1970, 
        np.cos(2*np.pi*(df_selected['year']-1970)/18.613),
        np.sin(2*np.pi*(df_selected['year']-1970)/18.613)

    ]
    X = sm.add_constant(X)
    model = sm.OLS(y, X)
    results = model.fit()
    # e^{i\theta} = \cos\theta + i\sin\theta
    phase = np.angle(complex(results.params['x2'], results.params['x3']))
    ampl = np.sqrt(results.params['x2']**2 + results.params['x3']**2)
    
    # plot it
    fig, ax = plt.subplots()
    ax.set_title("Phase (rad): %.1f, amplitude: %.1f\nA(cos): %.1f, B(sin): %.1f" % (phase, ampl, results.params['x2'], results.params['x3']))
    ax.plot(df['year'], df['waterlevel'], '.', alpha=0.3)
    ax.plot(df_selected['year'], results.fittedvalues, linewidth=5)



In [71]:

    
interactive(plot, startyear=(1860, 1980, 5), n_periods=(2,5))



In [72]:

    
df = annual_df[annual_df['station']=='mean']
hkv = pandas.read_csv('full_output_station_gemiddeld.csv', sep=';')
df = df.merge(hkv[['year', 'U2sin', 'U2cos']], on='year', suffixes=('', '_hkv'))
df = df.reset_index()
y = df['nap']
X = np.c_[
    df['year']-1970, 
    np.cos(2*np.pi*(df['year']-1970)/18.613),
    np.sin(2*np.pi*(df['year']-1970)/18.613),
    df['U2cos'], 
    df['U2sin']
]
X = sm.add_constant(X)
model = sm.OLS(y, X)
results = model.fit()
linear = results.params['const'] + results.params['x1'] * (df['year']-1970)
nodal = results.params['x2'] * np.cos(2*np.pi*(df['year']-1970)/18.613) + results.params['x3'] * np.sin(2*np.pi*(df['year']-1970)/18.613)
wind = results.params['x4'] * df['U2cos'] + results.params['x5'] * df['U2sin']
df['linear'] = linear
df['nodal'] = nodal
df['wind'] = wind
df['residual'] = results.resid
df['loess'] = sm.nonparametric.lowess(df['residual'], df['year'], frac=60.0/len(df))[:,1]
df['residual_loess'] = df['residual'] - df['loess']



In [73]:

    
df[['year', 'nap', 'station', 'linear', 'nodal', 'residual', 'wind', 'loess', 'residual_loess']].to_json('fit.json', orient='records')



In [74]:

    
y = df['nap']
X = np.c_[
    np.ones(shape=(len(df),))
]
# compute semipartial correlations
rsquares = []
terms = [df['year']-1970, 
              np.c_[np.cos(2*np.pi*(df['year']-1970)/18.613), np.sin(2*np.pi*(df['year']-1970)/18.613)],
              np.c_[df['U2cos'], df['U2sin']]]
names = ['linear', 'nodal', 'wind']
rsquare = 0
for name, term in zip(names, terms):
    X = np.c_[X, term]
    model = sm.OLS(y, X)
    results = model.fit()
    rsquares.append((name, results.rsquared - rsquare))
    rsquare = results.rsquared
rsquares.append(('loess', 1- np.var(df['residual_loess'])/np.var(df['nap'])- rsquare))
rsquare = results.rsquared
rsquares.append(('residual', np.var(df['residual_loess'])/np.var(df['nap'])))


import json
json.dump(dict(rsquares), open('rsquares.json', 'w'))
rsquares









    Out[74]:





[('linear', 0.82769308510876782),
 ('nodal', 0.019216129938318116),
 ('wind', 0.043763073036397149),
 ('loess', 0.0036103727985814515),
 ('residual', 0.10571733911793549)]



In [75]:

    
a = results.summary(yname='waterlevel', xname=['constant', 'linear', 'nodal cos', 'nodal sin', 'wind cos', 'wind sin'])
#print(a.as_latex())
a









    Out[75]:





OLS Regression Results

  Dep. Variable:        waterlevel       R-squared:             0.891


  Model:                    OLS          Adj. R-squared:        0.886


  Method:              Least Squares     F-statistic:           190.6


  Date:              Sun, 16 Nov 2014    Prob (F-statistic):  1.71e-54


  Time:                  23:49:24        Log-Likelihood:      -566.06


  No. Observations:          123         AIC:                   1144.


  Df Residuals:              117         BIC:                   1161.


  Df Model:                    5                                     




               coef      std err       t       P>|t|  [95.0% Conf. Int.] 


  constant     -44.6608      6.507     -6.863   0.000    -57.548   -31.774


  linear         1.7577      0.067     26.278   0.000      1.625     1.890


  nodal cos      3.7984      3.233      1.175   0.242     -2.604    10.201


  nodal sin    -11.8578      3.138     -3.778   0.000    -18.073    -5.642


  wind cos       1.2649      0.189      6.694   0.000      0.891     1.639


  wind sin      -0.4672      0.266     -1.753   0.082     -0.995     0.060




  Omnibus:         0.139    Durbin-Watson:         1.435


  Prob(Omnibus):   0.933    Jarque-Bera (JB):      0.311


  Skew:            0.013    Prob(JB):              0.856


  Kurtosis:        2.755    Cond. No.               122.



In [76]:

    
def plot(degree):
    fitted = pandas.DataFrame(dict(fit=results.resid, year=df['year']))
    %Rpush fitted
    %Rpush degree
    %R l = predict(loess(fitted, degree=degree, control=loess.control(surface='direct')), se=TRUE, newdata=data.frame(year=seq(1890,2100)))
    l = %Rget l
    import pandas.rpy.common as com
    l = com.load_data('l')
    plt.fill_between(np.arange(1890, 2101), l['fit'] - l['se.fit']*1.96, l['fit'] + l['se.fit']*1.96, alpha=0.5)
    plt.plot(np.arange(1890, 2101), l['fit'])
    plt.ylim(-200,400)
    plt.title('Loess model with degree %d' % degree)
    plt.ylabel("sea surface height residual ['mm']")
interactive(plot, degree=(1,2))



In [77]:

    
# The broken linear model
y = df['nap']
X = np.c_[
    df['year']-1970, 
    np.cos(2*np.pi*(df['year']-1970)/18.613),
    np.sin(2*np.pi*(df['year']-1970)/18.613),
    df['U2cos'], 
    df['U2sin'],
    (df['year']-1990 > 0)*(df['year']-1990)
]
X = sm.add_constant(X)
model = sm.OLS(y, X)
fit = model.fit()



In [78]:

    
%Rpush df 
%R fit <- lm(nap ~ year + I((year > 1990)*(year-1990)) + I(cos((2*pi*year-1970)/18.613)) + I(sin((2*pi*year-1970)/18.613) + U2cos + U2sin), data=df)
%R years <- seq(1890, 2100)
%R pred <- predict(fit, interval="confidence", newdata=data.frame(year=years, U2cos=rep(mean(df$U2cos), length(years)), U2sin=rep(mean(df$U2sin), length(years))))
pred = %Rget pred
%R print(summary(fit))









    





Call:
lm(formula = nap ~ year + I((year > 1990) * (year - 1990)) + 
    I(cos((2 * pi * year - 1970)/18.613)) + I(sin((2 * pi * year - 
    1970)/18.613) + U2cos + U2sin), data = df)

Residuals:
    Min      1Q  Median      3Q     Max 
-66.682 -19.736   2.081  19.674  63.102 

Coefficients:
                                                        Estimate Std. Error
(Intercept)                                           -3.382e+03  1.867e+02
year                                                   1.688e+00  9.715e-02
I((year > 1990) * (year - 1990))                       8.555e-01  6.372e-01
I(cos((2 * pi * year - 1970)/18.613))                  5.595e+00  3.709e+00
I(sin((2 * pi * year - 1970)/18.613) + U2cos + U2sin)  7.036e-01  1.751e-01
                                                      t value Pr(>|t|)    
(Intercept)                                           -18.112  < 2e-16 ***
year                                                   17.378  < 2e-16 ***
I((year > 1990) * (year - 1990))                        1.343 0.181947    
I(cos((2 * pi * year - 1970)/18.613))                   1.508 0.134162    
I(sin((2 * pi * year - 1970)/18.613) + U2cos + U2sin)   4.018 0.000104 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 28.58 on 118 degrees of freedom
Multiple R-squared:  0.8528,	Adjusted R-squared:  0.8478 
F-statistic: 170.9 on 4 and 118 DF,  p-value: < 2.2e-16



In [79]:

    
def compute(split):
    %Rpush split
    %Rpush df 
    %R fit <- lm(nap ~  year + I((year > split)*(year-split)) + I(cos((2*pi*year-1970)/18.613)) + I(sin((2*pi*year-1970)/18.613) + U2cos + U2sin), data=df)
    %R years <- seq(1890, 2100)
    %R pred <- predict(fit, interval="confidence", newdata=data.frame(year=years, U2cos=rep(mean(df$U2cos), length(years)), U2sin=rep(mean(df$U2sin), length(years))))
    pred = %Rget pred
    %R print(summary(fit))
interactive(compute, split=(1980, 2000))









    





Call:
lm(formula = nap ~ year + I((year > split) * (year - split)) + 
    I(cos((2 * pi * year - 1970)/18.613)) + I(sin((2 * pi * year - 
    1970)/18.613) + U2cos + U2sin), data = df)

Residuals:
    Min      1Q  Median      3Q     Max 
-66.682 -19.736   2.081  19.674  63.102 

Coefficients:
                                                        Estimate Std. Error
(Intercept)                                           -3.382e+03  1.867e+02
year                                                   1.688e+00  9.715e-02
I((year > split) * (year - split))                     8.555e-01  6.372e-01
I(cos((2 * pi * year - 1970)/18.613))                  5.595e+00  3.709e+00
I(sin((2 * pi * year - 1970)/18.613) + U2cos + U2sin)  7.036e-01  1.751e-01
                                                      t value Pr(>|t|)    
(Intercept)                                           -18.112  < 2e-16 ***
year                                                   17.378  < 2e-16 ***
I((year > split) * (year - split))                      1.343 0.181947    
I(cos((2 * pi * year - 1970)/18.613))                   1.508 0.134162    
I(sin((2 * pi * year - 1970)/18.613) + U2cos + U2sin)   4.018 0.000104 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 28.58 on 118 degrees of freedom
Multiple R-squared:  0.8528,	Adjusted R-squared:  0.8478 
F-statistic: 170.9 on 4 and 118 DF,  p-value: < 2.2e-16



In [80]:

    
plt.fill_between(np.arange(1890, 2101), np.array(pred)[:,1], np.array(pred)[:,2], alpha=0.5)
plt.plot(np.arange(1890, 2101), np.array(pred)[:,0])









    Out[80]:





[<matplotlib.lines.Line2D at 0x1140ae490>]



In [81]:

    
# The linear model
%Rpush df 
%R fit <- lm(nap ~ year + I(cos((2*pi*year-1970)/18.613)) + I(sin((2*pi*year-1970)/18.613)) + U2cos + U2sin, data=df)
%R years <- seq(1890, 2100)
# Padd with means
%R U2cos = df$U2cos
%R U2cos[years>2012] = mean(df$U2cos)
%R U2sin = df$U2cos
%R U2sin[years>2012] = mean(df$U2sin)
%R pred <- predict(fit, interval="confidence", newdata=data.frame(year=years, U2cos=U2cos, U2sin=U2sin))
pred = %Rget pred
%R print(summary(fit))









    





Call:
lm(formula = nap ~ year + I(cos((2 * pi * year - 1970)/18.613)) + 
    I(sin((2 * pi * year - 1970)/18.613)) + U2cos + U2sin, data = df)

Residuals:
    Min      1Q  Median      3Q     Max 
-59.919 -17.607   1.216  15.672  59.747 

Coefficients:
                                        Estimate Std. Error t value Pr(>|t|)
(Intercept)                           -3.507e+03  1.289e+02 -27.207  < 2e-16
year                                   1.758e+00  6.689e-02  26.278  < 2e-16
I(cos((2 * pi * year - 1970)/18.613))  3.427e+00  3.233e+00   1.060  0.29120
I(sin((2 * pi * year - 1970)/18.613)) -1.197e+01  3.139e+00  -3.814  0.00022
U2cos                                  1.265e+00  1.890e-01   6.694 7.94e-10
U2sin                                 -4.672e-01  2.665e-01  -1.753  0.08214
                                         
(Intercept)                           ***
year                                  ***
I(cos((2 * pi * year - 1970)/18.613))    
I(sin((2 * pi * year - 1970)/18.613)) ***
U2cos                                 ***
U2sin                                 .  
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 24.73 on 117 degrees of freedom
Multiple R-squared:  0.8907,	Adjusted R-squared:  0.886 
F-statistic: 190.6 on 5 and 117 DF,  p-value: < 2.2e-16



In [124]:

    
fitted = pandas.DataFrame(dict(fit=results.resid, year=df['year']))
degree = 1
%Rpush fitted
%Rpush degree
%R l = predict(loess(fitted, degree=degree, control=loess.control(surface='direct')), se=TRUE, newdata=data.frame(year=seq(1890,2100)))
l = %Rget l
import pandas.rpy.common as com
l = com.load_data('l')
ci_loess = pandas.DataFrame(data=dict(year=np.arange(1890, 2101), se=l['se.fit'], fit=l['fit'], lwr=l['fit'] - l['se.fit']*1.96, upr=l['fit'] + l['se.fit']*1.96))

# compute angle
beta = (ci_loess[ci_loess['year'] == 2100].fit.item() - ci_loess[ci_loess['year'] == 2013].fit.item())/(2100-2013)
# compute std error (1.96 -> z 5%, 2 two-tailed)
se = ((ci_loess[ci_loess['year'] == 2100].upr.item() - ci_loess[ci_loess['year'] == 2100].lwr.item()))/1.96/2.0/(2100-2013)
beta, se









    Out[124]:





(0.27802282630813135, 0.23436251419913748)



In [114]:

    
fitted = pandas.DataFrame(dict(fit=results.resid, year=df['year']))
degree = 2
%Rpush fitted
%Rpush degree
%R fit <- loess(fitted, degree=degree, control=loess.control(surface='direct'))
%R l <- predict(fit, se=TRUE, newdata=data.frame(year=seq(1890,2100)))

import pandas.rpy.common as com
l = com.load_data('l')
fit = %Rget fit

ci_loess2 = pandas.DataFrame(data=dict(year=np.arange(1890, 2101),  fit=l['fit'], lwr=l['fit'] - l['se.fit']*1.96, upr=l['fit'] + l['se.fit']*1.96))
#ci_loess = ci_loess.set_index('year')



In [84]:

    
ci_linear = com.load_data("pred")
ci_linear['year'] = np.arange(1890, 2101)
#ci_linear= ci_linear.set_index('year')



In [85]:

    
ci = pandas.merge(ci_loess, ci_loess2, on='year', suffixes=('_loess', '_loess2'))
ci = pandas.merge(ci_linear, ci, on='year', suffixes=('_linear', ''))
#ci['fit'].name('fit_linear')
#ci['lwr'].rename('lwr_linear')
#ci['upr'].rename('upr_linear')
ci.rename(columns={'fit':'fit_linear', 'lwr': 'lwr_linear', 'upr': 'upr_linear'}, inplace=True)
ci.to_csv('ci.csv')



In [86]:

    
plt.fill_between(np.arange(1890,2101), ci_linear['lwr'] + ci_loess['lwr'], ci_linear['upr'] + ci_loess['upr'])
plt.plot(np.arange(1890,2101), ci_linear['fit'] + ci_loess['fit'])
plt.plot(np.arange(1890,2101), ci_linear['fit'], color='black')









    Out[86]:





[<matplotlib.lines.Line2D at 0x114c1b7d0>]



In [40]:

    
plt.fill_between(np.arange(1890,2101), ci_linear['lwr'] , ci_linear['upr'] )
plt.plot(np.arange(1890,2101), ci_linear['fit'])
plt.fill_between(np.arange(1890,2101), ci_loess['lwr'] , ci_loess['upr'] )
plt.plot(np.arange(1890,2101), ci_loess['fit'])









    Out[40]:





[<matplotlib.lines.Line2D at 0x11390aad0>]



In [40]:



In [41]:

    
df = annual_df[annual_df['station']=='mean']
hkv = pandas.read_csv('full_output_station_gemiddeld.csv', sep=';')
df = df.merge(hkv[['year', 'U2sin', 'U2cos']], on='year', suffixes=('', '_hkv'))
df = df.reset_index()
y = df['nap']
plt.subplots(figsize=(10,6))
for split in np.arange(1990, 2000):
    X = np.c_[
        df['year']-1970, 
        (df['year']-split)*(df['year']>split),
        np.cos(2*np.pi*(df['year']-1970)/18.613),
        np.sin(2*np.pi*(df['year']-1970)/18.613),
        df['U2cos'], 
        df['U2sin']
    ]
    X = sm.add_constant(X)
    model = sm.OLS(y, X)
    results = model.fit()
    plt.plot(df['year'], results.fittedvalues, label=str(split), alpha=0.3)



In [42]:

    
# Based on the records of Amsterdam/Den Helder
df = pandas.read_csv('/Users/baart_f/src/oetpython/applications/sealevel//sealevel/static/data/extra/9000.csv')

# Create a plot
fig, ax1 = plt.subplots(1,1, figsize=(10,6))
# compute a trend
def trend(index):
    # Select based on index
    df_selected = df.ix[index]
    # Create a linear model
    y = df_selected['waterlevel']
    X = np.c_[
        df_selected['year.month'],
        np.cos(2*np.pi*(df_selected['year.month']-1970)/18.613),
        np.sin(2*np.pi*(df_selected['year.month']-1970)/18.613)
    ]
    X = statsmodels.regression.linear_model.add_constant(X)
    model= statsmodels.regression.linear_model.OLS(y, X)
    # fit the model
    result = model.fit()
    # ignore if we have missings
    if np.isnan(df_selected['year.month']).any():
        return np.nan
    # create a plot
    ax1.plot(df_selected['year.month'], result.fittedvalues, 'k-', alpha=0.2, linewidth=3)
    # return the trend
    return result.params['x1']
df['waterlevel'][df['waterlevel'] == -99999] = np.nan
ax1.plot(df['year.month'], df['waterlevel'], '.')
ax1.set_xlabel('tijd [jaar]')
ax1.set_ylabel('zeespiegelniveau [m boven NAP]')
df = df[df['year.month']<1890]
betas = pandas.rolling_apply(np.array(df.index, dtype='int'),20, trend)
fig.savefig('amsterdam.pdf')



In [43]:

    
# standard error of beta_1 and confidence range 
np.std(betas[~np.isnan(betas)]), 1.96 * 2 * np.std(betas[~np.isnan(betas)])









    Out[43]:





(0.17157195469765205, 0.67256206241479599)



In [44]:

    
# the same for the whole of amsterdam
resp = requests.get('http://www.psmsl.org/data/longrecords/amsterdam.sea.level')
f = io.BytesIO(resp.content)
df = pandas.read_fwf(f, widths=(10,20), names=('year.month', 'waterlevel'), skiprows=1)
df['waterlevel']/=10.0
# Create a plot
fig, ax1 = plt.subplots(1,1, figsize=(10,6))
ax1.plot(df['year.month'], df['waterlevel'], '.')
ax1.set_xlabel('tijd [jaar]')
ax1.set_ylabel('zeespiegelniveau [m boven NAP]')

betas = pandas.rolling_apply(np.array(df.index, dtype='int'),20, trend)
np.std(betas[~np.isnan(betas)]), 1.96 * 2 * np.std(betas[~np.isnan(betas)])









    Out[44]:





(0.17411785038363373, 0.68254197350384416)



In [44]:

Dep. Variable:	waterlevel	R-squared:	0.828
Model:	OLS	Adj. R-squared:	0.827
Method:	Least Squares	F-statistic:	589.0
Date:	Sun, 16 Nov 2014	Prob (F-statistic):	1.60e-48
Time:	23:49:03	Log-Likelihood:	-598.81
No. Observations:	124	AIC:	1202.
Df Residuals:	122	BIC:	1207.
Df Model:	1

	coef	std err	t	P>\|t\|	[95.0% Conf. Int.]
const	3219.4017	149.443	21.543	0.000	2923.564 3515.239
year	1.8581	0.077	24.269	0.000	1.707 2.010

Omnibus:	3.101	Durbin-Watson:	1.544
Prob(Omnibus):	0.212	Jarque-Bera (JB):	2.704
Skew:	-0.357	Prob(JB):	0.259
Kurtosis:	3.120	Cond. No.	1.06e+05

Omnibus:	3.114	Durbin-Watson:	1.547
Prob(Omnibus):	0.211	Jarque-Bera (JB):	2.643
Skew:	-0.434	Prob(JB):	0.267
Kurtosis:	3.101	Cond. No.	1.62e+05

Omnibus:	0.139	Durbin-Watson:	1.435
Prob(Omnibus):	0.933	Jarque-Bera (JB):	0.311
Skew:	0.013	Prob(JB):	0.856
Kurtosis:	2.755	Cond. No.	122.