SECOORA Notebook 2

Sea Surface Height time-series model skill

This notebook calculates several skill scores for the SECOORA models weekly time-series saved by 00-fetch_data.ipynb.

Load configuration


In [1]:
import os
try:
    import cPickle as pickle
except ImportError:
    import pickle


run_name = '2014-07-07'
fname = os.path.join(run_name, 'config.pkl')
with open(fname, 'rb') as f:
    config = pickle.load(f)

In [2]:
import numpy as np
from pandas import DataFrame, read_csv
from utilities import (load_secoora_ncs, to_html,
                       save_html, apply_skill)


fname = '{}-all_obs.csv'.format(run_name)
all_obs = read_csv(os.path.join(run_name, fname), index_col='name')


def rename_cols(df):
    columns = dict()
    for station in df.columns:
        mask = all_obs['station'] == station
        name = all_obs['station'][mask].index[0]
        columns.update({station: name})
    return df.rename(columns=columns)

Skill 1: Model Bias (or Mean Bias)

The bias skill compares the model mean elevation against the observations. Because the observations were saved in NAVD datum, the deviation is usually a datum difference between the model forcings and the observations. (This can be confirmed via the constant bias observed at different runs.)

$$ \text{MB} = \mathbf{\overline{m}} - \mathbf{\overline{o}}$$

In [3]:
from utilities import mean_bias

dfs = load_secoora_ncs(run_name)

df = apply_skill(dfs, mean_bias, remove_mean=False, filter_tides=False)
df = rename_cols(df)
skill_score = dict(mean_bias=df.copy())

# Filter out stations with no valid comparison.
df.dropna(how='all', axis=1, inplace=True)
df = df.applymap('{:.2f}'.format).replace('nan', '--')

html = to_html(df.T)
fname = os.path.join(run_name, 'mean_bias.html'.format(run_name))
save_html(fname, html)
html


Out[3]:
COAWST_4 ESTOFS HYCOM ROMS_ESPRESSO SABGOM_ARCHIVE USF_FVCOM USF_ROMS
Duck, NC -- 0.07 -- -- 0.41 -- --
Oregon Inlet Marina, NC 0.19 0.25 0.58 0.46 0.65 -- --
Wrightsville Beach, NC 0.13 0.11 0.44 -- 0.49 -- --
Springmaid Pier, SC 0.16 0.14 -- -- 0.47 -- --
Oyster Landing (N Inlet Estuary), SC -- 0.25 -- -- 0.57 -- --
Charleston, SC 0.10 -- -- -- -- -- --
Fernandina Beach, FL -- 0.19 -- -- -- -- --
Mayport (Bar Pilots Dock), FL -- 0.14 -- -- -- -- --
Trident Pier, FL -- 0.20 -- -- 0.36 -- --
Lake Worth Pier, FL 0.32 0.18 0.22 -- 0.35 -- --
Virginia Key, FL 0.31 0.14 -- -- 0.37 -- --
Vaca Key, FL -- 0.16 -- -- 0.17 0.15 0.05
Key West, FL 0.34 0.17 -- -- 0.20 0.13 --
Naples, FL 0.26 0.11 -- -- -- 0.15 0.17
Fort Myers, FL -- -- -- -- -- 0.18 0.25
Port Manatee, FL -- 0.08 -- -- 0.36 0.20 0.12
Cedar Key, FL 0.13 0.08 -- -- 0.42 0.27 0.25
Apalachicola, FL 0.08 0.09 -- -- -- 0.27 0.21
Panama City, FL 0.06 -- -- -- -- -- 0.20
Pensacola, FL 0.04 -- -- -- -- -- --
Bing's Landing station -- 0.37 -- -- -- -- --
Binney Dock station 0.26 0.23 -- -- -- -- --
Gordon River Inlet station -- 0.11 -- -- -- 0.16 0.15
Melbourne station -- 0.38 -- -- -- -- --
Naples Bay station -- 0.12 -- -- -- 0.17 0.15
Ponce De Leon South station 0.19 0.19 0.20 -- -- -- --
St Lucie Inlet station 1.01 0.97 -- -- 0.28 -- --
Tolomato River station -- 0.28 -- -- -- -- --
Vero Beach station 0.32 0.32 -- -- -- -- --
Aripeka 3.92 3.74 -- -- -- 3.56 0.57
Big Carlos Pass 0.74 0.57 -- -- -- 0.34 0.48
Fred Howard Park -- 15.95 -- -- 12.73 15.96 0.27
Shell Point 0.73 0.54 -- -- 0.28 0.35 0.51
usf_tas_ngwlms -- 0.67 -- -- -- 0.45 0.47

Skill 2: Central Root Mean Squared Error

Root Mean Squared Error of the deviations from the mean.

$$ \text{CRMS} = \sqrt{\left(\mathbf{m'} - \mathbf{o'}\right)^2}$$

where: $\mathbf{m'} = \mathbf{m} - \mathbf{\overline{m}}$ and $\mathbf{o'} = \mathbf{o} - \mathbf{\overline{o}}$


In [4]:
from utilities import rmse

dfs = load_secoora_ncs(run_name)

df = apply_skill(dfs, rmse, remove_mean=True, filter_tides=False)
df = rename_cols(df)
skill_score['rmse'] = df.copy()

# Filter out stations with no valid comparison.
df.dropna(how='all', axis=1, inplace=True)
df = df.applymap('{:.2f}'.format).replace('nan', '--')

html = to_html(df.T)
fname = os.path.join(run_name, 'rmse.html'.format(run_name))
save_html(fname, html)
html


Out[4]:
COAWST_4 ESTOFS HYCOM ROMS_ESPRESSO SABGOM_ARCHIVE USF_FVCOM USF_ROMS
Duck, NC -- 0.09 -- -- 0.16 -- --
Oregon Inlet Marina, NC 0.25 0.34 0.21 0.23 0.18 -- --
Wrightsville Beach, NC 0.16 0.15 0.28 -- 0.15 -- --
Springmaid Pier, SC 0.18 0.23 -- -- 0.18 -- --
Oyster Landing (N Inlet Estuary), SC -- 0.32 -- -- 0.21 -- --
Charleston, SC 0.13 -- -- -- -- -- --
Fernandina Beach, FL -- 0.22 -- -- -- -- --
Mayport (Bar Pilots Dock), FL -- 0.15 -- -- -- -- --
Trident Pier, FL -- 0.07 -- -- 0.12 -- --
Lake Worth Pier, FL 0.10 0.09 0.26 -- 0.17 -- --
Virginia Key, FL 0.06 0.08 -- -- 0.12 -- --
Vaca Key, FL -- 0.11 -- -- 0.12 0.12 0.04
Key West, FL 0.05 0.07 -- -- 0.05 0.04 --
Naples, FL 0.10 0.10 -- -- -- 0.06 0.18
Fort Myers, FL -- -- -- -- -- 0.16 0.05
Port Manatee, FL -- 0.10 -- -- 0.07 0.06 0.11
Cedar Key, FL 0.13 0.08 -- -- 0.28 0.11 0.23
Apalachicola, FL 0.06 0.08 -- -- -- 0.06 0.10
Panama City, FL 0.05 -- -- -- -- -- 0.06
Pensacola, FL 0.05 -- -- -- -- -- --
Bing's Landing station -- 0.41 -- -- -- -- --
Binney Dock station 0.10 0.12 -- -- -- -- --
Gordon River Inlet station -- 0.11 -- -- -- 0.06 0.17
Melbourne station -- 0.44 -- -- -- -- --
Naples Bay station -- 0.14 -- -- -- 0.09 0.18
Ponce De Leon South station 0.17 0.22 0.21 -- -- -- --
St Lucie Inlet station 26.49 26.49 -- -- 0.15 -- --
Tolomato River station -- 0.30 -- -- -- -- --
Vero Beach station 0.27 0.29 -- -- -- -- --
Aripeka 51.26 51.26 -- -- -- 51.26 0.14
Big Carlos Pass 0.12 0.13 -- -- -- 0.08 0.17
Fred Howard Park -- 114.33 -- -- 104.50 114.34 0.06
Shell Point 0.14 0.10 -- -- 0.26 0.10 0.17
usf_tas_ngwlms -- 0.22 -- -- -- 0.16 0.01

In [5]:
from utilities import r2

dfs = load_secoora_ncs(run_name)

df = apply_skill(dfs, r2, remove_mean=True, filter_tides=False)
df = rename_cols(df)
skill_score['r2'] = df.copy()

# Filter out stations with no valid comparison.
df.dropna(how='all', axis=1, inplace=True)
df = df.applymap('{:.2f}'.format).replace('nan', '--')

html = to_html(df.T)
fname = os.path.join(run_name, 'r2.html'.format(run_name))
save_html(fname, html)
html


Out[5]:
COAWST_4 ESTOFS HYCOM ROMS_ESPRESSO SABGOM_ARCHIVE USF_FVCOM USF_ROMS
Duck, NC -- 0.92 -- -- 0.72 -- --
Oregon Inlet Marina, NC -0.81 -2.35 -3.73 -0.50 -4.63 -- --
Wrightsville Beach, NC 0.80 0.82 -0.21 -- 0.81 -- --
Springmaid Pier, SC 0.84 0.76 -- -- 0.81 -- --
Oyster Landing (N Inlet Estuary), SC -- 0.37 -- -- 0.66 -- --
Charleston, SC 0.93 -- -- -- -- -- --
Fernandina Beach, FL -- 0.84 -- -- -- -- --
Mayport (Bar Pilots Dock), FL -- 0.87 -- -- -- -- --
Trident Pier, FL -- 0.94 -- -- 0.82 -- --
Lake Worth Pier, FL 0.85 0.89 0.12 -- 0.62 -- --
Virginia Key, FL 0.89 0.81 -- -- 0.53 -- --
Vaca Key, FL -- -1.80 -- -- -2.27 -2.61 0.00
Key West, FL 0.83 0.64 -- -- 0.86 0.87 --
Naples, FL 0.69 0.72 -- -- -- 0.89 -0.40
Fort Myers, FL -- -- -- -- -- -1.69 0.00
Port Manatee, FL -- 0.50 -- -- 0.75 0.82 0.27
Cedar Key, FL 0.79 0.92 -- -- 0.14 0.85 0.16
Apalachicola, FL 0.74 0.55 -- -- -- 0.70 -1.92
Panama City, FL 0.58 -- -- -- -- -- -2.51
Pensacola, FL 0.49 -- -- -- -- -- --
Bing's Landing station -- -5.89 -- -- -- -- --
Binney Dock station 0.79 0.70 -- -- -- -- --
Gordon River Inlet station -- 0.61 -- -- -- 0.87 -0.38
Melbourne station -- -5.10 -- -- -- -- --
Naples Bay station -- 0.43 -- -- -- 0.77 -0.36
Ponce De Leon South station 0.64 0.44 0.14 -- -- -- --
St Lucie Inlet station 0.00 0.00 -- -- 0.62 -- --
Tolomato River station -- 0.41 -- -- -- -- --
Vero Beach station -3.69 -4.64 -- -- -- -- --
Aripeka -0.00 -0.00 -- -- -- -0.00 -0.00
Big Carlos Pass 0.47 0.33 -- -- -- 0.74 -0.66
Fred Howard Park -- 0.00 -- -- 0.00 0.00 0.00
Shell Point 0.66 0.82 -- -- 0.02 0.84 -0.00
usf_tas_ngwlms -- -38.83 -- -- -- -20.46 0.00

In [6]:
from utilities import r2

dfs = load_secoora_ncs(run_name)

df = apply_skill(dfs, r2, remove_mean=True, filter_tides=True)
df = rename_cols(df)
skill_score['low_pass_r2'] = df.copy()

# Filter out stations with no valid comparison.
df.dropna(how='all', axis=1, inplace=True)
df = df.applymap('{:.2f}'.format).replace('nan', '--')

html = to_html(df.T)
fname = os.path.join(run_name, 'low_pass_r2.html'.format(run_name))
save_html(fname, html)
html


Out[6]:
COAWST_4 ESTOFS HYCOM ROMS_ESPRESSO SABGOM_ARCHIVE USF_FVCOM USF_ROMS
Duck, NC -- 0.86 -- -- -1.73 -- --
Oregon Inlet Marina, NC 0.02 -0.56 -23.22 -0.33 -7.50 -- --
Wrightsville Beach, NC 0.74 0.13 0.84 -- -4.98 -- --
Springmaid Pier, SC 0.78 0.03 -- -- -5.19 -- --
Oyster Landing (N Inlet Estuary), SC -- 0.00 -- -- -3.85 -- --
Charleston, SC 0.66 -- -- -- -- -- --
Fernandina Beach, FL -- 0.26 -- -- -- -- --
Mayport (Bar Pilots Dock), FL -- 0.01 -- -- -- -- --
Trident Pier, FL -- 0.74 -- -- -0.22 -- --
Lake Worth Pier, FL 0.67 0.60 0.45 -- 0.06 -- --
Virginia Key, FL 0.78 0.56 -- -- -4.64 -- --
Vaca Key, FL -- -10.12 -- -- -9.80 -1.56 --
Key West, FL -1.37 -4.22 -- -- -3.26 -0.30 --
Naples, FL -0.17 -1.48 -- -- -- 0.33 -1.31
Fort Myers, FL -- -- -- -- -- 0.10 --
Port Manatee, FL -- -1.33 -- -- 0.14 0.42 -0.32
Cedar Key, FL 0.11 -1.37 -- -- -2.06 0.57 -0.90
Apalachicola, FL 0.35 -2.06 -- -- -- 0.51 -7.17
Panama City, FL 0.49 -- -- -- -- -- -6.02
Pensacola, FL 0.44 -- -- -- -- -- --
Bing's Landing station -- 0.44 -- -- -- -- --
Binney Dock station 0.87 0.88 -- -- -- -- --
Gordon River Inlet station -- -1.41 -- -- -- 0.36 -1.25
Melbourne station -- -8.32 -- -- -- -- --
Naples Bay station -- -1.72 -- -- -- 0.22 -0.85
Ponce De Leon South station 0.06 -0.60 -1.20 -- -- -- --
St Lucie Inlet station 0.06 0.05 -- -- 0.06 -- --
Tolomato River station -- 0.33 -- -- -- -- --
Vero Beach station 0.79 0.88 -- -- -- -- --
Aripeka 0.00 0.01 -- -- -- 0.00 --
Big Carlos Pass -0.16 -1.33 -- -- -- 0.18 -1.38
Fred Howard Park -- -0.00 -- -- 0.00 0.00 --
Shell Point 0.06 -1.35 -- -- -1.17 0.61 --
usf_tas_ngwlms -- -4.54 -- -- -- -0.48 --

Skill 4: Low passed and re-sampled (3H) R$^2$

https://github.com/ioos/secoora/issues/183


In [7]:
from utilities import r2

dfs = load_secoora_ncs(run_name)

# SABGOM dt = 3 hours.
dfs = dfs.swapaxes('items', 'major').resample('3H').swapaxes('items', 'major')

df = apply_skill(dfs, r2, remove_mean=True, filter_tides=False)
df = rename_cols(df)
skill_score['low_pass_resampled_3H_r2'] = df.copy()

# Filter out stations with no valid comparison.
df.dropna(how='all', axis=1, inplace=True)
df = df.applymap('{:.2f}'.format).replace('nan', '--')

html = to_html(df.T)
fname = os.path.join(run_name, 'low_pass_resampled_3H_r2.html'.format(run_name))
save_html(fname, html)
html


Out[7]:
COAWST_4 ESTOFS HYCOM ROMS_ESPRESSO SABGOM_ARCHIVE USF_FVCOM USF_ROMS
Duck, NC -- 0.93 -- -- 0.32 -- --
Oregon Inlet Marina, NC -0.73 -2.21 -3.66 -0.48 -5.44 -- --
Wrightsville Beach, NC 0.80 0.84 -0.20 -- 0.42 -- --
Springmaid Pier, SC 0.82 0.75 -- -- 0.35 -- --
Oyster Landing (N Inlet Estuary), SC -- 0.36 -- -- 0.31 -- --
Charleston, SC 0.89 -- -- -- -- -- --
Fernandina Beach, FL -- 0.84 -- -- -- -- --
Mayport (Bar Pilots Dock), FL -- 0.86 -- -- -- -- --
Trident Pier, FL -- 0.96 -- -- 0.51 -- --
Lake Worth Pier, FL 0.74 0.87 0.11 -- 0.17 -- --
Virginia Key, FL 0.83 0.81 -- -- -0.24 -- --
Vaca Key, FL -- -1.74 -- -- -3.14 -2.54 0.00
Key West, FL 0.81 0.62 -- -- 0.60 0.82 --
Naples, FL 0.73 0.73 -- -- -- 0.90 -0.58
Fort Myers, FL -- -- -- -- -- -1.63 0.00
Port Manatee, FL -- 0.54 -- -- 0.66 0.82 0.21
Cedar Key, FL 0.74 0.90 -- -- -0.20 0.84 0.11
Apalachicola, FL 0.75 0.53 -- -- -- 0.71 -0.87
Panama City, FL 0.65 -- -- -- -- -- -2.24
Pensacola, FL 0.56 -- -- -- -- -- --
Bing's Landing station -- -5.57 -- -- -- -- --
Binney Dock station 0.73 0.72 -- -- -- -- --
Gordon River Inlet station -- 0.58 -- -- -- 0.86 -0.61
Melbourne station -- -4.98 -- -- -- -- --
Naples Bay station -- 0.42 -- -- -- 0.77 -0.51
Ponce De Leon South station 0.56 0.41 0.10 -- -- -- --
St Lucie Inlet station 0.03 0.03 -- -- 0.30 -- --
Tolomato River station -- 0.40 -- -- -- -- --
Vero Beach station -3.14 -4.14 -- -- -- -- --
Aripeka -0.00 -0.00 -- -- -- -0.00 0.00
Big Carlos Pass 0.53 0.38 -- -- -- 0.74 -0.93
Fred Howard Park -- 0.00 -- -- 0.00 0.00 0.00
Shell Point 0.60 0.77 -- -- -0.12 0.81 0.00
usf_tas_ngwlms -- -34.41 -- -- -- -18.61 0.00

Save scores


In [8]:
fname = os.path.join(run_name, 'skill_score.pkl')
with open(fname,'wb') as f:
    pickle.dump(skill_score, f)

Normalized Taylor diagrams

The radius is model standard deviation error divided by observations deviation, azimuth is arc-cosine of cross correlation (R), and distance to point (1, 0) on the abscissa is Centered RMS.


In [9]:
%matplotlib inline
import matplotlib.pyplot as plt
from utilities.taylor_diagram import TaylorDiagram


def make_taylor(samples):
    fig = plt.figure(figsize=(9, 9))
    dia = TaylorDiagram(samples['std']['OBS_DATA'],
                        fig=fig,
                        label="Observation")
    colors = plt.matplotlib.cm.jet(np.linspace(0, 1, len(samples)))
    # Add samples to Taylor diagram.
    samples.drop('OBS_DATA', inplace=True)
    for model, row in samples.iterrows():
        dia.add_sample(row['std'], row['corr'], marker='s', ls='',
                       label=model)
    # Add RMS contours, and label them.
    contours = dia.add_contours(colors='0.5')
    plt.clabel(contours, inline=1, fontsize=10)
    # Add a figure legend.
    kw = dict(prop=dict(size='small'), loc='upper right')
    leg = fig.legend(dia.samplePoints,
                     [p.get_label() for p in dia.samplePoints],
                     numpoints=1, **kw)
    return fig

In [10]:
dfs = load_secoora_ncs(run_name)

# Bin and interpolate all series to 1 hour.
freq = '1H'
for station, df in list(dfs.iteritems()):
    df = df.resample(freq).interpolate().dropna(axis=1)
    if 'OBS_DATA' in df:
        samples = DataFrame.from_dict(dict(std=df.std(),
                                           corr=df.corr()['OBS_DATA']))
    else:
        continue
    samples[samples < 0] = np.NaN
    samples.dropna(inplace=True)
    if len(samples) <= 2:  # 1 obs 1 model.
        continue
    fig = make_taylor(samples)
    fig.savefig(os.path.join(run_name, '{}.png'.format(station)))
    plt.close(fig)