The Boston Light Swim

Sea Surface Temperature time-series model skill

Load configuration


In [1]:
import warnings


# Suppresing warnings for a "pretty output."
# Remove this line to debug any possible issues.
warnings.simplefilter('ignore')

In [2]:
import os
from ioos_tools.ioos import parse_config

config = parse_config('config.yaml')

save_dir = os.path.join(os.path.abspath(config['run_name']))

Skill 1: Model Bias (or Mean Bias)

The bias skill compares the model mean temperature against the observations. It is possible to introduce a Mean Bias in the model due to a mismatch of the boundary forcing and the model interior.

$$ \text{MB} = \mathbf{\overline{m}} - \mathbf{\overline{o}}$$

In [3]:
from ioos_tools.ioos import stations_keys


def rename_cols(df, config):
    cols = stations_keys(config, key='station_name')
    return df.rename(columns=cols)

In [4]:
from ioos_tools.ioos import load_ncs
from ioos_tools.skill_score import mean_bias, apply_skill

dfs = load_ncs(config)

df = apply_skill(dfs, mean_bias, remove_mean=False, filter_tides=False)
skill_score = dict(mean_bias=df.to_dict())

# Filter out stations with no valid comparison.
df.dropna(how='all', axis=1, inplace=True)
df = df.applymap('{:.2f}'.format).replace('nan', '--')

if not df.empty:
    df = rename_cols(df, config)

df.T


Out[4]:
SECOORA_NCSU_CNAPS coawst_4_use_best dhw_5km global
BOSTON 16 NM East of Boston, MA -0.81 -0.83 -0.30 -0.61

Skill 2: Central Root Mean Squared Error

Root Mean Squared Error of the deviations from the mean.

$$ \text{CRMS} = \sqrt{\left(\mathbf{m'} - \mathbf{o'}\right)^2}$$

where: $\mathbf{m'} = \mathbf{m} - \mathbf{\overline{m}}$ and $\mathbf{o'} = \mathbf{o} - \mathbf{\overline{o}}$


In [5]:
from ioos_tools.skill_score import rmse

dfs = load_ncs(config)

df = apply_skill(dfs, rmse, remove_mean=True, filter_tides=False)
skill_score['rmse'] = df.to_dict()

# Filter out stations with no valid comparison.
df.dropna(how='all', axis=1, inplace=True)
df = df.applymap('{:.2f}'.format).replace('nan', '--')

if not df.empty:
    df = rename_cols(df, config)

df.T


Out[5]:
SECOORA_NCSU_CNAPS coawst_4_use_best dhw_5km global
BOSTON 16 NM East of Boston, MA 0.63 0.68 0.21 0.64

In [6]:
from ioos_tools.skill_score import r2

dfs = load_ncs(config)

df = apply_skill(dfs, r2, remove_mean=True, filter_tides=False)
skill_score['r2'] = df.to_dict()

# Filter out stations with no valid comparison.
df.dropna(how='all', axis=1, inplace=True)
df = df.applymap('{:.2f}'.format).replace('nan', '--')

if not df.empty:
    df = rename_cols(df, config)

df.T


Out[6]:
SECOORA_NCSU_CNAPS coawst_4_use_best dhw_5km global
BOSTON 16 NM East of Boston, MA 0.20 0.14 0.04 0.33

In [7]:
import json

fname = os.path.join(save_dir, 'skill_score.json')

# Stringfy keys for json.
for key in skill_score.keys():
    skill_score[key] = {str(k): v for k, v in skill_score[key].items()}

with open(fname, 'w') as f:
    f.write(json.dumps(skill_score))

Normalized Taylor diagrams

The radius is model standard deviation error divided by observations deviation, azimuth is arc-cosine of cross correlation (R), and distance to point (1, 0) on the abscissa is Centered RMS.


In [8]:
%matplotlib inline
import matplotlib.pyplot as plt
from ioos_tools.taylor_diagram import TaylorDiagram


def make_taylor(samples):
    fig = plt.figure(figsize=(9, 9))
    dia = TaylorDiagram(samples['std']['OBS_DATA'],
                        fig=fig,
                        label='Observation')
    # Add samples to Taylor diagram.
    samples.drop('OBS_DATA', inplace=True)
    for model, row in samples.iterrows():
        dia.add_sample(row['std'], row['corr'], marker='s', ls='',
                       label=model)
    # Add RMS contours, and label them.
    contours = dia.add_contours(colors='0.5')
    plt.clabel(contours, inline=1, fontsize=10)
    # Add a figure legend.
    kw = dict(prop=dict(size='small'), loc='upper right')
    fig.legend(dia.samplePoints,
               [p.get_label() for p in dia.samplePoints],
               numpoints=1, **kw)
    return fig

In [9]:
import numpy as np
import pandas as pd

dfs = load_ncs(config)

# Bin and interpolate all series.
freq = '30min'
for station, df in list(dfs.iteritems()):
    df = df.resample(freq).interpolate().dropna(axis=1)
    if 'OBS_DATA' in df:
        samples = pd.DataFrame.from_dict(dict(std=df.std(),
                                              corr=df.corr()['OBS_DATA']))
    else:
        continue
    samples[samples < 0] = np.NaN
    samples.dropna(inplace=True)
    if len(samples) <= 2:  # 1 obs 1 model.
        continue
    fig = make_taylor(samples)
    fig.savefig(os.path.join(save_dir, '{}.png'.format(station)))
    plt.close(fig)