Sea Surface Temperature time-series model skill

Load configuration



In [1]:

    
import os
try:
    import cPickle as pickle
except ImportError:
    import pickle


run_name = '2015-08-17'
fname = os.path.join(run_name, 'config.pkl')
with open(fname, 'rb') as f:
    config = pickle.load(f)



In [2]:

    
import numpy as np
from pandas import DataFrame, read_csv
from utilities import to_html, save_html, apply_skill


fname = '{}-all_obs.csv'.format(run_name)
all_obs = read_csv(os.path.join(run_name, fname), index_col='name')


def rename_cols(df):
    columns = dict()
    for station in df.columns:
        mask = all_obs['station'].astype(str) == station
        name = all_obs['station'][mask].index[0]
        columns.update({station: name})
    return df.rename(columns=columns)



In [3]:

    
from glob import glob
from pandas import Panel
from utilities import nc2df


def load_ncs(run_name):
    fname = '{}-{}.nc'.format
    ALL_OBS_DATA = nc2df(os.path.join(run_name,
                                      fname(run_name, 'OBS_DATA')))
    index = ALL_OBS_DATA.index
    dfs = dict(OBS_DATA=ALL_OBS_DATA)
    for fname in glob(os.path.join(run_name, "*.nc")):
        if 'OBS_DATA' in fname:
            continue
        else:
            model = fname.split('.')[0].split('-')[-1]
            df = nc2df(fname)
            # FIXME: Horrible work around duplicate times.
            if len(df.index.values) != len(np.unique(df.index.values)):
                kw = dict(subset='index', take_last=True)
                df = df.reset_index().drop_duplicates(**kw).set_index('index')
            kw = dict(method='time', limit=30)
            df = df.reindex(index).interpolate(**kw).ix[index]
            dfs.update({model: df})

    return Panel.fromDict(dfs).swapaxes(0, 2)

Skill 1: Model Bias (or Mean Bias)

The bias skill compares the model mean temperature against the observations. It is possible to introduce a Mean Bias in the model due to a mismatch of the boundary forcing and the model interior.

$$ \text{MB} = \mathbf{\overline{m}} - \mathbf{\overline{o}}$$



In [4]:

    
from utilities import mean_bias

dfs = load_ncs(run_name)

df = apply_skill(dfs, mean_bias, remove_mean=False, filter_tides=False)
df = rename_cols(df)
skill_score = dict(mean_bias=df.copy())

# Filter out stations with no valid comparison.
df.dropna(how='all', axis=1, inplace=True)
df = df.applymap('{:.2f}'.format).replace('nan', '--')

html = to_html(df.T)
fname = os.path.join(run_name, 'mean_bias.html'.format(run_name))
save_html(fname, html)
html









    Out[4]:





  
    
      
      COAWST_4
      G1SST__SST
      HYCOM
      NECOFS_FVCOM
      NECOFS_GOM3_FVCOM
    
  
  
    
      Boston 16 Nm East Of Boston
      0.78
      0.37
      2.08
      8.16
      0.60
    
    
      Buoy A01
      0.75
      0.39
      0.98
      9.14
      0.78
    
    
      Boston, MA
      5.68
      0.45
      --
      3.84
      0.92

Skill 2: Central Root Mean Squared Error

Root Mean Squared Error of the deviations from the mean.

$$ \text{CRMS} = \sqrt{\left(\mathbf{m'} - \mathbf{o'}\right)^2}$$

where: $\mathbf{m'} = \mathbf{m} - \mathbf{\overline{m}}$ and $\mathbf{o'} = \mathbf{o} - \mathbf{\overline{o}}$



In [5]:

    
from utilities import rmse

dfs = load_ncs(run_name)

df = apply_skill(dfs, rmse, remove_mean=True, filter_tides=False)
df = rename_cols(df)
skill_score['rmse'] = df.copy()

# Filter out stations with no valid comparison.
df.dropna(how='all', axis=1, inplace=True)
df = df.applymap('{:.2f}'.format).replace('nan', '--')

html = to_html(df.T)
fname = os.path.join(run_name, 'rmse.html'.format(run_name))
save_html(fname, html)
html









    Out[5]:





  
    
      
      COAWST_4
      G1SST__SST
      HYCOM
      NECOFS_FVCOM
      NECOFS_GOM3_FVCOM
    
  
  
    
      Boston 16 Nm East Of Boston
      0.70
      0.34
      0.72
      0.58
      0.73
    
    
      Buoy A01
      0.78
      0.46
      0.70
      0.71
      0.67
    
    
      Boston, MA
      0.43
      0.55
      --
      0.43
      0.40

Skill 3: R$^2$

https://en.wikipedia.org/wiki/Coefficient_of_determination



In [6]:

    
from utilities import r2

dfs = load_ncs(run_name)

df = apply_skill(dfs, r2, remove_mean=True, filter_tides=False)
df = rename_cols(df)
skill_score['r2'] = df.copy()

# Filter out stations with no valid comparison.
df.dropna(how='all', axis=1, inplace=True)
df = df.applymap('{:.2f}'.format).replace('nan', '--')

html = to_html(df.T)
fname = os.path.join(run_name, 'r2.html'.format(run_name))
save_html(fname, html)
html









    Out[6]:





  
    
      
      COAWST_4
      G1SST__SST
      HYCOM
      NECOFS_FVCOM
      NECOFS_GOM3_FVCOM
    
  
  
    
      Boston 16 Nm East Of Boston
      -0.31
      -0.03
      -0.42
      0.08
      -0.21
    
    
      Buoy A01
      -0.54
      -0.19
      -0.26
      -0.30
      -0.00
    
    
      Boston, MA
      0.05
      -0.11
      --
      0.05
      -0.04



In [7]:

    
fname = os.path.join(run_name, 'skill_score.pkl')
with open(fname,'wb') as f:
    pickle.dump(skill_score, f)

Normalized Taylor diagrams

The radius is model standard deviation error divided by observations deviation, azimuth is arc-cosine of cross correlation (R), and distance to point (1, 0) on the abscissa is Centered RMS.



In [8]:

    
%matplotlib inline
import matplotlib.pyplot as plt
from utilities.taylor_diagram import TaylorDiagram


def make_taylor(samples):
    fig = plt.figure(figsize=(9, 9))
    dia = TaylorDiagram(samples['std']['OBS_DATA'],
                        fig=fig,
                        label="Observation")
    colors = plt.matplotlib.cm.jet(np.linspace(0, 1, len(samples)))
    # Add samples to Taylor diagram.
    samples.drop('OBS_DATA', inplace=True)
    for model, row in samples.iterrows():
        dia.add_sample(row['std'], row['corr'], marker='s', ls='',
                       label=model)
    # Add RMS contours, and label them.
    contours = dia.add_contours(colors='0.5')
    plt.clabel(contours, inline=1, fontsize=10)
    # Add a figure legend.
    kw = dict(prop=dict(size='small'), loc='upper right')
    leg = fig.legend(dia.samplePoints,
                     [p.get_label() for p in dia.samplePoints],
                     numpoints=1, **kw)
    return fig



In [9]:

    
dfs = load_ncs(run_name)

# Bin and interpolate all series to 1 hour.
freq = '30min'
for station, df in list(dfs.iteritems()):
    df = df.resample(freq).interpolate().dropna(axis=1)
    if 'OBS_DATA' in df:
        samples = DataFrame.from_dict(dict(std=df.std(),
                                           corr=df.corr()['OBS_DATA']))
    else:
        continue
    samples[samples < 0] = np.NaN
    samples.dropna(inplace=True)
    if len(samples) <= 2:  # 1 obs 1 model.
        continue
    fig = make_taylor(samples)
    fig.savefig(os.path.join(run_name, '{}.png'.format(station)))
    plt.close(fig)









    



c:\programs\anaconda-64\envs\swim\lib\site-packages\matplotlib\text.py:52: UnicodeWarning: Unicode equal comparison failed to convert both arguments to Unicode - interpreting them as being unequal
  if rotation in ('horizontal', None):
c:\programs\anaconda-64\envs\swim\lib\site-packages\matplotlib\text.py:54: UnicodeWarning: Unicode equal comparison failed to convert both arguments to Unicode - interpreting them as being unequal
  elif rotation == 'vertical':

	COAWST_4	G1SST__SST	HYCOM	NECOFS_FVCOM	NECOFS_GOM3_FVCOM
Boston 16 Nm East Of Boston	0.78	0.37	2.08	8.16	0.60
Buoy A01	0.75	0.39	0.98	9.14	0.78
Boston, MA	5.68	0.45	--	3.84	0.92

	COAWST_4	G1SST__SST	HYCOM	NECOFS_FVCOM	NECOFS_GOM3_FVCOM
Boston 16 Nm East Of Boston	0.70	0.34	0.72	0.58	0.73
Buoy A01	0.78	0.46	0.70	0.71	0.67
Boston, MA	0.43	0.55	--	0.43	0.40