Multitemporal masking testing

Goal:

Estimate time series of green and SWIR reflectances for multi-temporal cloud/shadow filtering

Problem:

Timeseries is irregularly spaced, especially before Landsat 7, with possibility of large gaps

Approaches:

  1. Iteratively reweighted least squares for modeling the training period
    • Only training period because we don't want to include changes in one regression model
  2. LOWESS
  3. STL - Seasonal-trend LOESS
  4. LOESS

In [2]:
from __future__ import print_function, division

%load_ext autoreload
%autoreload 2

from datetime import datetime as dt

import numpy as np
import statsmodels.api as sm
import matplotlib.pyplot as plt
%matplotlib inline
import matplotlib
matplotlib.rcParams['savefig.dpi'] = 2 * matplotlib.rcParams['savefig.dpi']
import pandas as pd

green = 1
swir1 = 4

# Read in data
x = np.load('../data/sample_x.npy')
Y = np.load('../data/px12_py27_Y.npy')

mask = (Y[7, :] <= 1)

x = np.array([_x.toordinal() for _x in x[mask]])
Y = Y[:7, mask]

In [3]:
# Plotter
def plotter(x, y, predx=None, predy=None, crit=400, compare='gt'):
    plt.plot(x, y, 'ko')
    
    if predx is not None and predy is not None:
        plt.plot(predx, predy, 'b-')
        
        match = np.in1d(predx, x)
        
        match_x = predx[match]
        match_y = predy[match]
        
        assert all(match_x == x)
        
        resid = (y - match_y)
        #print('Residuals:')
        #print(resid)
        
        if compare == 'gt':
            masked = resid > crit
        elif compare == 'lt':
            masked = resid < crit
    
        if np.any(masked == True):
            plt.plot(x[masked], y[masked], 'ro')

In [4]:
index = np.arange(0, 30)

plt.subplot(211)
plotter(x[index], Y[green, index])
plt.ylabel('Green')

plt.subplot(212)
plotter(x[index], Y[swir1, index])
plt.ylabel('SWIR1')


Out[4]:
<matplotlib.text.Text at 0x7f17ac04abd0>

1. IRWLS


In [5]:
niter = 50

n_year = np.ceil((x[index[-1]] - x[index[0]]) / 365.25)

def get_predX(x, n_year):
    w = 2.0 * np.pi / 365.25

    X = np.array([
        np.ones_like(x),
        np.cos(w * x),
        np.sin(w * x),
        np.cos(w / n_year * x),
        np.sin(w / n_year * x)
    ])
    # X = sm.add_constant(X)
    return X

X = get_predX(x, n_year)

green_rlm = sm.RLM(Y[green, index], X[:, index].T,
                   M=sm.robust.norms.TukeyBiweight()).fit(maxiter=niter)
swir1_rlm = sm.RLM(Y[swir1, index], X[:, index].T,
                   M=sm.robust.norms.TukeyBiweight()).fit(maxiter=niter)

predx = np.linspace(x[index[0]], x[index[-1]], num=x[index[-1]] - x[index[0]] + 1).astype(np.int32)
predX = get_predX(predx, n_year)

plt.subplot(211)
plotter(x[index], Y[green, index],
        predx=predx, predy=green_rlm.predict(predX.T))
plt.ylabel('Green')

plt.subplot(212)
plotter(x[index], Y[swir1, index],
        predx=predx, predy=swir1_rlm.predict(predX.T),
        crit=-400, compare='lt')
plt.ylabel('SWIR1')


Out[5]:
<matplotlib.text.Text at 0x7f17abf342d0>

2. LOWESS

Subset


In [6]:
# Setup
span = 9

frac = span / x[index].shape[0]
delta = (x[index].max() - x[index].min()) * 0.01

green_lowess = sm.nonparametric.lowess(Y[green, index], x[index],
                                       frac=frac, delta=delta)
swir1_lowess = sm.nonparametric.lowess(Y[swir1, index], x[index],
                                       frac=frac, delta=delta)

plt.subplot(211)
plotter(x[index], Y[green, index],
        predx=green_lowess[:, 0], predy=green_lowess[:, 1])
plt.ylabel('Green')

plt.subplot(212)
plotter(x[index], Y[swir1, index],
        predx=swir1_lowess[:, 0], predy=swir1_lowess[:, 1],
        crit=-400, compare='lt')
plt.ylabel('SWIR1')


Out[6]:
<matplotlib.text.Text at 0x7f17abe10590>

Full time period


In [7]:
frac = span / x.shape[0]
delta = (x.max() - x.min()) * 0.01

green_lowess = sm.nonparametric.lowess(Y[green, :], x,
                                       frac=frac, delta=delta)
swir1_lowess = sm.nonparametric.lowess(Y[swir1, :], x,
                                       frac=frac, delta=delta)

plt.subplot(211)
plotter(x, Y[green, :],
        predx=green_lowess[:, 0], predy=green_lowess[:, 1])
plt.ylabel('Green')

plt.subplot(212)
plotter(x, Y[swir1, :],
        predx=swir1_lowess[:, 0], predy=swir1_lowess[:, 1],
        crit=-400, compare='lt')
plt.ylabel('SWIR1')


Out[7]:
<matplotlib.text.Text at 0x7f17aa2a6790>

3. STL Seasonal-trend LOESS

To perform STL we can use a wrapper for R's built-in stl written by Andreas Hilboll located here:

https://gist.github.com/andreas-h/7808564

In [6]:
# Import wrapper by "andreas-h" for R's STL
! git clone https://gist.github.com/7808564.git
! cp 7808564/*.py .
! sed -i 's/DateRange/date_range/g' r_stl.py


fatal: destination path '7808564' already exists and is not an empty directory.

In [7]:
from r_stl import stl

In [8]:
data = pd.Series(Y[green, index], 
                 index=np.array([dt.fromordinal(_x) for _x in x[index]]))

stl(data, 5)


---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
<ipython-input-8-3368d730eae9> in <module>()
      2                  index=np.array([dt.fromordinal(_x) for _x in x[index]]))
      3 
----> 4 stl(data, 5)

/home/ceholden/Documents/yatsm/examples/r_stl.py in stl(data, ns, np, nt, nl, isdeg, itdeg, ildeg, nsjump, ntjump, nljump, ni, no, fulloutput)
     99     # TODO: account for non-monthly series
    100     _idx = pandas.date_range(start=_data.index[0], end=_data.index[-1],
--> 101                             offset=pandas.datetools.MonthBegin())
    102     data = pandas.Series(index=_idx)
    103     data[_data.index] = _data

TypeError: date_range() got an unexpected keyword argument 'offset'

4. LOESS