Change Detection using Sentinel-1 SAR data



In [1]:

    
algorithm = "deutscher"
version = 0.04

1. Query and preprocess Data

Define area and product



In [2]:

    
product   = "s1g_gamma0_caqueta"
latitude_extents = (    1.487715,   1.540572)
longitude_extents = ( -74.859247 ,-74.81)

Visualize area



In [3]:

    
from utils.data_cube_utilities.dc_display_map import display_map
display_map(latitude  = latitude_extents,
            longitude = longitude_extents)









    Out[3]:

Load Data



In [4]:

    
import datacube
dc = datacube.Datacube(app = "notebook: {}, version: {}".format(algorithm, version))









    



/home/localuser/Datacube/datacube_env/lib/python3.5/site-packages/psycopg2-2.7.5-py3.5-linux-x86_64.egg/psycopg2/__init__.py:144: UserWarning: The psycopg2 wheel package will be renamed from release 2.8; in order to keep installing from binary please use "pip install psycopg2-binary" instead. For details see: <http://initd.org/psycopg/docs/install.html#binary-install-from-pypi>.
  """)



In [5]:

    
raw_dataset = dc.load(product = product,
                   latitude  = latitude_extents, 
                   longitude = longitude_extents)



In [6]:

    
raw_dataset









    Out[6]:





<xarray.Dataset>
Dimensions:    (latitude: 293, longitude: 276, time: 227)
Coordinates:
  * time       (time) datetime64[ns] 2015-05-12T23:20:45.500000 ... 2018-06-28T10:35:06.500000
  * latitude   (latitude) float64 1.541 1.54 1.54 1.54 ... 1.488 1.488 1.488
  * longitude  (longitude) float64 -74.86 -74.86 -74.86 ... -74.81 -74.81 -74.81
Data variables:
    vh         (time, latitude, longitude) float32 0.0 0.0 0.0 ... 0.0 0.0 0.0
    vv         (time, latitude, longitude) float32 0.0 0.0 0.0 ... 0.0 0.0 0.0
Attributes:
    crs:      EPSG:4326

Clean Data
As a result of an ingestion experiment. Some datasets include time slices filled with 0 valued pixels. The code below removies them.



In [7]:

    
import xarray as xr  
import numpy as np
import matplotlib.pyplot as plt

def remove_all_zero(dataset):
    return dataset.drop([c[0].values 
        for c in [(t,np.count_nonzero(dataset.sel(time=t).vv)) 
                  for t in dataset.time] if c[1]==0],dim='time')



In [8]:

    
from typing import List  
import itertools

has_time_dimension = lambda x: "time" in dict(x.dims).keys()
get_first = lambda x: x[0]
get_last =  lambda x: x[-1]


def group_dates_by_day( dates: List[np.datetime64]) -> List[List[np.datetime64]]:
    generate_key = lambda b: ((b - np.datetime64('1970-01-01T00:00:00Z')) / (np.timedelta64(1, 'h')*24)).astype(int)
    return [list(group) for key, group in itertools.groupby(dates, key=generate_key)]

def reduce_on_day(ds: xr.Dataset,
                  reduction_func: np.ufunc = np.nanmean) -> xr.Dataset:
    #Group dates by day into date_groups
    day_groups = group_dates_by_day(ds.time.values)
    
    #slice large dataset into many smaller datasets by date_group
    group_chunks = (ds.sel(time = t) for t in day_groups)
    
    #reduce each dataset using something like "average" or "median" such that many values for a day become one value   
    group_slices = (_ds.reduce(reduction_func, dim = "time") for _ds in group_chunks if has_time_dimension(_ds))

    #recombine slices into larger dataset
    new_dataset  = xr.concat(group_slices, dim = "time") 
    
    #rename times values using the first time in each date_group  
    new_times = list(map(get_first, day_groups))    
    new_dataset = new_dataset.reindex(dict(time = np.array(new_times)))
    
    return new_dataset



In [9]:

    
cleaned_dataset = remove_all_zero(raw_dataset)



In [10]:

    
cleaned_dataset = reduce_on_day(cleaned_dataset)









    



/home/localuser/Datacube/datacube_env/lib/python3.5/site-packages/ipykernel_launcher.py:10: DeprecationWarning: parsing timezone aware datetimes is deprecated; this will raise an error in the future
  # Remove the CWD from sys.path while we load stuff.



In [11]:

    
cleaned_dataset









    Out[11]:





<xarray.Dataset>
Dimensions:    (latitude: 293, longitude: 276, time: 99)
Coordinates:
  * time       (time) datetime64[ns] 2015-05-12T23:20:45.500000 ... 2018-06-19T23:20:47.500000
  * latitude   (latitude) float64 1.541 1.54 1.54 1.54 ... 1.488 1.488 1.488
  * longitude  (longitude) float64 -74.86 -74.86 -74.86 ... -74.81 -74.81 -74.81
Data variables:
    vh         (time, latitude, longitude) float32 -6.579403 ... -24.35921
    vv         (time, latitude, longitude) float32 -2.1306589 ... -18.185274

Log Normalization



In [12]:

    
%matplotlib inline
import matplotlib.patches as mpatches
plt.figure(figsize = (15,4))

color_patches = list(map(lambda color, label: mpatches.Patch(color=color, label=label), ["blue", "orange"], ["VV", "VH"])) 
plt.legend(handles=color_patches, loc='best')

_ = cleaned_dataset.vv.plot.hist(bins = 200 , alpha = 0.5)
_ = cleaned_dataset.vh.plot.hist(bins = 200, alpha = 0.5)
plt.title("Histogram: VV, VH (Without log normalization)")









    Out[12]:





Text(0.5, 1.0, 'Histogram: VV, VH (Without log normalization)')



In [13]:

    
normalized_dataset = xr.merge([
    10*np.log10( - cleaned_dataset.vv),
    10*np.log10(- cleaned_dataset.vh)])









    



/home/localuser/Datacube/datacube_env/lib/python3.5/site-packages/xarray-0.10.9-py3.5.egg/xarray/core/computation.py:561: RuntimeWarning: divide by zero encountered in log10
  result_data = func(*input_data)
/home/localuser/Datacube/datacube_env/lib/python3.5/site-packages/xarray-0.10.9-py3.5.egg/xarray/core/computation.py:561: RuntimeWarning: invalid value encountered in log10
  result_data = func(*input_data)



In [14]:

    
normalized_dataset = xr.merge([
    normalized_dataset.vv.where(np.isfinite(normalized_dataset.vv)),
    normalized_dataset.vh.where(np.isfinite(normalized_dataset.vh))
])



In [15]:

    
import numpy as np 
def finite_histogram(data_array, *args, **kwargs):
    x = data_array.values.copy()
    x = x[~np.isnan(x)]
    x = x[np.isfinite(x)]
    
    return plt.hist(x,*args, **kwargs)



In [16]:

    
plt.figure(figsize = (15,4))

color_patches = list(map(lambda color, label: mpatches.Patch(color=color, label=label), ["blue", "orange"], ["VV", "VH"])) 
plt.legend(handles=color_patches, loc='best')

_ = finite_histogram(normalized_dataset.vv,bins = 500,   range=[-0, 20], alpha = 0.5)
_ = finite_histogram(normalized_dataset.vh, bins = 500,  range=[-0, 20], alpha = 0.5)
plt.title("Histogram: VV, VH (Log Normalized)")









    Out[16]:





Text(0.5, 1.0, 'Histogram: VV, VH (Log Normalized)')

2. Define Global Parameters for Deutscher algorithm



In [17]:

    
n = window_size = 3
band = "vv"
time_range = ("2015-09-01", "2017-01-01")

3. Explore Sentinel 1 statistical composites



In [18]:

    
subject = normalized_dataset



In [19]:

    
# subject = normalized_dataset #or log_nomralized_dataset  
data_array = subject[band]
data_array = data_array.where(data_array != 0)

Coefficient of Variance



In [20]:

    
import xarray as xr
import numpy as np  

def np_cv(arr:np.array, axis = None) -> np.array:
    mu = np.nanmean(arr, axis = axis)
    std = np.nanstd(arr, axis = axis)
    return mu/std

def global_coeffeiceint_of_variance(ds:xr.Dataset) -> xr.DataArray:
    arrays = [ds[variable].values for variable in ds.data_vars]
    concatted_array = np.concatenate(arrays)
    
    coords = list(raw_dataset.coords)[1:]
    
    return xr.DataArray(data = np_cv(concatted_array, axis = 0),
                        coords = [ds[coord] for coord in coords],
                        attrs  = ds.attrs)



In [21]:

    
# dataset_cov = global_coeffeiceint_of_variance(data_array.to_dataset(band))
dataset_cov = global_coeffeiceint_of_variance(subject) # if you wish to include both VV and VH
dataset_cov = dataset_cov.to_dataset(name = "cov")



In [22]:

    
dataset_cov.cov.plot()









    Out[22]:





<matplotlib.collections.QuadMesh at 0x7f16ab4b42e8>



In [23]:

    
plt.figure(figsize = (15,2))
_ = finite_histogram(dataset_cov.cov, bins = 500)

Mean



In [24]:

    
dataset_mean = data_array.reduce(np.nanmean, dim = "time").to_dataset(name = "mu")



In [25]:

    
dataset_mean.mu.plot(vmin=0,vmax=np.max(dataset_mean.mu))









    Out[25]:





<matplotlib.collections.QuadMesh at 0x7f16a9fd1c50>



In [26]:

    
plt.figure(figsize = (15,2))
_ = finite_histogram(dataset_mean.mu, bins = 500)

Min



In [27]:

    
dataset_min = data_array.min(dim = "time").to_dataset(name = "ds_min")



In [28]:

    
dataset_min.ds_min.plot(vmin=0,vmax=np.max(dataset_mean.mu))









    Out[28]:





<matplotlib.collections.QuadMesh at 0x7f16aa056630>



In [29]:

    
plt.figure(figsize = (15,2))
_ = finite_histogram(dataset_min.ds_min, bins = 500)

All at once



In [30]:

    
plt.figure(figsize = (15,2))


color_patches = list(map(lambda color, label: mpatches.Patch(color=color, label=label), ["skyblue", "red", "green"], ["min", "cov", "mean"])) 
plt.legend(handles=color_patches, loc='best')

_ = finite_histogram(dataset_min.ds_min, bins = 500, alpha = 0.5, range = [-5,25], color = "skyblue")
_ = finite_histogram(dataset_cov.cov, bins = 500, alpha = 0.5, range = [-5,25], color = "red")
_ = finite_histogram(dataset_mean.mu, bins = 500, alpha = 0.5, range = [-5,25], color = "green")

4. Plot RGB false color composites



In [31]:

    
from utils.data_cube_utilities.dc_rgb import rgb



In [32]:

    
import xarray as xr 
from functools import reduce  

stats_dataset = xr.merge([dataset_cov, dataset_mean, dataset_min])
stats_dataset = stats_dataset.where(reduce(np.logical_and , [np.isfinite(stats_dataset.ds_min.values),
                        np.isfinite(stats_dataset.mu.values),
                        np.isfinite(stats_dataset.cov.values)]))

RGB Composite 1
R : Coefficient of variation
G : Mean
B : Coefficient of variation



In [33]:

    
rgb(stats_dataset, bands= ["cov", "mu", "cov"])

RGB Composite 2
R : Coefficient of variation
G : mean
B : minimum



In [36]:

    
rgb(stats_dataset, bands= ["cov", "mu", "ds_min"])

5. Develop a Backscatter Trend Product



In [37]:

    
bt_source = data_array



In [38]:

    
n_earliest_times = bt_source.time.values[0:n] ##  Select First N dates in xarray
n_latest_times   = bt_source.time.values[-n:] ##  Select Last N dates in xarray

ds_before = bt_source.sel(time = n_earliest_times) ## Subset xarray datasets into before time frame
ds_after  = bt_source.sel(time = n_latest_times)   ## Subset xarray datasets into after  time frame



In [39]:

    
def delta(after,before):
    return (before - after)



In [40]:

    
ds_after_mean  = ds_after.mean(dim = 'time')
ds_before_mean = ds_before.mean(dim = 'time')



In [41]:

    
bt =  delta(ds_before_mean, ds_after_mean).to_dataset(name = "backscatter_trend")



In [42]:

    
import numpy as np 
def finite_histogram(data_array, *args, **kwargs):
    x = data_array.values.copy()
    x = x[~np.isnan(x)]
    x = x[np.isfinite(x)]
    
    return plt.hist(x,*args, **kwargs)



In [43]:

    
plt.figure(figsize = (15,2.5))
plt.title("Histogram: Backscatter {}".format(band))
_ = finite_histogram(bt.backscatter_trend, bins = 2000, range = [-5,5])

Plot Backscatter Trend



In [44]:

    
def aspect_ratio_helper(x,y, fixed_width = 20):
    width = fixed_width
    height = y * (fixed_width / x)
    return (int(width), int(height))



In [45]:

    
aspect_ratio = aspect_ratio_helper(*bt.backscatter_trend.values.shape)



In [46]:

    
plt.figure(figsize = aspect_ratio)
bt.backscatter_trend\
    .isel(latitude  = slice(40,600),
          longitude = slice(0, 550))\
    .plot(cmap = "Greys", vmin = -10, vmax = 10)









    Out[46]:





<matplotlib.collections.QuadMesh at 0x7f16a87647f0>

6. Calculate a Deutscher Product



In [47]:

    
nbt = bt.where(np.isfinite(bt.backscatter_trend.values))



In [48]:

    
plt.figure(figsize = (15,2.5))
_ = finite_histogram(nbt.backscatter_trend, bins = 1000)



In [49]:

    
deutscher_product = stats_dataset.cov * nbt.backscatter_trend



In [50]:

    
plt.figure(figsize = (15,2.5))
plt.title("Histogram: Deutscher")
_ = finite_histogram(deutscher_product, bins = 1000)

7. Plot Deustscher



In [51]:

    
#Custom function for a color mapping object
from matplotlib.colors import LinearSegmentedColormap
import matplotlib.pyplot as plt  

def custom_color_mapper(name = "custom", val_range = (1.96,1.96), colors = "RdGnBu"):
    custom_cmap = LinearSegmentedColormap.from_list(name,colors=colors)
    
    min, max = val_range
    step = max/10.0
    Z = [min,0],[0,max]
    levels = np.arange(min,max+step,step)
    cust_map = plt.contourf(Z, 100, cmap=custom_cmap)#cmap = custom_cmap
    plt.clf()
    return cust_map.cmap



In [52]:

    
aspect_ratio = aspect_ratio_helper(*deutscher_product.values.shape)
plt.figure(figsize = aspect_ratio)

deutscher_colors = custom_color_mapper(name = "Deutscher", colors = ["red","yellow", "green"])

deutscher_product.plot(cmap = deutscher_colors)









    Out[52]:





<matplotlib.collections.QuadMesh at 0x7f16a86202e8>



In [53]:

    
from utils.data_cube_utilities import dc_utilities
def export_slice_to_geotiff(ds, path):
    dc_utilities.write_geotiff_from_xr(path,
                                        ds.astype(np.float32),
                                        list(ds.data_vars.keys()),
                                        crs="EPSG:4326")



In [54]:

    
export_slice_to_geotiff(deutscher_product.to_dataset('deutscher'), 'algorithm[{}][{}][{}].tif'.format(algorithm, version, product))









    



/home/localuser/Datacube/datacube_env/lib/python3.5/site-packages/ipykernel_launcher.py:1: FutureWarning: the order of the arguments on DataArray.to_dataset has changed; you now need to supply ``name`` as a keyword argument
  """Entry point for launching an IPython kernel.



In [ ]: