NDVI Anomaly

This notebook compares NDVI between two time periods to detect land change. In the case of deforestation, the NDVI values will reduce from (0.6 to 0.9 ... typical for forests) to lower values (<0.6). This change can be detected and used to investigate deforestation or monitor the extent of the land change.


In [1]:
name = "NDVI anomaly"
version = 0.01
%matplotlib inline

Choose Platform and Product


In [2]:
import warnings
# Supress Warning 
warnings.filterwarnings('ignore')

In [3]:
import utils.data_cube_utilities.data_access_api as dc_api  
api = dc_api.DataAccessApi()
dc = api.dc

In [4]:
# Get available products
products_info = dc.list_products()

In [5]:
# List LANDSAT 7 products
print("LANDSAT 7 Products:")
products_info[["platform", "name"]][products_info.platform == "LANDSAT_7"]


LANDSAT 7 Products:
Out[5]:
platform name
id
2 LANDSAT_7 ls7_usgs_sr_scene

In [6]:
# List LANDSAT 8 products
print("LANDSAT 8 Products:")
products_info[["platform", "name"]][products_info.platform == "LANDSAT_8"]


LANDSAT 8 Products:
Out[6]:
platform name
id
1 LANDSAT_8 ls8_usgs_sr_scene

In [7]:
# Select one of the data cubes

product = "ls7_usgs_sr_scene"
platform = "LANDSAT_7" 

# product = "ls8_usgs_sr_scene"
# platform = "LANDSAT_8"

In [8]:
from utils.data_cube_utilities.dc_load import get_product_extents
from utils.data_cube_utilities.dc_time import dt_to_str

full_lat, full_lon, min_max_dates = get_product_extents(api, platform, product)

# Print the extents of the combined data.
print("Latitude Extents:", full_lat)
print("Longitude Extents:", full_lon)
print("Time Extents:", list(map(dt_to_str, min_max_dates)))


Latitude Extents: (-12.57305555565614, 18.32305555570214)
Longitude Extents: (-25.66583333353866, 44.05861111146359)
Time Extents: ['1999-07-08', '2020-01-10']

In [9]:
# Select an analysis region and visualize the region

# Dodoma, Tanzania
latitude = (-6.27, -6.04) 
longitude = (35.66, 35.89)

In [10]:
## The code below renders a map that can be used to orient yourself with the region.
from utils.data_cube_utilities.dc_display_map import display_map
display_map(latitude = latitude, longitude = longitude)


Out[10]:

Define Analysis Parameters


In [11]:
from datetime import datetime

# To compare the NDVI values from two time periods (baseline and analysis), 
# it is best to create "Maximum NDVI" cloud-free mosaics and then compare the changes

# Select the baseline time period (start and end)
baseline_time_period = ("2000-01-01", "2001-12-31")

# Select the analysis time period (start and end)
analysis_time_period = ("2016-01-01", "2017-12-31") 

# Select the cloud-free mosaic type
# Options are: max_ndvi, median, most_recent_pixel
# It is most common to use the maximum NDVI mosaic for this analysis

baseline_mosaic_function = "max_ndvi" 
analysis_mosaic_function = "max_ndvi" 

# Select a baseline NDVI threshold range
# The analysis will only consider pixels in this range for change detection
# Example: use 0.6 to 0.9 for dense vegetation, grasslands are 0.2 to 0.6
ndvi_baseline_threshold_range = (0.6, 0.9)

Load and Clean Data

Initialize Connection to Datacube


In [12]:
import datacube
dc = datacube.Datacube(app = f"{name}_v{version}")

Load Data ( Baseline, Analysis)


In [13]:
baseline_ds = dc.load(
    latitude = latitude,
    longitude = longitude,
    time = baseline_time_period,
    measurements = ["red", "green", "blue", "nir", "swir1", "swir2", "pixel_qa"],
    product = product,
    platform = platform
)

In [14]:
analysis_ds = dc.load(
    latitude = latitude,
    longitude = longitude,
    time = analysis_time_period,
    measurements = ["red", "green", "blue", "nir", "swir1", "swir2", "pixel_qa"],
    product = product,
    platform = platform
)

Check if loads are valid


In [15]:
import xarray as xr 
def is_dataset_empty(ds:xr.Dataset) -> bool:
    checks_for_empty = [
                        lambda x: len(x.dims) == 0,      #Dataset has no dimensions
                        lambda x: len(x.data_vars) == 0  #Dataset no variables 
                       ]
    for f in checks_for_empty:
         if f(ds) == True:
                return True
    return False

In [16]:
if is_dataset_empty(baseline_ds): raise Exception("DataCube Load returned an empty Dataset." +  
                                               "Please check load parameters for Baseline Dataset!")

In [17]:
if is_dataset_empty(analysis_ds): raise Exception("DataCube Load returned an empty Dataset." +  
                                               "Please check load parameters for Analysis Dataset!")

Clean Data

Generating boolean masks that highlight valid pixels Pixels must be cloud-free over land or water to be considered


In [18]:
from utils.data_cube_utilities.dc_mosaic import ls8_unpack_qa, ls7_unpack_qa 

unpack_function = {"LANDSAT_7": ls7_unpack_qa,
                   "LANDSAT_8": ls8_unpack_qa}

unpack = unpack_function[platform]

In [19]:
import numpy as np
def clean_mask(ds, unpacking_func, bands):
    masks = [unpacking_func(ds, band) for band in bands]
    return np.logical_or(*masks).values

In [20]:
baseline_clean_mask = clean_mask(baseline_ds.pixel_qa,unpack, ["clear", "water"])
analysis_clean_mask = clean_mask(analysis_ds.pixel_qa, unpack, ["clear", "water"])

In [21]:
baseline_ds = baseline_ds.where(baseline_clean_mask)
analysis_ds = analysis_ds.where(analysis_clean_mask)

Mosaic

Use clean masks in a time series composite


In [22]:
from utils.data_cube_utilities.dc_mosaic import create_max_ndvi_mosaic, create_median_mosaic, create_mosaic

In [23]:
mosaic_function = {"median": create_median_mosaic,
                   "max_ndvi": create_max_ndvi_mosaic,
                   "most_recent_pixel": create_mosaic}

In [24]:
baseline_compositor = mosaic_function[baseline_mosaic_function]
analysis_compositor = mosaic_function[analysis_mosaic_function]

In [25]:
baseline_composite = baseline_compositor(baseline_ds, clean_mask = baseline_clean_mask)
analysis_composite = analysis_compositor(analysis_ds, clean_mask = analysis_clean_mask)

In [26]:
from utils.data_cube_utilities.dc_rgb import rgb

In [27]:
import matplotlib.pyplot as plt
plt.figure(figsize = (12,6))
rgb(baseline_composite)
plt.show()


<Figure size 864x432 with 0 Axes>

In [28]:
plt.figure(figsize = (12,6))
rgb(analysis_composite)
plt.show()


<Figure size 864x432 with 0 Axes>

Baseline Mosaic using the NDVI Threshold Range

The image below will mask clouds and only include pixels that fall within the threshold range


In [29]:
def NDVI(dataset):
    return (dataset.nir - dataset.red)/(dataset.nir + dataset.red)

In [30]:
def TCAPWET(dataset):
        return (0.1509*dataset.red + 0.1973*dataset.green + 0.3279*dataset.blue 
                + 0.3406*dataset.nir - 0.7112*dataset.swir1 - 0.4572*dataset.swir2)

In [31]:
_min, _max = ndvi_baseline_threshold_range  
baseline_ndvi_filter_mask = np.logical_and(NDVI(baseline_composite) > _min, NDVI(baseline_composite) < _max)

In [32]:
def aspect_ratio_helper(ds, fixed_width = 15):
        y,x = ds.values.shape
        width = fixed_width
        height = y * (fixed_width / x)
        return (width, height)

In [33]:
import matplotlib.pyplot as plt
from matplotlib.cm import RdYlGn, Greens
RdYlGn.set_bad('black',1.)
Greens.set_bad('black',1.)

In [34]:
# This is the baseline NDVI threshold plot that shows GREEN pixels in the threshold range
# plt.figure(figsize = aspect_ratio_helper(baseline_ndvi_filter_mask)) 
plt.figure(figsize = (8,8))
baseline_ndvi_filter_mask.plot(cmap = "Greens")


Out[34]:
<matplotlib.collections.QuadMesh at 0x7f8f274eb908>

In [35]:
baseline_composite = baseline_composite.where(baseline_ndvi_filter_mask)

Wetness Anomaly


In [36]:
tcap_wet_baseline = TCAPWET(baseline_composite) # Tasselled Cap Wetness

In [37]:
(tcap_wet_baseline).plot(figsize=(8,8), vmin=-1500, vmax=1500, cmap = "Blues")


Out[37]:
<matplotlib.collections.QuadMesh at 0x7f8f260c79b0>

In [38]:
analysis_composite_mask = analysis_composite.where(baseline_ndvi_filter_mask)

In [39]:
tcap_wet_analysis = TCAPWET(analysis_composite_mask) # Tasselled Cap Wetness

In [40]:
(tcap_wet_analysis).plot(figsize=(8,8), vmin=-1500, vmax=1500, cmap = "Blues")


Out[40]:
<matplotlib.collections.QuadMesh at 0x7f8f247ad0f0>

In [41]:
wetness_anomaly = tcap_wet_analysis - tcap_wet_baseline

In [42]:
(wetness_anomaly).plot(figsize=(8,8), vmin=-3500, vmax=0, cmap = "Reds_r")


Out[42]:
<matplotlib.collections.QuadMesh at 0x7f8f246ce358>

NDVI Anomaly


In [43]:
ndvi_baseline_composite = NDVI(baseline_composite)
ndvi_analysis_composite = NDVI(analysis_composite)

In [44]:
ndvi_anomaly = ndvi_analysis_composite - ndvi_baseline_composite

NDVI Anomaly Plot


In [45]:
plt.figure(figsize = (8,8))
ndvi_anomaly.plot(vmin=-0.5, vmax=0.5, cmap = RdYlGn)


Out[45]:
<matplotlib.collections.QuadMesh at 0x7f8f27523908>

Discretized/Binned plot


In [46]:
plt.figure(figsize = (8,8))
ndvi_anomaly.plot(levels = 8, vmin=-1, vmax=1, cmap = RdYlGn)


Out[46]:
<matplotlib.collections.QuadMesh at 0x7f8f1b85d630>

In [47]:
from utils.data_cube_utilities.plotter_utils import create_discrete_color_map
cmap = create_discrete_color_map(data_range=(-1,1), 
                                 th=[0.0], colors=[(240,93,94), (126,243,125)])
cmap.set_bad("black", 1.)

NDVI Anomaly

This product shows the following ...
BLACK = Cloud or Pixels NOT in the baseline threshold range
GREEN = Pixels with an increase in NDVI
RED = Pixels with a decrease in NDVI


In [48]:
plt.figure(figsize = (8,8))
plt.imshow(ndvi_anomaly.values, cmap=cmap, vmin=-1, vmax=1)
plt.show()


NDVI Anomaly Threshold Product


In [49]:
# Select NDVI Anomaly Threshold Range
# We are looking for pixels that have lost significant vegetation
# NDVI losses are typically 0.1 or more for deforestation

minimum_change = -0.6
maximum_change = -0.2

NDVI Change Distribution

Threshold range, highlighted in red


In [50]:
from matplotlib.ticker import FuncFormatter

def threshold_plot(da, min_threshold, max_threshold, mask = None, width = 10, *args, **kwargs): 
    color_in    = np.array([255,0,0])
    color_out   = np.array([0,0,0])
    color_cloud = np.array([255,255,255])
    
    array = np.zeros((*da.values.shape, 3)).astype(np.int16)
    
    inside  = np.logical_and(da.values > min_threshold, da.values < max_threshold)
    outside = np.invert(inside)
    masked  = np.zeros(da.values.shape).astype(bool) if mask is None else mask
    
    array[inside] =  color_in
    array[outside] = color_out
    array[masked] =  color_cloud

    def figure_ratio(ds, fixed_width = 10):
        width = fixed_width
        height = len(ds.latitude) * (fixed_width / len(ds.longitude))
        return (width, height)


    fig, ax = plt.subplots(figsize = figure_ratio(da,fixed_width = width))
    
    lat_formatter = FuncFormatter(lambda y_val, tick_pos: "{0:.3f}".format(da.latitude.values[tick_pos] ))
    lon_formatter = FuncFormatter(lambda x_val, tick_pos: "{0:.3f}".format(da.longitude.values[tick_pos]))

    ax.xaxis.set_major_formatter(lon_formatter)
    ax.yaxis.set_major_formatter(lat_formatter)
    
    plt.title("Threshold: {} < x < {}".format(min_threshold, max_threshold))
    plt.xlabel('Longitude')
    plt.ylabel('Latitude')
    
    plt.imshow(array, *args, **kwargs)
    plt.show()

In [51]:
no_data_mask = np.logical_or(np.isnan(baseline_composite.red.values), np.isnan(analysis_composite.red.values))

In [52]:
threshold_plot(ndvi_anomaly, minimum_change, maximum_change, mask = no_data_mask, width  = 8)



In [53]:
def threshold_count(da, min_threshold, max_threshold, mask = None):
    def count_not_nans(arr):
        return np.count_nonzero(~np.isnan(arr))
    
    in_threshold = np.logical_and( da.values > min_threshold, da.values < max_threshold)
    
    total_non_cloudy = count_not_nans(da.values) if mask is None else np.sum(mask) 
    
    return dict(total = np.size(da.values),
                total_non_cloudy = total_non_cloudy,
                inside = np.nansum(in_threshold),
                outside = total_non_cloudy - np.nansum(in_threshold)
               )    
    
def threshold_percentage(da, min_threshold, max_threshold, mask = None):
    counts = threshold_count(da, min_threshold, max_threshold, mask = mask)
    return dict(percent_inside_threshold = (counts["inside"]   / counts["total"]) * 100.0,
                percent_outside_threshold = (counts["outside"] / counts["total"]) * 100.0,
                percent_clouds = ( 100.0-counts["total_non_cloudy"] / counts["total"] * 100.0))

In [54]:
threshold_count(ndvi_anomaly,minimum_change,maximum_change)


Out[54]:
{'total': 687241,
 'total_non_cloudy': 315569,
 'inside': 59766,
 'outside': 255803}

In [55]:
threshold_percentage(ndvi_anomaly,minimum_change,maximum_change)


Out[55]:
{'percent_inside_threshold': 8.696512577101775,
 'percent_outside_threshold': 37.221731532315445,
 'percent_clouds': 54.08175589058278}