This notebook uses changes in NDVI, EVI or Fractional Cover to identify land change. The algorithm identifies a "baseline" and "analysis" time period and then compares the spectral parameters in each of those time periods. Significant reductions in vegetation are coincident with land change. In some cases these changes could be deforestation. Users of this algorithm should not accept the accuracy of the results but should conduct ground validation testing to assess accuracy. In most cases, these algorithms can be used to identify clusters of pixels that have experienced change and allow targeted investigation of those areas by local or regional governments.
In [1]:
# Enable importing of utilities.
import sys
sys.path.append('..')
# Supress Warnings
import warnings
warnings.filterwarnings('ignore')
import numpy as np
import xarray as xr
import pandas as pd
import matplotlib.pyplot as plt
import datetime as dt
# Load Data Cube Configuration
import datacube
import utils.data_cube_utilities.data_access_api as dc_api
api = dc_api.DataAccessApi()
dc = api.dc
<p style="color:red";>CHANGE INPUTS BELOW
In [2]:
# Select a Product and Platform
# Examples: ghana, kenya, tanzania, sierra_leone, senegal
product = "ls7_usgs_sr_scene"
platform = "LANDSAT_7"
# product = "ls8_usgs_sr_scene"
# platform = "LANDSAT_8"
In [3]:
from utils.data_cube_utilities.dc_load import get_product_extents
from utils.data_cube_utilities.dc_time import dt_to_str
full_lat, full_lon, min_max_dates = get_product_extents(api, platform, product)
# Print the extents of the combined data.
print("Latitude Extents:", full_lat)
print("Longitude Extents:", full_lon)
print("Time Extents:", list(map(dt_to_str, min_max_dates)))
Visualize the available area
In [4]:
from utils.data_cube_utilities.dc_display_map import display_map
display_map(full_lat, full_lon)
Out[4]:
<p style="color:red";>CHANGE INPUTS BELOW
In [5]:
# Select an analysis region
# Mau Forest - Western Kenya
# latitude = (-0.13406, 0.21307)
# longitude = (35.28322, 35.56681)
# Nairobi, Kenya
latitude = (-1.4347, -1.151)
longitude = (36.6985, 36.9978)
# Dar es Salaam, Tanzania
# latitude = (-7.0, -6.7)
# longitude = (39.1, 39.4)
# Phu Thuy, Vietnam
# latitude = (10.8945, 10.9679)
# longitude = (108.0616, 108.1460)
Visualize the selected area
In [6]:
display_map(latitude = latitude, longitude = longitude)
Out[6]:
<p style="color:red";>CHANGE INPUTS BELOW
In [7]:
# Select the start and end periods for your analysis products
# The datetime function is (Year,Month,Day)
# These time windows will be used to make a mosaic, so typically pick a year length or more
# Be sure to evaluate the RGB mosaics to affirm they are not full of clouds
# Select the baseline time period (start and end)
baseline_time_period = ("2002-01-01", "2002-12-31")
# Select the analysis time period (start and end)
analysis_time_period = ("2015-01-01", "2015-12-31")
# Select the cloud-free mosaic type
# Options are: max_ndvi, min_ndvi, median, most_recent_pixel, geomedian
# If a geomedian is selected, it will take much longer to process
# It is most common to use the max_ndvi or median for these land change analyses
# HINT: Consider max_ndvi mosaics for NDVI analyses and median mosaics for EVI analyses
baseline_mosaic_function = "median"
analysis_mosaic_function = "median"
In [8]:
import datacube
name = "land change"
version = 1
dc = datacube.Datacube(app = f"{name}_v{version}")
common_load_params = dict(latitude = latitude, longitude = longitude,
measurements = ["red", "green", "blue", "nir", "swir1", "swir2", "pixel_qa"],
product = product, platform = platform,
group_by='solar_day',
dask_chunks={'time':1})
In [9]:
baseline_ds = dc.load(**common_load_params,
time = baseline_time_period).compute()
In [10]:
analysis_ds = dc.load(**common_load_params,
time = analysis_time_period).compute()
In [11]:
def is_dataset_empty(ds:xr.Dataset) -> bool:
checks_for_empty = [
lambda x: len(x.dims) == 0, # Dataset has no dimensions
lambda x: len(x.data_vars) == 0, # Dataset has no variables
lambda x: list(x.data_vars.values())[0].count().values == 0 # Data variables are empty
]
for f in checks_for_empty:
if f(ds):
return True
return False
In [12]:
if is_dataset_empty(baseline_ds): raise Exception("DataCube Load returned an empty Dataset." +
"Please check load parameters for Baseline Dataset!")
In [13]:
if is_dataset_empty(analysis_ds): raise Exception("DataCube Load returned an empty Dataset." +
"Please check load parameters for Analysis Dataset!")
In [14]:
from utils.data_cube_utilities.dc_mosaic import ls8_unpack_qa, ls7_unpack_qa
unpack_functions = {"LANDSAT_7": ls7_unpack_qa,
"LANDSAT_8": ls8_unpack_qa}
unpack_function = unpack_functions[platform]
In [15]:
def clean_mask(ds, unpacking_func, cover_types):
masks = [unpacking_func(ds, cover_type) for cover_type in cover_types]
return np.logical_or(*masks).values
In [16]:
baseline_clean_mask = clean_mask(baseline_ds.pixel_qa, unpack_function, ["clear", "water"])
analysis_clean_mask = clean_mask(analysis_ds.pixel_qa, unpack_function, ["clear", "water"])
In [17]:
baseline_ds = baseline_ds.where(baseline_clean_mask)
analysis_ds = analysis_ds.where(analysis_clean_mask)
In [18]:
from utils.data_cube_utilities.dc_mosaic import create_max_ndvi_mosaic, create_min_ndvi_mosaic, create_median_mosaic, create_mosaic, create_hdmedians_multiple_band_mosaic
mosaic_function = {"median": create_median_mosaic,
"max_ndvi": create_max_ndvi_mosaic,
"min_ndvi": create_min_ndvi_mosaic,
"geomedian": create_hdmedians_multiple_band_mosaic,
"most_recent_pixel": create_mosaic}
In [19]:
baseline_compositor = mosaic_function[baseline_mosaic_function]
analysis_compositor = mosaic_function[analysis_mosaic_function]
In [20]:
baseline_composite = baseline_compositor(baseline_ds, clean_mask = baseline_clean_mask)
analysis_composite = analysis_compositor(analysis_ds, clean_mask = analysis_clean_mask)
In [21]:
from utils.data_cube_utilities.vegetation import NDVI, EVI
from utils.data_cube_utilities.dc_fractional_coverage_classifier import frac_coverage_classify
# Select these two lines for NDVI
parameter_baseline_composite = NDVI(baseline_composite)
parameter_analysis_composite = NDVI(analysis_composite)
# Select these two lines for EVI
# parameter_baseline_composite = EVI(baseline_composite)
# parameter_analysis_composite = EVI(analysis_composite)
# Select these two lines for Fractional Cover (FC)
# parameter_baseline_composite = frac_coverage_classify(baseline_composite).pv
# parameter_analysis_composite = frac_coverage_classify(analysis_composite).pv
In [22]:
parameter_anomaly = parameter_analysis_composite - parameter_baseline_composite
In [23]:
import matplotlib.pyplot as plt
from utils.data_cube_utilities.dc_rgb import rgb
from matplotlib.cm import RdYlGn, Greens
RdYlGn.set_bad('black',1.)
Greens.set_bad('black',1.)
In [24]:
fig, ax = plt.subplots(2, 2, figsize=(12,12))
baseline_rgb = baseline_composite[['red', 'green', 'blue']].to_array()
analysis_rgb = analysis_composite[['red', 'green', 'blue']].to_array()
# Use the middle values of the data (2% to 98%) to brighten the image
rgb_vmin = min(baseline_rgb.quantile(0.02).values,analysis_rgb.quantile(0.02).values)
rgb_vmax = max(baseline_rgb.quantile(0.98).values,analysis_rgb.quantile(0.98).values)
# Plot the resulting 4 products ... Baseline RGB, Analysis RGB, Total Anomaly, Anomaly Threshold
baseline_rgb.plot.imshow(ax=ax[0,0], vmin=rgb_vmin, vmax=rgb_vmax)
analysis_rgb.plot.imshow(ax=ax[0,1], vmin=rgb_vmin, vmax=rgb_vmax)
parameter_anomaly.plot(ax=ax[1,0], vmin=-0.7, vmax=0.7, cmap = RdYlGn)
analysis_composite['swir1'].plot.imshow(ax=ax[1,1], add_colorbar=False)
# Titles
ax[0,0].set_title('Baseline composite')
ax[0,1].set_title('Analysis composite')
ax[1,0].set_title('Anomaly (analysis minus baseline)')
ax[1,1].set_title('Locations of extreme anomalies')
plt.show()
In [25]:
def threshold_count(da, min_threshold, max_threshold, mask = None):
def count_not_nans(arr):
return np.count_nonzero(~np.isnan(arr))
in_threshold = np.logical_and( da.values > min_threshold, da.values < max_threshold)
total_non_cloudy = count_not_nans(da.values) if mask is None else np.sum(mask)
return dict(total = np.size(da.values),
total_non_cloudy = total_non_cloudy,
inside = np.nansum(in_threshold),
outside = total_non_cloudy - np.nansum(in_threshold)
)
def threshold_percentage(da, min_threshold, max_threshold, mask = None):
counts = threshold_count(da, min_threshold, max_threshold, mask = mask)
return dict(percent_inside_threshold = (counts["inside"] / counts["total"]) * 100.0,
percent_outside_threshold = (counts["outside"] / counts["total"]) * 100.0,
percent_clouds = ( 100.0-counts["total_non_cloudy"] / counts["total"] * 100.0))
In [26]:
# Select an Anomaly Threshold Range to calculate the results from the previous image
# Be sure to put the smallest value in the "minimum_change" location (be careful of negative values)
# Losses are typically in these ranges for deforestation
# NDVI: -0.1 to -0.7
# Fractional Cover PV: -20 to -70
minimum_change = -0.7
maximum_change = -0.1
In [27]:
# This output is a count of the pixels that fall within each threshold range
threshold_count(parameter_anomaly,minimum_change,maximum_change)
Out[27]:
In [28]:
# This output is a percentage of the pixels that fall within each threshold range
threshold_percentage(parameter_anomaly,minimum_change,maximum_change)
Out[28]:
<p style="color:red";>CHANGE INPUTS BELOW
In [29]:
from utils.data_cube_utilities.dc_utilities import write_geotiff_from_xr
from pathlib import Path
# Remove the comment tag # to create a GeoTIFF output product
# Change the name of the output file, or it will be overwritten for each run
# There are 3 possible outputs ... baseline mosaic, analysis mosaic, or anomaly product
output_dir = "../output/geotiffs"
path = Path(output_dir)
path.mkdir(parents=True, exist_ok=True)
# Export baseline composite.
# write_geotiff_from_xr(output_dir + "/sample_baseline_01.tif", bands = ['red','green','blue','nir','swir1','swir2'], baseline_composite)
# Export analysis composite.
# write_geotiff_from_xr(output_dir + "/sample_analysis_01.tif", bands = ['red','green','blue','nir','swir1','swir2'], analysis_composite)
# Export anomaly product.
write_geotiff_from_xr(output_dir + "/sample_anomaly_01.tif", parameter_anomaly.to_dataset(name='pv'), bands=['pv'])
In [30]:
!ls -lah ../output/geotiffs/*.tif