In [2]:

    
# Enable importing of utilities.
from sys import path
path.append("..")

ARDC Training: Python Notebooks

Task-C: Fractional Cover (FC) and Spectral Indices (NDBI and NDVI)

Import the Datacube Configuration



In [5]:

    
import datacube
import utils.data_cube_utilities.data_access_api as dc_api  
api = dc_api.DataAccessApi()
dc = datacube.Datacube(app = 'ardc_task_c')
api.dc = dc

Browse the available Data Cubes



In [7]:

    
list_of_products = dc.list_products()
netCDF_products = list_of_products[list_of_products['format'] == 'NetCDF']
netCDF_products









    Out[7]:







  
    
      
      name
      description
      lat
      label
      lon
      product_type
      time
      platform
      format
      instrument
      creation_time
      crs
      resolution
      tile_size
      spatial_dimensions
    
    
      id
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
    
  
  
    
      13
      ls7_ledaps_ghana
      Landsat 7 USGS Collection 1 Higher Level SR sc...
      None
      None
      None
      LEDAPS
      None
      LANDSAT_7
      NetCDF
      ETM
      None
      EPSG:4326
      (-0.000269494585236, 0.000269494585236)
      (0.943231048326, 0.943231048326)
      (latitude, longitude)
    
    
      17
      ls7_ledaps_kenya
      Landsat 7 USGS Collection 1 Higher Level SR sc...
      None
      None
      None
      LEDAPS
      None
      LANDSAT_7
      NetCDF
      ETM
      None
      EPSG:4326
      (-0.000269493, 0.000269493)
      (0.99981903, 0.99981903)
      (latitude, longitude)
    
    
      18
      ls7_ledaps_senegal
      Landsat 7 USGS Collection 1 Higher Level SR sc...
      None
      None
      None
      LEDAPS
      None
      LANDSAT_7
      NetCDF
      ETM
      None
      EPSG:4326
      (-0.000271152, 0.00027769)
      (0.813456, 0.83307)
      (latitude, longitude)
    
    
      16
      ls7_ledaps_sierra_leone
      Landsat 7 USGS Collection 1 Higher Level SR sc...
      None
      None
      None
      LEDAPS
      None
      LANDSAT_7
      NetCDF
      ETM
      None
      EPSG:4326
      (-0.000269494585236, 0.000269494585236)
      (0.943231048326, 0.943231048326)
      (latitude, longitude)
    
    
      19
      ls7_ledaps_tanzania
      Landsat 7 USGS Collection 1 Higher Level SR sc...
      None
      None
      None
      LEDAPS
      None
      LANDSAT_7
      NetCDF
      ETM
      None
      EPSG:4326
      (-0.000271277688070265, 0.000271139577954979)
      (0.999929558226998, 0.999962763497961)
      (latitude, longitude)
    
    
      31
      ls7_ledaps_vietnam
      Landsat 7 USGS Collection 1 Higher Level SR sc...
      None
      None
      None
      LEDAPS
      None
      LANDSAT_7
      NetCDF
      ETM
      None
      EPSG:4326
      (-0.000269494585236, 0.000269494585236)
      (0.943231048326, 0.943231048326)
      (latitude, longitude)
    
    
      9
      ls8_lasrc_ghana
      Landsat 8 USGS Collection 1 Higher Level SR sc...
      None
      None
      None
      LaSRC
      None
      LANDSAT_8
      NetCDF
      OLI_TIRS
      None
      EPSG:4326
      (-0.000269494585236, 0.000269494585236)
      (0.943231048326, 0.943231048326)
      (latitude, longitude)
    
    
      10
      ls8_lasrc_kenya
      Landsat 8 USGS Collection 1 Higher Level SR sc...
      None
      None
      None
      LaSRC
      None
      LANDSAT_8
      NetCDF
      OLI_TIRS
      None
      EPSG:4326
      (-0.000271309115317046, 0.00026957992707863)
      (0.999502780827996, 0.999602369607559)
      (latitude, longitude)
    
    
      11
      ls8_lasrc_senegal
      Landsat 8 USGS Collection 1 Higher Level SR sc...
      None
      None
      None
      LaSRC
      None
      LANDSAT_8
      NetCDF
      OLI_TIRS
      None
      EPSG:4326
      (-0.000271152, 0.00027769)
      (0.813456, 0.83307)
      (latitude, longitude)
    
    
      8
      ls8_lasrc_sierra_leone
      Landsat 8 USGS Collection 1 Higher Level SR sc...
      None
      None
      None
      LaSRC
      None
      LANDSAT_8
      NetCDF
      OLI_TIRS
      None
      EPSG:4326
      (-0.000269494585236, 0.000269494585236)
      (0.943231048326, 0.943231048326)
      (latitude, longitude)
    
    
      15
      ls8_lasrc_tanzania
      Landsat 8 USGS Collection 1 Higher Level SR sc...
      None
      None
      None
      LaSRC
      None
      LANDSAT_8
      NetCDF
      OLI_TIRS
      None
      EPSG:4326
      (-0.000271277688070265, 0.000271139577954979)
      (0.999929558226998, 0.999962763497961)
      (latitude, longitude)

Pick a product
Use the platform and product names from the previous block to select a Data Cube.



In [8]:

    
# Change the data platform and data cube here

platform = "LANDSAT_7"
# platform = "LANDSAT_8"

# product = "ls7_ledaps_ghana"
# product = "ls7_ledaps_kenya"
# product = "ls7_ledaps_senegal"
# product = "ls7_ledaps_sierra_leone"
# product = "ls7_ledaps_tanzania"
product = "ls7_ledaps_vietnam"

Display Latitude-Longitude and Time Bounds of the Data Cube



In [9]:

    
from utils.data_cube_utilities.dc_time import _n64_to_datetime, dt_to_str

extents = api.get_full_dataset_extent(platform = platform, product = product, measurements=[])

latitude_extents = (min(extents['latitude'].values),max(extents['latitude'].values))
longitude_extents = (min(extents['longitude'].values),max(extents['longitude'].values))
time_extents = (min(extents['time'].values),max(extents['time'].values))

print("Latitude Extents:", latitude_extents)
print("Longitude Extents:", longitude_extents)
print("Time Extents:", list(map(dt_to_str, map(_n64_to_datetime, time_extents))))









    



Latitude Extents: (9.176425374578418, 13.964805165051667)
Longitude Extents: (102.40430421277932, 108.93092407802477)
Time Extents: ['1999-09-08', '2016-12-29']

Visualize Data Cube Region



In [10]:

    
## The code below renders a map that can be used to orient yourself with the region.
from utils.data_cube_utilities.dc_display_map import display_map
display_map(latitude = latitude_extents, longitude = longitude_extents)









    Out[10]:

Pick a smaller analysis region and display that region
Try to keep your region to less than 0.2-deg x 0.2-deg for rapid processing. You can click on the map above to find the Lat-Lon coordinates of any location. You will want to identify a region with an urban aree or some known vegetation. Pick a time window of a few months to a year so we can pick out some clear pixels.



In [15]:

    
## Vietnam - Central Lam Dong Province ##
longitude_extents = (105.2, 105.3)
latitude_extents  = (11.25, 11.35)

time_extents = ('2015-01-01', '2015-12-31')



In [16]:

    
display_map(latitude = latitude_extents, longitude = longitude_extents)









    Out[16]:

Load the dataset and the required spectral bands or other parameters

After loading, you will view the Xarray dataset. Notice the dimensions represent the number of pixels in your latitude and longitude dimension as well as the number of time slices (time) in your time series.



In [17]:

    
landsat_dataset = dc.load(latitude = latitude_extents,
                          longitude = longitude_extents,
                          platform = platform,
                          time = time_extents,
                          product = product,
                          measurements = ['red', 'green', 'blue', 'nir', 'swir1', 'swir2', 'pixel_qa'])



In [18]:

    
landsat_dataset
#view the dimensions and sample content from the cube









    Out[18]:





<xarray.Dataset>
Dimensions:    (latitude: 372, longitude: 372, time: 16)
Coordinates:
  * time       (time) datetime64[ns] 2015-01-07T03:18:35 ... 2015-12-25T03:21:19
  * latitude   (latitude) float64 11.35 11.35 11.35 11.35 ... 11.25 11.25 11.25
  * longitude  (longitude) float64 105.2 105.2 105.2 105.2 ... 105.3 105.3 105.3
Data variables:
    red        (time, latitude, longitude) int16 1185 1245 1266 ... 776 917 1120
    green      (time, latitude, longitude) int16 1295 1295 1316 ... 776 888 955
    blue       (time, latitude, longitude) int16 1277 1341 1298 ... 862 862 1034
    nir        (time, latitude, longitude) int16 2741 2605 2558 ... 1867 2001
    swir1      (time, latitude, longitude) int16 1627 1601 1574 ... 1307 1596
    swir2      (time, latitude, longitude) int16 978 978 1006 ... 478 754 1059
    pixel_qa   (time, latitude, longitude) int32 224 224 224 224 ... 66 66 66 96
Attributes:
    crs:      EPSG:4326

Display Example Images

Single band visualization
For a quick inspection, let's look at one image. The code will allow the selection of any band (red, blue, green, nir, swir1, swir2) to produce a grey-scale image. Select the desired acquisition (time slice) in the block below. You can select from 1 to #, where the max value is the number of time slices noted in the block above. Change the comment statements below to select the bands for the first image.



In [19]:

    
acquisition_number = 0
# select an acquisition number from 0 (first time layer) to "time" using the array limits above



In [20]:

    
%matplotlib inline
#landsat_dataset.red.isel(time = acquisition_number).plot(cmap = "Greys")
landsat_dataset.green.isel(time = acquisition_number).plot(cmap = "Greys")
#landsat_dataset.blue.isel(time = acquisition_number).plot(cmap = "Greys")
#landsat_dataset.nir.isel(time = acquisition_number).plot(cmap = "Greys")
#landsat_dataset.swir1.isel(time = acquisition_number).plot(cmap = "Greys")
#landsat_dataset.swir2.isel(time = acquisition_number).plot(cmap = "Greys")









    Out[20]:





<matplotlib.collections.QuadMesh at 0x7ff6b791bf60>

Define Cloud Masking Function
Removes clouds and cloud shadows based on the Landsat pixel QA information This is only for reference ... nothing to modify here



In [21]:

    
import numpy as np  

def generate_cloud_mask(dataset, include_shadows = False):
    #Create boolean Masks for clear and water pixels
    clear_pixels = dataset.pixel_qa.values == 2  + 64
    water_pixels = dataset.pixel_qa.values == 4  + 64
    shadow_pixels= dataset.pixel_qa.values == 8 + 64
    
    a_clean_mask = np.logical_or(clear_pixels, water_pixels)
    
    if include_shadows:
        a_clean_mask = np.logical_or(a_clean_mask, shadow_pixels)
        
    return np.invert(a_clean_mask)

def remove_clouds(dataset, include_shadows = False):
    #Create boolean Masks for clear and water pixels
    clear_pixels = dataset.pixel_qa.values == 2  + 64
    water_pixels = dataset.pixel_qa.values == 4  + 64
    shadow_pixels= dataset.pixel_qa.values == 8 + 64
    
    a_clean_mask = np.logical_or(clear_pixels, water_pixels)
    
    if include_shadows:
        a_clean_mask = np.logical_or(a_clean_mask, shadow_pixels)
        
    return dataset.where(a_clean_mask)



In [22]:

    
cloud_mask = generate_cloud_mask(landsat_dataset)
cloudless = remove_clouds(landsat_dataset) #landsat_dataset.where(image_is_clean)

Set up plotting function (to be used later) Nothing to modify here

Median Mosaic
Masks clouds from imagery using the median valued cloud-free pixels in the time series



In [23]:

    
from utils.data_cube_utilities.dc_mosaic import create_median_mosaic

def median_mosaic(dataset):
    # The mask here is based on pixel_qa products. It comes bundled in with most Landsat Products.
    cloud_free_boolean_mask = np.invert(generate_cloud_mask(dataset))
    return create_median_mosaic(dataset, clean_mask = cloud_free_boolean_mask)



In [24]:

    
median_composite = median_mosaic(landsat_dataset)



In [25]:

    
median_composite.nir.plot(cmap = "Greys")









    Out[25]:





<matplotlib.collections.QuadMesh at 0x7ff6b66b3d68>



In [26]:

    
from utils.data_cube_utilities.dc_rgb import rgb
rgb(median_composite)









    Out[26]:





(<Figure size 720x720 with 1 Axes>,
 <matplotlib.axes._subplots.AxesSubplot at 0x7ff687763940>)

Fractional Cover

Fractional Cover (FC) is used for landcover type estimation (vegetation, non-green vegetation, bare soil) of each pixel. We use a model from CSIRO (Juan Gerschmann) and apply it to a median mosaic.



In [27]:

    
from utils.data_cube_utilities.dc_fractional_coverage_classifier import frac_coverage_classify 
frac_classes = frac_coverage_classify(median_composite, clean_mask = np.ones(median_composite.pixel_qa.shape).astype(np.bool))

Plotting Fractional Cover Results

Plot Bare Soil (bs), Photosynthetic Vegetation (pv) or Non Photosynthetic Vegetation (npv)
Plot a False Color RGB result where RGB = bs/pv/npv.



In [28]:

    
frac_classes.bs.plot(cmap = "Greys")
#frac_classes.pv.plot(cmap = "Greys")
#frac_classes.npv.plot(cmap = "Greys")









    Out[28]:





<matplotlib.collections.QuadMesh at 0x7ff6866450b8>



In [29]:

    
rgb(frac_classes, bands = ['bs', 'pv', 'npv'])









    Out[29]:





(<Figure size 720x720 with 1 Axes>,
 <matplotlib.axes._subplots.AxesSubplot at 0x7ff6866095f8>)

Spectral Indices

NDVI (vegetation) and NDBI (urbanization)

NDVI = Normalized Difference Vegetation Index

A derived index that correlates well with the existance of vegetation.

$$ NDVI = \frac{(NIR - RED)}{(NIR + RED)}$$



In [30]:

    
def NDVI(dataset):
    return (dataset.nir - dataset.red)/(dataset.nir + dataset.red)

NDBI = Normalized Difference Build-Up Index

A derived index that correlates well with the existance of urbanization.

$$NDBI = \frac{(SWIR1 - NIR)}{(SWIR1 + NIR)}$$



In [31]:

    
def NDBI(dataset):
        return (dataset.swir1 - dataset.nir)/(dataset.swir1 + dataset.nir)



In [32]:

    
landsat_mosaic = median_mosaic(landsat_dataset)  

ndbi = NDBI(landsat_mosaic)  # Urbanization - Reds
ndvi_mosaic = NDVI(landsat_mosaic)  # Dense Vegetation - Greens



In [33]:

    
(ndbi).plot(cmap = "Reds")









    Out[33]:





<matplotlib.collections.QuadMesh at 0x7ff686593358>



In [34]:

    
(ndvi_mosaic).plot(cmap = "Greens")









    Out[34]:





<matplotlib.collections.QuadMesh at 0x7ff6864b6e10>

Create a threshold plot

First we will define a minimum threshold and a maximum threshold. Then you will create a plot that colors the region between the threshold a single color (e.g. red) and the region outside the threshold will be BLACK or WHITE. Also, we will calculate the % of pixels and the number of pixels in the threshold range.



In [35]:

    
# Select the time slice for the NVDI output (first slice=0)
t = 0
ndvi_dataset_at_time_t = NDVI(landsat_dataset).isel(time = t)  
mask_at_time_t = generate_cloud_mask(landsat_dataset.isel(time = t))



In [36]:

    
# Define the threshold region bounds
minimum_threshold = 0.6
maximum_threshold = 0.9



In [37]:

    
from matplotlib.ticker import FuncFormatter
import matplotlib.pyplot as plt

def threshold_plot(da, min_threshold, max_threshold, mask = None, width = 10, *args, **kwargs): 
    color_in    = np.array([255,0,0])
    color_out   = np.array([0,0,0])
    color_cloud = np.array([255,255,255])
    
    array = np.zeros((*da.values.shape, 3)).astype(np.int16)
    
    inside  = np.logical_and(da.values > min_threshold, da.values < max_threshold)
    outside = np.invert(inside)
    masked  = np.zeros(da.values.shape).astype(bool) if mask is None else mask
    
    array[inside] =  color_in
    array[outside] = color_out
    array[masked] =  color_cloud

    def figure_ratio(ds, fixed_width = 10):
        width = fixed_width
        height = len(ds.latitude) * (fixed_width / len(ds.longitude))
        return (width, height)


    fig, ax = plt.subplots(figsize = figure_ratio(da,fixed_width = width))
    
    lat_formatter = FuncFormatter(lambda y_val, tick_pos: "{0:.3f}".format(da.latitude.values[tick_pos] ))
    lon_formatter = FuncFormatter(lambda x_val, tick_pos: "{0:.3f}".format(da.longitude.values[tick_pos]))

    ax.xaxis.set_major_formatter(lon_formatter)
    ax.yaxis.set_major_formatter(lat_formatter)
    
    plt.title("Threshold: {} < x < {}".format(min_threshold, max_threshold))
    plt.xlabel('Longitude')
    plt.ylabel('Latitude')
    
    plt.imshow(array, *args, **kwargs)
    plt.show()

Plot NDVI Mosaic



In [38]:

    
# Plot the NDVI threshold product using a cloud-filterd mosaic
threshold_plot(ndvi_mosaic, minimum_threshold, maximum_threshold, width = 10)

Plot NDVI at time t



In [39]:

    
# Plot the NDVI threshold product using a single time slice (one scene)
threshold_plot(ndvi_dataset_at_time_t, minimum_threshold, maximum_threshold, width = 10, mask = mask_at_time_t,)



In [40]:

    
def threshold_count(da, min_threshold, max_threshold, mask = None):
    def count_not_nans(arr):
        return np.count_nonzero(~np.isnan(arr))
    
    in_threshold = np.logical_and( da.values > min_threshold, da.values < max_threshold)
    
    total_non_cloudy = count_not_nans(da.values) if mask is None else np.sum(mask) 
    
    return dict(total = np.size(da.values),
                total_non_cloudy = total_non_cloudy,
                inside = np.nansum(in_threshold),
                outside = total_non_cloudy - np.nansum(in_threshold)
               )    
    
def threshold_percentage(da, min_threshold, max_threshold, mask = None):
    counts = threshold_count(da, min_threshold, max_threshold, mask = mask)
    return dict(percent_inside_threshold = (counts["inside"]   / counts["total"]) * 100.0,
                percent_outside_threshold = (counts["outside"] / counts["total"]) * 100.0,
                percent_clouds = ( 100.0-counts["total_non_cloudy"] / counts["total"] * 100.0))



In [41]:

    
threshold_count(ndvi_mosaic,
                minimum_threshold,
                maximum_threshold)









    Out[41]:





{'total': 138384,
 'total_non_cloudy': 138384,
 'inside': 44355,
 'outside': 94029}



In [42]:

    
threshold_percentage(ndvi_mosaic,
                     minimum_threshold,
                     maximum_threshold)









    Out[42]:





{'percent_inside_threshold': 32.05211585154353,
 'percent_outside_threshold': 67.94788414845647,
 'percent_clouds': 0.0}



In [43]:

    
threshold_count(ndvi_dataset_at_time_t,
                minimum_threshold,
                maximum_threshold,
                mask = mask_at_time_t)









    Out[43]:





{'total': 138384,
 'total_non_cloudy': 100741,
 'inside': 13591,
 'outside': 87150}



In [44]:

    
threshold_percentage(ndvi_dataset_at_time_t,
                     minimum_threshold,
                     maximum_threshold,
                     mask = mask_at_time_t )









    Out[44]:





{'percent_inside_threshold': 9.821222106601919,
 'percent_outside_threshold': 62.97693374956642,
 'percent_clouds': 27.20184414383165}

Select a single pixel and plot an index value over time



In [45]:

    
pixel_lat = 11.45
pixel_lon = 105.40



In [46]:

    
pixel = NDVI(remove_clouds(landsat_dataset)).sel(latitude  = pixel_lat,
                                                longitude = pixel_lon,
                                                method = 'nearest') # nearest neighbor selection



In [47]:

    
plt.figure(figsize = (20,5)) 
plt.scatter(pixel.time.values, pixel.values)









    Out[47]:





<matplotlib.collections.PathCollection at 0x7ff686381278>

	name	description	lat	label	lon	product_type	time	platform	format	instrument	creation_time	crs	resolution	tile_size	spatial_dimensions
id
13	ls7_ledaps_ghana	Landsat 7 USGS Collection 1 Higher Level SR sc...	None	None	None	LEDAPS	None	LANDSAT_7	NetCDF	ETM	None	EPSG:4326	(-0.000269494585236, 0.000269494585236)	(0.943231048326, 0.943231048326)	(latitude, longitude)
17	ls7_ledaps_kenya	Landsat 7 USGS Collection 1 Higher Level SR sc...	None	None	None	LEDAPS	None	LANDSAT_7	NetCDF	ETM	None	EPSG:4326	(-0.000269493, 0.000269493)	(0.99981903, 0.99981903)	(latitude, longitude)
18	ls7_ledaps_senegal	Landsat 7 USGS Collection 1 Higher Level SR sc...	None	None	None	LEDAPS	None	LANDSAT_7	NetCDF	ETM	None	EPSG:4326	(-0.000271152, 0.00027769)	(0.813456, 0.83307)	(latitude, longitude)
16	ls7_ledaps_sierra_leone	Landsat 7 USGS Collection 1 Higher Level SR sc...	None	None	None	LEDAPS	None	LANDSAT_7	NetCDF	ETM	None	EPSG:4326	(-0.000269494585236, 0.000269494585236)	(0.943231048326, 0.943231048326)	(latitude, longitude)
19	ls7_ledaps_tanzania	Landsat 7 USGS Collection 1 Higher Level SR sc...	None	None	None	LEDAPS	None	LANDSAT_7	NetCDF	ETM	None	EPSG:4326	(-0.000271277688070265, 0.000271139577954979)	(0.999929558226998, 0.999962763497961)	(latitude, longitude)
31	ls7_ledaps_vietnam	Landsat 7 USGS Collection 1 Higher Level SR sc...	None	None	None	LEDAPS	None	LANDSAT_7	NetCDF	ETM	None	EPSG:4326	(-0.000269494585236, 0.000269494585236)	(0.943231048326, 0.943231048326)	(latitude, longitude)
9	ls8_lasrc_ghana	Landsat 8 USGS Collection 1 Higher Level SR sc...	None	None	None	LaSRC	None	LANDSAT_8	NetCDF	OLI_TIRS	None	EPSG:4326	(-0.000269494585236, 0.000269494585236)	(0.943231048326, 0.943231048326)	(latitude, longitude)
10	ls8_lasrc_kenya	Landsat 8 USGS Collection 1 Higher Level SR sc...	None	None	None	LaSRC	None	LANDSAT_8	NetCDF	OLI_TIRS	None	EPSG:4326	(-0.000271309115317046, 0.00026957992707863)	(0.999502780827996, 0.999602369607559)	(latitude, longitude)
11	ls8_lasrc_senegal	Landsat 8 USGS Collection 1 Higher Level SR sc...	None	None	None	LaSRC	None	LANDSAT_8	NetCDF	OLI_TIRS	None	EPSG:4326	(-0.000271152, 0.00027769)	(0.813456, 0.83307)	(latitude, longitude)
8	ls8_lasrc_sierra_leone	Landsat 8 USGS Collection 1 Higher Level SR sc...	None	None	None	LaSRC	None	LANDSAT_8	NetCDF	OLI_TIRS	None	EPSG:4326	(-0.000269494585236, 0.000269494585236)	(0.943231048326, 0.943231048326)	(latitude, longitude)
15	ls8_lasrc_tanzania	Landsat 8 USGS Collection 1 Higher Level SR sc...	None	None	None	LaSRC	None	LANDSAT_8	NetCDF	OLI_TIRS	None	EPSG:4326	(-0.000271277688070265, 0.000271139577954979)	(0.999929558226998, 0.999962763497961)	(latitude, longitude)