In [1]:

    
# Enable importing of utilities.
from sys import path
path.append("..")

ARDC Training: Python Notebooks

Task-B: Water Extent (WOFS) and Water Quality (TSM)

Import the Datacube Configuration



In [2]:

    
import datacube
import utils.data_cube_utilities.data_access_api as dc_api  
api = dc_api.DataAccessApi()
dc = datacube.Datacube(app = 'ardc_task_b')
api.dc = dc

Browse the available Data Cubes



In [3]:

    
list_of_products = dc.list_products()
netCDF_products = list_of_products[list_of_products['format'] == 'NetCDF']
netCDF_products









    Out[3]:







  
    
      
      name
      description
      platform
      instrument
      creation_time
      format
      lat
      label
      product_type
      lon
      time
      crs
      resolution
      tile_size
      spatial_dimensions
    
    
      id
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
    
  
  
    
      13
      ls7_ledaps_ghana
      Landsat 7 USGS Collection 1 Higher Level SR sc...
      LANDSAT_7
      ETM
      None
      NetCDF
      None
      None
      LEDAPS
      None
      None
      EPSG:4326
      (-0.000269494585236, 0.000269494585236)
      (0.943231048326, 0.943231048326)
      (latitude, longitude)
    
    
      17
      ls7_ledaps_kenya
      Landsat 7 USGS Collection 1 Higher Level SR sc...
      LANDSAT_7
      ETM
      None
      NetCDF
      None
      None
      LEDAPS
      None
      None
      EPSG:4326
      (-0.000269493, 0.000269493)
      (0.99981903, 0.99981903)
      (latitude, longitude)
    
    
      18
      ls7_ledaps_senegal
      Landsat 7 USGS Collection 1 Higher Level SR sc...
      LANDSAT_7
      ETM
      None
      NetCDF
      None
      None
      LEDAPS
      None
      None
      EPSG:4326
      (-0.000271152, 0.00027769)
      (0.813456, 0.83307)
      (latitude, longitude)
    
    
      16
      ls7_ledaps_sierra_leone
      Landsat 7 USGS Collection 1 Higher Level SR sc...
      LANDSAT_7
      ETM
      None
      NetCDF
      None
      None
      LEDAPS
      None
      None
      EPSG:4326
      (-0.000269494585236, 0.000269494585236)
      (0.943231048326, 0.943231048326)
      (latitude, longitude)
    
    
      19
      ls7_ledaps_tanzania
      Landsat 7 USGS Collection 1 Higher Level SR sc...
      LANDSAT_7
      ETM
      None
      NetCDF
      None
      None
      LEDAPS
      None
      None
      EPSG:4326
      (-0.000271277688070265, 0.000271139577954979)
      (0.999929558226998, 0.999962763497961)
      (latitude, longitude)
    
    
      31
      ls7_ledaps_vietnam
      Landsat 7 USGS Collection 1 Higher Level SR sc...
      LANDSAT_7
      ETM
      None
      NetCDF
      None
      None
      LEDAPS
      None
      None
      EPSG:4326
      (-0.000269494585236, 0.000269494585236)
      (0.943231048326, 0.943231048326)
      (latitude, longitude)
    
    
      9
      ls8_lasrc_ghana
      Landsat 8 USGS Collection 1 Higher Level SR sc...
      LANDSAT_8
      OLI_TIRS
      None
      NetCDF
      None
      None
      LaSRC
      None
      None
      EPSG:4326
      (-0.000269494585236, 0.000269494585236)
      (0.943231048326, 0.943231048326)
      (latitude, longitude)
    
    
      10
      ls8_lasrc_kenya
      Landsat 8 USGS Collection 1 Higher Level SR sc...
      LANDSAT_8
      OLI_TIRS
      None
      NetCDF
      None
      None
      LaSRC
      None
      None
      EPSG:4326
      (-0.000271309115317046, 0.00026957992707863)
      (0.999502780827996, 0.999602369607559)
      (latitude, longitude)
    
    
      11
      ls8_lasrc_senegal
      Landsat 8 USGS Collection 1 Higher Level SR sc...
      LANDSAT_8
      OLI_TIRS
      None
      NetCDF
      None
      None
      LaSRC
      None
      None
      EPSG:4326
      (-0.000271152, 0.00027769)
      (0.813456, 0.83307)
      (latitude, longitude)
    
    
      8
      ls8_lasrc_sierra_leone
      Landsat 8 USGS Collection 1 Higher Level SR sc...
      LANDSAT_8
      OLI_TIRS
      None
      NetCDF
      None
      None
      LaSRC
      None
      None
      EPSG:4326
      (-0.000269494585236, 0.000269494585236)
      (0.943231048326, 0.943231048326)
      (latitude, longitude)
    
    
      15
      ls8_lasrc_tanzania
      Landsat 8 USGS Collection 1 Higher Level SR sc...
      LANDSAT_8
      OLI_TIRS
      None
      NetCDF
      None
      None
      LaSRC
      None
      None
      EPSG:4326
      (-0.000271277688070265, 0.000271139577954979)
      (0.999929558226998, 0.999962763497961)
      (latitude, longitude)

Pick a product
Use the platform and product names from the previous block to select a Data Cube.



In [4]:

    
# Change the data platform and data cube here

platform = "LANDSAT_7"
# platform = "LANDSAT_8"

# product = "ls7_ledaps_ghana"
# product = "ls7_ledaps_kenya"
# product = "ls7_ledaps_senegal"
# product = "ls7_ledaps_sierra_leone"
# product = "ls7_ledaps_tanzania"
product = "ls7_ledaps_vietnam"

Display Latitude-Longitude and Time Bounds of the Data Cube



In [5]:

    
from utils.data_cube_utilities.dc_time import _n64_to_datetime, dt_to_str

extents = api.get_full_dataset_extent(platform = platform, product = product, measurements=[])

latitude_extents = (min(extents['latitude'].values),max(extents['latitude'].values))
longitude_extents = (min(extents['longitude'].values),max(extents['longitude'].values))
time_extents = (min(extents['time'].values),max(extents['time'].values))

print("Latitude Extents:", latitude_extents)
print("Longitude Extents:", longitude_extents)
print("Time Extents:", list(map(dt_to_str, map(_n64_to_datetime, time_extents))))









    



Latitude Extents: (9.176425374578418, 13.964805165051667)
Longitude Extents: (102.40430421277932, 108.93092407802477)
Time Extents: ['1999-09-08', '2016-12-29']

Visualize Data Cube Region



In [6]:

    
## The code below renders a map that can be used to orient yourself with the region.
from utils.data_cube_utilities.dc_display_map import display_map
display_map(latitude = latitude_extents, longitude = longitude_extents)









    Out[6]:

Pick a smaller analysis region and display that region
Try to keep your region to less than 0.2-deg x 0.2-deg for rapid processing. You can click on the map above to find the Lat-Lon coordinates of any location. You will want to identify a region with an inland water body. Pick a time window of a few months so we can pick out some clear pixels and plot the water.



In [8]:

    
## Vietnam - Central Lam Dong Province ##
longitude_extents = (107.0, 107.2)
latitude_extents  = (11.7, 12.0)

time_extents = ('2010-01-01', '2010-12-31')



In [9]:

    
display_map(latitude = latitude_extents, longitude = longitude_extents)









    Out[9]:

Load the dataset and the required spectral bands or other parameters

After loading, you will view the Xarray dataset. Notice the dimensions represent the number of pixels in your latitude and longitude dimension as well as the number of time slices (time) in your time series.



In [10]:

    
landsat_dataset = dc.load(latitude = latitude_extents,
                          longitude = longitude_extents,
                          platform = platform,
                          time = time_extents,
                          product = product,
                          measurements = ['red', 'green', 'blue', 'nir', 'swir1', 'swir2', 'pixel_qa'])



In [11]:

    
landsat_dataset
#view the dimensions and sample content from the cube









    Out[11]:





<xarray.Dataset>
Dimensions:    (latitude: 1114, longitude: 743, time: 12)
Coordinates:
  * time       (time) datetime64[ns] 2010-01-11T02:59:07 ... 2010-12-29T03:00:48
  * latitude   (latitude) float64 12.0 12.0 12.0 12.0 ... 11.7 11.7 11.7 11.7
  * longitude  (longitude) float64 107.0 107.0 107.0 107.0 ... 107.2 107.2 107.2
Data variables:
    red        (time, latitude, longitude) int16 -9999 -9999 -9999 ... 546 586
    green      (time, latitude, longitude) int16 -9999 -9999 -9999 ... 585 563
    blue       (time, latitude, longitude) int16 -9999 -9999 -9999 ... 487 465
    nir        (time, latitude, longitude) int16 -9999 -9999 -9999 ... 2905 2586
    swir1      (time, latitude, longitude) int16 -9999 -9999 -9999 ... 1710 1737
    swir2      (time, latitude, longitude) int16 -9999 -9999 -9999 ... 817 903
    pixel_qa   (time, latitude, longitude) int32 1 1 1 1 1 1 ... 66 66 66 66 66
Attributes:
    crs:      EPSG:4326

Display Example Images

Single band visualization
For a quick inspection, let's look at one image. The code will allow the selection of any band (red, blue, green, nir, swir1, swir2) to produce a grey-scale image. Select the desired acquisition (time slice) in the block below. You can select from 1 to #, where the max value is the number of time slices noted in the block above. Change the comment statements below to select the bands for the first image.



In [12]:

    
acquisition_number = 2
# select an acquisition number from 1 to "time" using the array limits above



In [13]:

    
%matplotlib inline
#landsat_dataset.red.isel(time = acquisition_number).plot(cmap = "Greys")
landsat_dataset.green.isel(time = acquisition_number).plot(cmap = "Greys")
#landsat_dataset.blue.isel(time = acquisition_number).plot(cmap = "Greys")
#landsat_dataset.nir.isel(time = acquisition_number).plot(cmap = "Greys")
#landsat_dataset.swir1.isel(time = acquisition_number).plot(cmap = "Greys")
#landsat_dataset.swir2.isel(time = acquisition_number).plot(cmap = "Greys")









    Out[13]:





<matplotlib.collections.QuadMesh at 0x7fae69fbe160>

Define Cloud Masking Function
Removes clouds and cloud shadows based on the Landsat pixel QA information This is only for reference ... nothing to modify here



In [14]:

    
import numpy as np  

def generate_cloud_mask(dataset, include_shadows = False):
    #Create boolean Masks for clear and water pixels
    clear_pixels = dataset.pixel_qa.values == 2  + 64
    water_pixels = dataset.pixel_qa.values == 4  + 64
    shadow_pixels= dataset.pixel_qa.values == 8 + 64
    
    a_clean_mask = np.logical_or(clear_pixels, water_pixels)
    
    if include_shadows:
        a_clean_mask = np.logical_or(a_clean_mask, shadow_pixels)
        
    return np.invert(a_clean_mask)

def remove_clouds(dataset, include_shadows = False):
    #Create boolean Masks for clear and water pixels
    clear_pixels = dataset.pixel_qa.values == 2  + 64
    water_pixels = dataset.pixel_qa.values == 4  + 64
    shadow_pixels= dataset.pixel_qa.values == 8 + 64
    
    a_clean_mask = np.logical_or(clear_pixels, water_pixels)
    
    if include_shadows:
        a_clean_mask = np.logical_or(a_clean_mask, shadow_pixels)
        
    return dataset.where(a_clean_mask)



In [15]:

    
cloud_mask = generate_cloud_mask(landsat_dataset)
cloudless = remove_clouds(landsat_dataset) #landsat_dataset.where(image_is_clean)

Set up plotting function (to be used later) Nothing to modify here



In [16]:

    
from utils.data_cube_utilities.dc_rgb import rgb

Most Recent Pixel Mosaic
Masks clouds from imagery and uses the most recent cloud-free pixels.



In [17]:

    
from utils.data_cube_utilities.dc_mosaic import create_mosaic

def mrf_mosaic(dataset):
    # The mask here is based on pixel_qa products. It comes bundled in with most Landsat Products.
    cloud_free_boolean_mask = np.invert(generate_cloud_mask(dataset))
    return create_mosaic(dataset, clean_mask = cloud_free_boolean_mask)



In [18]:

    
recent_composite = mrf_mosaic(landsat_dataset)



In [19]:

    
recent_composite.nir.plot(cmap = "Greys")









    Out[19]:





<matplotlib.collections.QuadMesh at 0x7fae26c5c5f8>



In [20]:

    
rgb(recent_composite, width = 20)









    Out[20]:





(<Figure size 1440x960.431 with 1 Axes>,
 <matplotlib.axes._subplots.AxesSubplot at 0x7fae25295828>)

Plot WOFS water detection results

This example uses the Australian Water Detection from Space (WOFS) algorithm for water detection. The base image will use a most-recent pixel composite (from above). When reviewing the results, 1=water, 0=no water.



In [21]:

    
from utils.data_cube_utilities.dc_water_classifier import wofs_classify



In [22]:

    
water_classification = wofs_classify(recent_composite, clean_mask = np.ones(recent_composite.pixel_qa.shape).astype(np.bool),  mosaic = True)



In [23]:

    
water_classification.wofs.plot(cmap='Blues')









    Out[23]:





<matplotlib.collections.QuadMesh at 0x7fae24247630>

Plot NDWI water detection results

This example uses the Normalized Difference Water Index (NDWI) which is a spectral "index" that correlates well with the existance of water.
$$ NDWI = \frac{GREEN - NIR}{GREEN + NIR}$$



In [24]:

    
def NDWI(dataset):
    return (dataset.green - dataset.nir)/(dataset.green + dataset.nir)



In [25]:

    
ndwi = NDWI(recent_composite)  # High Concentrations of Water - Blues



In [26]:

    
(ndwi).plot(cmap = "Blues")









    Out[26]:





<matplotlib.collections.QuadMesh at 0x7fae24176860>

Plot TSM water quality results

This example uses the Australian Total Suspended Matter (TSM) algorithm. The TSM value is the mean over the entire time range. This parameter is a measure of the particulate matter in water and is often a proxy for water quality.



In [27]:

    
from utils.data_cube_utilities.dc_water_quality import tsm



In [28]:

    
mask_that_only_includes_water_pixels = water_classification.wofs == 1  
tsm_dataset = tsm(recent_composite, clean_mask = mask_that_only_includes_water_pixels )



In [29]:

    
tsm_dataset.tsm.plot(cmap = "jet")









    Out[29]:





<matplotlib.collections.QuadMesh at 0x7fae240a8d30>

Time Series Water Detection Analysis

Time series output of the Australian Water Detection from Space (WOFS) results. The results show the percent of time that a pixel is classified as water over the entire time series. BLUE = frequent water, RED = infrequent water.



In [30]:

    
ts_water_classification = wofs_classify(landsat_dataset,clean_mask = np.invert(cloud_mask))



In [31]:

    
# Apply nan to no_data values
ts_water_classification = ts_water_classification.where(ts_water_classification != -9999)

##Time series aggregation that ignores nan values.    
water_classification_percentages = (ts_water_classification.mean(dim = ['time']) * 100).wofs.rename('water_classification_percentages')









    



/home/localuser/miniconda3/envs/cubeenv/lib/python3.6/site-packages/xarray/core/nanops.py:140: RuntimeWarning: Mean of empty slice
  return np.nanmean(a, axis=axis, dtype=dtype)



In [32]:

    
## import color-scheme and set nans to black
from matplotlib.cm import jet_r as jet_r
jet_r.set_bad('black',1)

## apply nan to percentage values that aren't greater than 0, then plot
water_classification_percentages\
    .where(water_classification_percentages > 0)\
    .plot(cmap = jet_r)









    Out[32]:





<matplotlib.collections.QuadMesh at 0x7fae6ce150f0>

Create a WOFS plot for a single pixel

First select the Lat-Lon position. Then the code will find the closest pixel in the dataset using a "nearest neighbor" selection.



In [33]:

    
pixel_lat = 11.84
pixel_lon = 107.09



In [34]:

    
pixel = ts_water_classification.sel( latitude  = pixel_lat,
                                     longitude = pixel_lon,
                                     method = 'nearest') # nearest neighbor selection



In [35]:

    
import matplotlib.pyplot as plt 
plt.figure(figsize = (20,5)) 
plt.scatter(pixel.time.values, pixel.wofs.values)









    Out[35]:





<matplotlib.collections.PathCollection at 0x7fae6cdb94a8>

	name	description	platform	instrument	creation_time	format	lat	label	product_type	lon	time	crs	resolution	tile_size	spatial_dimensions
id
13	ls7_ledaps_ghana	Landsat 7 USGS Collection 1 Higher Level SR sc...	LANDSAT_7	ETM	None	NetCDF	None	None	LEDAPS	None	None	EPSG:4326	(-0.000269494585236, 0.000269494585236)	(0.943231048326, 0.943231048326)	(latitude, longitude)
17	ls7_ledaps_kenya	Landsat 7 USGS Collection 1 Higher Level SR sc...	LANDSAT_7	ETM	None	NetCDF	None	None	LEDAPS	None	None	EPSG:4326	(-0.000269493, 0.000269493)	(0.99981903, 0.99981903)	(latitude, longitude)
18	ls7_ledaps_senegal	Landsat 7 USGS Collection 1 Higher Level SR sc...	LANDSAT_7	ETM	None	NetCDF	None	None	LEDAPS	None	None	EPSG:4326	(-0.000271152, 0.00027769)	(0.813456, 0.83307)	(latitude, longitude)
16	ls7_ledaps_sierra_leone	Landsat 7 USGS Collection 1 Higher Level SR sc...	LANDSAT_7	ETM	None	NetCDF	None	None	LEDAPS	None	None	EPSG:4326	(-0.000269494585236, 0.000269494585236)	(0.943231048326, 0.943231048326)	(latitude, longitude)
19	ls7_ledaps_tanzania	Landsat 7 USGS Collection 1 Higher Level SR sc...	LANDSAT_7	ETM	None	NetCDF	None	None	LEDAPS	None	None	EPSG:4326	(-0.000271277688070265, 0.000271139577954979)	(0.999929558226998, 0.999962763497961)	(latitude, longitude)
31	ls7_ledaps_vietnam	Landsat 7 USGS Collection 1 Higher Level SR sc...	LANDSAT_7	ETM	None	NetCDF	None	None	LEDAPS	None	None	EPSG:4326	(-0.000269494585236, 0.000269494585236)	(0.943231048326, 0.943231048326)	(latitude, longitude)
9	ls8_lasrc_ghana	Landsat 8 USGS Collection 1 Higher Level SR sc...	LANDSAT_8	OLI_TIRS	None	NetCDF	None	None	LaSRC	None	None	EPSG:4326	(-0.000269494585236, 0.000269494585236)	(0.943231048326, 0.943231048326)	(latitude, longitude)
10	ls8_lasrc_kenya	Landsat 8 USGS Collection 1 Higher Level SR sc...	LANDSAT_8	OLI_TIRS	None	NetCDF	None	None	LaSRC	None	None	EPSG:4326	(-0.000271309115317046, 0.00026957992707863)	(0.999502780827996, 0.999602369607559)	(latitude, longitude)
11	ls8_lasrc_senegal	Landsat 8 USGS Collection 1 Higher Level SR sc...	LANDSAT_8	OLI_TIRS	None	NetCDF	None	None	LaSRC	None	None	EPSG:4326	(-0.000271152, 0.00027769)	(0.813456, 0.83307)	(latitude, longitude)
8	ls8_lasrc_sierra_leone	Landsat 8 USGS Collection 1 Higher Level SR sc...	LANDSAT_8	OLI_TIRS	None	NetCDF	None	None	LaSRC	None	None	EPSG:4326	(-0.000269494585236, 0.000269494585236)	(0.943231048326, 0.943231048326)	(latitude, longitude)
15	ls8_lasrc_tanzania	Landsat 8 USGS Collection 1 Higher Level SR sc...	LANDSAT_8	OLI_TIRS	None	NetCDF	None	None	LaSRC	None	None	EPSG:4326	(-0.000271277688070265, 0.000271139577954979)	(0.999929558226998, 0.999962763497961)	(latitude, longitude)