In [1]:

    
# Enable importing of utilities.
from sys import path
path.append("..")

ARDC Training: Python Notebooks

Task-A: Cloud-free Mosaics and K-means Clustering

Import the Datacube Configuration



In [2]:

    
import datacube
import utils.data_cube_utilities.data_access_api as dc_api  
api = dc_api.DataAccessApi()
dc = datacube.Datacube(app = 'ardc_task_a')
api.dc = dc

Browse the available Data Cubes



In [3]:

    
list_of_products = dc.list_products()
netCDF_products = list_of_products[list_of_products['format'] == 'NetCDF']
netCDF_products









    Out[3]:







  
    
      
      name
      description
      format
      label
      platform
      product_type
      lat
      lon
      instrument
      time
      creation_time
      crs
      resolution
      tile_size
      spatial_dimensions
    
    
      id
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
    
  
  
    
      13
      ls7_ledaps_ghana
      Landsat 7 USGS Collection 1 Higher Level SR sc...
      NetCDF
      None
      LANDSAT_7
      LEDAPS
      None
      None
      ETM
      None
      None
      EPSG:4326
      (-0.000269494585236, 0.000269494585236)
      (0.943231048326, 0.943231048326)
      (latitude, longitude)
    
    
      17
      ls7_ledaps_kenya
      Landsat 7 USGS Collection 1 Higher Level SR sc...
      NetCDF
      None
      LANDSAT_7
      LEDAPS
      None
      None
      ETM
      None
      None
      EPSG:4326
      (-0.000269493, 0.000269493)
      (0.99981903, 0.99981903)
      (latitude, longitude)
    
    
      18
      ls7_ledaps_senegal
      Landsat 7 USGS Collection 1 Higher Level SR sc...
      NetCDF
      None
      LANDSAT_7
      LEDAPS
      None
      None
      ETM
      None
      None
      EPSG:4326
      (-0.000271152, 0.00027769)
      (0.813456, 0.83307)
      (latitude, longitude)
    
    
      16
      ls7_ledaps_sierra_leone
      Landsat 7 USGS Collection 1 Higher Level SR sc...
      NetCDF
      None
      LANDSAT_7
      LEDAPS
      None
      None
      ETM
      None
      None
      EPSG:4326
      (-0.000269494585236, 0.000269494585236)
      (0.943231048326, 0.943231048326)
      (latitude, longitude)
    
    
      19
      ls7_ledaps_tanzania
      Landsat 7 USGS Collection 1 Higher Level SR sc...
      NetCDF
      None
      LANDSAT_7
      LEDAPS
      None
      None
      ETM
      None
      None
      EPSG:4326
      (-0.000271277688070265, 0.000271139577954979)
      (0.999929558226998, 0.999962763497961)
      (latitude, longitude)
    
    
      31
      ls7_ledaps_vietnam
      Landsat 7 USGS Collection 1 Higher Level SR sc...
      NetCDF
      None
      LANDSAT_7
      LEDAPS
      None
      None
      ETM
      None
      None
      EPSG:4326
      (-0.000269494585236, 0.000269494585236)
      (0.943231048326, 0.943231048326)
      (latitude, longitude)
    
    
      9
      ls8_lasrc_ghana
      Landsat 8 USGS Collection 1 Higher Level SR sc...
      NetCDF
      None
      LANDSAT_8
      LaSRC
      None
      None
      OLI_TIRS
      None
      None
      EPSG:4326
      (-0.000269494585236, 0.000269494585236)
      (0.943231048326, 0.943231048326)
      (latitude, longitude)
    
    
      10
      ls8_lasrc_kenya
      Landsat 8 USGS Collection 1 Higher Level SR sc...
      NetCDF
      None
      LANDSAT_8
      LaSRC
      None
      None
      OLI_TIRS
      None
      None
      EPSG:4326
      (-0.000271309115317046, 0.00026957992707863)
      (0.999502780827996, 0.999602369607559)
      (latitude, longitude)
    
    
      11
      ls8_lasrc_senegal
      Landsat 8 USGS Collection 1 Higher Level SR sc...
      NetCDF
      None
      LANDSAT_8
      LaSRC
      None
      None
      OLI_TIRS
      None
      None
      EPSG:4326
      (-0.000271152, 0.00027769)
      (0.813456, 0.83307)
      (latitude, longitude)
    
    
      8
      ls8_lasrc_sierra_leone
      Landsat 8 USGS Collection 1 Higher Level SR sc...
      NetCDF
      None
      LANDSAT_8
      LaSRC
      None
      None
      OLI_TIRS
      None
      None
      EPSG:4326
      (-0.000269494585236, 0.000269494585236)
      (0.943231048326, 0.943231048326)
      (latitude, longitude)
    
    
      15
      ls8_lasrc_tanzania
      Landsat 8 USGS Collection 1 Higher Level SR sc...
      NetCDF
      None
      LANDSAT_8
      LaSRC
      None
      None
      OLI_TIRS
      None
      None
      EPSG:4326
      (-0.000271277688070265, 0.000271139577954979)
      (0.999929558226998, 0.999962763497961)
      (latitude, longitude)

Pick a product
Use the platform and product names from the previous block to select a Data Cube.



In [4]:

    
# Change the data platform and data cube here

platform = "LANDSAT_7"
# platform = "LANDSAT_8"

# product = "ls7_ledaps_ghana"
# product = "ls7_ledaps_kenya"
# product = "ls7_ledaps_senegal"
# product = "ls7_ledaps_sierra_leone"
# product = "ls7_ledaps_tanzania"
product = "ls7_ledaps_vietnam"

Display Latitude-Longitude and Time Bounds of the Data Cube



In [5]:

    
from utils.data_cube_utilities.dc_time import _n64_to_datetime, dt_to_str

extents = api.get_full_dataset_extent(platform = platform, product = product, measurements=[])

latitude_extents = (min(extents['latitude'].values),max(extents['latitude'].values))
longitude_extents = (min(extents['longitude'].values),max(extents['longitude'].values))
time_extents = (min(extents['time'].values),max(extents['time'].values))

print("Latitude Extents:", latitude_extents)
print("Longitude Extents:", longitude_extents)
print("Time Extents:", list(map(dt_to_str, map(_n64_to_datetime, time_extents))))









    



Latitude Extents: (9.176425374578418, 13.964805165051667)
Longitude Extents: (102.40430421277932, 108.93092407802477)
Time Extents: ['1999-09-08', '2016-12-29']

Visualize Data Cube Region



In [6]:

    
## The code below renders a map that can be used to orient yourself with the region.
from utils.data_cube_utilities.dc_display_map import display_map
display_map(latitude = latitude_extents, longitude = longitude_extents)









    Out[6]:

Pick a smaller analysis region and display that region
Try to keep your region to less than 0.2-deg x 0.2-deg for rapid processing Pick the time extents for your mosaic product (keep to 1 year or less)



In [7]:

    
## Vietnam - Central Lam Dong Province ##
longitude_extents = (107.80, 108.00)
latitude_extents  = (11.70, 11.90)

time_extents = ('2015-01-01', '2015-12-31')



In [8]:

    
display_map(latitude = latitude_extents, longitude = longitude_extents)









    Out[8]:

Load the dataset and the required spectral bands or other parameters

After loading, you will view the Xarray dataset. Notice the dimensions represent the number of pixels in your latitude and longitude dimension as well as the number of time slices (time) in your time series.



In [9]:

    
landsat_dataset = dc.load(latitude = latitude_extents,
                          longitude = longitude_extents,
                          platform = platform,
                          time = time_extents,
                          product = product,
                          measurements = ['red', 'green', 'blue', 'nir', 'swir1', 'swir2', 'pixel_qa'])



In [10]:

    
landsat_dataset
#view the dimensions and sample content from the cube









    Out[10]:





<xarray.Dataset>
Dimensions:    (latitude: 743, longitude: 743, time: 19)
Coordinates:
  * time       (time) datetime64[ns] 2015-01-09T03:06:13 ... 2015-12-27T03:08:59
  * latitude   (latitude) float64 11.9 11.9 11.9 11.9 ... 11.7 11.7 11.7 11.7
  * longitude  (longitude) float64 107.8 107.8 107.8 107.8 ... 108.0 108.0 108.0
Data variables:
    red        (time, latitude, longitude) int16 514 457 382 419 ... 442 406 332
    green      (time, latitude, longitude) int16 482 421 381 401 ... 451 431 332
    blue       (time, latitude, longitude) int16 296 257 238 276 ... 282 263 262
    nir        (time, latitude, longitude) int16 2457 2331 2288 ... 2784 2329
    swir1      (time, latitude, longitude) int16 1774 1826 1568 ... 1873 1341
    swir2      (time, latitude, longitude) int16 1003 1057 867 ... 1063 1117 664
    pixel_qa   (time, latitude, longitude) int32 66 66 66 66 66 ... 66 66 66 66
Attributes:
    crs:      EPSG:4326

Display Example Images

Single band visualization
For a quick inspection, let's look at two images. The first image will allow the selection of any band (red, blue, green, nir, swir1, swir2) to produce a grey-scale image of any band. The second image will mask clouds with bright red on an RGB image. Select the desired acquisition (time slice) in the block below. You can select from 1 to #, where the max value is the number of time slices noted in the block above. Change the comment statements below to select the bands for the first image.



In [11]:

    
acquisition_number = 10
# select an acquisition number from 1 to "time" using the array limits above



In [12]:

    
%matplotlib inline
# landsat_dataset.red.isel(time = acquisition_number).plot(cmap = "Greys")
landsat_dataset.green.isel(time = acquisition_number).plot(cmap = "Greys")
# landsat_dataset.blue.isel(time = acquisition_number).plot(cmap = "Greys")
#landsat_dataset.nir.isel(time = acquisition_number).plot(cmap = "Greys")
#landsat_dataset.swir1.isel(time = acquisition_number).plot(cmap = "Greys")
#landsat_dataset.swir2.isel(time = acquisition_number).plot(cmap = "Greys")









    Out[12]:





<matplotlib.collections.QuadMesh at 0x7fbe0abb4e80>

Define Cloud Masking Function
Removes clouds and cloud shadows based on the Landsat pixel QA information This is only for reference ... nothing to modify here



In [13]:

    
import numpy as np  

def generate_cloud_mask(dataset, include_shadows = False):
    #Create boolean Masks for clear and water pixels
    clear_pixels = dataset.pixel_qa.values == 2  + 64
    water_pixels = dataset.pixel_qa.values == 4  + 64
    shadow_pixels= dataset.pixel_qa.values == 8 + 64
    
    a_clean_mask = np.logical_or(clear_pixels, water_pixels)
    
    if include_shadows:
        a_clean_mask = np.logical_or(a_clean_mask, shadow_pixels)
        
    return np.invert(a_clean_mask)

def remove_clouds(dataset, include_shadows = False):
    #Create boolean Masks for clear and water pixels
    clear_pixels = dataset.pixel_qa.values == 2  + 64
    water_pixels = dataset.pixel_qa.values == 4  + 64
    shadow_pixels= dataset.pixel_qa.values == 8 + 64
    
    a_clean_mask = np.logical_or(clear_pixels, water_pixels)
    
    if include_shadows:
        a_clean_mask = np.logical_or(a_clean_mask, shadow_pixels)
        
    return dataset.where(a_clean_mask)

Mask clouds from your selected acquisition and visualize the scene and mask
Now we will look at two RGB images where the second image includes the cloud, cloud shadow and no data mask in RED. Also, the scene is the same as the acquistion selected above.



In [14]:

    
cloud_mask = generate_cloud_mask(landsat_dataset)
cloudless = remove_clouds(landsat_dataset)



In [15]:

    
import matplotlib.pyplot as plt
from utils.data_cube_utilities.dc_rgb import rgb

rgb(landsat_dataset,at_index = acquisition_number)
plt.show()



In [16]:

    
red = [255,0,0]



In [17]:

    
rgb(landsat_dataset,at_index = acquisition_number,paint_on_mask = [(cloud_mask, red)])









    Out[17]:





(<Figure size 720x28155.8 with 1 Axes>,
 <matplotlib.axes._subplots.AxesSubplot at 0x7fbdb12f5828>)

Cleaning up the clouds and creating a cloud-free mosaic

Remember that this process will filter clouds from the entire time series stack

Most Recent Pixel Mosaic
Masks clouds from imagery and uses the most recent cloud-free pixels.



In [18]:

    
from utils.data_cube_utilities.dc_mosaic import create_mosaic

def mrf_mosaic(dataset):
    # The mask here is based on pixel_qa products. It comes bundled in with most Landsat Products.
    cloud_free_boolean_mask = np.invert(generate_cloud_mask(dataset))
    return create_mosaic(dataset, clean_mask = cloud_free_boolean_mask)



In [19]:

    
recent_composite = mrf_mosaic(landsat_dataset)



In [20]:

    
recent_composite.nir.plot(cmap = "Greys")









    Out[20]:





<matplotlib.collections.QuadMesh at 0x7fbdb1321748>



In [21]:

    
rgb(recent_composite)









    Out[21]:





(<Figure size 720x720 with 1 Axes>,
 <matplotlib.axes._subplots.AxesSubplot at 0x7fbdc03abe80>)

Median Mosaic
Masks clouds from imagery using the median valued cloud-free pixels in the time series



In [22]:

    
from utils.data_cube_utilities.dc_mosaic import create_median_mosaic

def median_mosaic(dataset):
    # The mask here is based on pixel_qa products. It comes bundled in with most Landsat Products.
    cloud_free_boolean_mask = np.invert(generate_cloud_mask(dataset))
    return create_median_mosaic(dataset, clean_mask = cloud_free_boolean_mask)



In [23]:

    
median_composite = median_mosaic(landsat_dataset)



In [24]:

    
median_composite.nir.plot(cmap = "Greys")









    Out[24]:





<matplotlib.collections.QuadMesh at 0x7fbdc032e1d0>



In [25]:

    
rgb(median_composite)









    Out[25]:





(<Figure size 720x720 with 1 Axes>,
 <matplotlib.axes._subplots.AxesSubplot at 0x7fbdc02b4828>)

Select bands used for clustering



In [26]:

    
cluster_bands = ['red', 'green', 'blue', 'swir1']

Perform K-Means clustering and view the output



In [27]:

    
def figure_ratio(ds, fixed_width = 10):
    width = fixed_width
    height = len(ds.latitude) * (fixed_width / len(ds.longitude))
    return (width, height)



In [28]:

    
from utils.data_cube_utilities.dc_clustering import kmeans_cluster_dataset

# change the number of clusters in the line below, as desired
# this example uses the "median composite" image from above
classification_x =  kmeans_cluster_dataset(median_composite, cluster_bands, n_clusters=8)



In [29]:

    
# plot the k-mean classification result 
classification_x.plot(figsize = figure_ratio(classification_x))









    Out[29]:





<matplotlib.collections.QuadMesh at 0x7fbdb9f73e10>

	name	description	format	label	platform	product_type	lat	lon	instrument	time	creation_time	crs	resolution	tile_size	spatial_dimensions
id
13	ls7_ledaps_ghana	Landsat 7 USGS Collection 1 Higher Level SR sc...	NetCDF	None	LANDSAT_7	LEDAPS	None	None	ETM	None	None	EPSG:4326	(-0.000269494585236, 0.000269494585236)	(0.943231048326, 0.943231048326)	(latitude, longitude)
17	ls7_ledaps_kenya	Landsat 7 USGS Collection 1 Higher Level SR sc...	NetCDF	None	LANDSAT_7	LEDAPS	None	None	ETM	None	None	EPSG:4326	(-0.000269493, 0.000269493)	(0.99981903, 0.99981903)	(latitude, longitude)
18	ls7_ledaps_senegal	Landsat 7 USGS Collection 1 Higher Level SR sc...	NetCDF	None	LANDSAT_7	LEDAPS	None	None	ETM	None	None	EPSG:4326	(-0.000271152, 0.00027769)	(0.813456, 0.83307)	(latitude, longitude)
16	ls7_ledaps_sierra_leone	Landsat 7 USGS Collection 1 Higher Level SR sc...	NetCDF	None	LANDSAT_7	LEDAPS	None	None	ETM	None	None	EPSG:4326	(-0.000269494585236, 0.000269494585236)	(0.943231048326, 0.943231048326)	(latitude, longitude)
19	ls7_ledaps_tanzania	Landsat 7 USGS Collection 1 Higher Level SR sc...	NetCDF	None	LANDSAT_7	LEDAPS	None	None	ETM	None	None	EPSG:4326	(-0.000271277688070265, 0.000271139577954979)	(0.999929558226998, 0.999962763497961)	(latitude, longitude)
31	ls7_ledaps_vietnam	Landsat 7 USGS Collection 1 Higher Level SR sc...	NetCDF	None	LANDSAT_7	LEDAPS	None	None	ETM	None	None	EPSG:4326	(-0.000269494585236, 0.000269494585236)	(0.943231048326, 0.943231048326)	(latitude, longitude)
9	ls8_lasrc_ghana	Landsat 8 USGS Collection 1 Higher Level SR sc...	NetCDF	None	LANDSAT_8	LaSRC	None	None	OLI_TIRS	None	None	EPSG:4326	(-0.000269494585236, 0.000269494585236)	(0.943231048326, 0.943231048326)	(latitude, longitude)
10	ls8_lasrc_kenya	Landsat 8 USGS Collection 1 Higher Level SR sc...	NetCDF	None	LANDSAT_8	LaSRC	None	None	OLI_TIRS	None	None	EPSG:4326	(-0.000271309115317046, 0.00026957992707863)	(0.999502780827996, 0.999602369607559)	(latitude, longitude)
11	ls8_lasrc_senegal	Landsat 8 USGS Collection 1 Higher Level SR sc...	NetCDF	None	LANDSAT_8	LaSRC	None	None	OLI_TIRS	None	None	EPSG:4326	(-0.000271152, 0.00027769)	(0.813456, 0.83307)	(latitude, longitude)
8	ls8_lasrc_sierra_leone	Landsat 8 USGS Collection 1 Higher Level SR sc...	NetCDF	None	LANDSAT_8	LaSRC	None	None	OLI_TIRS	None	None	EPSG:4326	(-0.000269494585236, 0.000269494585236)	(0.943231048326, 0.943231048326)	(latitude, longitude)
15	ls8_lasrc_tanzania	Landsat 8 USGS Collection 1 Higher Level SR sc...	NetCDF	None	LANDSAT_8	LaSRC	None	None	OLI_TIRS	None	None	EPSG:4326	(-0.000271277688070265, 0.000271139577954979)	(0.999929558226998, 0.999962763497961)	(latitude, longitude)