In [1]:
# Enable importing of utilities.
from sys import path
path.append("..")

ARDC Training: Python Notebooks

Task-E: This notebook will demonstrate 2D transect analyses and 3D Hovmoller plots. We will run these for NDVI (land) and TSM (water quality) to show the spatial and temporal variation of data along a line (transect) for a given time slice and for the entire time series.

Import the Datacube Configuration


In [2]:
import datacube
import utils.data_cube_utilities.data_access_api as dc_api  
api = dc_api.DataAccessApi()
dc = datacube.Datacube(app = 'ardc_task_e')
api.dc = dc

In [3]:
# # Supress Warning 
# import warnings
# warnings.filterwarnings('ignore')

Browse the available Data Cubes


In [4]:
list_of_products = dc.list_products()
netCDF_products = list_of_products[list_of_products['format'] == 'NetCDF']
netCDF_products


Out[4]:
name description product_type instrument format platform creation_time label time lat lon crs resolution tile_size spatial_dimensions
id
13 ls7_ledaps_ghana Landsat 7 USGS Collection 1 Higher Level SR sc... LEDAPS ETM NetCDF LANDSAT_7 None None None None None EPSG:4326 (-0.000269494585236, 0.000269494585236) (0.943231048326, 0.943231048326) (latitude, longitude)
17 ls7_ledaps_kenya Landsat 7 USGS Collection 1 Higher Level SR sc... LEDAPS ETM NetCDF LANDSAT_7 None None None None None EPSG:4326 (-0.000269493, 0.000269493) (0.99981903, 0.99981903) (latitude, longitude)
18 ls7_ledaps_senegal Landsat 7 USGS Collection 1 Higher Level SR sc... LEDAPS ETM NetCDF LANDSAT_7 None None None None None EPSG:4326 (-0.000271152, 0.00027769) (0.813456, 0.83307) (latitude, longitude)
16 ls7_ledaps_sierra_leone Landsat 7 USGS Collection 1 Higher Level SR sc... LEDAPS ETM NetCDF LANDSAT_7 None None None None None EPSG:4326 (-0.000269494585236, 0.000269494585236) (0.943231048326, 0.943231048326) (latitude, longitude)
19 ls7_ledaps_tanzania Landsat 7 USGS Collection 1 Higher Level SR sc... LEDAPS ETM NetCDF LANDSAT_7 None None None None None EPSG:4326 (-0.000271277688070265, 0.000271139577954979) (0.999929558226998, 0.999962763497961) (latitude, longitude)
31 ls7_ledaps_vietnam Landsat 7 USGS Collection 1 Higher Level SR sc... LEDAPS ETM NetCDF LANDSAT_7 None None None None None EPSG:4326 (-0.000269494585236, 0.000269494585236) (0.943231048326, 0.943231048326) (latitude, longitude)
9 ls8_lasrc_ghana Landsat 8 USGS Collection 1 Higher Level SR sc... LaSRC OLI_TIRS NetCDF LANDSAT_8 None None None None None EPSG:4326 (-0.000269494585236, 0.000269494585236) (0.943231048326, 0.943231048326) (latitude, longitude)
10 ls8_lasrc_kenya Landsat 8 USGS Collection 1 Higher Level SR sc... LaSRC OLI_TIRS NetCDF LANDSAT_8 None None None None None EPSG:4326 (-0.000271309115317046, 0.00026957992707863) (0.999502780827996, 0.999602369607559) (latitude, longitude)
11 ls8_lasrc_senegal Landsat 8 USGS Collection 1 Higher Level SR sc... LaSRC OLI_TIRS NetCDF LANDSAT_8 None None None None None EPSG:4326 (-0.000271152, 0.00027769) (0.813456, 0.83307) (latitude, longitude)
8 ls8_lasrc_sierra_leone Landsat 8 USGS Collection 1 Higher Level SR sc... LaSRC OLI_TIRS NetCDF LANDSAT_8 None None None None None EPSG:4326 (-0.000269494585236, 0.000269494585236) (0.943231048326, 0.943231048326) (latitude, longitude)
15 ls8_lasrc_tanzania Landsat 8 USGS Collection 1 Higher Level SR sc... LaSRC OLI_TIRS NetCDF LANDSAT_8 None None None None None EPSG:4326 (-0.000271277688070265, 0.000271139577954979) (0.999929558226998, 0.999962763497961) (latitude, longitude)

Pick a product

Use the platform and product names from the previous block to select a Data Cube.


In [5]:
import utils.data_cube_utilities.data_access_api as dc_api  
api = dc_api.DataAccessApi(config = '/home/localuser/.datacube.conf')

# Change the data platform and data cube here

platform = "LANDSAT_7"
# platform = "LANDSAT_8"

# product = "ls7_ledaps_ghana"
# product = "ls7_ledaps_kenya"
# product = "ls7_ledaps_senegal"
# product = "ls7_ledaps_sierra_leone"
# product = "ls7_ledaps_tanzania"
product = "ls7_ledaps_vietnam"

Display Latitude-Longitude and Time Bounds of the Data Cube


In [6]:
from utils.data_cube_utilities.dc_time import _n64_to_datetime, dt_to_str

extents = api.get_full_dataset_extent(platform = platform, product = product, measurements=[])

latitude_extents = (min(extents['latitude'].values),max(extents['latitude'].values))
longitude_extents = (min(extents['longitude'].values),max(extents['longitude'].values))
time_extents = (min(extents['time'].values),max(extents['time'].values))

print("Latitude Extents:", latitude_extents)
print("Longitude Extents:", longitude_extents)
print("Time Extents:", list(map(dt_to_str, map(_n64_to_datetime, time_extents))))


Latitude Extents: (9.176425374578418, 13.964805165051667)
Longitude Extents: (102.40430421277932, 108.93092407802477)
Time Extents: ['1999-09-08', '2016-12-29']

Visualize Data Cube Region


In [7]:
## The code below renders a map that can be used to orient yourself with the region.
from utils.data_cube_utilities.dc_display_map import display_map
display_map(latitude = latitude_extents, longitude = longitude_extents)


Out[7]:

Pick a smaller analysis region and display that region

Try to keep your region to less than 0.2-deg x 0.2-deg for rapid processing. You can click on the map above to find the Lat-Lon coordinates of any location. You will want to identify a region with an inland water body and some vegetation. Pick a time window of several years.


In [8]:
## Vietnam - Central Lam Dong Province ##
longitude_extents = (107.0, 107.2)
latitude_extents  = (11.7, 12.0)

## Vietnam Ho Tri An Lake
# longitude_extents = (107.0, 107.2)
# latitude_extents  = (11.1, 11.3)

time_extents = ('2005-01-01', '2005-12-31')

In [9]:
display_map(latitude = latitude_extents, longitude = longitude_extents)


Out[9]:

Load the dataset and the required spectral bands or other parameters

After loading, you will view the Xarray dataset. Notice the dimensions represent the number of pixels in your latitude and longitude dimension as well as the number of time slices (time) in your time series.


In [10]:
landsat_dataset = dc.load(latitude = latitude_extents,
                          longitude = longitude_extents,
                          platform = platform,
                          time = time_extents,
                          product = product,
                          measurements = ['red', 'green', 'blue', 'nir', 'swir1', 'swir2', 'pixel_qa'])

In [11]:
landsat_dataset
#view the dimensions and sample content from the cube


Out[11]:
<xarray.Dataset>
Dimensions:    (latitude: 1114, longitude: 743, time: 13)
Coordinates:
  * time       (time) datetime64[ns] 2005-01-13T02:57:03 ... 2005-12-31T02:57:25
  * latitude   (latitude) float64 12.0 12.0 12.0 12.0 ... 11.7 11.7 11.7 11.7
  * longitude  (longitude) float64 107.0 107.0 107.0 107.0 ... 107.2 107.2 107.2
Data variables:
    red        (time, latitude, longitude) int16 -9999 -9999 -9999 ... 457 517
    green      (time, latitude, longitude) int16 -9999 -9999 -9999 ... 500 500
    blue       (time, latitude, longitude) int16 -9999 -9999 -9999 ... 296 316
    nir        (time, latitude, longitude) int16 -9999 -9999 -9999 ... 2713 2356
    swir1      (time, latitude, longitude) int16 -9999 -9999 -9999 ... 1632 1552
    swir2      (time, latitude, longitude) int16 -9999 -9999 -9999 ... 796 738
    pixel_qa   (time, latitude, longitude) int32 1 1 1 1 1 1 ... 66 66 66 66 66
Attributes:
    crs:      EPSG:4326

Preparing the data

We will filter out the clouds and the water using the Landsat pixel_qa information. Next, we will calculate the values of NDVI (vegetation index) and TSM (water quality).


In [12]:
import xarray as xr  
import numpy as np
def ls7_unpack_qa( data_array , cover_type):  
    
    land_cover_endcoding = dict( fill     =  [1], 
                                 clear    =  [66,  130], 
                                 water    =  [68,  132],
                                 shadow   =  [72,  136],
                                 snow     =  [80,  112, 144, 176],
                                 cloud    =  [96,  112, 160, 176, 224],
                                 low_conf =  [66,  68,  72,  80,  96,  112],
                                 med_conf =  [130, 132, 136, 144, 160, 176],
                                 high_conf=  [224]
                               ) 
    boolean_mask = np.isin(data_array.values, land_cover_endcoding[cover_type]) 
    return xr.DataArray(boolean_mask.astype(int),
                        coords = data_array.coords,
                        dims = data_array.dims,
                        name = cover_type + "_mask",
                        attrs = data_array.attrs)

In [13]:
clear_xarray  = ls7_unpack_qa(landsat_dataset.pixel_qa, "clear")  
water_xarray  = ls7_unpack_qa(landsat_dataset.pixel_qa, "water")
shadow_xarray = ls7_unpack_qa(landsat_dataset.pixel_qa, "shadow") 
cloud_xarray = ls7_unpack_qa(landsat_dataset.pixel_qa, "cloud")

In [14]:
clean_xarray = xr.ufuncs.logical_or(clear_xarray , water_xarray).astype(np.int8).rename("clean_mask")

clean_mask = np.logical_or(clear_xarray.values.astype(bool),
                           water_xarray.values.astype(bool))

In [15]:
def NDVI(dataset):
    return ((dataset.nir - dataset.red)/(dataset.nir + dataset.red)).rename("NDVI")

In [16]:
ndvi_xarray = NDVI(landsat_dataset)  # Vegetation Index

In [17]:
from utils.data_cube_utilities.dc_water_quality import tsm

tsm_xarray = tsm(landsat_dataset, clean_mask = water_xarray.values.astype(bool) ).tsm

Combine everything into one XARRAY for further analysis


In [18]:
combined_dataset = xr.merge([landsat_dataset,
          clean_xarray,
          clear_xarray,
          water_xarray,
          shadow_xarray,
          cloud_xarray,                  
          ndvi_xarray,
          tsm_xarray])

# Copy original crs to merged dataset 
combined_dataset = combined_dataset.assign_attrs(landsat_dataset.attrs)

Define a path for a transect

A transect is just a line that will run across our region of interest. Use the display map above to find the end points of your desired line. If you click on the map it will give you precise Lat-Lon positions for a point.

Start with a line across a mix of water and land


In [19]:
# Water and Land Mixed Examples

# North-South Path
start = ( 11.8428, 107.0734 )
end   = ( 11.7306, 107.0734 )

# East-West Path
# start = ( 11.9167, 107.0554 )
# end   = ( 11.9167, 107.1719 )

# East-West Path for Lake Ho Tri An
# start = ( 11.25, 107.02 )
# end   = ( 11.25, 107.18 )

Plot the transect line


In [20]:
import folium
import numpy as np  
from folium.features import CustomIcon

def plot_a_path(points , zoom = 15):
    xs,ys = zip(*points)
    
    map_center_point = (np.mean(xs), np.mean(ys))
    the_map = folium.Map(location=[map_center_point[0], map_center_point[1]], zoom_start = zoom, tiles='http://mt1.google.com/vt/lyrs=y&z={z}&x={x}&y={y}', attr = "Google Attribution")
    path = folium.PolyLine(locations=points, weight=5, color = 'orange')
    the_map.add_child(path)
    
    start = ( xs[0] ,ys[0] )
    end   = ( xs[-1],ys[-1])
    
    return the_map  

plot_a_path([start,end])


Out[20]:

Find the nearest pixels along the transect path


In [21]:
from utils.data_cube_utilities.transect import line_scan

import numpy as np

def get_index_at(coords, ds):
    '''Returns an integer index pair.'''
    lat = coords[0]
    lon = coords[1]
    
    nearest_lat = ds.sel(latitude = lat, method = 'nearest').latitude.values
    nearest_lon = ds.sel(longitude = lon, method = 'nearest').longitude.values
    
    lat_index = np.where(ds.latitude.values == nearest_lat)[0]
    lon_index = np.where(ds.longitude.values == nearest_lon)[0]
    
    return (int(lat_index), int(lon_index))

def create_pixel_trail(start, end, ds):
    a = get_index_at(start, ds)
    b = get_index_at(end, ds)
    
    indices = line_scan.line_scan(a, b)

    pixels = [ ds.isel(latitude = x, longitude = y) for x, y in indices]
    return pixels

In [22]:
list_of_pixels_along_segment = create_pixel_trail(start, end, landsat_dataset)

Groundwork for Transect (2-D) and Hovmöller (3-D) Plots


In [23]:
import xarray
import matplotlib.pyplot as plt  
from matplotlib.ticker import FuncFormatter  
from datetime import datetime  
import time

def plot_list_of_pixels(list_of_pixels, band_name, y = None): 
    start = (
             "{0:.2f}".format(float(list_of_pixels[0].latitude.values )),
             "{0:.2f}".format(float(list_of_pixels[0].longitude.values))
            )  
    end = (
             "{0:.2f}".format(float(list_of_pixels[-1].latitude.values)),
             "{0:.2f}".format(float(list_of_pixels[-1].longitude.values))
            )
    
    def reformat_n64(t):
        return time.strftime("%Y.%m.%d", time.gmtime(t.astype(int)/1000000000))   
    
    def pixel_to_array(pixel):
        return(pixel.values)
    
    def figure_ratio(x,y, fixed_width = 10):
        width = fixed_width
        height = y * (fixed_width / x)
        return (width, height)
    
    pixel_array = np.transpose([pixel_to_array(pix) for pix in list_of_pixels])
    
    #If the data has one acquisition, then plot transect (2-D), else Hovmöller (3-D) 
    if y.size == 1:
        plt.figure(figsize = (15,5))
        plt.scatter(np.arange(pixel_array.size), pixel_array)
        plt.title("Transect (2-D) \n Acquisition date: {}".format(reformat_n64(y)))
        plt.xlabel("Pixels along the transect \n {} -  {} \n ".format(start,end))
        plt.ylabel(band_name)

    else:
        m = FuncFormatter(lambda x :x )
        figure = plt.figure(figsize = figure_ratio(len(list_of_pixels),
                                                   len(list_of_pixels[0].values),
                                                   fixed_width = 15))
        number_of_y_ticks = 5 

        ax = plt.gca()
        cax = ax.imshow(pixel_array, interpolation='none')
        figure.colorbar(cax,fraction=0.110, pad=0.04)

        ax.set_title("Hovmöller (3-D) \n Acquisition range: {} -  {} \n ".format(reformat_n64(y[0]),reformat_n64(y[-1])))
        plt.xlabel("Pixels along the transect \n {} -  {} \n ".format(start,end))
        ax.get_yaxis().set_major_formatter( FuncFormatter(lambda x, p: reformat_n64(list_of_pixels[0].time.values[int(x)]) if int(x) < len(list_of_pixels[0].time) else ""))    
        plt.ylabel("Time")
    plt.show()

In [24]:
def transect_plot(start,
                  end,
                  da):
    if type(da) is not xarray.DataArray and (type(da) is xarray.Dataset)  :
        raise Exception('You should be passing in a data-array, not a Dataset')

    pixels = create_pixel_trail(start, end,da)
    dates = da.time.values  

    lats = [x.latitude.values for x in pixels]
    lons = [x.longitude.values for x in pixels]
    
    plot_list_of_pixels(pixels, da.name, y = dates)

In [25]:
pixels = create_pixel_trail(start, end, landsat_dataset)

In [26]:
t = 2
subset = list( map(lambda x: x.isel(time = t), pixels))

Mask Clouds


In [27]:
def land_and_water_masking_ls7(dataset):    
    #Create boolean Masks for clear and water pixels
    clear_pixels = landsat_dataset.pixel_qa.values == 2 + 64
    water_pixels = landsat_dataset.pixel_qa.values == 4 + 64

    a_clean_mask = np.logical_or(clear_pixels, water_pixels)
    return a_clean_mask

In [28]:
cloudless_dataset = landsat_dataset.where(land_and_water_masking_ls7(landsat_dataset))

Select an acquisition date and then plot a 2D transect without clouds


In [31]:
# select an acquisition number from the start (t=0) to "time" using the array limits above
acquisition_number = 10

In [32]:
#If plotted will create the 2-D transect
cloudless_dataset_for_acq_no = cloudless_dataset.isel(time = acquisition_number)

In [33]:
#If Plotted will create the 3-D Hovmoller plot for a portion of the time series (min to max)
min_acq = 1
max_acq = 4

cloudless_dataset_from_1_to_acq_no = cloudless_dataset.isel(time = slice(min_acq, max_acq))

Select one of the XARRAY parameters for analysis


In [34]:
band = 'green'

Create a 2D Transect plot of the "band" for one date


In [35]:
transect_plot(start, end, cloudless_dataset_for_acq_no[band])


Create a 2D Transect plot of NDVI for one date


In [36]:
transect_plot(start, end, NDVI(cloudless_dataset_for_acq_no))


Create a 3D Hovmoller plot of NDVI for the entire time series


In [37]:
transect_plot(start, end, NDVI(cloudless_dataset))


Create a 2D Transect plot of water existence for one date


In [38]:
transect_plot(start, end, water_xarray.isel(time = acquisition_number))


Create a 3D Hovmoller plot of water extent for the entire time series


In [39]:
transect_plot(start, end, water_xarray)


Create a 2D Transect plot of water quality (TSM) for one date


In [40]:
transect_plot(start, end, tsm_xarray.isel(time = acquisition_number))


Create a 3D Hovmoller plot of water quality (TSM) for one date


In [41]:
transect_plot(start, end, tsm_xarray)