TSM Demo


Notebook Summary

TSM stands for "Total Suspended Matter" - also called TSS which stands for "Total Suspended Solids". It is the dry-weight of particles suspended (not dissolved) in a body of water. It is a proxy of water quality.


Index

Import Dependencies and Connect to the Data Cube [▴](#top)


In [1]:
# Supress Warning 
import warnings
warnings.filterwarnings('ignore')

import matplotlib.pyplot as plt
%matplotlib inline
import numpy as np  
import xarray as xr  

import utils.data_cube_utilities.data_access_api as dc_api  
api = dc_api.DataAccessApi()
dc = api.dc

Choose Platforms and Products [▴](#top)

List available products for each platform


In [2]:
# Get available products
products_info = dc.list_products()

# List Landsat 7 products
print("Landsat 7 Products:")
products_info[["platform", "name"]][products_info.platform == "LANDSAT_7"]


Landsat 7 Products:
Out[2]:
platform name
id
2 LANDSAT_7 ls7_usgs_sr_scene

In [3]:
# List Landsat 8 products
print("Landsat 8 Products:")
products_info[["platform", "name"]][products_info.platform == "LANDSAT_8"]


Landsat 8 Products:
Out[3]:
platform name
id
1 LANDSAT_8 ls8_usgs_sr_scene

Choose products


In [4]:
# Select Products and Platforms
# It is possible to select multiple datasets (L7, L8)
# True = SELECT
# False = DO NOT SELECT

use_Landsat7 = False
use_Landsat8 = True
platforms, products = [], []
if use_Landsat7:
    platforms.append('LANDSAT_7')
    products.append('ls7_usgs_sr_scene')
if use_Landsat8:
    platforms.append('LANDSAT_8')
    products.append('ls8_usgs_sr_scene')

Get the Extents of the Cube [▴](#top)


In [5]:
from utils.data_cube_utilities.dc_load import get_overlapping_area
from utils.data_cube_utilities.dc_time import dt_to_str

full_lat, full_lon, min_max_dates = get_overlapping_area(api, platforms, products)

# Print the extents of each product.
str_min_max_dates = np.vectorize(dt_to_str)(min_max_dates)
for i, (platform, product) in enumerate(zip(platforms, products)):
    print("For platform {} and product {}:".format(platform, product))
    print("Time Extents:", str_min_max_dates[i])
    print()

# Print the extents of the combined data.
min_start_date_mutual = np.max(min_max_dates[:,0])
max_end_date_mutual = np.min(min_max_dates[:,1])
print("Overlapping Extents:")
print("Latitude Extents:", full_lat)
print("Longitude Extents:", full_lon)
print("Time Extents:", list(map(dt_to_str, (min_start_date_mutual, max_end_date_mutual))))


For platform LANDSAT_8 and product ls8_usgs_sr_scene:
Time Extents: ['2013-03-21' '2020-01-27']

Overlapping Extents:
Latitude Extents: (-12.63361111121218, 18.40166666681388)
Longitude Extents: (-25.47250000020378, 44.01000000035208)
Time Extents: ['2013-03-21', '2020-01-27']

Visualize the available area


In [6]:
# The code below renders a map that can be used to view the region.
from utils.data_cube_utilities.dc_display_map import display_map
display_map(full_lat, full_lon)


Out[6]:

Define the Extents of the Analysis [▴](#top)


In [7]:
# Select an analysis region (Lat-Lon) within the extents listed above. 
# Select a time period (Min-Max) within the extents listed above (Year-Month-Day)

# Lake Guiers - Senegal
# lat = (15.8293, 16.4319) 
# lon = (-15.9782, -15.7592)

# Lake Retba - Senegal
# lat = (14.8307, 14.8579) 
# lon = (-17.2518, -17.2039)

# Delta du Saloum - Senegal
lat = (13.65, 13.7550)
lon = (-16.70, -16.65)

# Time Period
# time_extents = ('2016-01-01', '2019-01-01')
time_extents = ('2016-01-01', '2016-12-31')

Visualize the selected area


In [8]:
display_map(lat, lon)


Out[8]:

Load Data from the Data Cube [▴](#top)

Mask out clouds and cloud shadows + water (if desired) and create a median mosaic


In [9]:
from utils.data_cube_utilities.dc_load import match_dim_sizes
from utils.data_cube_utilities.clean_mask import \
    landsat_qa_clean_mask, landsat_clean_mask_invalid
from utils.data_cube_utilities.sort import xarray_sortby_coord
from utils.data_cube_utilities.dc_load import is_dataset_empty

datasets, clean_masks = {}, {}
matching_abs_res, same_dim_sizes = match_dim_sizes(dc, products, lon, lat)
for platform, product in zip(platforms, products):
    # Load the data.
    # We need the bands ['red', 'green', 'blue', 'nir', 'swir1', 'swir2'] because WOFS uses them.
    # We need the 'pixel_qa' band to mask out undesired pixels.
    measurements = ['red', 'green', 'blue', 'nir', 'swir1', 'swir2', 'pixel_qa']
    dataset = dc.load(platform=platform, product=product, 
                      lat=lat, lon=lon, 
                      time=time_extents, measurements=measurements)
    if len(dataset.dims) == 0: # The dataset is empty.
        continue
    # Get the clean masks.
    clean_mask = (landsat_qa_clean_mask(dataset, platform) & 
                  (dataset[measurements[0]] != -9999) & 
                  landsat_clean_mask_invalid(dataset))
    # Remove the 'pixel_qa' band since we have the clean mask.
    dataset = dataset.drop('pixel_qa')
    # If needed, scale the datasets and clean masks to the same size in the x and y dimensions.
    if not same_dim_sizes:
        dataset = xr_scale_res(dataset, abs_res=matching_abs_res)
        clean_mask = xr_scale_res(clean_mask.astype(np.uint8), 
                                  abs_res=matching_abs_res).astype(np.bool)
    # Clean the data.
    dataset = dataset.astype(np.float32).where(clean_mask)
    # Collect the dataset and clean mask.
    datasets[platform] = dataset
    clean_masks[platform] = clean_mask
# Combine everything.
if len(datasets) > 0:
    dataset = xarray_sortby_coord(
        xr.concat(list(datasets.values()), dim='time'), coord='time')
    clean_mask = xarray_sortby_coord(
        xr.concat(list(clean_masks.values()), dim='time'), coord='time')
else:
    dataset = xr.Dataset()
    clean_mask = xr.DataArray(np.empty((0,), dtype=np.bool))
del datasets, clean_masks

assert not is_dataset_empty(dataset), "The dataset is empty."

In [10]:
from utils.data_cube_utilities.dc_mosaic import create_median_mosaic

# Create a median mosaic.
land_and_water_composite = create_median_mosaic(dataset, clean_mask)

In [11]:
# Show the land and water composite
from utils.data_cube_utilities.dc_rgb import rgb
from utils.data_cube_utilities.plotter_utils import figure_ratio

# RGB image options
# Standard RGB = 321 = Red, Green, Blue
# False Color = 543 = SWIR1, NIR, Red
# False Color (Landsat Mosaic) = 742 = SWIR2, NIR, Green

fig, ax = plt.subplots(figsize=figure_ratio(land_and_water_composite, fixed_width=12))

rgb(land_and_water_composite, bands=['swir2', 'nir', 'green'], 
    min_possible=0, max_possible=5000, fig=fig, ax=ax)
plt.show()


Obtain TSM [▴](#top)

Mask Out Everything but Water


In [12]:
from utils.data_cube_utilities.dc_water_classifier import wofs_classify

water_mask = wofs_classify(dataset, clean_mask=clean_mask.values, 
                           no_data=0.0).wofs.astype(np.bool)
water_dataset = dataset.where(water_mask)
water_composite = create_median_mosaic(water_dataset)

In [13]:
fig, ax = plt.subplots(figsize=figure_ratio(land_and_water_composite, fixed_width=12))

rgb(water_composite, bands=['swir2', 'nir', 'green'], fig=fig, ax=ax)
plt.show()


Compute TSM


In [14]:
from utils.data_cube_utilities.dc_water_quality import tsm

tsm_dataset = tsm(water_dataset)

Compute Mean TSM Over Time


In [15]:
mean_tsm = tsm_dataset.mean(dim=['time'])
mean_tsm.tsm.values[np.isnan(mean_tsm.tsm.values)] = 0

In [16]:
mean_tsm_plot = mean_tsm.tsm.plot(figsize=(10,15), cmap = "hot", 
                                  vmin=mean_tsm.tsm.min(), 
                                  vmax=mean_tsm.tsm.max())
plt.title('Mean Total Suspended Matter (TSM)')
plt.xlabel('Longitude')
plt.ylabel('Latitude')
mean_tsm_plot.colorbar.set_label('g \ L')
plt.show()


Compute Maximum TSM Over Time


In [17]:
max_tsm = tsm_dataset.max(dim=['time'])
max_tsm.tsm.values[np.isnan(max_tsm.tsm.values)] = 0

In [18]:
max_tsm_plot = max_tsm.tsm.plot(figsize=(10,15), cmap = "hot", 
                                vmin=max_tsm.tsm.min(), \
                                vmax=max_tsm.tsm.max())
plt.title('Maximum Total Suspended Matter (TSM)')
plt.xlabel('Longitude')
plt.ylabel('Latitude')
max_tsm_plot.colorbar.set_label('g \ L')
plt.show()


Compute Minimum TSM Over Time


In [19]:
minimum_tsm = tsm_dataset.min(dim=['time'])
minimum_tsm.tsm.values[np.isnan(minimum_tsm.tsm.values)] = 0

In [20]:
minimum_tsm_plot = minimum_tsm.tsm.plot(figsize=(10,15), cmap = "hot", 
                                        vmin=minimum_tsm.tsm.min(), 
                                        vmax=minimum_tsm.tsm.max())
plt.title('Minimum Total Suspended Matter (TSM)')
plt.xlabel('Longitude')
plt.ylabel('Latitude')
minimum_tsm_plot.colorbar.set_label('g \ L')
plt.show()


Descriptive Statistics


In [21]:
import pandas as pd

tsm_dataframe = tsm_dataset.tsm.to_dataframe()
tsm_statistics = tsm_dataframe.describe()
tsm_statistics = tsm_statistics.append(pd.Series(tsm_dataframe.max() - tsm_dataframe.min(), name='Range'))
first_quartile = tsm_dataframe.quantile(0.25)
third_quartile = tsm_dataframe.quantile(0.75)
tsm_statistics = tsm_statistics.append(pd.Series(third_quartile - first_quartile, name='Interquartile Range'))
tsm_statistics = tsm_statistics.append(pd.Series(tsm_dataframe.var(axis=0), name='Variance'))
tsm_statistics.rename(index={'count' : 'Samples', 'mean' : 'Mean', 'std' : 'Standard Deviation', 'min' : 'Minimum', 
                             '25%' : 'First Quartile', '50%' : 'Second Quartile', '75%' : 'Third Quartile', 'max' : 'Max'}, 
                      columns={'tsm':'TSM'}, inplace=True)
tsm_statistics


Out[21]:
TSM
Samples 1.739581e+06
Mean 5.481635e+01
Standard Deviation 2.710368e+01
Minimum 6.069431e+00
First Quartile 3.130906e+01
Second Quartile 5.240904e+01
Third Quartile 7.624344e+01
Max 4.335540e+02
Range 4.274846e+02
Interquartile Range 4.493438e+01
Variance 7.346094e+02

Create GeoTIFF Output Products [▴](#top)

<p style="color:red";>CHANGE INPUTS BELOW


In [26]:
from utils.data_cube_utilities.dc_utilities import write_geotiff_from_xr
import os
import pathlib

# Comment out any undesired lines that export GeoTIFFs.
# Change the output file paths, or the files will be overwritten for each run.

output_dir = 'output/geotiffs'
pathlib.Path(output_dir).mkdir(parents=True, exist_ok=True)

# Mean TSM
write_geotiff_from_xr(output_dir + '/mean_tsm.tif', mean_tsm, ['tsm'])

# Max TSM
write_geotiff_from_xr(output_dir + '/maximum_tsm.tif', max_tsm, ['tsm'])

# Minimum TSM
write_geotiff_from_xr(output_dir + '/minimum_tsm.tif', max_tsm, ['tsm'])

In [27]:
!ls -lah output/geotiffs/*.tif


-rw-r--r-- 1 jovyan users  19K May 21 16:21 output/geotiffs/Chunking_Demo_WOFS.tif
-rw-r--r-- 1 jovyan users 533K May 21 17:22 output/geotiffs/maximum_tsm.tif
-rw-r--r-- 1 jovyan users 533K May 21 17:22 output/geotiffs/mean_tsm.tif
-rw-r--r-- 1 jovyan users 533K May 21 17:22 output/geotiffs/minimum_tsm.tif