The code in this notebook subsets a data cube, selects a specific set of variables, generates some additional data from those and then outputs that data into a GeoTIFF file. The goal is to be able to do external analyses of this data using other data analysis tools or GIS tools. The files would be reasonable in size, since we would restrict the region and parameters in the output.
In [1]:
import xarray as xr
import numpy as np
import datacube
from utils.data_cube_utilities.data_access_api import DataAccessApi
In [2]:
api = DataAccessApi()
dc = api.dc
List available products for each platform
In [3]:
list_of_products = dc.list_products()
list_of_products
Out[3]:
Choose product
In [4]:
platform = "LANDSAT_7"
product = "ls7_usgs_sr_scene"
# platform = "LANDSAT_8"
# product = "ls8_usgs_sr_scene"
In [5]:
from utils.data_cube_utilities.dc_load import get_product_extents
from utils.data_cube_utilities.dc_time import dt_to_str
full_lat, full_lon, min_max_dates = get_product_extents(api, platform, product)
# Print the extents of the combined data.
print("Latitude Extents:", full_lat)
print("Longitude Extents:", full_lon)
print("Time Extents:", list(map(dt_to_str, min_max_dates)))
In [6]:
## The code below renders a map that can be used to orient yourself with the region.
from utils.data_cube_utilities.dc_display_map import display_map
display_map(full_lat, full_lon)
Out[6]:
In [7]:
######### Ghana - Pambros Salt Ponds ##################
lon = (-0.3013, -0.2671)
lat = (5.5155, 5.5617)
time_extents = ('2015-01-01', '2015-12-31')
In [8]:
from utils.data_cube_utilities.dc_display_map import display_map
display_map(lat, lon)
Out[8]:
In [9]:
landsat_dataset = dc.load(latitude = lat,
longitude = lon,
platform = platform,
time = time_extents,
product = product,
measurements = ['red', 'green', 'blue', 'nir', 'swir1', 'swir2', 'pixel_qa'])
In [10]:
landsat_dataset
Out[10]:
In [11]:
from utils.data_cube_utilities.clean_mask import landsat_qa_clean_mask
clear_xarray = landsat_qa_clean_mask(landsat_dataset, platform, cover_types=['clear'])
water_xarray = landsat_qa_clean_mask(landsat_dataset, platform, cover_types=['water'])
shadow_xarray = landsat_qa_clean_mask(landsat_dataset, platform, cover_types=['shadow'])
clean_xarray = xr.ufuncs.logical_or(clear_xarray , water_xarray).rename("clean_mask")
In [12]:
from utils.data_cube_utilities.dc_water_classifier import wofs_classify
water_classification = wofs_classify(landsat_dataset,
clean_mask = clean_xarray.values,
mosaic = False)
In [13]:
wofs_xarray = water_classification.wofs
In [14]:
def NDVI(dataset):
return ((dataset.nir - dataset.red)/(dataset.nir + dataset.red)).rename("NDVI")
In [15]:
def NDWI(dataset):
return ((dataset.green - dataset.nir)/(dataset.green + dataset.nir)).rename("NDWI")
In [16]:
def NDBI(dataset):
return ((dataset.swir2 - dataset.nir)/(dataset.swir2 + dataset.nir)).rename("NDBI")
In [17]:
ndbi_xarray = NDBI(landsat_dataset) # Urbanization - Reds
ndvi_xarray = NDVI(landsat_dataset) # Dense Vegetation - Greens
ndwi_xarray = NDWI(landsat_dataset) # High Concentrations of Water - Blues
In [18]:
from utils.data_cube_utilities.dc_water_quality import tsm
tsm_xarray = tsm(landsat_dataset, clean_mask = wofs_xarray.values.astype(bool) ).tsm
In [19]:
def EVI(dataset, c1 = None, c2 = None, L = None):
return ((dataset.nir - dataset.red)/((dataset.nir + (c1 * dataset.red) - (c2 *dataset.blue) + L))).rename("EVI")
In [20]:
evi_xarray = EVI(landsat_dataset, c1 = 6, c2 = 7.5, L = 1 )
In [21]:
combined_dataset = xr.merge([landsat_dataset,
## <span id="combine_data">Combine Data [▴](#top)</span> clean_xarray,
clear_xarray,
water_xarray,
shadow_xarray,
evi_xarray,
ndbi_xarray,
ndvi_xarray,
ndwi_xarray,
wofs_xarray,
tsm_xarray])
# Copy original crs to merged dataset
combined_dataset = combined_dataset.assign_attrs(landsat_dataset.attrs)
combined_dataset
Out[21]:
Export each acquisition as a GeoTIFF.
In [22]:
from utils.data_cube_utilities.import_export import export_xarray_to_multiple_geotiffs
# Ensure the output directory exists before writing to it.
if platform == 'LANDSAT_7':
!mkdir -p output/geotiffs/landsat7
else:
!mkdir -p output/geotiffs/landsat8
output_path = "output/geotiffs/landsat{0}/landsat{0}".format(7 if platform=='LANDSAT_7' else 8)
export_xarray_to_multiple_geotiffs(combined_dataset, output_path)
Check to see what files were exported. The size of these files is also shown.
In [23]:
if platform == 'LANDSAT_7':
!ls -lah output/geotiffs/landsat7/*.tif
else:
!ls -lah output/geotiffs/landsat8/*.tif
Sanity check using gdalinfo
to make sure that all of our bands exist .
In [24]:
if platform == 'LANDSAT_7':
!gdalinfo output/geotiffs/landsat7/landsat7_2015_01_09_03_06_13.tif
else:
!gdalinfo output/geotiffs/landsat8/landsat8_2015_01_01_03_07_41.tif
Zip all GeoTIFFs.
In [25]:
if platform == 'LANDSAT_7':
!tar -cvzf output/geotiffs/landsat7/landsat_7.tar.gz output/geotiffs/landsat7/*.tif
else:
!tar -cvzf output/geotiffs/landsat8/landsat_8.tar.gz output/geotiffs/landsat8/*.tif
Export all acquisitions together as a single NetCDF.
In [26]:
combined_dataset
Out[26]:
In [27]:
import os
import pathlib
from utils.data_cube_utilities.import_export import export_xarray_to_netcdf
# Ensure the output directory exists before writing to it.
ls_num = 7 if platform=='LANDSAT_7' else 8
output_dir = f"output/netcdfs/landsat{ls_num}"
pathlib.Path(output_dir).mkdir(parents=True, exist_ok=True)
output_file_path = output_dir + f"/ls{ls_num}_netcdf_example.nc"
export_xarray_to_netcdf(combined_dataset.red, output_file_path)