In [1]:
# Enable importing of utilities.
from sys import path
path.append("..")
Task-F: Data Export
The code in this notebook subsets a data cube, selects a specific set of variables, creates a new XARRAY, and then outputs that data into a GeoTIFF file and CSV file. This output file can be used in other software programs (e.g. QGIS, ArcGIS, EXCEL) for more specific analyses. We will keep the region small so that we can control file sizes.
In [2]:
import datacube
import utils.data_cube_utilities.data_access_api as dc_api
api = dc_api.DataAccessApi()
dc = datacube.Datacube(app = 'ardc_task_f')
api.dc = dc
In [3]:
list_of_products = dc.list_products()
netCDF_products = list_of_products[list_of_products['format'] == 'NetCDF']
netCDF_products
Out[3]:
In [5]:
# Change the data platform and data cube here
# This data export notebook will only work for Landsat-7 datasets
platform = "LANDSAT_7"
# product = "ls7_ledaps_ghana"
# product = "ls7_ledaps_kenya"
# product = "ls7_ledaps_senegal"
# product = "ls7_ledaps_sierra_leone"
# product = "ls7_ledaps_tanzania"
product = "ls7_ledaps_vietnam"
In [6]:
from utils.data_cube_utilities.dc_time import _n64_to_datetime, dt_to_str
extents = api.get_full_dataset_extent(platform = platform, product = product, measurements=[])
latitude_extents = (min(extents['latitude'].values),max(extents['latitude'].values))
longitude_extents = (min(extents['longitude'].values),max(extents['longitude'].values))
time_extents = (min(extents['time'].values),max(extents['time'].values))
print("Latitude Extents:", latitude_extents)
print("Longitude Extents:", longitude_extents)
print("Time Extents:", list(map(dt_to_str, map(_n64_to_datetime, time_extents))))
In [7]:
## The code below renders a map that can be used to orient yourself with the region.
from utils.data_cube_utilities.dc_display_map import display_map
display_map(latitude = latitude_extents, longitude = longitude_extents)
Out[7]:
In [10]:
## Vietnam - Central Lam Dong Province ##
longitude_extents = (105.2, 105.5)
latitude_extents = (11.25, 11.55)
time_extents = ('2010-01-01', '2010-12-31')
In [11]:
from utils.data_cube_utilities.dc_display_map import display_map
display_map(latitude = latitude_extents, longitude = longitude_extents)
Out[11]:
In [12]:
landsat_dataset = dc.load(latitude = latitude_extents,
longitude = longitude_extents,
platform = platform,
time = time_extents,
product = product,
measurements = ['red', 'green', 'blue', 'nir', 'swir1', 'swir2', 'pixel_qa'])
In [13]:
landsat_dataset # this is a printout of the first few values of each parameter in the XARRAY
Out[13]:
In [14]:
import xarray as xr
import numpy as np
def ls7_unpack_qa( data_array , cover_type):
land_cover_endcoding = dict( fill =[1] ,
clear =[322, 386, 834, 898, 1346],
water =[324, 388, 836, 900, 1348],
shadow =[328, 392, 840, 904, 1350],
snow =[336, 368, 400, 432, 848, 880, 812, 944, 1352],
cloud =[352, 368, 416, 432, 848, 880, 912, 944, 1352],
low_conf_cl =[322, 324, 328, 336, 352, 368, 834, 836, 840, 848, 864, 880],
med_conf_cl =[386, 388, 392, 400, 416, 432, 898, 900, 904, 928, 944],
high_conf_cl =[480, 992],
low_conf_cir =[322, 324, 328, 336, 352, 368, 386, 388, 392, 400, 416, 432, 480],
high_conf_cir=[834, 836, 840, 848, 864, 880, 898, 900, 904, 912, 928, 944],
terrain_occ =[1346,1348, 1350, 1352]
)
boolean_mask = np.isin(data_array, land_cover_endcoding[cover_type])
return xr.DataArray(boolean_mask.astype(np.int8),
coords = data_array.coords,
dims = data_array.dims,
name = cover_type + "_mask",
attrs = data_array.attrs)
In [15]:
clear_xarray = ls7_unpack_qa(landsat_dataset.pixel_qa, "clear")
water_xarray = ls7_unpack_qa(landsat_dataset.pixel_qa, "water")
shadow_xarray = ls7_unpack_qa(landsat_dataset.pixel_qa, "shadow")
cloud_xarray = ls7_unpack_qa(landsat_dataset.pixel_qa, "cloud")
In [16]:
clean_xarray = xr.ufuncs.logical_or(clear_xarray , water_xarray).astype(np.int8).rename("clean_mask")
clean_mask = np.logical_or(clear_xarray.values.astype(bool),
water_xarray.values.astype(bool))
In [17]:
def NDVI(dataset):
return ((dataset.nir - dataset.red)/(dataset.nir + dataset.red)).rename("NDVI")
In [18]:
def NDWI(dataset):
return ((dataset.green - dataset.nir)/(dataset.green + dataset.nir)).rename("NDWI")
In [19]:
def NDBI(dataset):
return ((dataset.swir2 - dataset.nir)/(dataset.swir2 + dataset.nir)).rename("NDBI")
In [20]:
def EVI(dataset, c1 = None, c2 = None, L = None):
return ((dataset.nir - dataset.red)/((dataset.nir + (c1 * dataset.red) - (c2 *dataset.blue) + L))).rename("EVI")
In [21]:
ndbi_xarray = NDBI(landsat_dataset) # Urbanization - Reds
ndvi_xarray = NDVI(landsat_dataset) # Dense Vegetation - Greens
ndwi_xarray = NDWI(landsat_dataset) # High Concentrations of Water - Blues
evi_xarray = EVI(landsat_dataset, c1 = 6, c2 = 7.5, L = 1 ) # Enhanced Vegetation Index
In [22]:
from utils.data_cube_utilities.dc_water_quality import tsm
tsm_xarray = tsm(landsat_dataset, clean_mask = water_xarray.values.astype(bool) ).tsm
In [23]:
combined_dataset = xr.merge([landsat_dataset,
clean_xarray,
clear_xarray,
water_xarray,
shadow_xarray,
cloud_xarray,
evi_xarray,
ndbi_xarray,
ndvi_xarray,
ndwi_xarray,
tsm_xarray])
# Copy original crs to merged dataset
combined_dataset = combined_dataset.assign_attrs(landsat_dataset.attrs)
combined_dataset # this is a printout of the first few values of each parameter in the XARRAY
Out[23]:
This section will be used to create a CSV export file for a given pixel. You will identify the pixel by selecting a specific Lat-Lon position and then the code will find the closest pixel to that point (nearest neighbor). Use the map at the top of this notebook to view your region and pick a Lat-Lon location. You can find an exact location by clicking on the map. The CSV file will contain the time series data for each XARRAY parameter.
In [24]:
# Lat and Lon coordinates extracted from the map above
pixel_lat = 11.3972
pixel_lon = 105.3528
In [25]:
pixel = combined_dataset.sel(latitude = pixel_lat,
longitude = pixel_lon,
method = 'nearest') # nearest neighbor selection
In [26]:
import xarray as xr
import csv
def ts_pixel_to_csv(pixel: xr.Dataset,
csv_file_name: str):
def __yield_from_time_axis(px):
for i in range(len(px.time)):
yield px.isel(time = i)
def __format_time(t):
return t
with open(csv_file_name,'w') as out:
csv_out=csv.writer(out)
column_names = ['time'] + list(pixel.data_vars)
csv_out.writerow(column_names)
for row in __yield_from_time_axis(pixel):
csv_out.writerow([__format_time(row.time.values)] + [row[key].values for key in list(pixel.data_vars)])
In [27]:
csv_name = 'test.csv'
In [28]:
ts_pixel_to_csv(pixel, csv_name)
File formatting
In [29]:
import time
def time_to_string(t):
return time.strftime("%Y_%m_%d_%H_%M_%S", time.gmtime(t.astype(int)/1000000000))
This function can be used to write a single time slice from an xarray to geotiff
In [30]:
from utils.data_cube_utilities import dc_utilities
def export_slice_to_geotiff(ds, path):
dc_utilities.write_geotiff_from_xr(path,
ds.astype(np.float32),
list(combined_dataset.data_vars.keys()),
crs="EPSG:4326")
For each time slice in a dataset we call export_slice_to_geotif
In [31]:
def export_xarray_to_geotiff(ds, path):
for t in ds.time:
time_slice_xarray = ds.sel(time = t)
export_slice_to_geotiff(time_slice_xarray,
path + "_" + time_to_string(t) + ".tif")
In [32]:
# export_xarray_to_geotiff(combined_dataset, "geotiffs/landsat7")
Check to see what files exist in geotiffs
In [33]:
# !ls -lah geotiffs/*.tif