To detect changes in plant life, we use a measure called NDVI.
In [1]:
%matplotlib inline
#Import the datacube and the API
import datacube
from utils.data_cube_utilities.data_access_api import DataAccessApi
from dc_notebook_utilities import generate_metadata_report
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import xarray as xr
#Create an instance of the datacube and API
api = DataAccessApi()
dc = api.dc
In [2]:
# Get available products
products = dc.list_products()
# List LANDSAT 7 products
print(products[["platform", "name"]][products.platform == "LANDSAT_7"])
In [3]:
# These are the platform (satellite) and product (datacube set)
# used for this demonstration.
platform = "LANDSAT_8"
product = "ls8_usgs_sr_scene"
The magnitudes of the different wavelengths of light can be quanitized and stored on a per pixel basis. NDVI only requires the use of NIR and RED light but there are many more wavelengths and some additional measures available. One such additional measure is called pixel_qa. This is a measure of the quality of the pixel for analysis. A breakdown of the values stored in pixel_qa are beyond the scope of this notebook but we encourage you to check our github for more information on the meaning behind the values stored within.
In [4]:
# Get the extents of the cube.
descriptor = api.get_query_metadata(platform=platform, product=product, measurements=[])
# Store the latitudinal and longitudinal extents.
lat, lon = products.resolution[products.platform == platform].any()
In [5]:
from utils.data_cube_utilities.dc_display_map import display_map
#save extents
min_date, max_date = descriptor['time_extents']
min_lat, max_lat = descriptor['lat_extents']
min_lon, max_lon = descriptor['lon_extents']
#Adjust date string
min_date_str = str(min_date.year) + '-' + str(min_date.month) + '-' + str(min_date.day)
max_date_str = str(max_date.year) + '-' + str(max_date.month) + '-' + str(max_date.day)
#Round GPS coordinates to 3 decimal places
min_lat_rounded, max_lat_rounded = (round(min_lat, 3), round(max_lat, 3))
min_lon_rounded, max_lon_rounded = (round(min_lon, 3), round(max_lon, 3))
#display the total area available in this datacube product that can be used for analysis
display_map(latitude = (min_lat_rounded, max_lat_rounded),longitude = (min_lon_rounded, max_lon_rounded))
Out[5]:
In [6]:
# Display metadata.
generate_metadata_report(min_date_str, max_date_str,
min_lon_rounded, max_lon_rounded, lon,
min_lat_rounded, max_lat_rounded, lat)
In [7]:
# Specify latitude and longitude bounds within the full extents
# shown in the metadata report above (reduce area for faster processing times).
# Kenya
lon_small = (35.61, 35.6325) # Kenya - Lake Kamnarok (large)
lat_small = (0.6, 0.6375) # Kenya - Lake Kamnarok (large)
# Display the subset for analysis.
display_map(latitude = lat_small, longitude = lon_small)
Out[7]:
In [8]:
from datetime import datetime as dt
# Select a date range to retrieve data for.
start_date = dt.strptime('2018-01-01', '%Y-%m-%d') # Kenya - Lake Kamnarok (large)
end_date = dt.strptime('2018-12-31', '%Y-%m-%d') # Kenya - Lake Kamnarok (large)
time_extents = (start_date, end_date)
landsat_dataset = dc.load(lat = lat_small,
lon = lon_small,
platform = platform,
time = time_extents,
product = product,
measurements = ['red', 'green', 'blue', 'swir1', 'swir2', 'nir', 'pixel_qa'],
group_by='solar_day')
In [9]:
from utils.data_cube_utilities.clean_mask import landsat_qa_clean_mask
# Get the clean mask for the LANDSAT satellite platform
clean_mask = landsat_qa_clean_mask(landsat_dataset, platform)
Before masking clouds and scan lines
In [10]:
plt.imshow(landsat_dataset.isel(time=0).red)
plt.title("Red band before masking ({})".format(str(landsat_dataset.time.values[0])))
plt.show()
After masking clouds and scan lines
In [11]:
cleaned_landsat_dataset = landsat_dataset.where(clean_mask)
plt.imshow(cleaned_landsat_dataset.isel(time=0).red)
plt.title("Red band after masking ({})".format(str(landsat_dataset.time.values[0])))
plt.show()
In [12]:
from utils.data_cube_utilities.dc_ndvi_anomaly import NDVI
ndvi_data_arr = NDVI(cleaned_landsat_dataset)
ndvi = ndvi_data_arr.to_dataset(name='ndvi')
In [13]:
acquisition_dates = np.array([pd.to_datetime(str(time)).strftime('%Y-%m-%d') for time in cleaned_landsat_dataset.time.values])
print("Acquisition Dates:")
print(acquisition_dates)
In [14]:
acquisition_date = '2018-04-26'
acquisition_index = np.argmax(acquisition_dates == acquisition_date)
if acquisition_index == 0 and acquisition_date != acquisition_dates[0]:
raise ValueError("Acquisition date '{}' is not in the list of acquisition dates above.".format(acquisition_date))
In [15]:
# (cleaned_landsat_dataset.red != -9999).any(['latitude', 'longitude'])
In [16]:
# from xarray.ufuncs import isnan as xr_nan
# cleaned_landsat_dataset.time[(~xr_nan(cleaned_landsat_dataset.red)).any(['latitude', 'longitude'])]
In [17]:
# True color plot
from utils.data_cube_utilities.dc_rgb import rgb
rgb(cleaned_landsat_dataset, at_index=acquisition_index,
bands=['red', 'green', 'blue'])
plt.show()
In [18]:
# False color plot
rgb(cleaned_landsat_dataset, at_index=acquisition_index, bands=['green', 'swir1', 'nir'])
plt.show()
In [19]:
from utils.data_cube_utilities.dc_display_map import display_map
ndvi_plotting_data = ndvi.isel(time=acquisition_index)
In [20]:
from utils.data_cube_utilities.plotter_utils import create_discrete_color_map
th = [-0.2, 0.2] # Color thresholds
cmap = create_discrete_color_map(th=th, colors=['green', 'blue', 'red'], data_range=[-1,1])
plt.figure(figsize=(10,10))
plt.imshow(ndvi_plotting_data.ndvi.values, cmap=cmap)
plt.show()
In [21]:
ndvi_dataset = ndvi_data_arr.to_dataset(name='NDVI')
In [22]:
from utils.data_cube_utilities.plotter_utils import xarray_time_series_plot
xarray_time_series_plot(ndvi_dataset, {'NDVI':{'mean':[{'line':{}}]}},
show_legend=False)
plt.title("Mean NDVI")
plt.show()
In [23]:
landsat_dataset = dc.load(lat = lat_small,
lon = lon_small,
platform = platform,
time = time_extents,
product = product,
measurements = ['red', 'nir', 'pixel_qa'],
group_by='solar_day')
In [24]:
from utils.data_cube_utilities.clean_mask import landsat_qa_clean_mask
# Get the clean mask for the LANDSAT satellite platform.
clean_mask = landsat_qa_clean_mask(landsat_dataset, platform)
In [25]:
from utils.data_cube_utilities.dc_mosaic import create_mosaic
cleaned_landsat_dataset_mosaic = \
create_mosaic(landsat_dataset, reverse_time=False, clean_mask=clean_mask)
In [26]:
# Get a list of available image aquisition dates.
acquisitions_list = landsat_dataset.time.values
print("There are {} acquisitions to choose from:".format(len(acquisitions_list)))
for index, acquisition in enumerate(acquisitions_list):
print("Index: {}, Acquisition: {}".format(index, pd.to_datetime(str(acquisition)).strftime('%Y-%m-%d')))
In [27]:
from utils.data_cube_utilities.dc_mosaic import create_mosaic, create_median_mosaic, \
create_max_ndvi_mosaic, create_min_ndvi_mosaic
# Select a scene to check for anomalies (one of the acquisition dates shown above).
acquisition_index = 6 # Check for anomalies on 2018-02-21. (Kenya - Lake Kamnarok)
scene_sel_datetime = acquisitions_list[acquisition_index]
print("Date selected to check for anomalies: ", scene_sel_datetime)
# Select scenes to form a baseline (must be a list of two elements - the start and end indices).
baseline_date_indices = list(range(0,len(acquisitions_list))) # Use the full time range as the baseline (Kenya - Lake Kamnarok)
baseline_datetimes = np.array([pd.to_datetime(str(date)) for date in acquisitions_list[baseline_date_indices]])
print("First and last dates selected for baseline: ", [np64.strftime('%Y-%m-%d') for np64 in baseline_datetimes[[0,-1]]])
# Create a dictionary of the different mosaic method options.
mosaic_methods = {'Most Recent':create_mosaic, 'Least Recent':create_mosaic,'Median':create_median_mosaic,
'Max NDVI':create_max_ndvi_mosaic, 'Min NDVI':create_min_ndvi_mosaic}
# Select a mosaic method for the baseline.
# (One of ['Most Recent','Least Recent','Median','Max NDVI','Min NDVI']).
mosaic_method_label = 'Max NDVI'
mosaic_method = mosaic_methods[mosaic_method_label]
print("Selected mosaic method: ", mosaic_method_label)
# Select a percentage threshold for anomalies.
threshold_percent = 0.60
print("Threshold percentage selected for anomalies: {}%".format(threshold_percent*100))
In [28]:
baseline_mosaic = mosaic_method(landsat_dataset.sel(time=baseline_datetimes),
clean_mask=clean_mask.sel(time=baseline_datetimes))
In [29]:
import sys #required for epsilon (in case the baseline is zero)
# Calculate the NDVI baseline values
from utils.data_cube_utilities.dc_ndvi_anomaly import NDVI
ndvi_baseline = NDVI(baseline_mosaic)
# Calculate the NDVI values in the target scene
ndvi_scene = NDVI(landsat_dataset.sel(time=scene_sel_datetime))
# Determine the percentage change
percentage_change = abs((ndvi_baseline - ndvi_scene) / (ndvi_baseline+sys.float_info.epsilon))
In [30]:
import matplotlib.pyplot as plt
# Use a cutoff value
anom = percentage_change > threshold_percent
# Set plot size
plt.figure(figsize = (15,12))
anom.plot(cmap='seismic')
plt.show()
If you used the original values for everything up to this point, you can clearly see an NDVI anomaly. This indicates that there was a measurable difference in the amount of photosynthetic plants in the lake basin compared to the standard defined by the baseline composite image.
In [31]:
display_map(latitude = lat_small,longitude = lon_small)
Out[31]: