by Brian Killough (NASA SEO)
In [1]:
import numpy as np
import datacube
import utils.data_cube_utilities.data_access_api as dc_api
api = dc_api.DataAccessApi()
dc = api.dc
You might want to learn more about what data is available and how it is stored.
In [2]:
list_of_products = dc.list_products()
list_of_products
Out[2]:
In [3]:
platform = "LANDSAT_7"
product = "ls7_usgs_sr_scene"
In [4]:
# Get product extents
prod_extents = api.get_query_metadata(platform=platform, product=product, measurements=[])
full_lat = prod_extents['lat_extents']
print("Lat bounds:", full_lat)
full_lon = prod_extents['lon_extents']
print("Lon bounds:", full_lon)
time_extents = list(map(lambda time: time.strftime('%Y-%m-%d'), prod_extents['time_extents']))
print("Time bounds:", time_extents)
Visualize the full area
In [5]:
from utils.data_cube_utilities.dc_display_map import display_map
display_map(full_lat, full_lon)
Out[5]:
In [6]:
# ######### Colombia - Cartegena ##################
#lon = (-74.863, -74.823)
#lat = (1.326, 1.357)
######### Vietnam - Buan Tua Srah Lake ##################
#lon = (108.02, 108.15)
#lat = (12.18 , 12.30)
######## Kenya - Lake Nakuru ##################
lon = (36.02, 36.13)
lat = (-0.42, -0.28)
time_extents = ('2015-01-01', '2015-12-31')
Visualize the full area
In [7]:
display_map(lat, lon)
Out[7]:
In [8]:
dataset = dc.load(latitude = lat,
longitude = lon,
platform = platform,
time = time_extents,
product = product,
measurements = ['red', 'green', 'blue', 'nir', 'swir1', 'swir2', 'pixel_qa'])
Single band visualization
For a quick inspection, let's look at two images. The first image will allow the selection of any band (red, blue, green, nir, swir1, swir2). The second image will mask clouds with bright red on a NIR image.
Select the desired acquistion in the block below. You can select from 1 to #, where the max value is the number of time slices noted in the block above. Change the comment statements below to select the bands for the first image.
In [9]:
acquisition_number = 20
# select an acquisition number from 1 to "time" using the array limits above
In [10]:
%matplotlib inline
dataset.nir.isel(time = acquisition_number).plot(cmap = "Greys")
Out[10]:
In [11]:
from utils.data_cube_utilities.clean_mask import \
landsat_qa_clean_mask, landsat_clean_mask_invalid
clean_mask = (landsat_qa_clean_mask(dataset, platform) &
((dataset != -9999).to_array().all('variable')) &
landsat_clean_mask_invalid(dataset)).persist()
cleaned_dataset = dataset.where(clean_mask)
In [12]:
from matplotlib.cm import Greys as red_on_grey
red_on_grey.set_bad('red',.5)
cleaned_dataset.isel(time = acquisition_number).nir.plot(cmap = red_on_grey)
Out[12]:
In [13]:
from utils.data_cube_utilities.dc_rgb import rgb
rgb_da = dataset[['red', 'green', 'blue']].to_array()
# Substitude black pixels for ones with no_data (-9999) values.
rgb_da = rgb_da.where(rgb_da!=-9999, 0)
# Use values fairly close but not equal to the minimum and maximum
# values as the values of minimum and maximum intensity in the image.
# The goal is to provide fairly consistent image brightness.
vmin = np.quantile(rgb_da.values, 0.10)
vmax = np.quantile(rgb_da.values, 0.90)
In [14]:
rgb(dataset.isel(time=acquisition_number),
min_possible=vmin, max_possible=vmax)
Out[14]:
In [15]:
red = [255,0,0]
In [16]:
rgb(dataset.isel(time=acquisition_number),
# index value is the time slice number
paint_on_mask = [(~clean_mask[acquisition_number], red)],
min_possible=vmin, max_possible=vmax)
Out[16]:
Most Recent Pixel Mosaic
Masks clouds from imagery and uses the most recent cloud-free pixels.
In [17]:
from utils.data_cube_utilities.dc_mosaic import create_mosaic
from utils.data_cube_utilities.dc_utilities import ignore_warnings
most_recent_composite = ignore_warnings(create_mosaic, cleaned_dataset, clean_mask)
In [18]:
most_recent_composite.nir.plot(cmap = "Greys")
Out[18]:
In [19]:
rgb(most_recent_composite, min_possible=vmin, max_possible=vmax)
Out[19]:
Median Mosaic
Masks clouds from imagery using the median valued cloud-free pixels in the time series
In [20]:
from utils.data_cube_utilities.dc_mosaic import create_median_mosaic
median_composite = ignore_warnings(create_median_mosaic, cleaned_dataset, clean_mask)
In [21]:
median_composite.nir.plot(cmap = "Greys")
Out[21]:
In [22]:
rgb(median_composite, min_possible=vmin, max_possible=vmax)
Out[22]:
In [23]:
from utils.data_cube_utilities.dc_water_classifier import wofs_classify
water_classification = wofs_classify(median_composite, mosaic = True)
In [24]:
water_classification.wofs.plot()
Out[24]:
Fractional Cover (FC) is used for landcover type estimation of vegetation, non-green vegetation, and bare soil of each pixel. We use a model from CSIRO (Juan Gerschmann) and apply it to a median mosaic. Read more here.
The PV component is the estimated probability of a pixel being photosynthetic vegetation. The BS component is the estimated probability of a pixel being bare soil. The NPV component is the estimated probability of a pixel being non-photosynthetic vegetation.
In [25]:
from utils.data_cube_utilities.dc_fractional_coverage_classifier import frac_coverage_classify
frac_classes = frac_coverage_classify(median_composite)
Show the Probabilities of Pixels Being Photosynthetic Vegetation
In [26]:
frac_classes.pv.plot()
Out[26]:
Show the Probabilities of Pixels Being Bare Soil
In [27]:
frac_classes.bs.plot(cmap = "Greys")
Out[27]:
Plot a False Color RGB result where RGB = bs/pv/npv.
In [28]:
rgb(frac_classes, bands = ['bs', 'pv', 'npv'],
min_possible=0, max_possible=100)
Out[28]:
$$ NDVI = \frac{(NIR - RED)}{(NIR + RED)}$$NDVI(Normalized Difference Vegetation Index
A derived index that correlates well with the existance of vegetation.
In [29]:
def NDVI(dataset):
return (dataset.nir - dataset.red)/(dataset.nir + dataset.red)
$$ NDWI = \frac{GREEN - NIR}{GREEN + NIR}$$NDWI Normalized Difference Water Index
A derived index that correlates well with the existance of water.
In [30]:
def NDWI(dataset):
return (dataset.green - dataset.nir)/(dataset.green + dataset.nir)
$$NDBI = \frac{(SWIR - NIR)}{(SWIR + NIR)}$$NDBI Normalized Difference Build-Up Index A derived index that correlates well with the existance of urbanization.
In [31]:
def NDBI(dataset):
return (dataset.swir2 - dataset.nir)/(dataset.swir2 + dataset.nir)
In [32]:
ndbi = NDBI(median_composite) # Urbanization - Reds
ndvi = NDVI(median_composite) # Dense Vegetation - Greens
ndwi = NDWI(median_composite) # High Concentrations of Water - Blues
In [33]:
(ndbi).plot(cmap = "Reds", vmin=-1, vmax=1)
Out[33]:
In [34]:
(ndvi).plot(cmap = "Greens", vmin=-1, vmax=1)
Out[34]:
In [35]:
(ndwi).plot(cmap = "Blues", vmin=-1, vmax=1)
Out[35]:
Merge into one large Dataset
If your data-arrays share the same set of coordinates, or if you feel that you'll be using these values together in the future, you should consider merging them into a dataset
In [36]:
ds_ndvi = ndvi.to_dataset(name = "NDVI")
ds_ndwi = ndwi.to_dataset(name= "NDWI")
ds_ndbi = ndbi.to_dataset(name = "NDBI")
urbanization_dataset = ds_ndvi.merge(ds_ndwi).merge(ds_ndbi)
In [37]:
# urbanization_dataset.NDBI.values *= 1 # Scale NDBI for visual purposes
rgb(urbanization_dataset, bands = ['NDBI', 'NDVI', 'NDWI'],
min_possible=-1, max_possible=1)
Out[37]:
In [38]:
from utils.data_cube_utilities.dc_water_quality import tsm
In [39]:
mask_that_only_includes_water_pixels = water_classification.wofs == 1
tsm_dataset = tsm(median_composite, clean_mask = mask_that_only_includes_water_pixels )
In [40]:
tsm_dataset.tsm.plot(cmap = "jet")
Out[40]:
In [41]:
import numpy as np
def dataset_cloud_percentage_time_series(dataset):
## Creates an array of True and False Values
mask = np.logical_or(
dataset.pixel_qa == 64 + 2,
dataset.pixel_qa == 64 + 4
)
mask = np.logical_or(
mask,
dataset.pixel_qa == 64 + 8
)
mask = np.invert(mask)
mask = mask.astype(np.int16) ## True/False ----> 1/0
cloud_coverage = mask.mean(dim = ['latitude', 'longitude']) ## Find average value(between 1 and 0) per time slice. Treat as percentage
return (cloud_coverage * 100).rename("cloud_coverage_percentage")
In [42]:
time_series = dataset_cloud_percentage_time_series(dataset)
In [43]:
time_series.plot.hist()
Out[43]:
In [44]:
import matplotlib.pyplot as plt
times = time_series.time.values
values = time_series.values
plt.title('Cloud Coverage')
plt.scatter(times, values)
Out[44]:
In [45]:
ts_water_classification = wofs_classify(dataset, clean_mask)
In [46]:
# Apply nan to no_data values
ts_water_classification = ts_water_classification.where(ts_water_classification != -9999)
##Time series aggregation that ignores nan values.
water_classification_percentages = (ts_water_classification.mean(dim = ['time']) * 100).wofs.rename('water_classification_percentages')
In [47]:
## import color-scheme and set nans to black
from matplotlib.cm import jet_r as jet_r
jet_r.set_bad('black',1)
## apply nan to percentage values that aren't greater than 0, then plot
water_classification_percentages\
.where(water_classification_percentages > 0)\
.plot(cmap = jet_r)
Out[47]: