In [1]:
## Setting up environment: python modules etc
# * Optional!
# * Skip this section if you use other means to ensure correct env setup,
# * such as module load before starting jupyter notebook.
# make my current source code python modules available in this notebook
import sys
paths=sys.path
#paths.append('/g/data1/u46/fxz547/Githubz/agdc-v2')
paths.insert(0,'/g/data1/u46/fxz547/Githubz/agdc-v2') #prepend a path
paths.append('/g/data1/u46/fxz547/Githubz/wofs')
In [2]:
from IPython.display import display
from pprint import pprint
from __future__ import print_function
from collections import defaultdict
import xarray as xr
import xarray.ufuncs
import datacube
from datacube.api import GridWorkflow
from datacube.storage import masking
import numpy as np
import math
#from pyproj import Proj, transform
from osgeo import gdal, osr, ogr
from rasterio.warp import reproject, RESAMPLING
import rasterio
%matplotlib inline
import matplotlib.pyplot as plt
from pylab import rcParams
rcParams['figure.figsize'] = 10, 10 # increase plot size a bit...
rcParams['axes.formatter.useoffset'] = False # disable scalar formatter / offset in axes labels
Python envionment and modules are ready to go
In [3]:
dc = datacube.Datacube( app='wofs-dev')
#or to use a specific config file: dc = datacube.Datacube(config='/home/547/adh547/unification.datacube.conf', app='wofs-dev')
In [4]:
# where is this function dc.grid_spec_for_product( product='ls5_nbar_albers')
# dc.grid_spec_for_product( product='ls5_nbar_albers')
In [ ]:
In [5]:
# use ? or ?? to check the function
#GridWorkflow?
In [4]:
#gw = GridWorkflow(dc, product='ls5_nbar_albers')
#OR
gw = GridWorkflow(dc.index , product='ls5_nbar_albers' )
In [7]:
#gw.list_cells?
In [5]:
# product_type= nbar | pqa
qdict={'latitude': (-36.0, -35.0), 'platform': ['LANDSAT_5', 'LANDSAT_7', 'LANDSAT_8'], 'longitude': (149.01, 150.1), 'time': ('1990-01-01', '2016-03-31')}
cells = gw.list_cells(product_type='nbar', product='ls5_nbar_albers', **qdict)
#longitude=(149.06,149.18), latitude=(-35.27, -35.33), time=('1996-01-01', '2016-03-20'))
len(cells)
Out[5]:
In [9]:
# cells is now is huge dict, like the following
# defaultdict(dict,
# {(15, -41): {'geobox': <datacube.model.GeoBox at 0x7fc05f3e3390>,
# 'sources': <xarray.DataArray (time: 111)>
# array([ (Dataset <id=6ce1955f-5ef9-4876-8bb8-69638f45efe8 type=ls5_nbar_albers location=/g/data/u46/users/gxr547/unicube/LS5_TM_NBAR/LS5_TM_NBAR_3577_15_-41_19900302231139000000.nc>,),
# (Dataset <id=94efe23b-f699-4a16-a244-ab0fc6b58461 type=ls5_nbar_albers location=/g/data/u46/users/gxr547/unicube/LS5_TM_NBAR/LS5_TM_NBAR_3577_15_-41_19900403231042000000.nc>,),
# (Dataset <id=e0f376e7-a7db-4cbb-b137-b9a2865c2b02 type=ls5_nbar_albers location=/g/data/u46/users/gxr547/unicube/LS5_TM_NBAR/LS5_TM_NBAR_3577_15_-41_19900708231050000000.nc>,),
# (Dataset <id=8eb3a1af-e906-419e-a22e-d523a65fa830 type=ls5_nbar_albers location=/g/data/u46/users/gxr547/unicube/LS5_TM_NBAR/LS5_TM_NBAR_3577_15_-41_19900724231046000000.nc>,),
# (Dataset <id=7c317cb9-b8de-48b8-92a5
In [6]:
# To get the cell indexes
cells.keys()
Out[6]:
In [7]:
prodlist=dc.list_products() #pandas df
prodlist[prodlist.name=='ls5_nbar_albers'] # the row for your product
Out[7]:
In [8]:
# get your values as numpy.ndarr
pppl=prodlist[prodlist.name=='ls5_nbar_albers'][['platform','instrument']].values
In [9]:
type(pppl)
Out[9]:
In [10]:
pppl.shape
Out[10]:
In [11]:
satname=pppl[0,0]
satsensor=pppl[0,1]
print(satname,satsensor)
In [12]:
nbar_tiles = gw.list_tiles(product_type='nbar', product='ls5_nbar_albers', platform=['LANDSAT_5', 'LANDSAT_7', 'LANDSAT_8'],
longitude=(149.06,149.18), latitude=(-35.27, -35.33),
time=('1996-01-01', '2016-03-20'))
sorted(nbar_tiles.keys())
Out[12]:
In [13]:
# This nbar_tiles includes scenes and tiles, a very very long list
# nbar_tiles has 18 cells as keys
len(nbar_tiles)
Out[13]:
When using the search terms, there is not currently a way to filter out non-tile based data, such as scenes, which go far beyond the requested area. For now we must use product='ls5_nbar_albers'.
In [14]:
#bug if just 2 platforms in the list?
#qdict={'latitude': (-36.0, -35.0), 'longitude': (149.01, 150.1),'platform':['LANDSAT_5', 'LANDSAT_7'], 'time': ('1990-01-01', '2016-03-31')}
qdict={'latitude': (-36.0, -35.0), 'longitude': (149.01, 150.1),'platform':['LANDSAT_5', 'LANDSAT_7','LANDSAT_8'], 'time': ('1990-01-01', '2016-03-31')}
print (qdict)
nbar_tiles = gw.list_tiles(product='ls5_nbar_albers',**qdict)
# ,longitude=(149.06, 149.18), latitude=(-35.27, -35.33), time=('1996-01-01', '2016-03-20'))
sorted_keylist = sorted(nbar_tiles.keys())
print (len(sorted_keylist))
sorted_keylist[:5]
Out[14]:
In [19]:
#nbar_tiles # only the nbar_albers tiles
In [ ]:
In [ ]:
In [15]:
# Pixel Quality Tiles
pq_tiles = gw.list_tiles(product='ls5_pq_albers',**qdict)
# ,longitude=(149.06, 149.18), latitude=(-35.27, -35.33), time=('1996-01-01', '2016-03-20'))
len(sorted(pq_tiles.keys()))
Out[15]:
In [16]:
# after unification:
import logging
def get_nbarpqa_tiles_by_cell( acell, qdict):
"""
return a list of tiles
:param acell: a cell index tuple (15, -40)
:return:
"""
# gw.list_tiles((15,-40), product='ls5_nbar_albers')
nbar_tiles = gw.list_tiles(acell, product='ls5_nbar_albers',
**qdict) # , platform='LANDSAT_8') # ,time=('2000', '2007'))
pq_tiles = gw.list_tiles(acell, product='ls5_pq_albers',
**qdict) # , platform='LANDSAT_8') # , time=('2000', '2007'))
if (len(pq_tiles) == len(nbar_tiles)):
print("The cells have %s nbar and %s pq tiles", len(nbar_tiles), len(pq_tiles))
else:
logging.warn("Mismatched NBAR-PQA tiles: The cells have %s nbar and %s pq tiles", len(nbar_tiles),
len(pq_tiles))
# Cell, Time -> Product -> TileDef
tile_def = defaultdict(dict)
for index, tile in nbar_tiles.items():
tile_def[index[:2], index[2]]['nbar'] = tile
for index, tile in pq_tiles.items():
tile_def[index[:2], index[2]]['pqa'] = tile
for index, products in tile_def.items():
if len(products) < 2:
logging.warn('un-paired nbar-pqa product for cell %s', str(index))
logging.warn("remove this un-paired tile from the dict")
tile_def.pop(index)
else:
logging.debug('%s,%s', index, len(products))
return tile_def
Get the key for the first tile:
In [19]:
#Get the key for the tile:
acell=(15,-40)
#qdict = {'platform': ['LANDSAT_5'], 'time': ('1990-01-01', '1990-03-31')}
qdict = {'platform': ['LANDSAT_5'], 'time': ('2011-01-01', '2011-03-31')}
tile_def=get_nbarpqa_tiles_by_cell( (15,-40), qdict )
#keys = list(tile_def)
In [20]:
tile_keys=tile_def.keys()
print (len(tile_keys))
for key in tile_keys[:10]:
print (key)
cell = key[0]
dt_stamp=key[1]
print (dt_stamp)
#ISO
isostr=str(dt_stamp)[:19].replace(':','-')
print (isostr)
In [21]:
key= tile_keys[0]
tile_def[key]
Out[21]:
In [22]:
key
Out[22]:
In [23]:
time_slice=1
cell=(15,-40)
key= tile_keys[time_slice]
print(key)
tile = tile_def[key]['nbar']
print (cell)
print (tile)
nbar_data = gw.load(tile)
nbar_data
Out[23]:
In [24]:
# It looks that nbar_data.blue is 3D array (time,y,x); But actually you cannot do nbar_data.blue[1, :,:]??
type(nbar_data.blue)
nbar_data.blue
Out[24]:
In [25]:
y_size=4000
x_size=4000
#del raw_image
raw_image = np.zeros((6, y_size, x_size), dtype='int16') #'float32')
raw_image[0,:,:] = nbar_data.blue[:,:]
raw_image[1,:,:] = nbar_data.green[:,:]
raw_image[2,:,:] = nbar_data.red[:,:]
raw_image[3,:,:] = nbar_data.nir[:,:]
raw_image[4,:,:] = nbar_data.swir1[:,:]
raw_image[5,:,:] = nbar_data.swir2[:,:]
In [26]:
raw_image.shape
Out[26]:
In [27]:
type(raw_image)
Out[27]:
In [28]:
from wofs.waters.detree.classifier import WaterClassifier
classifier = WaterClassifier()
# TODO: water classification using the input data tiles
water_classified_img = classifier.classify(raw_image)
In [29]:
# display all or part of the classified image
plt.imshow(water_classified_img ) #show Lake BurleyGriffin clfimg[2200:2600,1500:2500]) if cell=(15,-40)
plt.colorbar(orientation='vertical', shrink=0.8, label='water=128 or no water=0');
In [33]:
# The first step produce a very rough water tile as shown above. It even has water in no data region.
# This is why it must be filtered by no_data, cloud, etc. below
In [ ]:
In [30]:
no_data = (nbar_data.green == nbar_data.green.nodata)
no_data.plot()
Out[30]:
In [31]:
type(no_data)
Out[31]:
In [32]:
# a mask 1+2D array
no_data
Out[32]:
In [33]:
nbar_data.green.where(~no_data).plot()
Out[33]:
In [34]:
nbar_data.green.where(~ no_data[0]).plot()
Out[34]:
In [35]:
# Get the nodata values for each band (in case they aren't the same)
no_data_values = nbar_data.apply(lambda data_array: data_array.nodata).to_array(dim='band')
# Turn the Dataset into a DataArray, so we can check all bands
stack = nbar_data.to_array(dim='band')
# Find all values that are set to no data, from any band
no_data_mask = (stack == no_data_values).any(dim='band')
no_data_mask.plot()
Out[35]:
In [40]:
no_data_mask
Out[40]:
In [41]:
no_data_mask.squeeze()
Out[41]:
In [36]:
# cell = key[0]
tile = tile_def[key]['pqa']
pq_data = gw.load( tile)
We can look at the bits available to us:
In [37]:
masking.get_flags_def(pq_data)
Out[37]:
In [38]:
# better display using pandas
import pandas
pandas.DataFrame.from_dict(masking.get_flags_def(pq_data), orient='index')
Out[38]:
We can make a mask from a combination of bits. Here we will use the cloud masks and the contiguous bit.
In [39]:
contiguous = masking.make_mask(pq_data, contiguous=1).pixelquality
contiguous.plot()
Out[39]:
In [40]:
no_contiguous = masking.make_mask(pq_data, contiguous=0).pixelquality
no_contiguous.plot()
Out[40]:
In [41]:
# are the same?
np.sum (contiguous != no_contiguous)
Out[41]:
In [42]:
good_data = masking.make_mask(pq_data, cloud_acca='no_cloud', cloud_fmask='no_cloud', contiguous=True).pixelquality
good_data.plot()
Out[42]:
In [43]:
good_data # is a mask image True/false
Out[43]:
In [44]:
cloud_free_nbar = nbar_data.where(good_data)
cloud_free_nbar.red.plot()
Out[44]:
In [45]:
cloud_free_nbar.red
Out[45]:
In [ ]:
In [ ]:
In [46]:
pq_data
Out[46]:
In [47]:
perfect_flag = int(masking.get_flags_def(pq_data)['ga_good_pixel']['values'].keys()[0]) # should be 16383
# perfect_flag = 16383
print ('perfect_flag', perfect_flag)
perfect_pixel_mask=(pq_data.pixelquality == perfect_flag)
perfect_pixel_mask.plot()
Out[47]:
In [48]:
perfect_nbar_data = nbar_data.where(perfect_pixel_mask)
In [49]:
y_size=4000
x_size=4000
#del raw_image
raw_image = np.zeros((6, y_size, x_size), dtype='int16') #'float32')
raw_image[0,:,:] = perfect_nbar_data.blue[:,:]
raw_image[1,:,:] = perfect_nbar_data.green[:,:]
raw_image[2,:,:] = perfect_nbar_data.red[:,:]
raw_image[3,:,:] = perfect_nbar_data.nir[:,:]
raw_image[4,:,:] = perfect_nbar_data.swir1[:,:]
raw_image[5,:,:] = perfect_nbar_data.swir2[:,:]
water_img = classifier.classify(raw_image)
In [50]:
# display all or part of the classified image
#plt.imshow(water_img)
plt.imshow(water_img[2200:2600,1500:2500]) #show Lake BurleyGriffin clfimg[2200:2600,1500:2500]) if cell=(15,-40)
plt.colorbar(orientation='vertical', shrink=0.8, label='water=128 or no water=0');
In [51]:
water_img
Out[51]:
In [52]:
pq_data
Out[52]:
In [53]:
perfect_pixel_mask = masking.make_mask(pq_data, ga_good_pixel=True).pixelquality
In [54]:
perfect_pixel_mask = masking.make_mask(pq_data.pixelquality, ga_good_pixel=True)
In [ ]:
perfect_pixel_mask.plot()
Out[ ]:
In [62]:
acellindex=(15,-40)
dsm_cells = gw.list_tiles(acellindex, product='dsm1sv10')
print (dsm_cells)
In [63]:
# ----------------------------------------------------------------
def get_dsm_data(acellindex, qdict={}):
"""
get dsm data for sia, terrainshadow, highslope filters
:param acellindex:
:param qdict:
:return:
"""
query = qdict.copy() #avoid modify the original input qdict in-situ
query.pop('time', None)
query.pop('platform', None)
dsm_cells = gw.list_cells(acellindex, product='dsm1sv10', **query)
def load_dsm(cell):
data = gw.load(cell)
return data.squeeze('time').drop('time')
return {index: load_dsm(cell) for index, cell in dsm_cells.items()}
In [64]:
dsmtile= get_dsm_data(acellindex, qdict={})
In [65]:
print(dsmtile)
In [66]:
dsmtile[(15,-40)]['elevation'].plot()
Out[66]:
In [ ]: