In [1]:
import datacube.api
import numpy
import fiona
import geopandas
import rasterio
from affine import Affine
from rasterio.crs import from_string
import osr
from skimage import exposure
from image_processing.segmentation import Segments, rasterise_vector
import matplotlib.pyplot as plt
%matplotlib inline

Query the datacube


In [2]:
dc = datacube.api.API()

Open the vector file and get the bounding box


In [3]:
vfname = '/g/data2/v10/public/water-example/sample-water-bodies.shp'
src = fiona.open(vfname, 'r')
xidx = (src.bounds[0], src.bounds[2])
yidx = (src.bounds[-1], src.bounds[1])
gdf = geopandas.read_file(vfname)
gdf.plot()


Out[3]:
<matplotlib.axes._subplots.AxesSubplot at 0x7fe2c66da550>

Using the co-ordinates of the vector file's bunding box, query the datacube.


In [4]:
nbar = dc.get_dataset(product='nbar', platform='LANDSAT_8', 
                      y=yidx, x=xidx)
nbar


Out[4]:
<xarray.Dataset>
Dimensions:  (time: 159, x: 4226, y: 4888)
Coordinates:
  * time     (time) datetime64[ns] 2013-03-26T00:09:17.140271 ...
  * y        (y) float64 -3.66e+06 -3.66e+06 -3.66e+06 -3.66e+06 -3.66e+06 ...
  * x        (x) float64 1.192e+06 1.192e+06 1.192e+06 1.192e+06 1.192e+06 ...
Data variables:
    crs      int64 0
    band_6   (time, y, x) int16 -999 -999 -999 -999 -999 -999 -999 -999 -999 ...
    band_7   (time, y, x) int16 -999 -999 -999 -999 -999 -999 -999 -999 -999 ...
    band_4   (time, y, x) int16 -999 -999 -999 -999 -999 -999 -999 -999 -999 ...
    band_5   (time, y, x) int16 -999 -999 -999 -999 -999 -999 -999 -999 -999 ...
    band_2   (time, y, x) int16 -999 -999 -999 -999 -999 -999 -999 -999 -999 ...
    band_3   (time, y, x) int16 -999 -999 -999 -999 -999 -999 -999 -999 -999 ...
    band_1   (time, y, x) int16 -999 -999 -999 -999 -999 -999 -999 -999 -999 ...
Attributes:
    source: This data is a reprojection and retile of Landsat surface reflectance scene data.
    summary: These files are experimental, short lived, and the format will change.
    product_version: 0.0.0
    license: Creative Commons Attribution 4.0 International (CC-BY 4.0)
    title: Experimental Data files From the Australian Geoscience Data Cube - DO NOT USE

Extract and convert the co-ordinate reference system into rasterio's preferred format


In [5]:
sr = osr.SpatialReference()
sr.ImportFromWkt(nbar.crs.spatial_ref)
crs = from_string(sr.ExportToProj4())
print crs


{'lon_0': 132, 'ellps': 'GRS80', 'y_0': 0, 'no_defs': True, 'proj': 'aea', 'x_0': 0, 'units': 'm', 'towgs84': '0,0,0,0,0,0,0', 'lat_2': -36, 'lat_1': -18, 'lat_0': 0}

Create an affine/geotransformtion matrix.

We make an assumption that the spatial array is uniformally spaced in order for this to work.


In [6]:
pix_x = nbar.x.values[1] - nbar.x.values[0]
pix_y = nbar.y.values[1] - nbar.y.values[0]
ulx = nbar.x.values[0] - pix_x / 2.0
uly = nbar.y.values[0] - pix_y  /2.0
transform = Affine.from_gdal(*[ulx, pix_x, 0, uly, 0, pix_y])
print transform


| 25.00, 0.00, 1192000.00|
| 0.00,-25.00,-3659925.00|
| 0.00, 0.00, 1.00|

Determine the pixel size in hectares. We'll use this later on.


In [7]:
ha = pix_x **2 / 10000.0
print "Pixel area in hectares: {}".format(ha)


Pixel area in hectares: 0.0625

In [8]:
dims = nbar.band_4.shape

In [9]:
img = numpy.zeros((3, dims[1], dims[2]), dtype='float32')
img[0] = nbar.band_5[28]
img[1] = nbar.band_4[28]
img[2] = nbar.band_3[28]
img[img == -999] = numpy.nan
img /= 10000

In [10]:
scl = exposure.equalize_hist(img, mask=numpy.isfinite(img))

In [11]:
plt.imshow(scl.transpose(1, 2, 0))


Out[11]:
<matplotlib.image.AxesImage at 0x7fe2993640d0>

Segmentation

Rasterise all the geometry contained within the vector file, and create a Segments object.


In [12]:
ras = rasterise_vector(vfname, shape=dims[1:], crs=crs, transform=transform)
seg = Segments(ras)
print "Number of segments: {}".format(seg.n_segments)


Number of segments: 181

For the NIR, Green, & Red bands, calculate statistics for every segment.


In [13]:
dat = nbar.band_5[28].values.astype('float32')
dat[dat == -999] = numpy.nan
nir_stats = seg.basic_statistics(dat, nan=True, scale_factor=ha)
dat = nbar.band_4[28].values.astype('float32')
dat[dat == -999] = numpy.nan
red_stats = seg.basic_statistics(dat, nan=True, scale_factor=ha)
dat = nbar.band_3[28].values.astype('float32')
dat[dat == -999] = numpy.nan
green_stats = seg.basic_statistics(dat, nan=True, scale_factor=ha)


/g/data/v10/public/modules/agdc-py2-env/anaconda2-2.5.0/envs/agdc/lib/python2.7/site-packages/numpy/lib/nanfunctions.py:675: RuntimeWarning: Mean of empty slice
  warnings.warn("Mean of empty slice", RuntimeWarning)
/g/data/v10/public/modules/agdc-py2-env/anaconda2-2.5.0/envs/agdc/lib/python2.7/site-packages/numpy/lib/nanfunctions.py:326: RuntimeWarning: All-NaN slice encountered
  warnings.warn("All-NaN slice encountered", RuntimeWarning)
/g/data/v10/public/modules/agdc-py2-env/anaconda2-2.5.0/envs/agdc/lib/python2.7/site-packages/numpy/lib/nanfunctions.py:227: RuntimeWarning: All-NaN axis encountered
  warnings.warn("All-NaN axis encountered", RuntimeWarning)
/g/data/v10/public/modules/agdc-py2-env/anaconda2-2.5.0/envs/agdc/lib/python2.7/site-packages/numpy/lib/nanfunctions.py:1136: RuntimeWarning: Degrees of freedom <= 0 for slice.
  warnings.warn("Degrees of freedom <= 0 for slice.", RuntimeWarning)

In [14]:
nir_stats.head(10)


Out[14]:
Segment_IDs Mean Max Min StdDev Total Area
0 182 2759.640381 4058.0 1861.0 558.137939 314599.0 37.8750
1 183 1947.141724 2425.0 1504.0 151.191681 467314.0 15.0000
2 184 1646.594849 2214.0 1389.0 226.984085 191005.0 7.2500
3 185 2011.195923 2867.0 1422.0 174.165222 5882748.0 182.8125
4 186 2148.417480 2517.0 1835.0 137.978973 1014053.0 29.5000
5 187 1993.644165 2976.0 900.0 272.751160 7149208.0 224.1250
6 188 NaN NaN NaN NaN 0.0 22.8125
7 189 NaN NaN NaN NaN 0.0 11.6250
8 190 1945.478638 2528.0 1405.0 159.013412 1317089.0 42.3125
9 191 1760.558472 2596.0 1040.0 283.184509 9343284.0 331.6875

Viewing segments

Get the bounding boxes for every segment


In [15]:
bboxes = seg.bounding_box()

In [16]:
sid = 185
window = bboxes[sid]
ys, ye = window[0]
xs, xe = window[1]
print window


((3582, 3641), (88, 171))

In [17]:
subs = img[:, ys:ye, xs:xe]
scl_subs = exposure.equalize_hist(subs, mask=numpy.isfinite(subs))
plt.title(src[sid - 1]['properties']['FEATURETYP'] +  ' (???)')
plt.imshow(scl_subs.transpose(1, 2, 0))


Out[17]:
<matplotlib.image.AxesImage at 0x7fe3102fbe50>

Find a segment with a low mean value.


In [18]:
nir_stats[nir_stats['Mean'] < 500]


Out[18]:
Segment_IDs Mean Max Min StdDev Total Area
175 357 387.886719 3297.0 10.0 583.76947 2653921.0 427.625

In [19]:
sid = 357
window = bboxes[sid]
ys, ye = window[0]
xs, xe = window[1]
print window


((1963, 2049), (2055, 2168))

In [20]:
subs = img[:, ys:ye, xs:xe]
scl_subs = exposure.equalize_hist(subs, mask=numpy.isfinite(subs))
plt.title(src[sid - 1]['properties']['FEATURETYP'])
plt.imshow(scl_subs.transpose(1, 2, 0))


Out[20]:
<matplotlib.image.AxesImage at 0x7fe299364350>

In [21]:
sid = 262
window = bboxes[sid]
ys, ye = window[0]
xs, xe = window[1]
print window


((2269, 2821), (3670, 3973))

In [22]:
subs = img[:, ys:ye, xs:xe]
scl_subs = exposure.equalize_hist(subs, mask=numpy.isfinite(subs))
plt.title(src[sid - 1]['properties']['NAME'])
plt.imshow(scl_subs.transpose(1, 2, 0))


Out[22]:
<matplotlib.image.AxesImage at 0x7fe2c583d750>

Timeseries of the SWIR band for every segment


In [23]:
dat = nbar.band_6[0].values.astype('float32')
dat[dat == -999] = numpy.nan
swir_stats = seg.basic_statistics(dat, nan=True, scale_factor=ha)
swir_stats['timestamp'] = nbar.time[0].values
for i in range(1, dims[0]):
    dat = nbar.band_6[i].values.astype('float32')
    dat[dat == -999] = numpy.nan
    stats = seg.basic_statistics(dat, nan=True, scale_factor=ha)
    stats['timestamp'] = nbar.time[i].values
    swir_stats = swir_stats.append(stats)

In [24]:
swir_stats.set_index('timestamp', inplace=True)

In [25]:
print "Number of records: {}".format(swir_stats.shape[0])
swir_stats.head(10)


Number of records: 28779
Out[25]:
Segment_IDs Mean Max Min StdDev Total Area
timestamp
2013-03-26 00:09:17.140271 182 NaN NaN NaN NaN 0.0 37.8750
2013-03-26 00:09:17.140271 183 NaN NaN NaN NaN 0.0 15.0000
2013-03-26 00:09:17.140271 184 NaN NaN NaN NaN 0.0 7.2500
2013-03-26 00:09:17.140271 185 NaN NaN NaN NaN 0.0 182.8125
2013-03-26 00:09:17.140271 186 NaN NaN NaN NaN 0.0 29.5000
2013-03-26 00:09:17.140271 187 NaN NaN NaN NaN 0.0 224.1250
2013-03-26 00:09:17.140271 188 NaN NaN NaN NaN 0.0 22.8125
2013-03-26 00:09:17.140271 189 NaN NaN NaN NaN 0.0 11.6250
2013-03-26 00:09:17.140271 190 NaN NaN NaN NaN 0.0 42.3125
2013-03-26 00:09:17.140271 191 NaN NaN NaN NaN 0.0 331.6875

Segment (polygon) timeseries profiles


In [26]:
sid = 262
roi = swir_stats[swir_stats['Segment_IDs'] == sid]
roi = roi[numpy.isfinite(roi['Mean'])]
roi['Mean'].plot(title=src[sid - 1]['properties']['NAME'])


Out[26]:
<matplotlib.axes._subplots.AxesSubplot at 0x7fe2c4154d10>

In [27]:
sid = 357
roi = swir_stats[swir_stats['Segment_IDs'] == sid]
roi = roi[numpy.isfinite(roi['Mean'])]
roi['Mean'].plot(title=src[sid - 1]['properties']['FEATURETYP'])


Out[27]:
<matplotlib.axes._subplots.AxesSubplot at 0x7fe29928d350>

In [28]:
sid = 185
roi = swir_stats[swir_stats['Segment_IDs'] == sid]
roi = roi[numpy.isfinite(roi['Mean'])]
roi['Mean'].plot(title=src[sid - 1]['properties']['FEATURETYP'])


Out[28]:
<matplotlib.axes._subplots.AxesSubplot at 0x7fe2991f1e50>

Investigating a segment/polygon


In [29]:
sid = 262
window = bboxes[sid]
ys, ye = window[0]
xs, xe = window[1]
roi = swir_stats[swir_stats['Segment_IDs'] == sid]
roi = roi[numpy.isfinite(roi['Mean'])]

In [30]:
wh1 = (roi['Mean'] < 1100) & (roi['Total'] > 110000000)
roi[wh1]


Out[30]:
Segment_IDs Mean Max Min StdDev Total Area
timestamp
2013-05-26 00:10:29.522857 262 1068.986938 3572.0 10.0 1121.092529 111261232.0 6505.0625
2013-08-30 00:10:29.317821 262 1091.199707 4498.0 10.0 1098.150024 113573152.0 6505.0625

In [31]:
wh2 = (roi['Mean'] > 2900) & (roi['StdDev'] < 400)
roi[wh2]


Out[31]:
Segment_IDs Mean Max Min StdDev Total Area
timestamp
2013-12-20 00:09:54.117418 262 2929.727051 4735.0 152.0 399.086304 304928928.0 6505.0625
2013-12-29 00:03:40.964257 262 3121.633301 4408.0 933.0 348.412964 64780136.0 6505.0625

In [32]:
fig, axes = plt.subplots(ncols=2)
timestamp = roi[wh1].index[0]
subs[0] = nbar.band_5.loc[timestamp][ys:ye, xs:xe]
subs[1] = nbar.band_4.loc[timestamp][ys:ye, xs:xe]
subs[1] = nbar.band_3.loc[timestamp][ys:ye, xs:xe]
scl_subs = exposure.equalize_hist(subs, mask=subs != -999)
axes[0].set_title(timestamp)
axes[0].imshow(scl_subs.transpose(1, 2, 0))
timestamp = roi[wh2].index[0]
subs[0] = nbar.band_5.loc[timestamp][ys:ye, xs:xe]
subs[1] = nbar.band_4.loc[timestamp][ys:ye, xs:xe]
subs[1] = nbar.band_3.loc[timestamp][ys:ye, xs:xe]
scl_subs = exposure.equalize_hist(subs, mask=subs != -999)
axes[1].set_title(timestamp)
axes[1].imshow(scl_subs.transpose(1, 2, 0))


Out[32]:
<matplotlib.image.AxesImage at 0x7fe298fb3750>

In [ ]: