In this notebook we focus on identification of change through classification of a temporal stack of images covering the Area of Interest (AOI). We utilize the algorithm for classifying imagery into forest / non-forest developed in drc_roads_classification to classify the imagery into forest/non-forest. Then, we utilize random forests to identify and classify change in the temporal stack.
Keywords: temporal analysis, forest monitoring, classification
In [1]:
import datetime
from functools import reduce
import json
import os
import numpy as np
import pandas as pd
from planet import api
import rasterio
from sklearn.cluster import MiniBatchKMeans
from sklearn.ensemble import RandomForestClassifier
In [1]:
from utils import Timer
import visual
In [3]:
# Import functionality from local notebooks
from ipynb.fs.defs.drc_roads_download import get_overlapping_scenes, StripDownloader, StripSearcher
from ipynb.fs.defs.drc_roads_classification \
import load_training_data, classify_forest, classified_band_to_rgb
In this section, we use the ipynb module to import StripDownloader
from the drc_roads_download notebook. We then use StripDownloader
to download the portion of OrthoTile strips that overlap the AOI.
In [4]:
# load aoi. Saved in drc_roads_download notebook
aoi_geojson_filename = os.path.join('pre-data', 'aoi.geojson')
In [5]:
# create planet api client
planet_api_key = os.environ['PL_API_KEY']
# quick check that key is defined
assert planet_api_key, "PL_API_KEY not defined."
client = api.ClientV1(api_key=planet_api_key)
In [6]:
# load aoi json
with open(aoi_geojson_filename) as json_data:
aoi = json.load(json_data)
In [7]:
# get good overlapping scenes from to July 2017 to February 2018
begin=datetime.datetime(year=2017,month=7,day=1)
end=datetime.datetime(year=2018,month=3,day=1)
strip_searcher = StripSearcher(aoi, begin, end, client)
good_scenes = strip_searcher.search(aoi_geojson_filename)
In [8]:
asset_type = 'analytic'
strip_downloader = StripDownloader(good_scenes, aoi_geojson_filename, client)
strip_downloader.run(asset_type, overwrite=False, verbose=True)
In [9]:
# sort strip ids as integers. Strip id order is the same as strip temporal order
strip_ids = [int(s) for s in good_scenes.strip_id.unique().tolist()]
strip_ids.sort()
strip_ids = [str(s) for s in strip_ids]
In [10]:
strip_mosaics = [strip_downloader.get_filename(i, asset_type) for i in strip_ids]
In [22]:
# strip_mosaics
In this section, we use the ipynb module to import classification functionality from the drc_roads_classification notebook.
In [12]:
classified_bands_file = os.path.join('data', 'classified_bands.npz')
def save_to_cache(classified_bands, strip_ids):
save_bands = dict((s, classified_bands[s])
for s in strip_ids)
# masked arrays are saved as just arrays, so save mask for later
save_bands.update(dict((s+'_msk', classified_bands[s].mask)
for s in strip_ids))
np.savez_compressed(classified_bands_file, **save_bands)
def load_from_cache():
tmp_cls = np.load(classified_bands_file)
sids = [k for k in tmp_cls.keys() if not k.endswith('_msk')]
# reform masked array from saved array and saved mask
classified_bands = dict((s, np.ma.array(tmp_cls[s], mask=tmp_cls[s+'_msk']))
for s in sids)
return classified_bands
In [13]:
use_cache = True
if use_cache and os.path.isfile(classified_bands_file):
print('using cached classified bands')
classified_bands = load_from_cache()
else:
X_training, y_training = load_training_data()
with Timer():
def classify(sid):
img = strip_downloader.get_filename(sid, asset_type)
# we only have two values, 0 and 1. Convert to uint8 for memory
band = (classify_forest(img, X_training, y_training)).astype(np.uint8)
return band
classified_bands = dict((s, classify(s)) for s in strip_ids)
# save to cache
save_to_cache(classified_bands, strip_ids)
In [14]:
# Decimate classified arrays for memory conservation
def decimate(arry, num=8):
return arry[::num, ::num].copy()
do_visualize = True # set to True to view images
if do_visualize:
for strip_id, classified_band in classified_bands.items():
visual.plot_image(classified_band_to_rgb(decimate(classified_band)),
title='Classified Image ({})'.format(strip_id),
figsize=(8, 8))
The results of classification in these images is mixed. There are a few images where the UDM fails to identify clouds (e.g. 1114901) and few images where the results actually look pretty good but classification missfires and classifies everything as non-forest (e.g. 1091588). There may be uniform in the imagery that makes it brighter than usual, which causes the classifier to fail. Ultimately, visual inspection finds that about 8 of the 21 classified images looks informative.
In [15]:
def get_bands(classified_bands):
return [classified_bands[k] for k in classified_bands.keys()]
def perc_masked(mask):
return 100*(np.count_nonzero(mask))/mask.size
def num_valid(mask):
return (mask.size - np.count_nonzero(mask))
def combine_masks(classified_bands):
masks = [b.mask for b in get_bands(classified_bands)]
total_mask = reduce(np.logical_or, masks)
print('{} valid pixels ({}% masked)'.format(num_valid(total_mask),
round(perc_masked(total_mask), 2)))
return total_mask
def valid_pixels(band, mask):
masked_band = np.ma.array(band, mask=mask)
return masked_band.compressed()
def bands_to_X(classified_bands, mask):
# X shape: (num_pixels, num_bands)
all_valid_pixels = [valid_pixels(b, mask) for b in get_bands(classified_bands)]
X = np.swapaxes(np.stack(all_valid_pixels), 0, 1)
return X
# lets just work with pixels that have a data point in every image
total_mask = combine_masks(classified_bands)
X = bands_to_X(classified_bands, total_mask)
In [16]:
def classify_kmeans(X):
mbk = MiniBatchKMeans(n_clusters=2)
with Timer():
mbk.fit(X)
with Timer():
y_pred = mbk.predict(X)
return y_pred
y_pred = classify_kmeans(X)
In [17]:
def y_to_band(y, mask):
band = np.ma.array(np.zeros(mask.shape), mask=mask)
band[~band.mask] = y
return band
y_band = y_to_band(y_pred, total_mask)
print(y_band.shape)
print(y_band.size - np.count_nonzero(y_band.mask))
print(np.count_nonzero(y_band))
In [18]:
# separate the class numbers into rgb bands
def classified_band_to_rgb(classified_band):
return [classified_band == n for n in [0,1,2]]
visual.plot_image(classified_band_to_rgb(y_band), title='KMeans Classified Image', figsize=(15, 15))
Ok. We can kind of see roads here, but the new roads (aka the change) isn't clearly identified. Let's try supervised classification.
In [19]:
label_band = abs(classified_bands['1130991'] - classified_bands['739199'])
In [20]:
visual.plot_image(classified_band_to_rgb(decimate(label_band)),
title='Classified Difference Image',
figsize=(8, 8))
In [21]:
# set nodata to zero, move class values to 1 and 2
label_band_array = (label_band + 1).filled(0)
Save label image with metadata from the original image
In [41]:
# get source crs
with rasterio.open(strip_mosaics[0], 'r') as src:
src_crs = src.crs
src_transform = src.transform
src_crs
src_transform
Out[41]:
In [44]:
label_img = os.path.join('pre-data', 'difference.tif')
(height, width) = label_band_array.shape
profile = {
'width': width,
'height': height,
'driver': 'GTiff',
'count': 1,
'compress': 'lzw',
'dtype': rasterio.uint8,
'crs': src_crs,
'transform': src_transform
}
with rasterio.open(label_img, 'w', **profile) as dst:
# multiply by 100 for visibility
dst.write(100 * label_band_array.astype(rasterio.uint8), 1)
In [2]:
# uncomment to get metadata for label_img
# !gdalinfo $label_img
The labeled images were created in GIMP using manual labeling of change (roads being built) in the difference image.
In [24]:
change_img = os.path.join('pre-data', 'difference_change.tif')
nochange_img = os.path.join('pre-data', 'difference_nochange.tif')
In [25]:
def get_label_mask(image_filename):
with rasterio.open(image_filename, 'r') as src:
band = src.read(1)
label_data = band == 255 # valid data in white regions
label_mask = ~label_data # mask True (masked) for not valid data
return label_mask
def get_unmasked_count(mask):
return np.size(mask) - np.count_nonzero(mask)
change_mask = get_label_mask(change_img)
print(get_unmasked_count(change_mask))
nochange_mask = get_label_mask(nochange_img)
print(get_unmasked_count(nochange_mask))
In [26]:
# combine the label masks with the valid data mask and then create X dataset for each label
total_mask = combine_masks(classified_bands)
total_change_mask = np.logical_or(total_mask, change_mask)
print('{} valid pixels ({}% masked)'.format(num_valid(total_change_mask),
round(perc_masked(total_change_mask), 2)))
X_change = bands_to_X(classified_bands, total_change_mask)
total_nochange_mask = np.logical_or(total_mask, nochange_mask)
print('{} valid pixels ({}% masked)'.format(num_valid(total_nochange_mask),
round(perc_masked(total_nochange_mask), 2)))
X_nochange = bands_to_X(classified_bands, total_nochange_mask)
In [27]:
def sample_pixels(pixels, sample_size):
if pixels.shape[0] > sample_size:
pixel_sample = pixels.copy()
np.random.shuffle(pixel_sample)
pixel_sample = pixel_sample[:sample_size]
else:
pixel_sample = pixels
return pixel_sample
def make_same_size_samples(list_of_X, size_percent=100):
# X: array of dim (num_pixels, num_classified_bands)
sample_len = min([p.shape[0] for p in list_of_X])
# only return size_percent number of pixels in the sample
if size_percent < 100 and size_percent > 0:
sample_len = round(sample_len * size_percent/100.0)
return [sample_pixels(p, sample_len) for p in list_of_X]
[X_change_sample, X_nochange_sample] = \
make_same_size_samples([X_change, X_nochange], size_percent=10)
print(X_change_sample.shape)
print(X_nochange_sample.shape)
In [28]:
change_label_value = 0
nochange_label_value = 1
X_rf = np.concatenate((X_change_sample, X_nochange_sample), axis=0)
y_rf = np.array(X_change_sample.shape[0] * [change_label_value] + \
X_nochange_sample.shape[0] * [nochange_label_value])
print(X_rf.shape)
print(y_rf.shape)
In [29]:
def classify_change(classified_bands, mask, X_training, y_training):
clf = RandomForestClassifier()
with Timer():
clf.fit(X_training, y_training)
X = bands_to_X(classified_bands, total_mask)
with Timer():
y_pred = clf.predict(X)
y_band = y_to_band(y_pred, total_mask)
return y_band
with Timer():
y_band_rf = classify_change(classified_bands, total_mask, X_rf, y_rf)
In [30]:
visual.plot_image(classified_band_to_rgb(y_band_rf), title='RF Classified Image', figsize=(25, 25))
The results look pretty good! With a little clean up, we could have a pretty cut and dry change classification. To achieve these results, we used only 10% of the pixels manually labeled as 'change' as the training dataset.