Once the datasets are obtained, they must be aligned and cropped to the same region. In this notebook, we crop the Planet scene and ground truth data to the aoi.
The sections are:
Note: there are quite a few cells that are dedicated to defining utility functions that have broad applicability and can be lifted from this notebook. They are indicated by a line at the top of the cell that starts with
# Utility functions:
In [1]:
from collections import namedtuple
import copy
import json
import os
import pathlib
import shutil
import subprocess
import tempfile
import ipyleaflet as ipyl
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
import rasterio
from shapely.geometry import shape, mapping
%matplotlib inline
In [2]:
train_scene_id = '20160831_180231_0e0e'
In [3]:
# define and, if necessary, create train data directory
train_dir = os.path.join('data', 'train')
pathlib.Path(train_dir).mkdir(parents=True, exist_ok=True)
# define train scene
train_scene = os.path.join(train_dir, train_scene_id + '_3B_AnalyticMS.tif')
train_scene_metadata = os.path.join(train_dir,
train_scene_id + '_3B_AnalyticMS_metadata.xml')
In [4]:
# First test if scene file exists, if not, use the Planet commandline tool to download the image, metadata, and udm.
# This command assumes a bash shell, available in Unix-based operating systems.
!test -f $train_scene || \
planet data download \
--item-type PSOrthoTile \
--dest $train_dir \
--asset-type analytic,analytic_xml \
--string-in id $train_scene_id
In [5]:
test_scene_id = '20160831_180257_0e26'
In [6]:
# define and, if necessary, create test data directory
test_dir = os.path.join('data', 'test')
pathlib.Path(test_dir).mkdir(parents=True, exist_ok=True)
# define test scene
test_scene = os.path.join(test_dir, test_scene_id + '_3B_AnalyticMS.tif')
test_scene_metadata = os.path.join(test_dir,
test_scene_id + '_3B_AnalyticMS_metadata.xml')
In [7]:
# First test if scene file exists, if not, use the Planet commandline tool to download the image, metadata, and udm.
# This command assumes a bash shell, available in Unix-based operating systems.
!test -f $test_scene || \
planet data download \
--item-type PSOrthoTile \
--dest $test_dir \
--asset-type analytic,analytic_xml \
--string-in id $test_scene_id
In [8]:
predata_dir = 'pre-data'
test_aoi_filename = os.path.join(predata_dir, 'aoi-test.geojson')
assert os.path.isfile(test_aoi_filename)
train_aoi_filename = os.path.join(predata_dir, 'aoi-train.geojson')
assert os.path.isfile(train_aoi_filename)
ground_truth_filename = os.path.join(predata_dir, 'ground-truth.geojson')
assert os.path.isfile(ground_truth_filename)
In [9]:
# Utility functions: cropping polygons
# Uses shapely for geospatial operations
def crop_polygons_to_aoi(polygons, aoi):
"""Crops polygons to the aoi.
Polygons within aoi are copied. For Polygons that intersect aoi boundary, the
intersection geometry is saved. If the intersection is a MultiPolygon, it is
stored as multiple Polygons.
:param dict aoi: geojson polygon describing crop feature
:param list features: geojson polygons to be cropped
"""
aoi_shp = shape(aoi['geometry'])
cropped_features = []
for f in polygons:
shp = shape(f['geometry'])
assert shp.type == 'Polygon'
if shp.within(aoi_shp):
cropped_features.append(copy.deepcopy(f))
elif shp.intersects(aoi_shp):
# 'cut' features at the aoi boundary by the aoi
cropped_shp = shp.intersection(aoi_shp)
try:
# try to iterate, which only works for MultiPolygon
for s in cropped_shp:
new_f = copy.deepcopy(f)
new_f['geometry'] = mapping(s)
cropped_features.append(new_f)
except TypeError:
# Polygon is not iterable
new_f = copy.deepcopy(f)
new_f['geometry'] = mapping(cropped_shp)
cropped_features.append(new_f)
return cropped_features
In [10]:
# Utility functions: loading and saving geojson
def save_geojson(features, filename):
with open(filename, 'w') as f:
f.write(json.dumps(features))
def load_geojson(filename):
with open(filename, 'r') as f:
return json.load(f)
In [11]:
ground_truth_data = load_geojson(ground_truth_filename)
Train Ground Truth Data
In [12]:
train_aoi = load_geojson(train_aoi_filename)
train_ground_truth_data = crop_polygons_to_aoi(ground_truth_data, train_aoi)
print(len(train_ground_truth_data))
In [15]:
train_ground_truth_filename = os.path.join(predata_dir, 'ground-truth-train.geojson')
save_geojson(train_ground_truth_data, train_ground_truth_filename)
Test Ground Truth Data
In [17]:
test_aoi = load_geojson(test_aoi_filename)
test_ground_truth_data = crop_polygons_to_aoi(ground_truth_data, test_aoi)
print(len(test_ground_truth_data))
In [18]:
test_ground_truth_filename = os.path.join(predata_dir, 'ground-truth-test.geojson')
save_geojson(test_ground_truth_data, test_ground_truth_filename)
In [19]:
# Utility functions: crop and project an image
def _gdalwarp_crop_options(crop_filename):
return ['-cutline', crop_filename, '-crop_to_cutline']
def _gdalwarp_project_options(src_proj, dst_proj):
return ['-s_srs', src_proj, '-t_srs', dst_proj]
def _gdalwarp(input_filename, output_filename, options):
commands = ['gdalwarp'] + options + \
['-overwrite',
input_filename,
output_filename]
print(' '.join(commands))
subprocess.check_call(commands)
# lossless compression of an image
def _compress(input_filename, output_filename):
commands = ['gdal_translate',
'-co', 'compress=LZW',
'-co', 'predictor=2',
input_filename,
output_filename]
print(' '.join(commands))
subprocess.check_call(commands)
# uses Rasterio to get image srs if dst_srs is specified
def warp(input_filename,
output_filename,
crop_filename=None,
dst_srs=None,
overwrite=True,
compress=False):
options = []
if crop_filename is not None:
options += _gdalwarp_crop_options(crop_filename)
if dst_srs is not None:
src_srs = rasterio.open(input_filename).crs['init']
options += _gdalwarp_project_options(src_srs, dst_srs)
# check to see if output file exists, if it does, do not warp
if os.path.isfile(output_filename) and not overwrite:
print('{} already exists. Aborting warp of {}.'.format(output_filename, input_filename))
elif compress:
with tempfile.NamedTemporaryFile(suffix='.vrt') as vrt_file:
options += ['-of', 'vrt']
_gdalwarp(input_filename, vrt_file.name, options)
_compress(vrt_file.name, output_filename)
else:
_gdalwarp(input_filename, output_filename, options)
In [20]:
train_scene_cropped = os.path.join(predata_dir, 'train_scene_cropped.tif')
warp(train_scene, train_scene_cropped, crop_filename=train_aoi_filename, overwrite=False, compress=True)
Copy over the image metadata
In [21]:
train_scene_cropped_metadata = os.path.join(predata_dir, 'train_scene_cropped_metadata.xml')
shutil.copyfile(train_scene_metadata, train_scene_cropped_metadata)
Out[21]:
In [22]:
# Utility functions: loading an image
NamedBands = namedtuple('NamedBands', 'b, g, r, nir')
def load_masked_bands(filename):
"""Loads a 4-band BGRNir Planet Image file as a list of masked bands.
The masked bands share the same mask, so editing one band mask will
edit them all."""
with rasterio.open(filename) as src:
b, g, r, nir = src.read()
mask = src.read_masks(1) == 0 # 0 value means the pixel is masked
bands = NamedBands(b=b, g=g, r=r, nir=nir)
return NamedBands(*[np.ma.array(b, mask=mask)
for b in bands])
print(load_masked_bands(train_scene_cropped).b.shape)
In [23]:
# Utility functions: displaying an image
def _linear_scale(ndarray, old_min, old_max, new_min, new_max):
"""Linear scale from old_min to new_min, old_max to new_max.
Values below min/max are allowed in input and output.
Min/Max values are two data points that are used in the linear scaling.
"""
#https://en.wikipedia.org/wiki/Normalization_(image_processing)
return (ndarray - old_min)*(new_max - new_min)/(old_max - old_min) + new_min
# print(linear_scale(np.array([1,2,10,100,256,2560, 2660]), 2, 2560, 0, 256))
def _mask_to_alpha(bands):
band = np.atleast_3d(bands)[...,0]
alpha = np.zeros_like(band)
alpha[~band.mask] = 1
return alpha
def _add_alpha_mask(bands):
return np.dstack([bands, _mask_to_alpha(bands)])
def bands_to_display(bands, alpha=True):
"""Converts a list of bands to a 3-band rgb, normalized array for display."""
rgb_bands = np.dstack(bands[:3])
old_min = np.percentile(rgb_bands, 2)
old_max = np.percentile(rgb_bands, 98)
new_min = 0
new_max = 1
scaled = _linear_scale(rgb_bands.astype(np.double),
old_min, old_max, new_min, new_max)
bands = np.clip(scaled, new_min, new_max)
if alpha is True:
bands = _add_alpha_mask(bands)
return bands
In [24]:
plt.figure()
bands = load_masked_bands(train_scene_cropped)
plt.imshow(bands_to_display([bands.r, bands.g, bands.b]))
Out[24]:
Define Layer for cropped Planet scene
First we project the cropped Planet scene to WGS84 for showing on the map. Then we adjust the scene for display and save as an 8-bit jpeg. Finally, we define the image layer using the projected image bounds.
Leaflet appears to support local files if they are jpg (src)
In [25]:
# Utility functions: creating an image layer for display on a map
def _save_display_image(src_filename, dst_filename):
# convert to rgb and scale to 8-bit
bands = load_masked_bands(src_filename)
img = bands_to_display([bands.r, bands.g, bands.b])
# save as jpeg
if(os.path.isfile(dst_filename)): os.remove(dst_filename)
matplotlib.image.imsave(dst_filename, img)
def create_image_layer(filename):
with tempfile.NamedTemporaryFile(suffix='.tif') as temp_file:
projected_filename = temp_file.name
# project to wgs84
dst_srs = 'epsg:4326' #WGS84
warp(filename, projected_filename, dst_srs=dst_srs)
# save as jpeg
display_image = os.path.join('data', 'display.jpg')
_save_display_image(projected_filename, display_image)
# determine image layer bounds
(minx, miny, maxx, maxy) = rasterio.open(projected_filename).bounds
sw = [miny, minx]
ne = [maxy, maxx]
# Create image layer
return ipyl.ImageOverlay(url=display_image, bounds=[sw, ne])
Define layer for ground truth data
In [26]:
def create_feature_layer(features):
# Assign colors to classes
# Class descriptions can be found in datasets-identify notebook
agg_classes = ['G', 'R', 'F', 'P', 'T', 'D', 'C', 'V']
# colors determined using [colorbrewer2.org](http://colorbrewer2.org/#type=sequential&scheme=BuGn&n=3)
colors = ['#ffffd9','#edf8b1','#c7e9b4','#7fcdbb','#41b6c4','#1d91c0','#225ea8','#0c2c84']
class_colors = dict((a,c) for a,c in zip(agg_classes, colors))
def get_color(cls):
return class_colors[cls]
feature_collection = {
"type": "FeatureCollection",
"features": features
}
for f in feature_collection['features']:
feature_color = get_color(f['properties']['CLASS1'])
f['properties']['style'] = {
'color': feature_color,
'weight': 1,
'fillColor': feature_color,
'fillOpacity': 0.1}
return ipyl.GeoJSON(data=feature_collection)
In [27]:
zoom = 13
center = [38.30933576918588, -121.55410766601564] # lat/lon
map_tiles = ipyl.TileLayer(url='http://{s}.basemaps.cartocdn.com/light_all/{z}/{x}/{y}.png')
data_map = ipyl.Map(
center=center,
zoom=zoom,
default_tiles = map_tiles
)
data_map.add_layer(create_image_layer(train_scene_cropped))
data_map.add_layer(create_feature_layer(train_ground_truth_data))
# display
data_map
Awesome! The data looks nicely registered to the imagery and the crop outlines don't appear to have changed much over the years.
In [28]:
test_scene_cropped = os.path.join(predata_dir, 'test_scene_cropped.tif')
warp(test_scene, test_scene_cropped, crop_filename=test_aoi_filename, overwrite=False, compress=True)
In [29]:
test_scene_cropped_metadata = os.path.join(predata_dir, 'test_scene_cropped_metadata.xml')
shutil.copyfile(test_scene_metadata, test_scene_cropped_metadata)
Out[29]:
In [30]:
plt.figure()
bands = load_masked_bands(test_scene_cropped)
plt.imshow(bands_to_display([bands.r, bands.g, bands.b]))
Out[30]:
In [ ]: