Prepare Datasets

Once the datasets are obtained, they must be aligned and cropped to the same region. In this notebook, we crop the Planet scene and ground truth data to the aoi.

The sections are:

  • Crop Ground Truth Data to AOI
  • Crop Image to AOI
  • Visualize Ground Truth Data over Image

Note: there are quite a few cells that are dedicated to defining utility functions that have broad applicability and can be lifted from this notebook. They are indicated by a line at the top of the cell that starts with

# Utility functions:

In [1]:
from collections import namedtuple
import copy
import json
import os
import pathlib
import shutil
import subprocess
import tempfile

import ipyleaflet as ipyl

import matplotlib
import matplotlib.pyplot as plt
import numpy as np
import rasterio
from shapely.geometry import shape, mapping

%matplotlib inline

Datasets

Train Scene


In [2]:
train_scene_id = '20160831_180231_0e0e'

In [3]:
# define and, if necessary, create train data directory
train_dir = os.path.join('data', 'train')
pathlib.Path(train_dir).mkdir(parents=True, exist_ok=True)

# define train scene
train_scene = os.path.join(train_dir, train_scene_id + '_3B_AnalyticMS.tif')
train_scene_metadata = os.path.join(train_dir,
                                    train_scene_id + '_3B_AnalyticMS_metadata.xml')

In [4]:
# First test if scene file exists, if not, use the Planet commandline tool to download the image, metadata, and udm.
# This command assumes a bash shell, available in Unix-based operating systems.
!test -f $train_scene || \
    planet data download \
        --item-type PSOrthoTile \
        --dest $train_dir \
        --asset-type analytic,analytic_xml \
        --string-in id $train_scene_id

Test Scene


In [5]:
test_scene_id = '20160831_180257_0e26'

In [6]:
# define and, if necessary, create test data directory
test_dir = os.path.join('data', 'test')
pathlib.Path(test_dir).mkdir(parents=True, exist_ok=True)

# define test scene
test_scene = os.path.join(test_dir, test_scene_id + '_3B_AnalyticMS.tif')
test_scene_metadata = os.path.join(test_dir,
                                   test_scene_id + '_3B_AnalyticMS_metadata.xml')

In [7]:
# First test if scene file exists, if not, use the Planet commandline tool to download the image, metadata, and udm.
# This command assumes a bash shell, available in Unix-based operating systems.
!test -f $test_scene || \
    planet data download \
        --item-type PSOrthoTile \
        --dest $test_dir \
        --asset-type analytic,analytic_xml \
        --string-in id $test_scene_id

AOI and Ground Truth

These datasets are created in identify-datasets notebook


In [8]:
predata_dir = 'pre-data'

test_aoi_filename = os.path.join(predata_dir, 'aoi-test.geojson')
assert os.path.isfile(test_aoi_filename)

train_aoi_filename = os.path.join(predata_dir, 'aoi-train.geojson')
assert os.path.isfile(train_aoi_filename)

ground_truth_filename = os.path.join(predata_dir, 'ground-truth.geojson')
assert os.path.isfile(ground_truth_filename)

Crop Ground Truth Data to AOI


In [9]:
# Utility functions: cropping polygons

# Uses shapely for geospatial operations
def crop_polygons_to_aoi(polygons, aoi):
    """Crops polygons to the aoi.
    
    Polygons within aoi are copied. For Polygons that intersect aoi boundary, the 
    intersection geometry is saved. If the intersection is a MultiPolygon, it is
    stored as multiple Polygons.
    
    :param dict aoi: geojson polygon describing crop feature
    :param list features: geojson polygons to be cropped
    """
    aoi_shp = shape(aoi['geometry'])
    cropped_features = []
    for f in polygons:
        shp = shape(f['geometry'])
        assert shp.type == 'Polygon'
        
        if shp.within(aoi_shp):
            cropped_features.append(copy.deepcopy(f))
        elif shp.intersects(aoi_shp):
            # 'cut' features at the aoi boundary by the aoi
            cropped_shp = shp.intersection(aoi_shp)
            
            try:
                # try to iterate, which only works for MultiPolygon
                for s in cropped_shp:
                    new_f = copy.deepcopy(f)
                    new_f['geometry'] = mapping(s)
                    cropped_features.append(new_f)
            except TypeError:
                # Polygon is not iterable
                new_f = copy.deepcopy(f)
                new_f['geometry'] = mapping(cropped_shp)
                cropped_features.append(new_f)
    return cropped_features

In [10]:
# Utility functions: loading and saving geojson

def save_geojson(features, filename):
    with open(filename, 'w') as f:
        f.write(json.dumps(features))

def load_geojson(filename):
    with open(filename, 'r') as f:
        return json.load(f)

In [11]:
ground_truth_data = load_geojson(ground_truth_filename)

Train Ground Truth Data


In [12]:
train_aoi = load_geojson(train_aoi_filename)
train_ground_truth_data = crop_polygons_to_aoi(ground_truth_data, train_aoi)
print(len(train_ground_truth_data))


223

In [15]:
train_ground_truth_filename = os.path.join(predata_dir, 'ground-truth-train.geojson')
save_geojson(train_ground_truth_data,  train_ground_truth_filename)

Test Ground Truth Data


In [17]:
test_aoi = load_geojson(test_aoi_filename)
test_ground_truth_data = crop_polygons_to_aoi(ground_truth_data, test_aoi)
print(len(test_ground_truth_data))


340

In [18]:
test_ground_truth_filename = os.path.join(predata_dir, 'ground-truth-test.geojson')
save_geojson(test_ground_truth_data, test_ground_truth_filename)

Crop Train Image to AOI


In [19]:
# Utility functions: crop and project an image

def _gdalwarp_crop_options(crop_filename):
    return ['-cutline', crop_filename, '-crop_to_cutline']
    
def _gdalwarp_project_options(src_proj, dst_proj):
    return ['-s_srs', src_proj, '-t_srs', dst_proj]

def _gdalwarp(input_filename, output_filename, options):
    commands = ['gdalwarp'] + options + \
               ['-overwrite',
                input_filename,
                output_filename]
    print(' '.join(commands))
    subprocess.check_call(commands)

# lossless compression of an image
def _compress(input_filename, output_filename):
    commands = ['gdal_translate',
                '-co', 'compress=LZW',
                '-co', 'predictor=2',
                input_filename,
                output_filename]
    print(' '.join(commands))
    subprocess.check_call(commands)
    
# uses Rasterio to get image srs if dst_srs is specified
def warp(input_filename,
         output_filename,
         crop_filename=None,
         dst_srs=None,
         overwrite=True,
         compress=False):
    options = []

    if crop_filename is not None:
        options += _gdalwarp_crop_options(crop_filename)

    if dst_srs is not None:
        src_srs = rasterio.open(input_filename).crs['init']
        options += _gdalwarp_project_options(src_srs, dst_srs)
        
    # check to see if output file exists, if it does, do not warp
    if os.path.isfile(output_filename) and not overwrite:
        print('{} already exists. Aborting warp of {}.'.format(output_filename, input_filename))
    elif compress:
        with tempfile.NamedTemporaryFile(suffix='.vrt') as vrt_file:
            options += ['-of', 'vrt']
            _gdalwarp(input_filename, vrt_file.name, options)
            _compress(vrt_file.name, output_filename)
    else:
        _gdalwarp(input_filename, output_filename, options)

In [20]:
train_scene_cropped = os.path.join(predata_dir, 'train_scene_cropped.tif')
warp(train_scene, train_scene_cropped, crop_filename=train_aoi_filename, overwrite=False, compress=True)


pre-data/train_scene_cropped.tif already exists. Aborting warp of data/train/20160831_180231_0e0e_3B_AnalyticMS.tif.

Copy over the image metadata


In [21]:
train_scene_cropped_metadata = os.path.join(predata_dir, 'train_scene_cropped_metadata.xml')
shutil.copyfile(train_scene_metadata, train_scene_cropped_metadata)


Out[21]:
'pre-data/train_scene_cropped_metadata.xml'

Visualize Cropped Image


In [22]:
# Utility functions: loading an image

NamedBands = namedtuple('NamedBands', 'b, g, r, nir')

def load_masked_bands(filename):
    """Loads a 4-band BGRNir Planet Image file as a list of masked bands.
    
    The masked bands share the same mask, so editing one band mask will
    edit them all."""
    with rasterio.open(filename) as src:
        b, g, r, nir = src.read()
        mask = src.read_masks(1) == 0 # 0 value means the pixel is masked
    
    bands = NamedBands(b=b, g=g, r=r, nir=nir)
    return NamedBands(*[np.ma.array(b, mask=mask)
                        for b in bands])

print(load_masked_bands(train_scene_cropped).b.shape)


(1343, 1762)

In [23]:
# Utility functions: displaying an image

def _linear_scale(ndarray, old_min, old_max, new_min, new_max):
    """Linear scale from old_min to new_min, old_max to new_max.
    
    Values below min/max are allowed in input and output.
    Min/Max values are two data points that are used in the linear scaling.
    """
    #https://en.wikipedia.org/wiki/Normalization_(image_processing)
    return (ndarray - old_min)*(new_max - new_min)/(old_max - old_min) + new_min
# print(linear_scale(np.array([1,2,10,100,256,2560, 2660]), 2, 2560, 0, 256))

def _mask_to_alpha(bands):
    band = np.atleast_3d(bands)[...,0]
    alpha = np.zeros_like(band)
    alpha[~band.mask] = 1
    return alpha

def _add_alpha_mask(bands):
    return np.dstack([bands, _mask_to_alpha(bands)])

def bands_to_display(bands, alpha=True):
    """Converts a list of bands to a 3-band rgb, normalized array for display."""
    rgb_bands = np.dstack(bands[:3])

    old_min = np.percentile(rgb_bands, 2)
    old_max = np.percentile(rgb_bands, 98)
    new_min = 0
    new_max = 1
    scaled = _linear_scale(rgb_bands.astype(np.double),
                           old_min, old_max, new_min, new_max)
    bands = np.clip(scaled, new_min, new_max)
    if alpha is True:
        bands = _add_alpha_mask(bands)
    return bands

In [24]:
plt.figure()
bands = load_masked_bands(train_scene_cropped)
plt.imshow(bands_to_display([bands.r, bands.g, bands.b]))


Out[24]:
<matplotlib.image.AxesImage at 0x7f9375d54940>

Visualize Ground Truth Data over Image

To ensure accurate alignment between the planet scene and the ground truth data, we will visualize them overlayed in a geographic reference system.

Define Layer for cropped Planet scene

First we project the cropped Planet scene to WGS84 for showing on the map. Then we adjust the scene for display and save as an 8-bit jpeg. Finally, we define the image layer using the projected image bounds.

Leaflet appears to support local files if they are jpg (src)


In [25]:
# Utility functions: creating an image layer for display on a map

def _save_display_image(src_filename, dst_filename):
    # convert to rgb and scale to 8-bit
    bands = load_masked_bands(src_filename)
    img = bands_to_display([bands.r, bands.g, bands.b])

    # save as jpeg
    if(os.path.isfile(dst_filename)): os.remove(dst_filename)
    matplotlib.image.imsave(dst_filename, img)

def create_image_layer(filename):
    with tempfile.NamedTemporaryFile(suffix='.tif') as temp_file:
        projected_filename = temp_file.name

        # project to wgs84
        dst_srs = 'epsg:4326' #WGS84
        warp(filename, projected_filename, dst_srs=dst_srs)
        
        # save as jpeg
        display_image = os.path.join('data', 'display.jpg')
        _save_display_image(projected_filename, display_image)
        
        # determine image layer bounds
        (minx, miny, maxx, maxy) = rasterio.open(projected_filename).bounds
        sw = [miny, minx]
        ne = [maxy, maxx]

        # Create image layer
        return ipyl.ImageOverlay(url=display_image, bounds=[sw, ne])

Define layer for ground truth data


In [26]:
def create_feature_layer(features):
    # Assign colors to classes

    # Class descriptions can be found in datasets-identify notebook
    agg_classes = ['G', 'R', 'F', 'P', 'T', 'D', 'C', 'V']

    # colors determined using [colorbrewer2.org](http://colorbrewer2.org/#type=sequential&scheme=BuGn&n=3)
    colors = ['#ffffd9','#edf8b1','#c7e9b4','#7fcdbb','#41b6c4','#1d91c0','#225ea8','#0c2c84']

    class_colors = dict((a,c) for a,c in zip(agg_classes, colors))

    def get_color(cls):
        return class_colors[cls]

    feature_collection = {
        "type": "FeatureCollection",
        "features": features
    }

    for f in feature_collection['features']:
        feature_color = get_color(f['properties']['CLASS1'])
        f['properties']['style'] = {
            'color': feature_color,
            'weight': 1,
            'fillColor': feature_color,
            'fillOpacity': 0.1}

    return ipyl.GeoJSON(data=feature_collection)

In [27]:
zoom = 13
center = [38.30933576918588, -121.55410766601564] # lat/lon
map_tiles = ipyl.TileLayer(url='http://{s}.basemaps.cartocdn.com/light_all/{z}/{x}/{y}.png')

data_map = ipyl.Map(
    center=center, 
    zoom=zoom,
    default_tiles = map_tiles
)

data_map.add_layer(create_image_layer(train_scene_cropped))
data_map.add_layer(create_feature_layer(train_ground_truth_data))

# display
data_map


gdalwarp -s_srs epsg:32610 -t_srs epsg:4326 -overwrite pre-data/train_scene_cropped.tif /tmp/tmpc904cotg.tif

Awesome! The data looks nicely registered to the imagery and the crop outlines don't appear to have changed much over the years.

Crop Test Image to AOI

Repeat the above procedures for the test image.


In [28]:
test_scene_cropped = os.path.join(predata_dir, 'test_scene_cropped.tif')
warp(test_scene, test_scene_cropped, crop_filename=test_aoi_filename, overwrite=False, compress=True)


pre-data/test_scene_cropped.tif already exists. Aborting warp of data/test/20160831_180257_0e26_3B_AnalyticMS.tif.

In [29]:
test_scene_cropped_metadata = os.path.join(predata_dir, 'test_scene_cropped_metadata.xml')
shutil.copyfile(test_scene_metadata, test_scene_cropped_metadata)


Out[29]:
'pre-data/test_scene_cropped_metadata.xml'

Visualize cropped image


In [30]:
plt.figure()
bands = load_masked_bands(test_scene_cropped)
plt.imshow(bands_to_display([bands.r, bands.g, bands.b]))


Out[30]:
<matplotlib.image.AxesImage at 0x7f936cb1a2e8>

In [ ]: