In this notebook we focus on a region in the Dominican Republic of Congo where roads have been built into the forest. In the area of interest (AOI) we are using, the roads were built between September and November 2017.
Because the AOI overlaps the OrthoTile grid cells, but only covers a portion of each OrthoTile, this notebook demonstrates working with OrthoTile strips to identify significantly overlapping strips, accessing OrthoTiles as Cloud-Optimized Geotiffs (COGs), and mosaicing multipls COGs on download. Additionally, this notebook demonstrates use of the planet client downloader to manage activation, download, and mosaicing of multiple scenes across multiple strips.
This notebook does the following:
Keywords: Cloud-optimized geotiff, COG, orthotiles, orthotile strips, mosaicing, search, download, downloader
In [1]:
import datetime
import json
import os
import subprocess
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from planet import api
from planet.api import downloader, filters
import rasterio
from shapely import geometry as sgeom
from shapely.ops import cascaded_union
The AOI is a region in the Democratic Republic of Congo that experiences road development between September and November 2017. It is a rectangle that overlaps orthotile grid cell boundaries. Usually, we would redefine the AOI to be within an Orthotile, but we would lose a lot of context if we limited this AOI to only one Orthotile grid cell.
In [2]:
aoi = {"geometry": {
"type":"Polygon",
"coordinates":
[[
[25.42429478260258,1.0255377823058893],
[25.592960813580472,1.0255377823058893],
[25.592960813580472,1.1196578801254304],
[25.42429478260258,1.1196578801254304],
[25.42429478260258,1.0255377823058893]
]]}}
item_type = 'PSOrthoTile'
In [3]:
aoi_shape = sgeom.shape(aoi['geometry'])
aoi_shape
Out[3]:
In [4]:
planet_api_key = os.environ['PL_API_KEY']
# quick check that key is defined
assert planet_api_key, "PL_API_KEY not defined."
client = api.ClientV1(api_key=planet_api_key)
In [5]:
# utility functions for searching for scenes that cover the aoi between given dates
def build_ps_request(aoi, item_type, begin, end):
search_aoi = aoi['geometry']
query = filters.and_filter(
filters.geom_filter(search_aoi),
filters.range_filter('cloud_cover', lt=5),
filters.date_range('acquired', gte=begin),
filters.date_range('acquired', lt=end)
)
# build a request for only PlanetScope imagery
request = filters.build_search_request(
query, item_types=[item_type]
)
return request
def get_monthly_stats(client, request):
stats_request = request.copy()
stats_request['interval'] = 'month'
return client.stats(stats_request).get()
begin=datetime.datetime(year=2017,month=9,day=1)
end=datetime.datetime(year=2017,month=12,day=1)
print(json.dumps(get_monthly_stats(client, build_ps_request(aoi, item_type, begin, end)),
indent=4))
In [6]:
def search_pl_api(client, request, limit=500):
result = client.quick_search(request)
# note that this returns a generator
return result.items_iter(limit=limit)
items = list(search_pl_api(client, build_ps_request(aoi, item_type, begin, end)))
print(len(items))
# uncomment below to see entire metadata for a landsat scene
# print(json.dumps(items[0], indent=4))
del items
In [7]:
def items_to_scenes(items):
item_types = []
def _get_props(item):
props = item['properties']
# add data not in properties list
props.update({
'thumbnail': item['_links']['thumbnail'],
'id': item['id'],
'footprint': item['geometry'],
})
return props
scenes = pd.DataFrame(data=[_get_props(i) for i in items])
# convert acquired from string to datetime for processing
scenes['acquired'] = pd.to_datetime(scenes['acquired'])
return scenes
scenes = items_to_scenes(search_pl_api(client, build_ps_request(aoi, item_type, begin, end)))
scenes.head()
Out[7]:
When the API searches for a scene that overlaps a given AOI, it uses the scene extent. However, we are interested in the scene footprint. That is, we don't care if a portion of a scene with no data overlaps the AOI. We want to filter those scenes out.
In [8]:
def aoi_intersection(footprint, aoi):
aoi_shape = sgeom.shape(aoi['geometry'])
footprint_shape = sgeom.shape(footprint)
intersection_shape = aoi_shape.intersection(footprint_shape)
try:
intersection_percent = 100 * footprint_shape.area / intersection_shape.area
except ZeroDivisionError:
intersection_percent = 0
data = {'intersection_shape': intersection_shape,
'intersection_fp_perc': intersection_percent}
return pd.Series(data=data)
intersections = scenes.footprint.apply(aoi_intersection, args=(aoi,))
scenes_inter = pd.concat([scenes, intersections], axis=1, sort=False)
In [9]:
# filter out scenes with no intersection
scenes_inter = scenes_inter[scenes_inter.intersection_fp_perc > 0]
len(scenes_inter)
Out[9]:
In [10]:
scenes_sid = scenes_inter.groupby(['strip_id'])
print('{} intersecting strips'.format(scenes_sid.ngroups))
In [11]:
def get_date(group):
dates = set([a.date() for a in group['acquired']])
assert len(dates) == 1
return min(dates)
strip_date = scenes_sid.apply(get_date)
strip_date.head()
Out[11]:
In [12]:
def get_strip_aoi_inter(group, aoi):
'''group: data frame with strip id as index'''
intersections = group['intersection_shape'].tolist()
intersection_shape = cascaded_union(intersections)
aoi_shape = sgeom.shape(aoi['geometry'])
try:
intersection_percent = 100 * intersection_shape.area / aoi_shape.area
except ZeroDivisionError:
intersection_percent = 0
data = {'strip_intersection_shape': intersection_shape,
'strip_intersection_aoi_perc': intersection_percent}
return pd.Series(data=data)
# with help from: https://stackoverflow.com/a/43616001/2344416
strip_aoi_inter = scenes_sid.apply(get_strip_aoi_inter, aoi=aoi)
strip_aoi_inter.head()
Out[12]:
In [13]:
# what does the distribution of intersection percent of aoi look like?
strip_aoi_inter.hist(bins=10)
Out[13]:
In [14]:
# add acquisition date information before filtering
strips = strip_aoi_inter.assign(acquisition_date=strip_date)
In [15]:
# filter to strips that have significant overlap
strips_filt = strips[strips.strip_intersection_aoi_perc > 80]
print('{} strips with significant overlap'.format(len(strips_filt)))
In [16]:
# what are the collection dates of strips with significant overlap?
strips_filt.acquisition_date
Out[16]:
In [17]:
overlapping_strip_ids = strips_filt.index.tolist()
overlapping_strip_ids
Out[17]:
In [18]:
# filter to scenes that are in the resulting strips
overlapping_scenes = scenes[scenes['strip_id'].isin(overlapping_strip_ids)]
In [19]:
print('There are {} OrthoTiles in {} strips that significantly overlap the aoi.'.format(
len(overlapping_scenes), len(overlapping_strip_ids)))
In [20]:
# save overlapping scenes dataframe for use in other notebooks
# uncomment to save
# overlapping_scenes_filename = os.path.join('pre-data', 'overlapping-scenes')
# overlapping_scenes.to_pickle(overlapping_scenes_filename)
In [21]:
def get_overlapping_scenes(aoi, begin, end, client, item_type, overlap_perc=80):
# get all scenes with overlapping bounds
request = build_ps_request(aoi, item_type, begin, end)
items = search_pl_api(client, request)
scenes = items_to_scenes(items)
print('{} scenes with intersecting borders'.format(len(scenes)))
# filter to scenes where the footprint overlaps
intersections = scenes.footprint.apply(aoi_intersection, args=(aoi,))
scenes_inter = pd.concat([scenes, intersections], axis=1, sort=False)
scenes_inter = scenes_inter[scenes_inter.intersection_fp_perc > 0]
print('{} scenes with intersecting footprints'.format(len(scenes_inter)))
# filter to strips with significant overlap
scenes_sid = scenes_inter.groupby(['strip_id'])
strip_aoi_inter = scenes_sid.apply(get_strip_aoi_inter, aoi=aoi)
print('{} intersecting strips'.format(scenes_sid.ngroups))
strip_date = scenes_sid.apply(get_date)
strips = strip_aoi_inter.assign(acquisition_date=strip_date)
strips_filt = strips[strips.strip_intersection_aoi_perc > overlap_perc]
print('{} strips with significant overlap'.format(len(strips_filt)))
# filter to scenes that are in resulting strips
overlapping_strip_ids = strips_filt.index.tolist()
overlapping_scenes = scenes[scenes['strip_id'].isin(overlapping_strip_ids)]
print('There are {} OrthoTiles in {} strips that significantly overlap the aoi'.format(
len(overlapping_scenes), len(overlapping_strip_ids)))
return overlapping_scenes
# run again with the same inputs to make sure we get the same results
_ = get_overlapping_scenes(aoi, begin, end, client, item_type)
Because the AOI crosses over Orthotile grid lines, we need multiple orthotiles from one strip to obtain the image that overlaps the AOI. But the portion of each Orthotile that overlaps the AOI is small relative to the orthotile. Therefore, we only want to download the pixels in the orthotile that overlap the AOI. We will accomplish this by accessing the Orthotiles as Cloud-Optimized Geotiffs (COGs).
This is a variation of the COG activation and download performed in temporal-analysis/crop-temporal.ipynb
. For this notebook, we wait until all orthotiles in a strip are activated then we download the COGs together, using gdalwarp
to perform the download as well as the mosaicing.
First, we will go through this exercise with one strip, then we will move onto multiple strips.
In [22]:
asset_type = 'analytic'
In [23]:
strip_id = overlapping_strip_ids[0]
print(strip_id)
strip_scenes = scenes[scenes['strip_id'] == strip_id]
strip_scenes
Out[23]:
We use the planet api client downloader to handle activation of the scenes. The downloader handles activation, polling activation status, and (if desired), downloading. Because we are using remote COGs, we do not want to download the scene. However, the downloader can still help us out. It has a cool feature where you can provide it with a function to call when a scene is activated.
In this section, we will provide it with a function that records the scene id and download url and checks to see if all scenes in the strip are activated. The function is actually just a method of a class (Tracker
), which maintains a dataset of scene ids and download urls and a list of scene ids in the strip. The method updates the scene id list when it is called by the downloader. Also, it checks to see if all scenes in the strip have been activated. In the future, we will update this part so that when all scenes in a strip are activated, a download and mosaic is triggered.
In [24]:
# create a downloader that will handle scene activation
dl = downloader.create(client)
In [25]:
# This class keeps track of activated scene download urls and the strip scene id's
# It also creates the `on_complete` partial function, which can be called
# by the downloader to update the list of scene download urls and check to see if all
# scenes in the strip are activated
class Tracker(object):
def __init__(self, strip_scenes):
self.urls = dict()
self.strip_scenes = set(strip_scenes)
def get_on_complete(self):
def on_complete(item, asset):
self.urls[item['id']] = asset['location']
print('{}:{}'.format(item['id'], asset['location']))
if self._got_all_strip_scenes():
print('strip complete')
return on_complete
def _got_all_strip_scenes(self):
return self.strip_scenes.intersection(set(self.urls)) == self.strip_scenes
# create the function that keeps track of the download urls and checks to see if all
# scenes in the strip are activated
strip_scene_ids = strip_scenes.id.tolist()
tracker = Tracker(strip_scene_ids)
dl.on_complete = tracker.get_on_complete()
In [26]:
# the downloader works with items, so get item object for each scene id
strip_scene_items = (client.get_item(item_type, sid).get()
for sid in strip_scenes.id.tolist())
In [27]:
# use the downloader to activate the scenes and get the download urls
dl.shutdown()
dl.activate(strip_scene_items, [asset_type])
Out[27]:
Ok! All scenes in the strip are activated. Now it's time to download them. We are using gdalwarp
to download the scenes, and it turns out gdalwarp
is also used for mosaicing scenes, so we are going to download and mosaic the scenes all in one step.
gdalwarp
requires the aoi used to crop the COG be saved to disk. We want the AOI to persist in git for use elsewhere so we save it in the pre-data
folder.
In [28]:
def create_save_dir(root_dir='data'):
save_dir = root_dir
if not os.path.isdir(save_dir):
os.makedirs(save_dir)
return save_dir
In [29]:
def save_geojson_file(aoi_geojson, save_dir):
filename = os.path.join(save_dir, 'aoi.geojson')
with open(filename, "w") as f:
f.write(json.dumps(aoi_geojson))
return filename
geojson_filename = save_geojson_file(aoi, create_save_dir('pre-data'))
print('wrote AOI to {}'.format(geojson_filename))
In [36]:
def create_output_filename(strip_id, save_dir):
create_save_dir(save_dir)
filename = os.path.join(save_dir, strip_id + '_analytic_mosaic.tif')
return filename
output_file = create_output_filename(strip_id, 'data')
output_file
Out[36]:
In [37]:
(item_ids, download_urls) = zip(*tracker.urls.items())
item_ids
Out[37]:
In [38]:
import tempfile
In [39]:
# we use gdalwarp to only download the aoi portion of the COGs and mosaic them in one step
def _gdalwarp(input_filenames, output_filename, options, verbose=False):
commands = ['gdalwarp'] + options + \
['-overwrite'] + \
input_filenames + \
[output_filename]
if verbose: print(' '.join(commands))
subprocess.check_call(commands)
# lossless compression of an image
def _compress(input_filename, output_filename, verbose=False):
commands = ['gdal_translate',
'-co', 'compress=LZW',
'-co', 'predictor=2',
input_filename,
output_filename]
if verbose: print(' '.join(commands))
subprocess.check_call(commands)
def download_strip_aoi(download_urls, output_filename, geojson_filename, compress=True, verbose=False):
vsicurl_urls = ['/vsicurl/' + d for d in download_urls]
options = [
'-cutline', geojson_filename,
'-crop_to_cutline',
]
if compress:
with tempfile.NamedTemporaryFile(suffix='.vrt') as vrt_file:
options += ['-of', 'vrt']
_gdalwarp(vsicurl_urls, vrt_file.name, options, verbose=verbose)
_compress(vrt_file.name, output_filename, verbose=verbose)
else:
_gdalwarp(vsicurl_urls, output_filename, options, verbose=verbose)
download_strip_aoi(download_urls, 'data/739199_analytic_mosaic_comp.tif', geojson_filename, verbose=True)
download_strip_aoi(download_urls, output_file, geojson_filename, compress=False, verbose=True)
In [34]:
# load local visual module
import visual
In [35]:
def load_4band(filename):
with rasterio.open(filename, 'r') as src:
# visual band ordering: red, green, blue, alpha
b, g, r, n = src.read()
# NoData value is 0
mask = b == 0
return [np.ma.array(band, mask=mask) for band in [b, g, r, n]]
def visualize_4band(bgrn_bands, title='Orthotile Strip Mosaic', figdim=15):
rgb_bands = [bgrn_bands[i] for i in [2, 1, 0]]
visual.plot_image(rgb_bands, title=title, figsize=(figdim, figdim))
print(output_file)
visualize_4band(load_4band(output_file))