In this notebook, we prepare the 2016 from the USDA 2016 Crop Data Layer (CDL) for Iowa for use in the crop classification notebooks. This involves taking the CDL data for Iowa, projecting, cropping, and resampling it to match the Planet scene. We perform these steps for the crop classification test and train Planet scenes.
The end results will be saved in pre-data for use in those notebooks. The initial data is too large to be managed gracefully in git.
In [63]:
import os
import pathlib
from subprocess import check_output, STDOUT, CalledProcessError
import tempfile
import rasterio
In this section, we download the 2016 Iowa CDL.
2016 CDL for Iowa is obtained through the CropScape site. On that site, ensure the layer is 'CropLand Data Layers -> 2016', then click the icon for 'Define Area of Interest by State...' (looks like an outline of the US with the US flag design). Under 'Select a State', select Iowa and click 'Submit.' Next, click the icon for 'Download Defined Area of Interest Data' (looks like a postal letter with an arrow). In the popup, ensure the 'CDL' tab is open and '2016' is selected, then click 'Select.' Another popup should appear with 'Please Wait...' then after a while, the popup should be replaced by a popup that says 'Download files from server'. Click 'Download' and after a bit the download should begin.
Unzip the downloaded folder and move the tif, CDL_2016_19.tif to the data folder.
In [20]:
# ensure the tif file is present
cdl_full = os.path.join('data', 'CDL_2016_19.tif')
print(cdl_full)
assert os.path.isfile(cdl_full)
The first step in preparing the train CDL dataset is downloading the train scene. We also download the metadata and udm because those are used in another notebook.
The train scene is 210879_1558814_2016-07-25_0e16.
In [21]:
# define and, if necessary, create data directory
train_folder = os.path.join('data', 'cart', '210879_1558814_2016-07-25_0e16')
pathlib.Path(train_folder).mkdir(parents=True, exist_ok=True)
train_scene = os.path.join(train_folder, '210879_1558814_2016-07-25_0e16_BGRN_Analytic.tif')
In [22]:
# First test if scene file exists, if not, use the Planet commandline tool to download the image, metadata, and udm.
# This command assumes a bash shell, available in Unix-based operating systems.
!test -f $train_scene || \
planet data download \
--item-type PSOrthoTile \
--dest $train_folder \
--asset-type analytic,analytic_xml,udm \
--string-in id 210879_1558814_2016-07-25_0e16
In [27]:
# Utility functions: crop, resample, and project an image
# These use gdalwarp. for a description of gdalwarp command line options, see:
# http://www.gdal.org/gdalwarp.html
def gdalwarp_project_options(src_crs, dst_crs):
return ['-s_srs', src_crs.to_string(), '-t_srs', dst_crs.to_string()]
def gdalwarp_crop_options(bounds, crs):
xmin, ymin, xmax, ymax = [str(b) for b in bounds]
# -te xmin ymin xmax ymax
return ['-te', xmin, ymin, xmax, ymax]
def gdalwarp_resample_options(width, height, technique='near'):
# for technique options, see: http://www.gdal.org/gdalwarp.html
return ['-ts', width, height, '-r', technique]
def gdalwarp(input_filename, output_filename, options):
commands = _gdalwarp_commands(input_filename, output_filename, options)
# print error if one is encountered
# https://stackoverflow.com/questions/29580663/save-error-message-of-subprocess-command
try:
output = check_output(commands, stderr=STDOUT)
except CalledProcessError as exc:
print(exc.output)
def _gdalwarp_commands(input_filename, output_filename, options):
commands = ['gdalwarp'] + options + \
['-overwrite',
input_filename,
output_filename]
print(' '.join(commands))
return commands
def _test():
TEST_DST_SCENE = train_scene
TEST_SRC_SCENE = cdl_full
with rasterio.open(TEST_DST_SCENE, 'r') as dst:
with rasterio.open(TEST_SRC_SCENE, 'r') as src:
print(gdalwarp_project_options(src.crs, dst.crs))
print(gdalwarp_crop_options(dst.bounds, dst.crs))
print(gdalwarp_resample_options(dst.width, dst.height))
# _test()
In [54]:
# lossless compression of an image
def _compress(input_filename, output_filename):
commands = ['gdal_translate',
'-co', 'compress=LZW',
'-co', 'predictor=2',
input_filename,
output_filename]
print(' '.join(commands))
# subprocess.check_call(commands)
# print error if one is encountered
# https://stackoverflow.com/questions/29580663/save-error-message-of-subprocess-command
try:
output = check_output(commands, stderr=STDOUT)
except CalledProcessError as exc:
print(exc.output)
In [55]:
def prepare_cdl_image(cdl_filename, dst_filename, out_filename, compress=False, overwrite=True):
'''Project, crop, and resample cdl image to match dst_filename image.'''
with rasterio.open(cdl_filename, 'r') as src:
with rasterio.open(dst_filename, 'r') as dst:
# project
src_crs = _add_nad_datum(src.crs) # Manually add NAD83 datum
proj_options = gdalwarp_project_options(src_crs, dst.crs)
# crop
crop_options = gdalwarp_crop_options(dst.bounds, dst.crs)
# resample
width, height = dst.shape
resample_options = gdalwarp_resample_options(str(width), str(height), 'near')
options = proj_options + crop_options + resample_options
# check to see if output file exists, if it does, do not warp
if os.path.isfile(dst_filename) and not overwrite:
print('{} already exists. Aborting warp of {}'.format(dst_filename, cdl_filename))
elif compress:
with tempfile.NamedTemporaryFile(suffix='.vrt') as vrt_file:
options += ['-of', 'vrt']
gdalwarp(cdl_filename, vrt_file.name, options)
_compress(vrt_file.name, out_filename)
else:
print(options)
gdalwarp(cdl_filename, out_filename, options)
def _add_nad_datum(crs):
'''Rasterio is not reading the datum for the CDL image so add it manually'''
crs.update({'datum': 'NAD83'})
return crs
def _test(delete=True):
TEST_DST_SCENE = train_scene
TEST_SRC_SCENE = cdl_full
with tempfile.NamedTemporaryFile(suffix='.tif', delete=delete, dir='.') as out_file:
# create output
prepare_cdl_image(TEST_SRC_SCENE, TEST_DST_SCENE, out_file.name)
# check output
with rasterio.open(TEST_DST_SCENE, 'r') as dst:
with rasterio.open(out_file.name, 'r') as src:
assert dst.crs == src.crs, '{} != {}'.format(src.crs, dst.crs)
assert dst.bounds == src.bounds
assert dst.shape == src.shape
# _test()
In [56]:
# define and, if necessary, create pre-data directory
predata_folder = 'pre-data'
pathlib.Path(predata_folder).mkdir(parents=True, exist_ok=True)
In [57]:
# create train dataset gold image from CDL image
train_CDL_filename = os.path.join(predata_folder, 'CDL_2016_19_train.tif')
prepare_cdl_image(cdl_full, train_scene, train_CDL_filename, overwrite=False, compress=True)
Now we prepare the test CDL dataset.
The test scene is 210863_1559015_2016-07-25_0e0f.
In [59]:
# define and, if necessary, create data directory
test_folder = os.path.join('data', 'cart', '210863_1559015_2016-07-25_0e0f')
pathlib.Path(test_folder).mkdir(parents=True, exist_ok=True)
test_scene = os.path.join(test_folder, '210863_1559015_2016-07-25_0e0f_BGRN_Analytic.tif')
In [64]:
# First test if scene file exists, if not, use the Planet commandline tool to download the image, metadata, and udm.
# This command assumes a bash shell, available in Unix-based operating systems.
!test -f $test_scene || \
planet data download \
--item-type PSOrthoTile \
--dest $test_folder \
--asset-type analytic,analytic_xml,udm \
--string-in id 210863_1559015_2016-07-25_0e0f
In [67]:
# create train dataset gold image from CDL image
test_CDL_filename = os.path.join(predata_folder, 'CDL_2016_19_test.tif')
prepare_cdl_image(cdl_full, test_scene, test_CDL_filename, overwrite=False, compress=True)
In [ ]: