Automated ordering, download, indexing of Landsat USGS data for AGDCv2

Import the required libraries

In [1]:
from __future__ import print_function, division

import os, json, requests, time, getpass
import pandas as pd
import geopandas as gpd
from shapely.geometry import Polygon
import gzip
    from urllib.parse import urlparse, urljoin
except ImportError:
    from urlparse import urlparse, urljoin

Register with ESPA and enter your credentials below

In [3]:
working_dir = os.path.abspath("/g/data/u46/users/gxr547")
data_dir = os.path.abspath('/g/data/u46/users/gxr547/espa_test')

In [4]:
from requests.auth import HTTPBasicAuth, HTTPDigestAuth
#password = getpass.getpass(prompt='Enter password for %s: '%username)
auth = HTTPBasicAuth(getpass.getpass(prompt=' username:'),
                      getpass.getpass(prompt=' password:')) username:········ password:········

Download auxilary data

In [5]:
def download_file(url, output_dir, overwrite=False):
    local_filename = os.path.join(output_dir,url.split('/')[-1])
    if not overwrite and os.path.exists(local_filename):
        print(local_filename, 'already exists')
    print('downloading', url, '->', local_filename)
    r = requests.get(url, stream=True)
    with open(local_filename, 'wb') as f:
        for chunk in r.iter_content(chunk_size=1024): 
            if chunk: # filter out keep-alive new chunks
    return local_filename

inventory data from USGS

In [36]:
landsat_csv_list = ["LANDSAT_8.csv.gz",
metadata_file_url = ""

for csv in landsat_csv_list:
    download_file(urljoin(metadata_file_url, csv), working_dir)

/g/data/u46/users/gxr547/LANDSAT_8.csv.gz already exists
/g/data/u46/users/gxr547/LANDSAT_ETM.csv.gz already exists
/g/data/u46/users/gxr547/LANDSAT_ETM_SLC_OFF.csv.gz already exists
/g/data/u46/users/gxr547/LANDSAT_TM-1980-1989.csv.gz already exists
/g/data/u46/users/gxr547/LANDSAT_TM-1990-1999.csv.gz already exists
/g/data/u46/users/gxr547/LANDSAT_TM-2000-2009.csv.gz already exists
/g/data/u46/users/gxr547/LANDSAT_TM-2010-2012.csv.gz already exists

product definitions and prep scripts

In [37]:
files = ['',
for url in files:
    download_file(url, working_dir, True)

downloading -> /g/data/u46/users/gxr547/ls5_scenes.yaml
downloading -> /g/data/u46/users/gxr547/ls7_scenes.yaml
downloading -> /g/data/u46/users/gxr547/ls8_scenes.yaml
downloading -> /g/data/u46/users/gxr547/
downloading -> /g/data/u46/users/gxr547/

Determine which WRS path/rows intersect the area of interest

Define the AOI

In [9]:
ul_lon, ul_lat = 77.83, 17.84 # upper left longitude, latitude
lr_lon, lr_lat = 78.00, 17.67 # lower right longitude, latitude
date_start, date_end = "2015-01-11"  , "2016-12-12" # start date and end date for time range selection  
sensor_list=["olitirs8", "tm5", "etm7"] # list of sensors to use

polygon_list = [[ul_lon, ul_lat], [lr_lon, ul_lat],[lr_lon, lr_lat],[ul_lon, lr_lat],[ul_lon, ul_lat]]

wrs_query = ''+str(polygon_list)

Determine which WRS2 path/rows cover the AOI

In [10]:
post_query = requests.get(wrs_query)
wrs_search_result = json.loads(post_query.text)

path_row = []
for item in wrs_search_result['results']:


Find scenes matching the criteria

In [16]:
scene_list = []

for csv in landsat_csv_list:
    collection =, csv), 'rb')

    data_inventory = pd.read_csv(collection , usecols=['acquisitionDate', "sceneID", "path", "row"]) # limit the columns to only the ones we need
    data_inventory["path_row"] = data_inventory["path"].map(str) + "_" + data_inventory["row"].map(str)
    data_inventory['acquisitionDate'] = data_inventory['acquisitionDate'].apply(pd.to_datetime)
    data_inventory = data_inventory.loc[(data_inventory['acquisitionDate'] >= pd.to_datetime(date_start)) &
                                        (data_inventory['acquisitionDate'] <= pd.to_datetime(date_end)) &

Submit an ESPA order for the SR product for the scene list

Filter unavailable scenes out

In [22]:
for sensor, scenes in available_scenes.items():
            "products": ["sr"],
            "inputs": scenes,
        "format": "gtiff",
        "resize": {"pixel_size": 30, "pixel_size_units": "meters"}}

Submit the orders

In [28]:
for json_item in json_order:
    place_order ='', json=json_item, auth=auth)

  "orderid": "",
  "status": 200
  "orderid": "",
  "status": 200

Get current orders

In [12]:
current_orders = requests.get('', auth=auth).json()['orders']
for order in current_orders:
    order_status = requests.get(''+order, auth=auth)
    status = order_status.json()['status']
    print(order, status) complete complete complete complete

download the data as it becomes available

In [ ]:
downloaded = set()

while current_orders:
    completed = set()
    for order in current_orders.copy():
        status = requests.get(''%order, auth=auth).json()
        completed.update(item['product_dload_url'] for item in status['orderid'][order] if item['status'] == 'complete')
        pending = [item for item in status['orderid'][order] if item['status'] != 'complete']
        if not pending:

    for url in completed - downloaded:
        download_file(url, data_dir)