Automated ordering, download, indexing of Landsat USGS data for AGDCv2

Import the required libraries


In [1]:
from __future__ import print_function, division

import os, json, requests, time, getpass
import pandas as pd
import geopandas as gpd
from shapely.geometry import Polygon
import gzip
try:
    from urllib.parse import urlparse, urljoin
except ImportError:
    from urlparse import urlparse, urljoin

Register with ESPA and enter your credentials below https://espa.cr.usgs.gov


In [3]:
working_dir = os.path.abspath("/g/data/u46/users/gxr547")
data_dir = os.path.abspath('/g/data/u46/users/gxr547/espa_test')

In [4]:
from requests.auth import HTTPBasicAuth, HTTPDigestAuth
#password = getpass.getpass(prompt='Enter password for %s: '%username)
auth = HTTPBasicAuth(getpass.getpass(prompt='https://espa.cr.usgs.gov username:'),
                      getpass.getpass(prompt='https://espa.cr.usgs.gov password:'))


https://espa.cr.usgs.gov username:········
https://espa.cr.usgs.gov password:········

Download auxilary data


In [5]:
def download_file(url, output_dir, overwrite=False):
    local_filename = os.path.join(output_dir,url.split('/')[-1])
    if not overwrite and os.path.exists(local_filename):
        print(local_filename, 'already exists')
        return
    print('downloading', url, '->', local_filename)
    r = requests.get(url, stream=True)
    with open(local_filename, 'wb') as f:
        for chunk in r.iter_content(chunk_size=1024): 
            if chunk: # filter out keep-alive new chunks
                f.write(chunk)
    return local_filename

inventory data from USGS


In [36]:
landsat_csv_list = ["LANDSAT_8.csv.gz",
                    "LANDSAT_ETM.csv.gz",
                    "LANDSAT_ETM_SLC_OFF.csv.gz",
                    "LANDSAT_TM-1980-1989.csv.gz",
                    "LANDSAT_TM-1990-1999.csv.gz",
                    "LANDSAT_TM-2000-2009.csv.gz",
                    "LANDSAT_TM-2010-2012.csv.gz"
                   ]
metadata_file_url = "https://landsat.usgs.gov/landsat/metadata_service/bulk_metadata_files/"

for csv in landsat_csv_list:
    download_file(urljoin(metadata_file_url, csv), working_dir)


/g/data/u46/users/gxr547/LANDSAT_8.csv.gz already exists
/g/data/u46/users/gxr547/LANDSAT_ETM.csv.gz already exists
/g/data/u46/users/gxr547/LANDSAT_ETM_SLC_OFF.csv.gz already exists
/g/data/u46/users/gxr547/LANDSAT_TM-1980-1989.csv.gz already exists
/g/data/u46/users/gxr547/LANDSAT_TM-1990-1999.csv.gz already exists
/g/data/u46/users/gxr547/LANDSAT_TM-2000-2009.csv.gz already exists
/g/data/u46/users/gxr547/LANDSAT_TM-2010-2012.csv.gz already exists

product definitions and prep scripts


In [37]:
files = ['https://raw.githubusercontent.com/data-cube/agdc-v2/develop/docs/config_samples/dataset_types/ls5_scenes.yaml',
         'https://raw.githubusercontent.com/data-cube/agdc-v2/develop/docs/config_samples/dataset_types/ls7_scenes.yaml',
         'https://raw.githubusercontent.com/data-cube/agdc-v2/develop/docs/config_samples/dataset_types/ls8_scenes.yaml',
         'https://raw.githubusercontent.com/data-cube/agdc-v2/develop/utils/usgslsprepare.py',
         'https://raw.githubusercontent.com/USGS-EROS/espa-bulk-downloader/master/download_espa_order.py',
        ]
for url in files:
    download_file(url, working_dir, True)


downloading https://raw.githubusercontent.com/data-cube/agdc-v2/develop/docs/config_samples/dataset_types/ls5_scenes.yaml -> /g/data/u46/users/gxr547/ls5_scenes.yaml
downloading https://raw.githubusercontent.com/data-cube/agdc-v2/develop/docs/config_samples/dataset_types/ls7_scenes.yaml -> /g/data/u46/users/gxr547/ls7_scenes.yaml
downloading https://raw.githubusercontent.com/data-cube/agdc-v2/develop/docs/config_samples/dataset_types/ls8_scenes.yaml -> /g/data/u46/users/gxr547/ls8_scenes.yaml
downloading https://raw.githubusercontent.com/data-cube/agdc-v2/develop/utils/usgslsprepare.py -> /g/data/u46/users/gxr547/usgslsprepare.py
downloading https://raw.githubusercontent.com/USGS-EROS/espa-bulk-downloader/master/download_espa_order.py -> /g/data/u46/users/gxr547/download_espa_order.py

Determine which WRS path/rows intersect the area of interest

Define the AOI


In [9]:
ul_lon, ul_lat = 77.83, 17.84 # upper left longitude, latitude
lr_lon, lr_lat = 78.00, 17.67 # lower right longitude, latitude
date_start, date_end = "2015-01-11"  , "2016-12-12" # start date and end date for time range selection  
sensor_list=["olitirs8", "tm5", "etm7"] # list of sensors to use

polygon_list = [[ul_lon, ul_lat], [lr_lon, ul_lat],[lr_lon, lr_lat],[ul_lon, lr_lat],[ul_lon, ul_lat]]

wrs_query = 'http://api.remotepixel.ca/wrs2tiles?search=poly:'+str(polygon_list)

Determine which WRS2 path/rows cover the AOI


In [10]:
post_query = requests.get(wrs_query)
wrs_search_result = json.loads(post_query.text)

path_row = []
for item in wrs_search_result['results']:
    path_row.append(str(item['path'])+"_"+str(item['row']))
path_row


Out[10]:
['144_48']

Find scenes matching the criteria


In [16]:
scene_list = []

for csv in landsat_csv_list:
    collection = gzip.open(os.path.join(working_dir, csv), 'rb')

    data_inventory = pd.read_csv(collection , usecols=['acquisitionDate', "sceneID", "path", "row"]) # limit the columns to only the ones we need
    data_inventory["path_row"] = data_inventory["path"].map(str) + "_" + data_inventory["row"].map(str)
    data_inventory['acquisitionDate'] = data_inventory['acquisitionDate'].apply(pd.to_datetime)
    data_inventory = data_inventory.loc[(data_inventory['acquisitionDate'] >= pd.to_datetime(date_start)) &
                                        (data_inventory['acquisitionDate'] <= pd.to_datetime(date_end)) &
                                        data_inventory['path_row'].isin(path_row)]
    scene_list.extend(data_inventory['sceneID'].tolist())

Submit an ESPA order for the SR product for the scene list

Filter unavailable scenes out


In [22]:
json_order=[]
for sensor, scenes in available_scenes.items():
    order_details={
        sensor:
        {
            "products": ["sr"],
            "inputs": scenes,
        },
        "format": "gtiff",
        "resize": {"pixel_size": 30, "pixel_size_units": "meters"}}
    json_order.append(order_details)

Submit the orders


In [28]:
for json_item in json_order:
    place_order = requests.post('https://espa.cr.usgs.gov/api/v0/order', json=json_item, auth=auth)
    print(place_order.text)


{
  "orderid": "gregory.raevski@ga.gov.au-01092017-175003-675",
  "status": 200
}
{
  "orderid": "gregory.raevski@ga.gov.au-01092017-175005-695",
  "status": 200
}

Get current orders


In [12]:
current_orders = requests.get('https://espa.cr.usgs.gov/api/v0/list-orders/', auth=auth).json()['orders']
for order in current_orders:
    order_status = requests.get('https://espa.cr.usgs.gov/api/v0/order/'+order, auth=auth)
    status = order_status.json()['status']
    print(order, status)


gregory.raevski@ga.gov.au-01092017-175003-675 complete
gregory.raevski@ga.gov.au-01092017-175005-695 complete
gregory.raevski@ga.gov.au-01032017-175950-043 complete
gregory.raevski@ga.gov.au-01032017-180639-765 complete

download the data as it becomes available


In [ ]:
downloaded = set()

while current_orders:
    completed = set()
    for order in current_orders.copy():
        status = requests.get('https://espa.cr.usgs.gov/api/v0/item-status/%s'%order, auth=auth).json()
        completed.update(item['product_dload_url'] for item in status['orderid'][order] if item['status'] == 'complete')
        pending = [item for item in status['orderid'][order] if item['status'] != 'complete']
        if not pending:
            current_orders.remove(order)

    for url in completed - downloaded:
        download_file(url, data_dir)
        downloaded.add(url)

    time.sleep(30)