In [1]:
from __future__ import print_function, division
import os, json, requests, time, getpass
import pandas as pd
import geopandas as gpd
from shapely.geometry import Polygon
import gzip
try:
from urllib.parse import urlparse, urljoin
except ImportError:
from urlparse import urlparse, urljoin
In [3]:
working_dir = os.path.abspath("/g/data/u46/users/gxr547")
data_dir = os.path.abspath('/g/data/u46/users/gxr547/espa_test')
In [4]:
from requests.auth import HTTPBasicAuth, HTTPDigestAuth
#password = getpass.getpass(prompt='Enter password for %s: '%username)
auth = HTTPBasicAuth(getpass.getpass(prompt='https://espa.cr.usgs.gov username:'),
getpass.getpass(prompt='https://espa.cr.usgs.gov password:'))
In [5]:
def download_file(url, output_dir, overwrite=False):
local_filename = os.path.join(output_dir,url.split('/')[-1])
if not overwrite and os.path.exists(local_filename):
print(local_filename, 'already exists')
return
print('downloading', url, '->', local_filename)
r = requests.get(url, stream=True)
with open(local_filename, 'wb') as f:
for chunk in r.iter_content(chunk_size=1024):
if chunk: # filter out keep-alive new chunks
f.write(chunk)
return local_filename
In [36]:
landsat_csv_list = ["LANDSAT_8.csv.gz",
"LANDSAT_ETM.csv.gz",
"LANDSAT_ETM_SLC_OFF.csv.gz",
"LANDSAT_TM-1980-1989.csv.gz",
"LANDSAT_TM-1990-1999.csv.gz",
"LANDSAT_TM-2000-2009.csv.gz",
"LANDSAT_TM-2010-2012.csv.gz"
]
metadata_file_url = "https://landsat.usgs.gov/landsat/metadata_service/bulk_metadata_files/"
for csv in landsat_csv_list:
download_file(urljoin(metadata_file_url, csv), working_dir)
In [37]:
files = ['https://raw.githubusercontent.com/data-cube/agdc-v2/develop/docs/config_samples/dataset_types/ls5_scenes.yaml',
'https://raw.githubusercontent.com/data-cube/agdc-v2/develop/docs/config_samples/dataset_types/ls7_scenes.yaml',
'https://raw.githubusercontent.com/data-cube/agdc-v2/develop/docs/config_samples/dataset_types/ls8_scenes.yaml',
'https://raw.githubusercontent.com/data-cube/agdc-v2/develop/utils/usgslsprepare.py',
'https://raw.githubusercontent.com/USGS-EROS/espa-bulk-downloader/master/download_espa_order.py',
]
for url in files:
download_file(url, working_dir, True)
In [9]:
ul_lon, ul_lat = 77.83, 17.84 # upper left longitude, latitude
lr_lon, lr_lat = 78.00, 17.67 # lower right longitude, latitude
date_start, date_end = "2015-01-11" , "2016-12-12" # start date and end date for time range selection
sensor_list=["olitirs8", "tm5", "etm7"] # list of sensors to use
polygon_list = [[ul_lon, ul_lat], [lr_lon, ul_lat],[lr_lon, lr_lat],[ul_lon, lr_lat],[ul_lon, ul_lat]]
wrs_query = 'http://api.remotepixel.ca/wrs2tiles?search=poly:'+str(polygon_list)
In [10]:
post_query = requests.get(wrs_query)
wrs_search_result = json.loads(post_query.text)
path_row = []
for item in wrs_search_result['results']:
path_row.append(str(item['path'])+"_"+str(item['row']))
path_row
Out[10]:
In [16]:
scene_list = []
for csv in landsat_csv_list:
collection = gzip.open(os.path.join(working_dir, csv), 'rb')
data_inventory = pd.read_csv(collection , usecols=['acquisitionDate', "sceneID", "path", "row"]) # limit the columns to only the ones we need
data_inventory["path_row"] = data_inventory["path"].map(str) + "_" + data_inventory["row"].map(str)
data_inventory['acquisitionDate'] = data_inventory['acquisitionDate'].apply(pd.to_datetime)
data_inventory = data_inventory.loc[(data_inventory['acquisitionDate'] >= pd.to_datetime(date_start)) &
(data_inventory['acquisitionDate'] <= pd.to_datetime(date_end)) &
data_inventory['path_row'].isin(path_row)]
scene_list.extend(data_inventory['sceneID'].tolist())
In [22]:
json_order=[]
for sensor, scenes in available_scenes.items():
order_details={
sensor:
{
"products": ["sr"],
"inputs": scenes,
},
"format": "gtiff",
"resize": {"pixel_size": 30, "pixel_size_units": "meters"}}
json_order.append(order_details)
In [28]:
for json_item in json_order:
place_order = requests.post('https://espa.cr.usgs.gov/api/v0/order', json=json_item, auth=auth)
print(place_order.text)
In [12]:
current_orders = requests.get('https://espa.cr.usgs.gov/api/v0/list-orders/', auth=auth).json()['orders']
for order in current_orders:
order_status = requests.get('https://espa.cr.usgs.gov/api/v0/order/'+order, auth=auth)
status = order_status.json()['status']
print(order, status)
In [ ]:
downloaded = set()
while current_orders:
completed = set()
for order in current_orders.copy():
status = requests.get('https://espa.cr.usgs.gov/api/v0/item-status/%s'%order, auth=auth).json()
completed.update(item['product_dload_url'] for item in status['orderid'][order] if item['status'] == 'complete')
pending = [item for item in status['orderid'][order] if item['status'] != 'complete']
if not pending:
current_orders.remove(order)
for url in completed - downloaded:
download_file(url, data_dir)
downloaded.add(url)
time.sleep(30)