In [ ]:
# You'll probably want to set our data rate higher for this notebook. 
# follow: http://stackoverflow.com/questions/43288550/iopub-data-rate-exceeded-when-viewing-image-in-jupyter-notebook

Setup

Let's setup our environment. We'll pull in the the usual gis suspects and setup a leaflet map, read our API keys from a json file, and setup our Planet client


In [ ]:
# See requirements.txt to set up your dev environment.
import sys
import os
import json
import scipy
import urllib
import datetime 
import urllib3
import rasterio
import subprocess
import numpy as np
import pandas as pd
import seaborn as sns
from osgeo import gdal
from planet import api
from planet.api import filters
from traitlets import link
from shapely.geometry import mapping, shape
from IPython.display import display, Image, HTML
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
urllib3.disable_warnings()
from ipyleaflet import (
    Map,
    Marker,
    TileLayer, ImageOverlay,
    Polyline, Polygon, Rectangle, Circle, CircleMarker,
    GeoJSON,
    DrawControl
)

%matplotlib inline

# will pick up api_key via environment variable PL_API_KEY
# but can be specified using `api_key` named argument
api_keys = json.load(open("apikeys.json",'r'))
client = api.ClientV1(api_key=api_keys["PLANET_API_KEY"])

Make a slippy map to get GeoJSON

  • The planet API allows you to query using a geojson which is a special flavor of json.
  • We use geojson to define Areas of Interest, or AOIs in satellite speak.
  • We are going to create a slippy map using leaflet and apply the Planet 2017 Q1 mosaic as the basemap. This requires our api key.
  • We are going to add a special draw handler that shoves a draw region into a object so we get the geojson.
  • If you don't want to do this, or need a fixed query try geojson.io
  • To install and run:
    $ pip install ipyleaflet
    $ jupyter nbextension enable --py --sys-prefix ipyleaflet
    $ jupyter nbextension enable --py --sys-prefix widgetsnbextension
  • More information

In [ ]:
# Basemap Mosaic (v1 API)
mosaicsSeries = 'global_quarterly_2017q1_mosaic'
# Planet tile server base URL (Planet Explorer Mosaics Tiles)
mosaicsTilesURL_base = 'https://tiles0.planet.com/experimental/mosaics/planet-tiles/' + mosaicsSeries + '/gmap/{z}/{x}/{y}.png'
# Planet tile server url
mosaicsTilesURL = mosaicsTilesURL_base + '?api_key=' + api_keys["PLANET_API_KEY"]
# Map Settings 
# Define colors
colors = {'blue': "#009da5"}

center = [37.774929,-122.419416]
# Define initial map zoom level
zoom = 11
# Set Map Tiles URL
planetMapTiles = TileLayer(url= mosaicsTilesURL)
# Create the map
m = Map(
    center=center, 
    zoom=zoom,
    default_tiles = planetMapTiles # Uncomment to use Planet.com basemap
)
# Define the draw tool type options
polygon = {'shapeOptions': {'color': colors['blue']}}
rectangle = {'shapeOptions': {'color': colors['blue']}} 

# Create the draw controls
# @see https://github.com/ellisonbg/ipyleaflet/blob/master/ipyleaflet/leaflet.py#L293
dc = DrawControl(
    polygon = polygon,
    rectangle = rectangle
)
# Initialize an action counter variable
actionCount = 0
AOIs = {}

# Register the draw controls handler
def handle_draw(self, action, geo_json):
    # Increment the action counter
    global actionCount
    actionCount += 1
    # Remove the `style` property from the GeoJSON
    geo_json['properties'] = {}
    # Convert geo_json output to a string and prettify (indent & replace ' with ")
    geojsonStr = json.dumps(geo_json, indent=2).replace("'", '"')
    AOIs[actionCount] = json.loads(geojsonStr)
    
# Attach the draw handler to the draw controls `on_draw` event
dc.on_draw(handle_draw)
m.add_control(dc)
m

Querying the Planet API.

  • First we'll grab our geojson area of interest (AOI) and use it to construct a query.
  • We'll then build a search to search that area looking for PSScene3Band
  • We have lots of products: RapidEye, PlanetScope (PS) 3 and 4 band, LandSat, and Sentinel are all possible.
  • Once we have our query, we'll do the search. We will then iterate over the results, slurp up the data, and put them in a pandas data frame for easy sorting.
  • We'll print the first few so we're sure it works.

In [ ]:
print AOIs[1]
myAOI = AOIs[1]["geometry"]

# build a query using the AOI and
# a cloud_cover filter that excludes 'cloud free' scenes

old = datetime.datetime(year=2017,month=1,day=1)

query = filters.and_filter(
    filters.geom_filter(myAOI),
    filters.range_filter('cloud_cover', lt=30),
    filters.date_range('acquired', gt=old)
)

# build a request for only PlanetScope imagery
request = filters.build_search_request(
    query, item_types=['PSScene3Band'] #"REOrthoTile"
)

# if you don't have an API key configured, this will raise an exception
result = client.quick_search(request)
scenes = []
planet_map = {}
for item in result.items_iter(limit=500):
    planet_map[item['id']]=item
    props = item['properties']
    props["id"] = item['id']
    props["geometry"] = item["geometry"]
    props["thumbnail"] = item["_links"]["thumbnail"]
    scenes.append(props)
scenes = pd.DataFrame(data=scenes)
display(scenes)
print scenes['satellite_id']
print len(scenes)

Cleanup

  • The data we got back is good, but we need some more information
  • We got back big scenes, but we only care about our area of interest. The scene may not cover the whole area of interest.
  • We can use the Shapely library to quickly figure out how much each scene overlaps our AOI
  • We will convert our AOI and the geometry of each scene to calculate overlap using a shapely call.
  • The returned acquisition, publish, and update times are strings, we'll convert them to datatime objects so we wan search.

In [ ]:
# now let's clean up the datetime stuff
# make a shapely shape from our aoi
sf = shape(myAOI)
footprints = []
overlaps = []
# go through the geometry from our api call, convert to a shape and calculate overlap area.
# also save the shape for safe keeping
for footprint in scenes["geometry"].tolist():
    s = shape(footprint)
    footprints.append(s)
    overlap = 100.0*(sf.intersection(s).area / sf.area)
    overlaps.append(overlap)
# take our lists and add them back to our dataframe
scenes['overlap'] = pd.Series(overlaps, index=scenes.index)
scenes['footprint'] = pd.Series(footprints, index=scenes.index)
# now make sure pandas knows about our date/time columns.
scenes["acquired"] = pd.to_datetime(scenes["acquired"])
scenes["published"] = pd.to_datetime(scenes["published"])
scenes["updated"] = pd.to_datetime(scenes["updated"])
scenes.head()

Filtering our search using pandas.

  • Using our dataframe we will filter the scenes to just what we want.
  • First we want scenes with less than 10% clouds.
  • Second we want standard quality images. Test images may not be high quality.
  • Third well only look for scenes since January.
  • Finally we will create a new data frame with our queries and print the results.

In [ ]:
# Now let's get it down to just good, recent, clear scenes
clear = scenes['cloud_cover']<0.05
good = scenes['quality_category']=="standard"
recent = scenes["acquired"] > datetime.date(year=2017,month=1,day=1)
partial_coverage = scenes["overlap"] > 60
good_scenes = scenes[(good&clear&recent&partial_coverage)]
display(good_scenes)
print len(good_scenes)

# Now let's get it down to just good, recent, clear scenes
clear = scenes['cloud_cover']<0.05
good = scenes['quality_category']=="standard"
all_time = scenes["acquired"] > datetime.date(year=2017,month=1,day=1)
full_coverage = scenes["overlap"] >= 60
all_scenes = scenes[(clear&all_time&full_coverage)]
display(all_scenes)
print all_scenes['satellite_id']
print len(all_scenes)

Visualizing scene foot prints overlap with our AOI

  • We know these scenes intersect with our AOI, but we aren't quite sure about the geometry.
  • We are going to plot our scene footprints and original AOI on our slippy map.
  • To do this we create GeoJson objects with properties.

In [ ]:
# first create a list of colors
colors = ["#ff0000","#00ff00","#0000ff","#ffff00","#ff00ff","#00ffff"]
# grab our scenes from the geometry/footprint geojson
footprints = good_scenes["geometry"].tolist()
# for each footprint/color combo
for footprint,color in zip(footprints,colors):
    # create the leaflet object
    feat = {'geometry':footprint,"properties":{
            'style':{'color': color,'fillColor': color,'fillOpacity': 0.2,'weight': 1}},
            'type':u"Feature"}
    # convert to geojson
    gjson = GeoJSON(data=feat)
    # add it our map
    m.add_layer(gjson)
# now we will draw our original AOI on top 
feat = {'geometry':myAOI,"properties":{
            'style':{'color': "#FFFFFF",'fillColor': "#FFFFFF",'fillOpacity': 0.5,'weight': 1}},
            'type':u"Feature"}
gjson = GeoJSON(data=feat)
m.add_layer(gjson)   
m

Let's see what we got.

  • The API returns a handy thumbnail link.
  • Let's tell jupyter to show it.
  • You may need to login to planet explorer to have auth.
    • If this is the case just print the urls and paste them into your browser.

In [ ]:
imgs = []
# loop through our thumbnails and add display them
for img in good_scenes["thumbnail"].tolist():
    imgs.append(Image(url=img))
    print img
display(*imgs)

Product Activation and Downloading

  • There are two things we need to know, the satellite type (asset) and image type (product).
  • Full resolution uncompressed satellite images are big and there are lots of ways to view them.
  • For this reason Planet generally keeps images in their native format and only processes them on customer requests. There is some caching of processed scenes, but this is the exception not the rule.
  • All images must be activated prior to downloading and this can take some time based on demand.
  • Additionally we need to determine what sort of product we want to download. Generally speaking there are three kinds of scenes:
    • Analytic - multi-band full resolution images that have not been processed. These are like raw files for DSLR camers.
    • Visual - these are color corrected rectified tifs. If you are just starting out this is your best call.
    • UDM - Usable data mask. This mask can be used to find bad pixels and columns and to mask out areas with clouds.

In [ ]:
def get_products(client, scene_id, asset_type='PSScene3Band'):    
    """
    Ask the client to return the available products for a 
    given scene and asset type. Returns a list of product 
    strings
    """
    out = client.get_assets_by_id(asset_type,scene_id)
    temp = out.get()
    return temp.keys()

def activate_product(client, scene_id, asset_type="PSScene3Band",product="analytic"):
    """
    Activate a product given a scene, an asset type, and a product.
    
    On success return the return value of the API call and an activation object
    """
    temp = client.get_assets_by_id(asset_type,scene_id)  
    products = temp.get()
    if( product in products.keys() ):
        return client.activate(products[product]),products[product]
    else:
        return None 

def download_and_save(client,product):
    """
    Given a client and a product activation object download the asset. 
    This will save the tiff file in the local directory and return its 
    file name. 
    """
    out = client.download(product)
    fp = out.get_body()
    fp.write()
    return fp.name

def scenes_are_active(scene_list):
    """
    Check if all of the resources in a given list of
    scene activation objects is read for downloading.
    """
    retVal = True
    for scene in scene_list:
        if scene["status"] != "active":
            print "{} is not ready.".format(scene)
            return False
    return True

Scenes ACTIVATE!

  • Given our good scenes list we will convert the data frame "id" column into a list and activate every item in that list.
  • For this example we are going to default to using a 3Band visual product but I have included some four band methods to help you out.
  • Activation usually takes about 5-15 minutes so get some coffee.

In [ ]:
to_get = good_scenes["id"].tolist()
activated = []
# for each scene to get
for scene in to_get:
    # get the product 
    product_types = get_products(client,scene)
    for p in product_types:
        # if there is a visual product
        if p == "visual": # p == "basic_analytic_dn"
            print "Activating {0} for scene {1}".format(p,scene)
            # activate the product
            _,product = activate_product(client,scene,product=p)
            activated.append(product)

Download Scenes

  • In this section we will see if our scenes have been activated.
  • If they are activated the client object will have its status flag set to active.
  • Once that is done we will then save the scenes to the local directory.
  • A smart engineer would set a path variable to store these files and check if the asset has already been downloaded prior to downloading

In [ ]:
tiff_files = []
asset_type = "_3B_Visual"
# check if our scenes have been activated
if True: #scenes_are_active(activated):
    for to_download,name in zip(activated,to_get):
        # create the product name
        name = name + asset_type + ".tif"
        # if the product exists locally
        if( os.path.isfile(name) ):
            # do nothing 
            print "We have scene {0} already, skipping...".format(name)
            tiff_files.append(name)
        elif to_download["status"] == "active":
            # otherwise download the product
            print "Downloading {0}....".format(name)
            fname = download_and_save(client,to_download)
            tiff_files.append(fname)
            print "Download done."
        else:
            print "Could not download, still activating"
else:
    print "Scenes aren't ready yet"

print tiff_files

Loading Images

  • There are a varitety of ways to load tif data including Rasterio, GDAL, OpenCV, SKImage.
  • Today we are going to use rasterio and load each channel into a numpy array.
  • Since the visual 3Band products are rotated we can also open a mask layer for processing.

In [ ]:
def load_image4(filename):
    """Return a 4D (r, g, b, nir) numpy array with the data in the specified TIFF filename."""
    path = os.path.abspath(os.path.join('./', filename))
    if os.path.exists(path):
        with rasterio.open(path) as src:
            b, g, r, nir = src.read()
            return np.dstack([r, g, b, nir])
        
def load_image3(filename):
    """Return a 3D (r, g, b) numpy array with the data in the specified TIFF filename."""
    path = os.path.abspath(os.path.join('./', filename))
    if os.path.exists(path):
        with rasterio.open(path) as src:
            b,g,r,mask = src.read()
            return np.dstack([b, g, r])
        
def get_mask(filename):
    """Return a 1D mask numpy array with the data in the specified TIFF filename."""
    path = os.path.abspath(os.path.join('./', filename))
    if os.path.exists(path):
        with rasterio.open(path) as src:
            b,g,r,mask = src.read()
            return np.dstack([mask])

def rgbir_to_rgb(img_4band):
    """Convert an RGBIR image to RGB"""
    return img_4band[:,:,:3]

Read Images and Use Matplotlib to show them.


In [ ]:
img_files = []
masks = []
# load the images and masks
for fname in tiff_files[0:2]:
    img_files.append(load_image3(fname))
    masks.append(get_mask(fname))
i = 0
# use matplotlib to display the map
for img,name in zip(img_files,tiff_files):
    plt.figure(i,figsize=(18,36))
    plt.imshow(img)
    plt.title(name)
    i+=1

Quick Histogram

  • Next up we'll plot the histogram of the image.
  • A histogram is just a plot of the number of pixels with a specific intensity for a given color.

In [ ]:
import numpy.ma as ma
def plot_hist4(img_4band,title=""):
    # Plot a four band histogram
    r, g, b, nir = img_4band[:, :, 0], img_4band[:, :, 1], img_4band[:, :, 2], img_4band[:, :, 3]
    for slice_, name, color in ((r,'r', 'red'),(g,'g', 'green'),(b,'b', 'blue'), (nir, 'nir', 'magenta')):
        plt.hist(slice_.ravel(), bins=100, 
                 range=[0,img_4band.max()], 
                 label=name, color=color, histtype='step')
    plt.title(title)
    plt.legend()
    
def plot_hist3(img_3band,mask,title=""):
    # plot a three band histogramwaiter = []
    r, g, b = img_3band[:, :, 0], img_3band[:, :, 1], img_3band[:, :, 2]
    r = ma.masked_array(r,mask=mask)
    g = ma.masked_array(g,mask=mask)
    b = ma.masked_array(b,mask=mask)
    for slice_, name, color in ((r,'r', 'red'),(g,'g', 'green'),(b,'b', 'blue')):
        plt.hist(slice_.ravel(), bins=25, 
                 range=[0,img_3band.max()], 
                 label=name, color=color, histtype='step')
    plt.title(title)
    plt.legend()

In [ ]:
i = 0
for img,name,mask in zip(img_files,tiff_files,masks):
    plt.figure(i,figsize=(18,6))
    plot_hist3(img,mask=mask,title=name)
    i+=1

Decomposing Channels

  • We can also decompose the channels of the image.
  • Sometimes it is useful to work just in a single channel.
  • Other times channels can be used to do useful things, like filter out clouds.

In [ ]:
def plot_bands4(img,title="",i=0):
    fig = plt.figure(i)
    fig.set_size_inches(24, 3)
    r, g, b, nir = img[:, :, 0], img[:, :, 1], img[:, :, 2], img[:, :, 3]
    fig.suptitle(title)
    for i, (x, c) in enumerate(((r, 'r'), (g, 'g'), (b, 'b'), (nir, 'near-ir'))):
        a = fig.add_subplot(1, 4, i+1)
        a.set_title(c)
        plt.imshow(x)
        
def plot_bands3(img,title="",i=0):
    fig = plt.figure(i)
    fig.set_size_inches(24, 5)
    r, g, b = img[:, :, 0], img[:, :, 1], img[:, :, 2]
    fig.suptitle(title)
    for i, (x, c) in enumerate(((r, 'r'), (g, 'g'), (b, 'b'))):
        a = fig.add_subplot(1, 4, i+1)
        a.set_title(c)
        plt.imshow(x)

In [ ]:
plot_bands3(img_files[0],title=tiff_files[0],i=0)

But all of these scenes are big, and we want downtown San Francisco

  • We can clip all of the scenes to the AOI we selected at the start of the notebook
  • First we'll dump the geojson to a file.
  • Since geospatial data is "big" we often work with files and get stuff out of memory ASAP.
  • For each of our scenes we'll create a 'clip' file.
  • We will use a tool called GDAL to clip the scene to our AOI
  • GDAL stands for Geospatial Data Abstraction Library
  • GDAL is a C++ library that is often run from the command line, but it does have SWIG bindings.

In [ ]:
aoi_file ="sanfrancisco.geojson" 
# write our input AOI to a geojson file.
with open(aoi_file,"w") as f:
    f.write(json.dumps(myAOI))
    

# create our full input and output names
clip_names = [os.path.abspath(tiff[:-4]+"_clip"+".tif") for tiff in tiff_files]
full_tif_files = [os.path.abspath("./"+tiff) for tiff in tiff_files]

for in_file,out_file in zip(tiff_files,clip_names):
    commands = ["gdalwarp", # t
           "-t_srs","EPSG:3857",
           "-cutline",aoi_file,
           "-crop_to_cutline",
           "-tap",
            "-tr", "3", "3"
           "-overwrite"]
    subprocess.call(["rm",out_file])
    commands.append(in_file)
    commands.append(out_file)
    print " ".join(commands)
    subprocess.call(commands)

Awesome, Let's take a look at what we got.


In [ ]:
clip_img_files = [load_image3(fname) for fname in clip_names]
    
i = 0
for img,name in zip(clip_img_files,clip_names)[0:2]:
    plt.figure(i,figsize=(6,12))
    plt.imshow(img)
    plt.title(name)
    i+=1

Hrm... that's not right.

  • You'll notice that a lot of these scenes don't fill our AOI.
  • A lot of theses images were taken roughly at the same time.
  • We should try to merge these scenes together to make one big scene.
  • This process is called mosaicking, and GDAL can help.
  • We will call GDAL from the command line using subprocess to do this for us.

In [ ]:
subprocess.call(["rm","merged.tif"])
commands = ["gdalwarp", # t
           "-t_srs","EPSG:3857",
           "-cutline",aoi_file,
           "-crop_to_cutline",
           "-tap",
            "-tr", "3", "3"
           "-overwrite"]
output_mosaic = "merged.tif"
for tiff in tiff_files[0:2]:
    commands.append(tiff)
commands.append(output_mosaic)
print " ".join(commands)
subprocess.call(commands)

Let's take a look.... looks much better


In [ ]:
merged = load_image3("./merged.tif")
plt.figure(i,figsize=(6,12))
plt.imshow(merged)
plt.title("merged")

Now let's pull it all together to do something interesting.

  • First we'll download and activate all of our target scenes from the past few years.
  • Then we'll clip them using GDAL to the small AOI we selected above.
  • Finally we'll export them and use that data to make a mosaic.
  • We'll use ImageMagick to convert our tifs to gifs, and our multiple gifs to an animated gif.

In [ ]:
# Activate
to_get = all_scenes["id"].tolist()
activated = []
for scene in to_get:
    product_types = get_products(client,scene)
    for p in product_types:
        if p == "visual": # p == "basic_analytic_dn"
            print "Activating {0} for scene {1}".format(p,scene)
            _,product = activate_product(client,scene,product=p)
            activated.append(product)
            
# Download  
tiff_files = []
asset_type = "_3B_Visual" 

if True:#scenes_are_active(activated):
    for to_download,name in zip(activated,to_get):
        name = name + asset_type + ".tif"
        if( os.path.isfile(name) ):
            print "We have scene {0} already, skipping...".format(name)
            tiff_files.append(name)
        elif to_download["status"] == "active":
            print "Downloading {0}....".format(name)
            fname = download_and_save(client,to_download)
            tiff_files.append(fname)
            print "Download done."
        else:
            print "Could not download, still activating"
else:
    print "Scenes aren't ready yet"

In [ ]:
tiff_files = sorted(tiff_files)
clip_names = []
# Create a list of tif file names. 
for tiff in tiff_files:
    clip_names.append(os.path.abspath(tiff[:-4]+"_clip"+".tif"))

full_tif_files = []
for tiff in tiff_files:
    full_tif_files.append(os.path.abspath("./"+tiff))

    # Run GDAL to crop our file down.
for in_file,out_file in zip(tiff_files,clip_names):
    commands = ["gdalwarp", # t
           "-t_srs","EPSG:3857",
           "-cutline",aoi_file,
           "-crop_to_cutline",
           "-tap",
            "-tr", "3", "3"
           "-overwrite"]
    subprocess.call(["rm",out_file])
    commands.append(in_file)
    commands.append(out_file)
    print " ".join(commands)
    subprocess.call(commands)    

temp_names = []
i = 0 
# use image magic convert to gif
for in_file in clip_names:
    temp_name = "img{num:04d}.png".format(num=i) 
    command = ["convert", in_file, "-sample", "30x30%",temp_name]
    temp_names.append(temp_name)
    i += 1 
    subprocess.call(command)

magic = "SanFrancisco.gif"
last_call = ["convert","-delay", "10","-loop","0", "img*.png",magic]
subprocess.call(last_call)
print "done!"

In [ ]:


In [ ]: