Mask a GeoTiff

In this notebook the user can load two GeoTiffs, extract a Tile from the first GeoTiff and mask it with second GeoTiff.

Dependencies


In [1]:
import sys.process._
import geotrellis.proj4.CRS
import geotrellis.raster.io.geotiff.writer.GeoTiffWriter
import geotrellis.raster.io.geotiff.{SinglebandGeoTiff, _}
import geotrellis.raster.{CellType, DoubleArrayTile}
import geotrellis.spark.io.hadoop._
import geotrellis.vector.{Extent, ProjectedExtent}
import org.apache.spark.mllib.linalg.Vector
import org.apache.spark.rdd.RDD
import org.apache.spark.{SparkConf, SparkContext}

//Spire is a numeric library for Scala which is intended to be generic, fast, and precise.
import spire.syntax.cfor._

In [2]:
sc.getConf.getAll


Waiting for a Spark session to start...
Out[2]:
[(spark.eventLog.enabled,true), (spark.hadoop.fs.s3a.connection.ssl.enabled,false), (spark.jars.ivy,), (spark.eventLog.dir,/data/local/spark/spark-events), (spark.hadoop.fs.s3a.fs.s3a.fast.upload,true), (spark.app.id,app-20170718202052-0004), (spark.hadoop.fs.s3a.endpoint,http://145.100.59.64:9091), (spark.jars.packages,), (spark.app.name,Apache Toree), (spark.hadoop.fs.s3a.access.key,A24H1RIGV4RKFGXJTEMS), (spark.serializer,org.apache.spark.serializer.KryoSerializer), (spark.local.dir,/data/local/spark/tmp/), (spark.memory.storageFraction,0.5), (spark.daemon.memory,3g), (spark.kryoserializer.buffer,512m), (spark.repl.class.outputDir,/data/local/spark/tmp/spark-b1ab533c-784b-41dc-9252-676994667033/repl-8286075f-fa24-4bd0-a9ad-d769bd89bc94), (spark.worker.memory,14g), (spark.driver.memory,8g), (spark.default.parallelism,32), (spark.dynamicAllocation.initialExecutors,1), (spark.submit.deployMode,client), (spark.network.timeout,360s), (spark.repl.class.uri,spark://145.100.59.64:39510/classes), (spark.broadcast.blockSize,20m), (spark.hadoop.fs.s3a.secret.key,5jd7ARCOi/XVjLzXqT5wA1NSgjmUo9mYJBgyGyIh), (spark.sql.warehouse.dir,file:///data/local/spark/), (spark.shuffle.service.enabled,true), (spark.master,spark://pheno0.phenovari-utwente.surf-hosted.nl:7077), (spark.driver.allowMultipleContexts,true), (spark.scheduler.mode,FAIR), (spark.executor.id,driver), (spark.executor.cores,2), (spark.driver.extraClassPath,/usr/lib/spark/jars/aws-java-sdk-s3-1.10.6.jar:/usr/lib/spark/jars/hadoop-aws-2.8.0.jar:/usr/lib/spark/jars/joda-time-2.9.4.jar), (spark.history.fs.cleaner.enabled,true), (spark.executor.memory,12g), (spark.shuffle.service.port,7338), (spark.driver.port,39510), (spark.hadoop.fs.s3a.impl,org.apache.hadoop.fs.s3a.S3AFileSystem), (spark.hadoop.fs.s3a.buffer.dir,/root/spark/work,/tmp), (spark.kryoserializer.buffer.max,1g), (spark.jars,file:/usr/local/share/jupyter/kernels/apache_toree_scala/lib/toree-assembly-0.2.0.dev1-incubating-SNAPSHOT.jar), (spark.executor.extraClassPath,/usr/lib/spark/jars/aws-java-sdk-s3-1.10.6.jar:/usr/lib/spark/jars/hadoop-aws-2.8.0.jar:/usr/lib/spark/jars/joda-time-2.9.4.jar), (spark.driver.host,145.100.59.64), (spark.memory.fraction,0.6), (spark.driver.maxResultSize,2g), (spark.dynamicAllocation.maxExecutors,8), (spark.dynamicAllocation.enabled,true)]

Read a GeoTiff file


In [3]:
var geo_projected_extent = new ProjectedExtent(new Extent(0,0,0,0), CRS.fromName("EPSG:3857"))
var geo_num_cols_rows :(Int, Int) = (0, 0)
val geo_path = "hdfs:///user/pheno/modis/usa_mask.tif"
val geo_tiles_RDD = sc.hadoopGeoTiffRDD(geo_path).values

val geo_extents_withIndex = sc.hadoopMultibandGeoTiffRDD(geo_path).keys.zipWithIndex().map{case (e,v) => (v,e)}
geo_projected_extent = (geo_extents_withIndex.filter(m => m._1 == 0).values.collect())(0)

val geo_tiles_withIndex = geo_tiles_RDD.zipWithIndex().map{case (e,v) => (v,e)}
val geo_tile0 = (geo_tiles_withIndex.filter(m => m._1==0).values.collect())(0)
geo_num_cols_rows = (geo_tile0.cols, geo_tile0.rows)
val geo_cellT = geo_tile0.cellType


geo_projected_extent = ProjectedExtent(Extent(-171.1296209227216, 19.996701428244357, -42.56469205974807, 49.99999999547987),geotrellis.proj4.CRS$$anon$3@41d0d1b7)
geo_num_cols_rows = (17800,4154)
geo_path = hdfs:///user/pheno/modis/usa_mask.tif
geo_tiles_RDD = MapPartitionsRDD[2] at values at <console>:49
geo_extents_withIndex = MapPartitionsRDD[7] at map at <console>:51
geo_projected_extent = ProjectedExtent(Extent(-171.1296209227216, 19.996701428244357, -42.56469205974807, 49.99999999547987),geotrellis.proj4.CRS$$anon$3@41d0d1b7)
geo_tiles_withIndex: org.apache.spark.rdd.RDD[(Lon...
Out[3]:
ProjectedExtent(Extent(-171.1296209227216, 19.996701428244357, -42.56469205974807, 49.99999999547987),geotrellis.proj4.CRS$$anon$3@41d0d1b7)

Read Mask


In [4]:
val mask_path = "hdfs:///user/hadoop/modis/usa_mask.tif"
    val mask_tiles_RDD = sc.hadoopGeoTiffRDD(mask_path).values
    val mask_tiles_withIndex = mask_tiles_RDD.zipWithIndex().map{case (e,v) => (v,e)}
    val mask_tile0 = (mask_tiles_withIndex.filter(m => m._1==0).values.collect())(0)


mask_path = hdfs:///user/hadoop/modis/usa_mask.tif
mask_tiles_RDD = MapPartitionsRDD[16] at values at <console>:47
mask_tiles_withIndex = MapPartitionsRDD[18] at map at <console>:48
mask_tile0 = FloatRawArrayTile([F@7119d8d1,15616,7784)
Out[4]:
FloatRawArrayTile([F@7119d8d1,15616,7784)

Mask GeoTiff


In [5]:
val res_tile = geo_tile0.localInverseMask(mask_tile0, 1, 0).toArrayDouble()


Out[5]:
Name: geotrellis.raster.GeoAttrsError
Message: Cannot combine rasters with different dimensions.(17800,4154) does not match (15616,7784)
StackTrace:   at geotrellis.raster.package$TileTupleExtensions.assertEqualDimensions(package.scala:141)
  at geotrellis.raster.ArrayTile$class.combineDouble(ArrayTile.scala:248)
  at geotrellis.raster.DoubleArrayTile.combineDouble(DoubleArrayTile.scala:24)
  at geotrellis.raster.ArrayTile$class.combineDouble(ArrayTile.scala:273)
  at geotrellis.raster.DoubleArrayTile.combineDouble(DoubleArrayTile.scala:24)
  at geotrellis.raster.Tile$class.dualCombine(Tile.scala:101)
  at geotrellis.raster.DoubleArrayTile.dualCombine(DoubleArrayTile.scala:24)
  at geotrellis.raster.mapalgebra.local.InverseMask$.apply(InverseMask.scala:32)
  at geotrellis.raster.mask.SinglebandTileMaskMethods$class.localInverseMask(SinglebandTileMaskMethods.scala:54)
  at geotrellis.raster.package$withTileMethods.localInverseMask(package.scala:53)

Save the new GeoTiff file


In [6]:
val clone_tile = DoubleArrayTile(res_tile, geo_num_cols_rows._1, geo_num_cols_rows._2)

val cloned = geotrellis.raster.DoubleArrayTile.empty(geo_num_cols_rows._1, geo_num_cols_rows._2)
cfor(0)(_ < geo_num_cols_rows._1, _ + 1) { col =>
    cfor(0)(_ < geo_num_cols_rows._2, _ + 1) { row =>
        val v = clone_tile.getDouble(col, row)
        cloned.setDouble(col, row, v)
    }
}

val geoTif = new SinglebandGeoTiff(cloned, geo_projected_extent.extent, geo_projected_extent.crs, Tags.empty, GeoTiffOptions(compression.DeflateCompression))

//Save GeoTiff to /tmp
val output = "/user/pheno/modis/modis_usa_mask.tif"
val tmp_output = "/tmp/modis_usa_mask.tif"
GeoTiffWriter.write(geoTif, tmp_output)

//Upload to HDFS
var cmd = "hadoop dfs -copyFromLocal -f " + tmp_output + " " + output
Process(cmd)!

cmd = "rm -fr " + tmp_output
Process(cmd)!


Out[6]:
Name: Unknown Error
Message: lastException: Throwable = null
<console>:47: error: not found: value res_tile
       val clone_tile = DoubleArrayTile(res_tile, geo_num_cols_rows._1, geo_num_cols_rows._2)
                                        ^

StackTrace: 

In [ ]: