Plot Kmeans clusters stored in a GeoTiff

This is a notebook plots the GeoTiffs created out of kmeans. Such GeoTiffs contains the Kmeans cluster IDs.

Dependencies


In [6]:
import sys
sys.path.append("/usr/lib/spark/python")
sys.path.append("/usr/lib/spark/python/lib/py4j-0.10.4-src.zip")
sys.path.append("/usr/lib/python3/dist-packages")

import os
os.environ["HADOOP_CONF_DIR"] = "/etc/hadoop/conf"

import os
os.environ["PYSPARK_PYTHON"] = "python3"
os.environ["PYSPARK_DRIVER_PYTHON"] = "ipython"

from pyspark.mllib.clustering import KMeans, KMeansModel 
from pyspark import SparkConf, SparkContext

from osgeo import gdal
from io import BytesIO

import matplotlib.pyplot as plt
import rasterio
from rasterio import plot
from rasterio.io import MemoryFile

Spark Context


In [7]:
appName = "plot_kmeans_clusters"
masterURL="spark://pheno0.phenovari-utwente.surf-hosted.nl:7077"

try:
    sc.stop()
except NameError:
    print("A  new Spark Context will be created.")
    
sc = SparkContext(conf = SparkConf().setAppName(appName).setMaster(masterURL))

Mode of Operation setup

The user should modify the following variables to define which GeoTiffs should be loaded. In case it (s)he wants to visualize results that just came out of kmeans laste execution, just copy the values set at its Mode of Operation Setup.


In [8]:
#GeoTiffs to be read from "hdfs:///user/hadoop/spring-index/"
dir_path = "hdfs:///user/hadoop/spring-index/"
offline_dir_path = "hdfs:///user/pheno/spring-index/"
geoTiff_dir = "BloomFinal"

#Kmeans number of iterations and clusters
numIterations = 35
minClusters = 2
maxClusters = 15
stepClusters = 1

Mode of Operation verification


In [9]:
geotiff_hdfs_paths = []

if minClusters > maxClusters:
    maxClusters = minClusters
    stepClusters = 1

if stepClusters < 1:
    stepClusters = 1
    
numClusters_id = 0
numClusters = minClusters
while numClusters <= maxClusters :
    path = offline_dir_path + geoTiff_dir + '/clusters_' + str(numClusters) + '_' + str(numIterations) + '.tif'
    geotiff_hdfs_paths.append(path)
    numClusters_id += 1
    numClusters += stepClusters

Load GeoTiffs

Load the GeoTiffs into MemoryFiles.


In [10]:
clusters_dataByteArrays = []

numClusters_id = 0
numClusters = minClusters
while numClusters <= maxClusters :
    clusters_data = sc.binaryFiles(geotiff_hdfs_paths[numClusters_id]).take(1)
    clusters_dataByteArrays.append(bytearray(clusters_data[0][1]))
    numClusters_id += 1
    numClusters += stepClusters

Check GeoTiffs metadata


In [11]:
for val in clusters_dataByteArrays:
    #Create a Memory File
    memFile = MemoryFile(val).open()
    print(memFile.profile)
    memFile.close()


{'count': 1, 'compress': 'deflate', 'interleave': 'band', 'crs': CRS({'init': 'epsg:4326'}), 'transform': Affine(0.008983152841195215, 0.0, -126.30312894720473,
       0.0, -0.008983152841195215, 49.25462702827337), 'height': 3892, 'driver': 'GTiff', 'width': 7808, 'nodata': nan, 'dtype': 'float64', 'tiled': False}
{'count': 1, 'compress': 'deflate', 'interleave': 'band', 'crs': CRS({'init': 'epsg:4326'}), 'transform': Affine(0.008983152841195215, 0.0, -126.30312894720473,
       0.0, -0.008983152841195215, 49.25462702827337), 'height': 3892, 'driver': 'GTiff', 'width': 7808, 'nodata': nan, 'dtype': 'float64', 'tiled': False}
{'count': 1, 'compress': 'deflate', 'interleave': 'band', 'crs': CRS({'init': 'epsg:4326'}), 'transform': Affine(0.008983152841195215, 0.0, -126.30312894720473,
       0.0, -0.008983152841195215, 49.25462702827337), 'height': 3892, 'driver': 'GTiff', 'width': 7808, 'nodata': nan, 'dtype': 'float64', 'tiled': False}
{'count': 1, 'compress': 'deflate', 'interleave': 'band', 'crs': CRS({'init': 'epsg:4326'}), 'transform': Affine(0.008983152841195215, 0.0, -126.30312894720473,
       0.0, -0.008983152841195215, 49.25462702827337), 'height': 3892, 'driver': 'GTiff', 'width': 7808, 'nodata': nan, 'dtype': 'float64', 'tiled': False}
{'count': 1, 'compress': 'deflate', 'interleave': 'band', 'crs': CRS({'init': 'epsg:4326'}), 'transform': Affine(0.008983152841195215, 0.0, -126.30312894720473,
       0.0, -0.008983152841195215, 49.25462702827337), 'height': 3892, 'driver': 'GTiff', 'width': 7808, 'nodata': nan, 'dtype': 'float64', 'tiled': False}
{'count': 1, 'compress': 'deflate', 'interleave': 'band', 'crs': CRS({'init': 'epsg:4326'}), 'transform': Affine(0.008983152841195215, 0.0, -126.30312894720473,
       0.0, -0.008983152841195215, 49.25462702827337), 'height': 3892, 'driver': 'GTiff', 'width': 7808, 'nodata': nan, 'dtype': 'float64', 'tiled': False}
{'count': 1, 'compress': 'deflate', 'interleave': 'band', 'crs': CRS({'init': 'epsg:4326'}), 'transform': Affine(0.008983152841195215, 0.0, -126.30312894720473,
       0.0, -0.008983152841195215, 49.25462702827337), 'height': 3892, 'driver': 'GTiff', 'width': 7808, 'nodata': nan, 'dtype': 'float64', 'tiled': False}
{'count': 1, 'compress': 'deflate', 'interleave': 'band', 'crs': CRS({'init': 'epsg:4326'}), 'transform': Affine(0.008983152841195215, 0.0, -126.30312894720473,
       0.0, -0.008983152841195215, 49.25462702827337), 'height': 3892, 'driver': 'GTiff', 'width': 7808, 'nodata': nan, 'dtype': 'float64', 'tiled': False}
{'count': 1, 'compress': 'deflate', 'interleave': 'band', 'crs': CRS({'init': 'epsg:4326'}), 'transform': Affine(0.008983152841195215, 0.0, -126.30312894720473,
       0.0, -0.008983152841195215, 49.25462702827337), 'height': 3892, 'driver': 'GTiff', 'width': 7808, 'nodata': nan, 'dtype': 'float64', 'tiled': False}
{'count': 1, 'compress': 'deflate', 'interleave': 'band', 'crs': CRS({'init': 'epsg:4326'}), 'transform': Affine(0.008983152841195215, 0.0, -126.30312894720473,
       0.0, -0.008983152841195215, 49.25462702827337), 'height': 3892, 'driver': 'GTiff', 'width': 7808, 'nodata': nan, 'dtype': 'float64', 'tiled': False}
{'count': 1, 'compress': 'deflate', 'interleave': 'band', 'crs': CRS({'init': 'epsg:4326'}), 'transform': Affine(0.008983152841195215, 0.0, -126.30312894720473,
       0.0, -0.008983152841195215, 49.25462702827337), 'height': 3892, 'driver': 'GTiff', 'width': 7808, 'nodata': nan, 'dtype': 'float64', 'tiled': False}
{'count': 1, 'compress': 'deflate', 'interleave': 'band', 'crs': CRS({'init': 'epsg:4326'}), 'transform': Affine(0.008983152841195215, 0.0, -126.30312894720473,
       0.0, -0.008983152841195215, 49.25462702827337), 'height': 3892, 'driver': 'GTiff', 'width': 7808, 'nodata': nan, 'dtype': 'float64', 'tiled': False}
{'count': 1, 'compress': 'deflate', 'interleave': 'band', 'crs': CRS({'init': 'epsg:4326'}), 'transform': Affine(0.008983152841195215, 0.0, -126.30312894720473,
       0.0, -0.008983152841195215, 49.25462702827337), 'height': 3892, 'driver': 'GTiff', 'width': 7808, 'nodata': nan, 'dtype': 'float64', 'tiled': False}
{'count': 1, 'compress': 'deflate', 'interleave': 'band', 'crs': CRS({'init': 'epsg:4326'}), 'transform': Affine(0.008983152841195215, 0.0, -126.30312894720473,
       0.0, -0.008983152841195215, 49.25462702827337), 'height': 3892, 'driver': 'GTiff', 'width': 7808, 'nodata': nan, 'dtype': 'float64', 'tiled': False}

Plot GeoTiffs


In [15]:
%matplotlib inline

numClusters_id = 0
numClusters = minClusters
while numClusters <= maxClusters :
    print ("Plot for " + str(numClusters) + " clusters!!!")
    memFile = MemoryFile(clusters_dataByteArrays[numClusters_id]).open()
    plot.show((memFile,1))
    if (numClusters < maxClusters) :
        _ = input("Press [enter] to continue.")
    memFile.close()
    numClusters_id += 1
    numClusters += stepClusters


Plot for 2 clusters!!!
Press [enter] to continue.
Plot for 3 clusters!!!
Press [enter] to continue.
Plot for 4 clusters!!!
Press [enter] to continue.
Plot for 5 clusters!!!
Press [enter] to continue.
Plot for 6 clusters!!!
Press [enter] to continue.
Plot for 7 clusters!!!
Press [enter] to continue.
Plot for 8 clusters!!!
Press [enter] to continue.
Plot for 9 clusters!!!
Press [enter] to continue.
Plot for 10 clusters!!!
Press [enter] to continue.
Plot for 11 clusters!!!
Press [enter] to continue.
Plot for 12 clusters!!!
Press [enter] to continue.
Plot for 13 clusters!!!
Press [enter] to continue.
Plot for 14 clusters!!!
Press [enter] to continue.
Plot for 15 clusters!!!

In [ ]: