Plot Kmeans clusters stored in a GeoTiff

This is a notebook plots the GeoTiffs created out of kmeans. Such GeoTiffs contains the Kmeans cluster IDs.

Dependencies


In [1]:
import sys
sys.path.append("/usr/lib/spark/python")
sys.path.append("/usr/lib/spark/python/lib/py4j-0.10.4-src.zip")
sys.path.append("/usr/lib/python3/dist-packages")

import os
os.environ["HADOOP_CONF_DIR"] = "/etc/hadoop/conf"

import os
os.environ["PYSPARK_PYTHON"] = "python3"
os.environ["PYSPARK_DRIVER_PYTHON"] = "ipython"

from pyspark.mllib.clustering import KMeans, KMeansModel 
from pyspark import SparkConf, SparkContext

from osgeo import gdal
from io import BytesIO

import scipy.misc

import rasterio
from rasterio import plot
from rasterio.io import MemoryFile

%pylab inline
from __future__ import print_function
from ipywidgets import interact, interactive, fixed, interact_manual
import ipywidgets as widgets
import matplotlib.image as mpimg

from PIL import Image
from array import array


Populating the interactive namespace from numpy and matplotlib
/usr/local/lib/python3.5/dist-packages/IPython/core/magics/pylab.py:160: UserWarning: pylab import has clobbered these variables: ['plot']
`%matplotlib` prevents importing * from pylab and numpy
  "\n`%matplotlib` prevents importing * from pylab and numpy"

Spark Context


In [2]:
appName = "plot_kmeans_clusters"
masterURL="spark://pheno0.phenovari-utwente.surf-hosted.nl:7077"

try:
    sc.stop()
except NameError:
    print("A  new Spark Context will be created.")
    
sc = SparkContext(conf = SparkConf().setAppName(appName).setMaster(masterURL))


A  new Spark Context will be created.

Mode of Operation setup

The user should modify the following variables to define which GeoTiffs should be loaded. In case it (s)he wants to visualize results that just came out of kmeans laste execution, just copy the values set at its Mode of Operation Setup.


In [3]:
#GeoTiffs to be read from "hdfs:///user/hadoop/modis/"
offline_dir_path = "hdfs:///user/pheno/spring-index/"

geoTiff_dir = "kmeans_BloomFinal_LeafFinal" 
band_num = 3

#Satellite years between (inclusive) 1989 - 2014
#Model years between (inclusive) 1980 - 2015
first_year = 1980
last_year = 2015

#Kmeans number of iterations and clusters
numIterations = 75
numClusters = 60

Mode of Operation verification


In [4]:
geotiff_hdfs_paths = []

if first_year > last_year:
    last_year = first_year
    
#Satellite years between (inclusive) 1989 - 2014
#Model years between (inclusive) 1980 - 2015
years = list(range(1980,2015))
    
year_id = 0
year = first_year
while year < last_year :
    path = offline_dir_path + geoTiff_dir + '/clusters_' + str(band_num) + '_' + str(numClusters) + '_' + str(numIterations) + '_' + str(first_year) + '_' + str(last_year) + '_' + str(years[year_id]) + '.tif'
    geotiff_hdfs_paths.append(path)
    year_id += 1
    year += 1

Load GeoTiffs

Load the GeoTiffs into MemoryFiles.


In [5]:
clusters_dataByteArrays = []
imgs = []

year_id = 0
year = first_year
while year < last_year :
    print(geotiff_hdfs_paths[year_id])
    clusters_data = sc.binaryFiles(geotiff_hdfs_paths[year_id]).take(1)
    clusters_dataByteArrays.append(bytearray(clusters_data[0][1]))
    img = MemoryFile(clusters_dataByteArrays[year_id]).open()
    imgs.append(img.read()[0])
    img.close()
    year_id += 1
    year += 1


hdfs:///user/pheno/spring-index/kmeans_BloomFinal_LeafFinal/clusters_3_60_75_1980_2015_1980.tif
hdfs:///user/pheno/spring-index/kmeans_BloomFinal_LeafFinal/clusters_3_60_75_1980_2015_1981.tif
hdfs:///user/pheno/spring-index/kmeans_BloomFinal_LeafFinal/clusters_3_60_75_1980_2015_1982.tif
hdfs:///user/pheno/spring-index/kmeans_BloomFinal_LeafFinal/clusters_3_60_75_1980_2015_1983.tif
hdfs:///user/pheno/spring-index/kmeans_BloomFinal_LeafFinal/clusters_3_60_75_1980_2015_1984.tif
hdfs:///user/pheno/spring-index/kmeans_BloomFinal_LeafFinal/clusters_3_60_75_1980_2015_1985.tif
hdfs:///user/pheno/spring-index/kmeans_BloomFinal_LeafFinal/clusters_3_60_75_1980_2015_1986.tif
hdfs:///user/pheno/spring-index/kmeans_BloomFinal_LeafFinal/clusters_3_60_75_1980_2015_1987.tif
hdfs:///user/pheno/spring-index/kmeans_BloomFinal_LeafFinal/clusters_3_60_75_1980_2015_1988.tif
hdfs:///user/pheno/spring-index/kmeans_BloomFinal_LeafFinal/clusters_3_60_75_1980_2015_1989.tif
hdfs:///user/pheno/spring-index/kmeans_BloomFinal_LeafFinal/clusters_3_60_75_1980_2015_1990.tif
hdfs:///user/pheno/spring-index/kmeans_BloomFinal_LeafFinal/clusters_3_60_75_1980_2015_1991.tif
hdfs:///user/pheno/spring-index/kmeans_BloomFinal_LeafFinal/clusters_3_60_75_1980_2015_1992.tif
hdfs:///user/pheno/spring-index/kmeans_BloomFinal_LeafFinal/clusters_3_60_75_1980_2015_1993.tif
hdfs:///user/pheno/spring-index/kmeans_BloomFinal_LeafFinal/clusters_3_60_75_1980_2015_1994.tif
hdfs:///user/pheno/spring-index/kmeans_BloomFinal_LeafFinal/clusters_3_60_75_1980_2015_1995.tif
hdfs:///user/pheno/spring-index/kmeans_BloomFinal_LeafFinal/clusters_3_60_75_1980_2015_1996.tif
hdfs:///user/pheno/spring-index/kmeans_BloomFinal_LeafFinal/clusters_3_60_75_1980_2015_1997.tif
hdfs:///user/pheno/spring-index/kmeans_BloomFinal_LeafFinal/clusters_3_60_75_1980_2015_1998.tif
hdfs:///user/pheno/spring-index/kmeans_BloomFinal_LeafFinal/clusters_3_60_75_1980_2015_1999.tif
hdfs:///user/pheno/spring-index/kmeans_BloomFinal_LeafFinal/clusters_3_60_75_1980_2015_2000.tif
hdfs:///user/pheno/spring-index/kmeans_BloomFinal_LeafFinal/clusters_3_60_75_1980_2015_2001.tif
hdfs:///user/pheno/spring-index/kmeans_BloomFinal_LeafFinal/clusters_3_60_75_1980_2015_2002.tif
hdfs:///user/pheno/spring-index/kmeans_BloomFinal_LeafFinal/clusters_3_60_75_1980_2015_2003.tif
hdfs:///user/pheno/spring-index/kmeans_BloomFinal_LeafFinal/clusters_3_60_75_1980_2015_2004.tif
hdfs:///user/pheno/spring-index/kmeans_BloomFinal_LeafFinal/clusters_3_60_75_1980_2015_2005.tif
hdfs:///user/pheno/spring-index/kmeans_BloomFinal_LeafFinal/clusters_3_60_75_1980_2015_2006.tif
hdfs:///user/pheno/spring-index/kmeans_BloomFinal_LeafFinal/clusters_3_60_75_1980_2015_2007.tif
hdfs:///user/pheno/spring-index/kmeans_BloomFinal_LeafFinal/clusters_3_60_75_1980_2015_2008.tif
hdfs:///user/pheno/spring-index/kmeans_BloomFinal_LeafFinal/clusters_3_60_75_1980_2015_2009.tif
hdfs:///user/pheno/spring-index/kmeans_BloomFinal_LeafFinal/clusters_3_60_75_1980_2015_2010.tif
hdfs:///user/pheno/spring-index/kmeans_BloomFinal_LeafFinal/clusters_3_60_75_1980_2015_2011.tif
hdfs:///user/pheno/spring-index/kmeans_BloomFinal_LeafFinal/clusters_3_60_75_1980_2015_2012.tif
hdfs:///user/pheno/spring-index/kmeans_BloomFinal_LeafFinal/clusters_3_60_75_1980_2015_2013.tif
hdfs:///user/pheno/spring-index/kmeans_BloomFinal_LeafFinal/clusters_3_60_75_1980_2015_2014.tif

Resize Images


In [36]:
imgs_5 = []
i_width = 7808
i_height = 3892

year_id = 0
year = first_year
while year < last_year:
    img = imgs[year_id]
    img = np.nan_to_num(img)
    img = np.uint8(img)
    img5 = zoom(img, 0.1, prefilter=False)
    imgs_5.append(img5)
    year_id += 1
    year += 1


/usr/local/lib/python3.5/dist-packages/scipy/ndimage/interpolation.py:600: UserWarning: From scipy 0.13.0, the output shape of zoom() is calculated with round() instead of int() - for these inputs the size of the returned array has changed.
  "the returned array has changed.", UserWarning)

Plot GeoTiffs


In [39]:
def showImageForYear(year = 1980):
    year_id = year - 1980
    #img = MemoryFile(clusters_dataByteArrays[year_id]).open()
    #plt.imshow(img.read()[0], extent=((0, img.width, img.height, 0))) #, cmap='gray')
    #img.close()
    plt.figure(figsize=(20,20))
    img = imgs_5[year_id]
    im = imshow(img)
    plt.axis('off')
    plt.show()

In [40]:
play = widgets.Play(
    interval=1000,
    value=1980,
    min=1980,
    max=2014,
    step=1,
    description="Press play",
    disabled=False
)
slider = widgets.IntSlider(min=1980,max=2014,step=1,value=1980)
widgets.jslink((play, 'value'), (slider, 'value'))

interact(showImageForYear, year=slider)
widgets.HBox([play])



In [ ]: