This is a notebook plots the GeoTiffs created out of kmeans. Such GeoTiffs contains the Kmeans cluster IDs.
In [1]:
import sys
sys.path.append("/usr/lib/spark/python")
sys.path.append("/usr/lib/spark/python/lib/py4j-0.10.4-src.zip")
sys.path.append("/usr/lib/python3/dist-packages")
import os
os.environ["HADOOP_CONF_DIR"] = "/etc/hadoop/conf"
import os
os.environ["PYSPARK_PYTHON"] = "python3"
os.environ["PYSPARK_DRIVER_PYTHON"] = "ipython"
from pyspark.mllib.clustering import KMeans, KMeansModel
from pyspark import SparkConf, SparkContext
from osgeo import gdal
from io import BytesIO
import scipy.misc
import rasterio
from rasterio import plot
from rasterio.io import MemoryFile
%pylab inline
from __future__ import print_function
from ipywidgets import interact, interactive, fixed, interact_manual
import ipywidgets as widgets
import matplotlib.image as mpimg
from PIL import Image
from array import array
In [2]:
appName = "plot_kmeans_clusters"
masterURL="spark://pheno0.phenovari-utwente.surf-hosted.nl:7077"
try:
sc.stop()
except NameError:
print("A new Spark Context will be created.")
sc = SparkContext(conf = SparkConf().setAppName(appName).setMaster(masterURL))
The user should modify the following variables to define which GeoTiffs should be loaded. In case it (s)he wants to visualize results that just came out of kmeans laste execution, just copy the values set at its Mode of Operation Setup.
In [3]:
#GeoTiffs to be read from "hdfs:///user/hadoop/modis/"
offline_dir_path = "hdfs:///user/pheno/spring-index/"
geoTiff_dir = "kmeans_BloomFinal_LeafFinal"
band_num = 3
#Satellite years between (inclusive) 1989 - 2014
#Model years between (inclusive) 1980 - 2015
first_year = 1980
last_year = 2015
#Kmeans number of iterations and clusters
numIterations = 75
numClusters = 60
In [4]:
geotiff_hdfs_paths = []
if first_year > last_year:
last_year = first_year
#Satellite years between (inclusive) 1989 - 2014
#Model years between (inclusive) 1980 - 2015
years = list(range(1980,2015))
year_id = 0
year = first_year
while year < last_year :
path = offline_dir_path + geoTiff_dir + '/clusters_' + str(band_num) + '_' + str(numClusters) + '_' + str(numIterations) + '_' + str(first_year) + '_' + str(last_year) + '_' + str(years[year_id]) + '.tif'
geotiff_hdfs_paths.append(path)
year_id += 1
year += 1
In [5]:
clusters_dataByteArrays = []
imgs = []
year_id = 0
year = first_year
while year < last_year :
print(geotiff_hdfs_paths[year_id])
clusters_data = sc.binaryFiles(geotiff_hdfs_paths[year_id]).take(1)
clusters_dataByteArrays.append(bytearray(clusters_data[0][1]))
img = MemoryFile(clusters_dataByteArrays[year_id]).open()
imgs.append(img.read()[0])
img.close()
year_id += 1
year += 1
In [36]:
imgs_5 = []
i_width = 7808
i_height = 3892
year_id = 0
year = first_year
while year < last_year:
img = imgs[year_id]
img = np.nan_to_num(img)
img = np.uint8(img)
img5 = zoom(img, 0.1, prefilter=False)
imgs_5.append(img5)
year_id += 1
year += 1
In [39]:
def showImageForYear(year = 1980):
year_id = year - 1980
#img = MemoryFile(clusters_dataByteArrays[year_id]).open()
#plt.imshow(img.read()[0], extent=((0, img.width, img.height, 0))) #, cmap='gray')
#img.close()
plt.figure(figsize=(20,20))
img = imgs_5[year_id]
im = imshow(img)
plt.axis('off')
plt.show()
In [40]:
play = widgets.Play(
interval=1000,
value=1980,
min=1980,
max=2014,
step=1,
description="Press play",
disabled=False
)
slider = widgets.IntSlider(min=1980,max=2014,step=1,value=1980)
widgets.jslink((play, 'value'), (slider, 'value'))
interact(showImageForYear, year=slider)
widgets.HBox([play])
In [ ]: