Title

Text

Initialization

This section initializes the notebook.

Dependencies

Here, all necessary libraries are imported.


In [1]:
#Add all dependencies to PYTHON_PATH
import sys
sys.path.append("/usr/lib/spark/python")
sys.path.append("/usr/lib/spark/python/lib/py4j-0.10.4-src.zip")
sys.path.append("/usr/lib/python3/dist-packages")
sys.path.append("/data/local/jupyterhub/modules/python")

#Define environment variables
import os
os.environ["HADOOP_CONF_DIR"] = "/etc/hadoop/conf"
os.environ["PYSPARK_PYTHON"] = "python3"
os.environ["PYSPARK_DRIVER_PYTHON"] = "ipython"

import subprocess

#Load PySpark to connect to a Spark cluster
from pyspark import SparkConf, SparkContext
from hdfs import InsecureClient
from tempfile import TemporaryFile

#from osgeo import gdal
#To read GeoTiffs as a ByteArray
from io import BytesIO
from rasterio.io import MemoryFile

import numpy
import numpy as np
import pandas
import datetime
import matplotlib.pyplot as plt
import rasterio
from rasterio import plot
from os import listdir
from os.path import isfile, join
import scipy.linalg

Configuration

This configuration determines whether functions print logs during the execution.


In [2]:
debugMode = True

Connect to Spark

Here, the Spark context is loaded, which allows for a connection to HDFS.


In [3]:
appName = "plot_GeoTiff"
masterURL = "spark://emma0.emma.nlesc.nl:7077"

#A context needs to be created if it does not already exist
try:
    sc.stop()
except NameError:
    print("A new Spark Context will be created.")

sc = SparkContext(conf = SparkConf().setAppName(appName).setMaster(masterURL))
conf = sc.getConf()


A new Spark Context will be created.

Subtitle


In [4]:
def getModeAsArray(filePath):
    data = sc.binaryFiles(filePath).take(1)
    byteArray = bytearray(data[0][1])
    memfile = MemoryFile(byteArray)
    dataset = memfile.open()
    array = np.array(dataset.read()[0], dtype=np.float64)
    memfile.close()
    array = array.flatten()
    array = array[~np.isnan(array)]
    return array

In [5]:
def detemineNorm(array1, array2):
    if array1.shape != array2.shape:
        print("Error: shapes are not the same: (" + str(array1.shape) + " vs " + str(array2.shape) + ")")
        return 0
    value = scipy.linalg.norm(array1 - array2)
    if value > 1:
        value = scipy.linalg.norm(array1 + array2)
    return value

In [6]:
textFile1 = sc.textFile("hdfs:///user/emma/svd/spark/BloomGridmetLeafGridmetCali3/U.csv").map(lambda line: (line.split(','))).map(lambda m: [ float(i) for i in m]).collect()

In [7]:
array1 = numpy.array(textFile1, dtype=np.float64)
vector11 = array1.T[0]
vector12 = array1.T[1]
vector13 = array1.T[2]

In [8]:
textFile2 = sc.textFile("hdfs:///user/emma/svd/BloomGridmetLeafGridmetCali/U.csv").map(lambda line: (line.split(','))).map(lambda m: [ np.float64(i) for i in m]).collect()

In [9]:
array2 = numpy.array(textFile2, dtype=np.float64).reshape(37,23926)
vector21 = array2[0]
vector22 = array2[1]
vector23 = array2[2]

In [10]:
array2.shape


Out[10]:
(26, 23926)

In [11]:
print(detemineNorm(vector11, vector21))
print(detemineNorm(vector12, vector22))
print(detemineNorm(vector13, vector23))


0.019877920048689643
0.22515439980564012
0.5539126057618723

In [12]:
array1 = getModeAsArray("hdfs:///user/emma/svd/spark/BloomGridmetLeafGridmetCali3/u_tiffs/svd_u_0_26.tif")
array2 = getModeAsArray("hdfs:///user/emma/svd/BloomGridmetLeafGridmetCali/ModeU01.tif")
detemineNorm(array1, array2)


Out[12]:
0.019877920048689643

In [13]:
print(detemineNorm(array1, vector11))
print(detemineNorm(array1, vector21))
print(detemineNorm(array2, vector11))
print(detemineNorm(array2, vector21))


0.0
0.019877920048689643
0.019877920048689643
0.0

In [14]:
~np.in1d(array1, vector21)


Out[14]:
array([ True,  True,  True, ...,  True,  True,  True], dtype=bool)

In [15]:
for i in range(10):
    print("%.19f %0.19f %0.19f" % (array1[i], array2[i], (array1[i]+array2[i])))


0.0054475668246696104 0.0054758155880260369 0.0109233824126956473
0.0060040926559457716 0.0060814415638501228 0.0120855342197958952
0.0063498673700583394 0.0064090184856642873 0.0127588858557226258
0.0066775983071042967 0.0067640268435454360 0.0134416251506497327
0.0071891643730413405 0.0071925245790763958 0.0143816889521177355
0.0074920533789200119 0.0074329244892621001 0.0149249778681821120
0.0080115963666863507 0.0079135797838804072 0.0159251761505667597
0.0080884104673247127 0.0080470660225824050 0.0161354764899071176
0.0082395320915717492 0.0081619449603445877 0.0164014770519163369
0.0082126407883930195 0.0080970752124450082 0.0163097160008380260

BloomFinalLowPR and LeafFinalLowPR


In [16]:
array1 = getModeAsArray("hdfs:///user/emma/svd/BloomFinalLowPRLeafFinalLowPR/ModeU01.tif")
array2 = getModeAsArray("hdfs:///user/emma/svd/spark/BloomFinalLowPRLeafFinalLowPR3/u_tiffs/svd_u_0_3.tif")
detemineNorm(array1, array2)


Out[16]:
9.28098946087584e-16

In [17]:
array1 = getModeAsArray("hdfs:///user/emma/svd/BloomFinalLowPRLeafFinalLowPR/ModeU02.tif")
array2 = getModeAsArray("hdfs:///user/emma/svd/spark/BloomFinalLowPRLeafFinalLowPR3/u_tiffs/svd_u_1_3.tif")
detemineNorm(array1, array2)


Out[17]:
8.816536190170412e-13

In [18]:
array1 = getModeAsArray("hdfs:///user/emma/svd/BloomFinalLowPRLeafFinalLowPR/ModeU01.tif")
array2 = getModeAsArray("hdfs:///user/emma/svd/spark/BloomFinalLowPRLeafFinalLowPR3/u_tiffs/svd_u_0_3.tif")
detemineNorm(array1, array2)


Out[18]:
9.28098946087584e-16

BloomGridmet and LeafGridmet


In [36]:
for i in range(37):
    if (i < 9):
        path1 = "hdfs:///user/emma/svd/BloomGridmetLeafGridmetCali/ModeU0"+ str(i+1) + ".tif"
    else:
        path1 = "hdfs:///user/emma/svd/BloomGridmetLeafGridmetCali/ModeU"+ str(i+1) + ".tif"
    array1 = getModeAsArray(path1)
    array2 = getModeAsArray("hdfs:///user/emma/svd/spark/BloomGridmetLeafGridmetCali3/u_tiffs/svd_u_" +str(i) +"_26.tif")
    print(detemineNorm(array1, array2))


0.019877920048689643
0.22515439980564012
0.5539126057618723
0.7117353852776293
0.6851165621257903
1.4807162426411247
1.1137233966422266
1.313815863083196
1.2163766695516485
1.0750196677899924
1.4332856347586593
1.556381876954485
1.5303061254016956
1.4356203489793442
1.7161962709948404
1.1844712251458485
1.245426934074946
1.5116259272830825
1.1301663108598696
1.4550808460826279
1.5220695730859273
1.4407635322689494
1.2046921666560537
1.4782273745166723
1.4577514911203793
1.5840442665155274

End of Notebook


In [ ]: