notebook.community

Edit and run



In [1]:

    
# import reader module from sparkxarray
from sparkxarray import reader
from pyspark.sql import SparkSession



In [2]:

    
# Create sparksession
spark = SparkSession.builder.appName("bias").getOrCreate()
sc = spark.sparkContext



In [3]:

    
FILE_1 = "/home/abanihi/Documents/Github/spark-xarray/datasets/AFRICA_KNMI-RACMO2.2b_CTL_ERAINT_MM_50km_1989-2008_tasmax.nc"
FILE_2 = "/home/abanihi/Documents/Github/spark-xarray/datasets/AFRICA_UC-WRF311_CTL_ERAINT_MM_50km-rg_1989-2008_tasmax.nc"



In [4]:

    
knmi = reader.ncread(sc, FILE_1, mode='single', partition_on=['rlat', 'rlon'], partitions=500, decode_times=False)



In [5]:

    
knmi.first()









    Out[5]:





<xarray.Dataset>
Dimensions:       (bnds: 2, height: 1, rlat: 1, rlon: 1, time: 240)
Coordinates:
  * time          (time) float64 468.0 469.0 470.0 471.0 472.0 473.0 474.0 ...
  * rlon          (rlon) float32 -24.64
    lon           (rlat, rlon) float32 -24.64
  * rlat          (rlat) float32 -45.76
    lat           (rlat, rlon) float32 -45.76
  * height        (height) float32 2.0
Dimensions without coordinates: bnds
Data variables:
    rotated_pole  |S1 b''
    time_bnds     (time, bnds) float64 468.0 469.0 469.0 470.0 470.0 471.0 ...
    tasmax        (time, height, rlat, rlon) float64 283.4 284.2 284.2 284.6 ...
Attributes:
    institution:     KNMI
    Conventions:     CF-1.0
    conventionsURL:  http://www.cgd.ucar.edu/cms/eaton/cf-metadata/index.html
    source:          RACMO2.2b
    project_id:      ENSEMBLES
    experiment_id:   ERAIN CORDEX-Africa-50km
    realization:     1
    comments:        beta-version RACMO2.2 with default physics from ECMWF CY...
    creation_date:   2010-04-09 13:53:22



In [6]:

    
wrf = reader.ncread(sc, FILE_2, mode='single', partition_on=['rlat', 'rlon'], partitions=500, decode_times=False)



In [7]:

    
wrf.first()









    Out[7]:





<xarray.Dataset>
Dimensions:       (bnds: 2, height: 1, rlat: 1, rlon: 1, time: 240)
Coordinates:
    lon           (rlat, rlon) float64 -24.64
    lat           (rlat, rlon) float64 -45.76
  * height        (height) float32 2.0
  * time          (time) float64 1.426e+04 1.429e+04 1.432e+04 1.435e+04 ...
  * rlat          (rlat) float64 -45.76
  * rlon          (rlon) float64 -24.64
Dimensions without coordinates: bnds
Data variables:
    tasmax        (time, height, rlat, rlon) float64 283.4 284.2 284.3 284.6 ...
    rotated_pole  |S1 b''
    time_bnds     (time, bnds) float64 1.424e+04 1.428e+04 1.428e+04 ...
Attributes:
    Conventions:               CF-1.4
    institution:               Universidad de Cantabria (Spain)
    title:                     CORDEX Africa Sensitivity Run
    comment:                   The simulation was forced with ERA-Interim 2x2...
    nco_openmp_thread_number:  1



In [8]:

    
%time wrf.count()









    



CPU times: user 72 ms, sys: 28 ms, total: 100 ms
Wall time: 35.5 s






    Out[8]:





38994



In [12]:

    
def create_indices(element):
    lat = round(float(element.rlat.data), 1)
    lon = round(float(element.rlon.data), 1)
    key = (lat, lon)
    return (key, element)



In [13]:

    
knmi2 = knmi.map(create_indices)
knmi2.first()









    Out[13]:





((-45.8, -24.6), <xarray.Dataset>
 Dimensions:       (bnds: 2, height: 1, rlat: 1, rlon: 1, time: 240)
 Coordinates:
   * time          (time) float64 468.0 469.0 470.0 471.0 472.0 473.0 474.0 ...
   * rlon          (rlon) float32 -24.64
     lon           (rlat, rlon) float32 -24.64
   * rlat          (rlat) float32 -45.76
     lat           (rlat, rlon) float32 -45.76
   * height        (height) float32 2.0
 Dimensions without coordinates: bnds
 Data variables:
     rotated_pole  |S1 b''
     time_bnds     (time, bnds) float64 468.0 469.0 469.0 470.0 470.0 471.0 ...
     tasmax        (time, height, rlat, rlon) float64 283.4 284.2 284.2 284.6 ...
 Attributes:
     institution:     KNMI
     Conventions:     CF-1.0
     conventionsURL:  http://www.cgd.ucar.edu/cms/eaton/cf-metadata/index.html
     source:          RACMO2.2b
     project_id:      ENSEMBLES
     experiment_id:   ERAIN CORDEX-Africa-50km
     realization:     1
     comments:        beta-version RACMO2.2 with default physics from ECMWF CY...
     creation_date:   2010-04-09 13:53:22)



In [14]:

    
wrf2 = wrf.map(create_indices)
wrf2.first()









    Out[14]:





((-45.8, -24.6), <xarray.Dataset>
 Dimensions:       (bnds: 2, height: 1, rlat: 1, rlon: 1, time: 240)
 Coordinates:
     lon           (rlat, rlon) float64 -24.64
     lat           (rlat, rlon) float64 -45.76
   * height        (height) float32 2.0
   * time          (time) float64 1.426e+04 1.429e+04 1.432e+04 1.435e+04 ...
   * rlat          (rlat) float64 -45.76
   * rlon          (rlon) float64 -24.64
 Dimensions without coordinates: bnds
 Data variables:
     tasmax        (time, height, rlat, rlon) float64 283.4 284.2 284.3 284.6 ...
     rotated_pole  |S1 b''
     time_bnds     (time, bnds) float64 1.424e+04 1.428e+04 1.428e+04 ...
 Attributes:
     Conventions:               CF-1.4
     institution:               Universidad de Cantabria (Spain)
     title:                     CORDEX Africa Sensitivity Run
     comment:                   The simulation was forced with ERA-Interim 2x2...
     nco_openmp_thread_number:  1)



In [15]:

    
rdd = wrf2.join(knmi2, numPartitions=500)
rdd.first()









    Out[15]:





((-44.4, -14.1), (<xarray.Dataset>
  Dimensions:       (bnds: 2, height: 1, rlat: 1, rlon: 1, time: 240)
  Coordinates:
      lon           (rlat, rlon) float64 -14.08
      lat           (rlat, rlon) float64 -44.44
    * height        (height) float32 2.0
    * time          (time) float64 1.426e+04 1.429e+04 1.432e+04 1.435e+04 ...
    * rlat          (rlat) float64 -44.44
    * rlon          (rlon) float64 -14.08
  Dimensions without coordinates: bnds
  Data variables:
      tasmax        (time, height, rlat, rlon) float64 284.4 285.3 284.2 285.0 ...
      rotated_pole  |S1 b''
      time_bnds     (time, bnds) float64 1.424e+04 1.428e+04 1.428e+04 ...
  Attributes:
      Conventions:               CF-1.4
      institution:               Universidad de Cantabria (Spain)
      title:                     CORDEX Africa Sensitivity Run
      comment:                   The simulation was forced with ERA-Interim 2x2...
      nco_openmp_thread_number:  1, <xarray.Dataset>
  Dimensions:       (bnds: 2, height: 1, rlat: 1, rlon: 1, time: 240)
  Coordinates:
    * time          (time) float64 468.0 469.0 470.0 471.0 472.0 473.0 474.0 ...
    * rlon          (rlon) float32 -14.08
      lon           (rlat, rlon) float32 -14.08
    * rlat          (rlat) float32 -44.44
      lat           (rlat, rlon) float32 -44.44
    * height        (height) float32 2.0
  Dimensions without coordinates: bnds
  Data variables:
      rotated_pole  |S1 b''
      time_bnds     (time, bnds) float64 468.0 469.0 469.0 470.0 470.0 471.0 ...
      tasmax        (time, height, rlat, rlon) float64 284.1 285.2 284.2 285.0 ...
  Attributes:
      institution:     KNMI
      Conventions:     CF-1.0
      conventionsURL:  http://www.cgd.ucar.edu/cms/eaton/cf-metadata/index.html
      source:          RACMO2.2b
      project_id:      ENSEMBLES
      experiment_id:   ERAIN CORDEX-Africa-50km
      realization:     1
      comments:        beta-version RACMO2.2 with default physics from ECMWF CY...
      creation_date:   2010-04-09 13:53:22))



In [16]:

    
rdd.getNumPartitions()









    Out[16]:





500



In [17]:

    
rdd.count()









    Out[17]:





38994



In [18]:

    
a = rdd.first()
a









    Out[18]:





((-44.4, -14.1), (<xarray.Dataset>
  Dimensions:       (bnds: 2, height: 1, rlat: 1, rlon: 1, time: 240)
  Coordinates:
      lon           (rlat, rlon) float64 -14.08
      lat           (rlat, rlon) float64 -44.44
    * height        (height) float32 2.0
    * time          (time) float64 1.426e+04 1.429e+04 1.432e+04 1.435e+04 ...
    * rlat          (rlat) float64 -44.44
    * rlon          (rlon) float64 -14.08
  Dimensions without coordinates: bnds
  Data variables:
      tasmax        (time, height, rlat, rlon) float64 284.4 285.3 284.2 285.0 ...
      rotated_pole  |S1 b''
      time_bnds     (time, bnds) float64 1.424e+04 1.428e+04 1.428e+04 ...
  Attributes:
      Conventions:               CF-1.4
      institution:               Universidad de Cantabria (Spain)
      title:                     CORDEX Africa Sensitivity Run
      comment:                   The simulation was forced with ERA-Interim 2x2...
      nco_openmp_thread_number:  1, <xarray.Dataset>
  Dimensions:       (bnds: 2, height: 1, rlat: 1, rlon: 1, time: 240)
  Coordinates:
    * time          (time) float64 468.0 469.0 470.0 471.0 472.0 473.0 474.0 ...
    * rlon          (rlon) float32 -14.08
      lon           (rlat, rlon) float32 -14.08
    * rlat          (rlat) float32 -44.44
      lat           (rlat, rlon) float32 -44.44
    * height        (height) float32 2.0
  Dimensions without coordinates: bnds
  Data variables:
      rotated_pole  |S1 b''
      time_bnds     (time, bnds) float64 468.0 469.0 469.0 470.0 470.0 471.0 ...
      tasmax        (time, height, rlat, rlon) float64 284.1 285.2 284.2 285.0 ...
  Attributes:
      institution:     KNMI
      Conventions:     CF-1.0
      conventionsURL:  http://www.cgd.ucar.edu/cms/eaton/cf-metadata/index.html
      source:          RACMO2.2b
      project_id:      ENSEMBLES
      experiment_id:   ERAIN CORDEX-Africa-50km
      realization:     1
      comments:        beta-version RACMO2.2 with default physics from ECMWF CY...
      creation_date:   2010-04-09 13:53:22))



In [139]:

    
def bias_correct(element):
    import numpy as np
    obs = element[1][1].tasmax.values.ravel()
    mod = element[1][0].tasmax.values.ravel()
    
    cdfn = 30.0
    
    obs = np.sort(obs)
    mod = np.sort(mod)
    
    global_max = max(np.amax(obs), np.amax(mod))
    
    wide = global_max / cdfn
    
    xbins = np.arange(0.0, global_max+wide, wide)
    
    pdfobs, bins = np.histogram(obs, bins=xbins)
    pdfmod, bins = np.histogram(mod, bins=xbins)
    
    cdfobs = np.insert(np.cumsum(pdfobs), 0, 0.0)
    cdfmod = np.insert(np.cumsum(pdfmod), 0, 0.0) 
    
    vals = [150., 256.6, 100000]
    
    def bias_map(vals, xbins, cdfmod, cdfobs):
        xbins = xbins
        cdfmod = cdfmod
        cdfobs = cdfobs
        
        cdf1 = np.interp(vals, xbins, cdfmod)
        
        corrected = np.interp(cdf1, cdfobs, xbins)
        
        return corrected 

    results = bias_map(vals, xbins, cdfmod, cdfobs)
        
    return results



In [140]:

    
bias_corrected = rdd.map(bias_correct)



In [142]:

    
bias_corrected.take(10)









    Out[142]:





[array([ 276.68960063,  276.68960063,  286.23062134]),
 array([ 277.6843516 ,  277.6843516 ,  287.25967407]),
 array([ 280.57859904,  280.57859904,  290.25372314]),
 array([ 280.19751383,  280.19751383,  299.52148031]),
 array([ 283.49803975,  283.49803975,  293.27383423]),
 array([ 285.22876485,  285.22876485,  304.89971415]),
 array([ 285.08433126,  285.08433126,  294.91482544]),
 array([ 283.94258016,  283.94258016,  293.73370361]),
 array([ 283.80044759,  283.80044759,  293.58666992]),
 array([ 288.61755575,  288.61755575,  298.56988525])]



In [102]:

    
bias_corrected.first().mean()









    Out[102]:





282.32231330871582



In [ ]:

Table of Contents