In [1]:

    
import glob

import iris
import iris.coord_categorisation
from iris.experimental.equalise_cubes import equalise_attributes

#import warnings
#warnings.filterwarnings('ignore')

import xarray as xr



In [3]:

    
model = 'NorESM1-M'
thetao_files = '/g/data/ua6/DRSv2/CMIP5/%s/piControl/mon/ocean/r1i1p1/thetao/latest/thetao_Omon_%s_piControl_r1i1p1_*.nc' %(model, model)
thetao_file_list = glob.glob(thetao_files)

Pick a dask approach

The documentation for using dask on a single machine can be found here.

(In running this notebook I only execute one of the next three cells, depending on which approach I want to use.)



In [2]:

    
#from dask.distributed import Client, progress, LocalCluster
#client = LocalCluster() 
#client

from dask.distributed import Client
client = Client()
client









    Out[2]:







Client

  Scheduler: tcp://127.0.0.1:33954
  
Dashboard: http://127.0.0.1:8787/status



Cluster

  Workers: 8
  Cores: 8
  Memory: 33.67 GB



In [2]:

    
import dask
import dask.multiprocessing
dask.set_options(get=dask.multiprocessing.get)



In [1]:

    
import dask
import dask.threaded
dask.set_options(get=dask.threaded.get)

xarray



In [3]:

    
ds = xr.open_mfdataset(thetao_files)
thetao = ds['thetao']



In [5]:

    
thetao









    Out[5]:





<xarray.DataArray 'thetao' (time: 6012, lev: 70, j: 384, i: 320)>
dask.array<shape=(6012, 70, 384, 320), dtype=float32, chunksize=(48, 70, 384, 320)>
Coordinates:
  * lev      (lev) float64 0.0 5.0 10.0 15.0 20.0 25.0 30.0 40.0 50.0 62.5 ...
  * j        (j) int32 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 ...
  * i        (i) int32 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 ...
    lat      (j, i) float32 -79.22052 -79.22052 -79.22052 -79.22052 ...
    lon      (j, i) float32 320.5625 321.6875 322.8125 323.9375 325.0625 ...
  * time     (time) object 0700-01-16 12:00:00 0700-02-15 00:00:00 ...
Attributes:
    standard_name:     sea_water_potential_temperature
    long_name:         Sea Water Potential Temperature
    units:             K
    original_name:     templvl
    original_units:    degC
    cell_methods:      time: mean
    cell_measures:     area: areacello volume: volcello
    associated_files:  baseURL: http://cmip-pcmdi.llnl.gov/CMIP5/dataLocation...



In [6]:

    
# the number of GB involved in the reduction
ds.thetao.nbytes/1e9









    Out[6]:





206.8512768



In [4]:

    
test = thetao.groupby('time.year').mean('time').load()
test

iris



In [6]:

    
cube = iris.load(thetao_file_list, 'sea_water_potential_temperature')
equalise_attributes(cube)
cube = cube.concatenate_cube()
iris.coord_categorisation.add_year(cube, 'time')



In [ ]:

    
cube = cube.aggregated_by(['year'], iris.analysis.MEAN)



In [ ]: