In [1]:
import glob

import iris
import iris.coord_categorisation
from iris.experimental.equalise_cubes import equalise_attributes

#import warnings
#warnings.filterwarnings('ignore')

import xarray as xr

In [3]:
model = 'NorESM1-M'
thetao_files = '/g/data/ua6/DRSv2/CMIP5/%s/piControl/mon/ocean/r1i1p1/thetao/latest/thetao_Omon_%s_piControl_r1i1p1_*.nc' %(model, model)
thetao_file_list = glob.glob(thetao_files)

Pick a dask approach

The documentation for using dask on a single machine can be found here.

(In running this notebook I only execute one of the next three cells, depending on which approach I want to use.)


In [2]:
#from dask.distributed import Client, progress, LocalCluster
#client = LocalCluster() 
#client

from dask.distributed import Client
client = Client()
client


Out[2]:

Client

Cluster

  • Workers: 8
  • Cores: 8
  • Memory: 33.67 GB

In [2]:
import dask
import dask.multiprocessing
dask.set_options(get=dask.multiprocessing.get)

In [1]:
import dask
import dask.threaded
dask.set_options(get=dask.threaded.get)

xarray


In [3]:
ds = xr.open_mfdataset(thetao_files)
thetao = ds['thetao']

In [5]:
thetao


Out[5]:
<xarray.DataArray 'thetao' (time: 6012, lev: 70, j: 384, i: 320)>
dask.array<shape=(6012, 70, 384, 320), dtype=float32, chunksize=(48, 70, 384, 320)>
Coordinates:
  * lev      (lev) float64 0.0 5.0 10.0 15.0 20.0 25.0 30.0 40.0 50.0 62.5 ...
  * j        (j) int32 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 ...
  * i        (i) int32 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 ...
    lat      (j, i) float32 -79.22052 -79.22052 -79.22052 -79.22052 ...
    lon      (j, i) float32 320.5625 321.6875 322.8125 323.9375 325.0625 ...
  * time     (time) object 0700-01-16 12:00:00 0700-02-15 00:00:00 ...
Attributes:
    standard_name:     sea_water_potential_temperature
    long_name:         Sea Water Potential Temperature
    units:             K
    original_name:     templvl
    original_units:    degC
    cell_methods:      time: mean
    cell_measures:     area: areacello volume: volcello
    associated_files:  baseURL: http://cmip-pcmdi.llnl.gov/CMIP5/dataLocation...

In [6]:
# the number of GB involved in the reduction
ds.thetao.nbytes/1e9


Out[6]:
206.8512768

In [4]:
test = thetao.groupby('time.year').mean('time').load()
test

iris


In [6]:
cube = iris.load(thetao_file_list, 'sea_water_potential_temperature')
equalise_attributes(cube)
cube = cube.concatenate_cube()
iris.coord_categorisation.add_year(cube, 'time')

In [ ]:
cube = cube.aggregated_by(['year'], iris.analysis.MEAN)

In [ ]: