Here I am trying to make it unecessary to used vectorize for the regression function, since that is slow, and also causes some funkyness with recent xarray/numpy versions.


In [4]:
import xarray as xr
import numpy as np

In [10]:
from xarrayutils.utils import xr_linregress, _linregress_ufunc

In [5]:
# test dataset
dim_order = ('x', 'y', 'time')
a = xr.DataArray(np.random.rand(3, 13, 5), dims=['x', 'time', 'y'])
b = xr.DataArray(np.random.rand(3, 5, 13), dims=['x','y', 'time'])
reg = xr_linregress(a,b)#
reg


---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-5-b5cea691722a> in <module>
      3 a = xr.DataArray(np.random.rand(3, 13, 5), dims=['x', 'time', 'y'])
      4 b = xr.DataArray(np.random.rand(3, 5, 13), dims=['x','y', 'time'])
----> 5 reg = xr_linregress(a,b)#
      6 reg

NameError: name 'xr_linregress' is not defined

In [17]:
np.testing.assert_allclose(reg.isel(x=xx, y=yy)['slope'].data, _linregress_ufunc(a.isel(x=xx, y=yy), b.isel(x=xx, y=yy))[0])

In [11]:
xx = 1
yy = 0

In [14]:



Out[14]:
array(0.065724)

In [16]:



Out[16]:
0.06572399788434902

Lets try to reproduce this with a simple example


In [1]:
import xarray as xr
import numpy as np
from scipy.stats import linregress


def _ufunc(aa,bb):
    out = linregress(aa,bb)
    return np.array([out.slope, out.intercept])

def wrapper(a, b, dim='time'):
    return xr.apply_ufunc(
        _ufunc,a,b,
        input_core_dims=[[dim], [dim]],
        output_core_dims=[["parameter"]],
        vectorize=True,
        dask="parallelized",
        output_dtypes=[a.dtype],
        output_sizes={"parameter": 2},)

In [4]:
a = xr.DataArray(np.random.rand(3, 13, 5), dims=['x', 'time', 'y'])
b = xr.DataArray(np.random.rand(3, 5, 13), dims=['x','y', 'time'])

a_dsk = a.chunk({'x':2, 'time':-1})
b_dsk = b.chunk({'x':2, 'time':-1})
a_dsk.data


Out[4]:
Array Chunk
Bytes 1.56 kB 1.04 kB
Shape (3, 13, 5) (2, 13, 5)
Count 3 Tasks 2 Chunks
Type float64 numpy.ndarray
5 13 3

In [2]:
wrapper(a,b)#.chunk({'x':2, 'time':-1})


---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-2-55269825020d> in <module>
----> 1 wrapper(a,b)#.chunk({'x':2, 'time':-1})

NameError: name 'a' is not defined

In [4]:
a = xr.DataArray(np.random.rand(3, 13, 5), dims=['x', 'time', 'y'])
b = xr.DataArray(np.random.rand(3, 5, 13), dims=['x','y', 'time'])
wrapper(a_dsk,b.chunk({'x':2, 'time':-1}))


---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-4-303b400356e2> in <module>
      1 a = xr.DataArray(np.random.rand(3, 13, 5), dims=['x', 'time', 'y'])
      2 b = xr.DataArray(np.random.rand(3, 5, 13), dims=['x','y', 'time'])
----> 3 wrapper(a.chunk({'x':2, 'time':-1}),b.chunk({'x':2, 'time':-1}))

<ipython-input-1-4094fd485c95> in wrapper(a, b, dim)
     16         dask="parallelized",
     17         output_dtypes=[a.dtype],
---> 18         output_sizes={"parameter": 2},)

~/miniconda/envs/euc_dynamics/lib/python3.7/site-packages/xarray/core/computation.py in apply_ufunc(func, input_core_dims, output_core_dims, exclude_dims, vectorize, join, dataset_join, dataset_fill_value, keep_attrs, kwargs, dask, output_dtypes, output_sizes, *args)
   1042             join=join,
   1043             exclude_dims=exclude_dims,
-> 1044             keep_attrs=keep_attrs
   1045         )
   1046     elif any(isinstance(a, Variable) for a in args):

~/miniconda/envs/euc_dynamics/lib/python3.7/site-packages/xarray/core/computation.py in apply_dataarray_vfunc(func, signature, join, exclude_dims, keep_attrs, *args)
    232 
    233     data_vars = [getattr(a, "variable", a) for a in args]
--> 234     result_var = func(*data_vars)
    235 
    236     if signature.num_outputs > 1:

~/miniconda/envs/euc_dynamics/lib/python3.7/site-packages/xarray/core/computation.py in apply_variable_ufunc(func, signature, exclude_dims, dask, output_dtypes, output_sizes, keep_attrs, *args)
    601                 "apply_ufunc: {}".format(dask)
    602             )
--> 603     result_data = func(*input_data)
    604 
    605     if signature.num_outputs == 1:

~/miniconda/envs/euc_dynamics/lib/python3.7/site-packages/xarray/core/computation.py in func(*arrays)
    591                     signature,
    592                     output_dtypes,
--> 593                     output_sizes,
    594                 )
    595 

~/miniconda/envs/euc_dynamics/lib/python3.7/site-packages/xarray/core/computation.py in _apply_blockwise(func, args, input_dims, output_dims, signature, output_dtypes, output_sizes)
    721         dtype=dtype,
    722         concatenate=True,
--> 723         new_axes=output_sizes
    724     )
    725 

~/miniconda/envs/euc_dynamics/lib/python3.7/site-packages/dask/array/blockwise.py in blockwise(func, out_ind, name, token, dtype, adjust_chunks, new_axes, align_arrays, concatenate, meta, *args, **kwargs)
    231         from .utils import compute_meta
    232 
--> 233         meta = compute_meta(func, dtype, *args[::2], **kwargs)
    234     if meta is not None:
    235         return Array(graph, out, chunks, meta=meta)

~/miniconda/envs/euc_dynamics/lib/python3.7/site-packages/dask/array/utils.py in compute_meta(func, _dtype, *args, **kwargs)
    119         # with np.vectorize, such as dask.array.routines._isnonzero_vec().
    120         if isinstance(func, np.vectorize):
--> 121             meta = func(*args_meta)
    122         else:
    123             try:

~/miniconda/envs/euc_dynamics/lib/python3.7/site-packages/numpy/lib/function_base.py in __call__(self, *args, **kwargs)
   2089             vargs.extend([kwargs[_n] for _n in names])
   2090 
-> 2091         return self._vectorize_call(func=func, args=vargs)
   2092 
   2093     def _get_ufunc_and_otypes(self, func, args):

~/miniconda/envs/euc_dynamics/lib/python3.7/site-packages/numpy/lib/function_base.py in _vectorize_call(self, func, args)
   2155         """Vectorized call to `func` over positional `args`."""
   2156         if self.signature is not None:
-> 2157             res = self._vectorize_call_with_signature(func, args)
   2158         elif not args:
   2159             res = func()

~/miniconda/envs/euc_dynamics/lib/python3.7/site-packages/numpy/lib/function_base.py in _vectorize_call_with_signature(self, func, args)
   2229                             for dims in output_core_dims
   2230                             for dim in dims):
-> 2231                 raise ValueError('cannot call `vectorize` with a signature '
   2232                                  'including new output dimensions on size 0 '
   2233                                  'inputs')

ValueError: cannot call `vectorize` with a signature including new output dimensions on size 0 inputs

In [ ]: