Here I am trying to make it unecessary to used vectorize for the regression function, since that is slow, and also causes some funkyness with recent xarray/numpy versions.
In [4]:
import xarray as xr
import numpy as np
In [10]:
from xarrayutils.utils import xr_linregress, _linregress_ufunc
In [5]:
# test dataset
dim_order = ('x', 'y', 'time')
a = xr.DataArray(np.random.rand(3, 13, 5), dims=['x', 'time', 'y'])
b = xr.DataArray(np.random.rand(3, 5, 13), dims=['x','y', 'time'])
reg = xr_linregress(a,b)#
reg
In [17]:
np.testing.assert_allclose(reg.isel(x=xx, y=yy)['slope'].data, _linregress_ufunc(a.isel(x=xx, y=yy), b.isel(x=xx, y=yy))[0])
In [11]:
xx = 1
yy = 0
In [14]:
Out[14]:
In [16]:
Out[16]:
Lets try to reproduce this with a simple example
In [1]:
import xarray as xr
import numpy as np
from scipy.stats import linregress
def _ufunc(aa,bb):
out = linregress(aa,bb)
return np.array([out.slope, out.intercept])
def wrapper(a, b, dim='time'):
return xr.apply_ufunc(
_ufunc,a,b,
input_core_dims=[[dim], [dim]],
output_core_dims=[["parameter"]],
vectorize=True,
dask="parallelized",
output_dtypes=[a.dtype],
output_sizes={"parameter": 2},)
In [4]:
a = xr.DataArray(np.random.rand(3, 13, 5), dims=['x', 'time', 'y'])
b = xr.DataArray(np.random.rand(3, 5, 13), dims=['x','y', 'time'])
a_dsk = a.chunk({'x':2, 'time':-1})
b_dsk = b.chunk({'x':2, 'time':-1})
a_dsk.data
Out[4]:
In [2]:
wrapper(a,b)#.chunk({'x':2, 'time':-1})
In [4]:
a = xr.DataArray(np.random.rand(3, 13, 5), dims=['x', 'time', 'y'])
b = xr.DataArray(np.random.rand(3, 5, 13), dims=['x','y', 'time'])
wrapper(a_dsk,b.chunk({'x':2, 'time':-1}))
In [ ]: