In [1]:
import numpy as np
import numpy.testing as npt
import xarray as xr
import xrft
import dask.array as dsar
from matplotlib import colors
import matplotlib.pyplot as plt
%matplotlib inline

Parallelized Bartlett's Method

For long data sets that have reached statistical equilibrium, it is useful to chunk the data, calculate the periodogram for each chunk and then take the average to reduce variance.


In [2]:
n = int(2**8)
da = xr.DataArray(np.random.rand(n,int(n/2),int(n/2)), dims=['time','y','x'])
da


Out[2]:
<xarray.DataArray (time: 256, y: 128, x: 128)>
array([[[ 0.493341,  0.28303 , ...,  0.434256,  0.616031],
        [ 0.777314,  0.629644, ...,  0.152931,  0.445424],
        ..., 
        [ 0.562456,  0.022227, ...,  0.88538 ,  0.054687],
        [ 0.381456,  0.908454, ...,  0.843443,  0.706326]],

       [[ 0.469143,  0.241104, ...,  0.249369,  0.830898],
        [ 0.283305,  0.438634, ...,  0.893666,  0.242556],
        ..., 
        [ 0.897823,  0.187038, ...,  0.977466,  0.270899],
        [ 0.252733,  0.425873, ...,  0.228847,  0.954393]],

       ..., 
       [[ 0.936424,  0.793693, ...,  0.406293,  0.272336],
        [ 0.917752,  0.83908 , ...,  0.954489,  0.151129],
        ..., 
        [ 0.081756,  0.016332, ...,  0.524886,  0.87095 ],
        [ 0.677224,  0.41488 , ...,  0.12199 ,  0.689685]],

       [[ 0.193302,  0.113419, ...,  0.083486,  0.784332],
        [ 0.695728,  0.376776, ...,  0.278004,  0.026373],
        ..., 
        [ 0.677775,  0.255296, ...,  0.112851,  0.46325 ],
        [ 0.598086,  0.529324, ...,  0.267431,  0.65419 ]]])
Dimensions without coordinates: time, y, x

One dimension

Discrete Fourier Transform


In [3]:
daft = xrft.dft(da.chunk({'time':int(n/4)}), dim=['time'], shift=False , chunks_to_segments=True).compute()
daft


Out[3]:
<xarray.DataArray 'fftn-917988d0ce7d7da01b5f7a3cf2bb9a26' (time_segment: 4, freq_time: 64, y: 128, x: 128)>
array([[[[ 30.737014+0.j      , ...,  31.659135+0.j      ],
         ..., 
         [ 31.308938+0.j      , ...,  31.768846+0.j      ]],

        ..., 
        [[  1.928097-0.118076j, ...,   0.732440+2.07656j ],
         ..., 
         [  0.225814+1.256083j, ...,   0.244113-1.276807j]]],


       ..., 
       [[[ 37.777908+0.j      , ...,  30.996848+0.j      ],
         ..., 
         [ 28.650088+0.j      , ...,  35.362874+0.j      ]],

        ..., 
        [[ -1.780642+0.477772j, ...,   2.575858+1.71943j ],
         ..., 
         [  3.149759-2.664934j, ...,   1.872009-2.977565j]]]])
Coordinates:
  * time_segment       (time_segment) int64 0 1 2 3
  * freq_time          (freq_time) float64 0.0 0.01562 0.03125 0.04688 ...
  * y                  (y) int64 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 ...
  * x                  (x) int64 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 ...
    freq_time_spacing  float64 0.01562

In [4]:
data = da.chunk({'time':int(n/4)}).data
data_rs = data.reshape((4,int(n/4),int(n/2),int(n/2)))
da_rs = xr.DataArray(data_rs, dims=['time_segment','time','y','x'])
da1 = xr.DataArray(dsar.fft.fftn(data_rs, axes=[1]).compute(),
                   dims=['time_segment','freq_time','y','x'])
da1


Out[4]:
<xarray.DataArray (time_segment: 4, freq_time: 64, y: 128, x: 128)>
array([[[[ 30.737014+0.j      , ...,  31.659135+0.j      ],
         ..., 
         [ 31.308938+0.j      , ...,  31.768846+0.j      ]],

        ..., 
        [[  1.928097-0.118076j, ...,   0.732440+2.07656j ],
         ..., 
         [  0.225814+1.256083j, ...,   0.244113-1.276807j]]],


       ..., 
       [[[ 37.777908+0.j      , ...,  30.996848+0.j      ],
         ..., 
         [ 28.650088+0.j      , ...,  35.362874+0.j      ]],

        ..., 
        [[ -1.780642+0.477772j, ...,   2.575858+1.71943j ],
         ..., 
         [  3.149759-2.664934j, ...,   1.872009-2.977565j]]]])
Dimensions without coordinates: time_segment, freq_time, y, x

We assert that our calculations give equal results.


In [5]:
npt.assert_almost_equal(da1, daft.values)

Power Spectrum


In [6]:
ps = xrft.power_spectrum(da.chunk({'time':int(n/4)}), dim=['time'], chunks_to_segments=True)
ps


Out[6]:
<xarray.DataArray 'concatenate-183433100cd82e429170a4fe2f9c4cbb' (time_segment: 4, freq_time: 64, y: 128, x: 128)>
dask.array<truediv, shape=(4, 64, 128, 128), dtype=float64, chunksize=(1, 32, 128, 128)>
Coordinates:
  * time_segment       (time_segment) int64 0 1 2 3
  * freq_time          (freq_time) float64 -0.5 -0.4844 -0.4688 -0.4531 ...
  * y                  (y) int64 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 ...
  * x                  (x) int64 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 ...
    freq_time_spacing  float64 0.01562

Taking the mean over the segments gives the Barlett's estimate.


In [7]:
ps = ps.mean(['time_segment','y','x'])
ps


Out[7]:
<xarray.DataArray 'concatenate-183433100cd82e429170a4fe2f9c4cbb' (freq_time: 64)>
dask.array<mean_agg-aggregate, shape=(64,), dtype=float64, chunksize=(32,)>
Coordinates:
  * freq_time          (freq_time) float64 -0.5 -0.4844 -0.4688 -0.4531 ...
    freq_time_spacing  float64 0.01562

In [8]:
fig, ax = plt.subplots()
ax.semilogx(ps.freq_time[int(n/8)+1:], ps[int(n/8)+1:])


Out[8]:
[<matplotlib.lines.Line2D at 0x10ebc9518>]

Two dimension

Discrete Fourier Transform


In [9]:
daft = xrft.dft(da.chunk({'y':32,'x':32}), dim=['y','x'], shift=False , chunks_to_segments=True).compute()
daft


Out[9]:
<xarray.DataArray 'fftn-8077935acd6b48b40d6593c688c326b2' (time: 256, y_segment: 4, freq_y: 32, x_segment: 4, freq_x: 32)>
array([[[[[ 505.090962 +0.j      , ...,    3.673241 +2.033024j],
          ..., 
          [ 506.979486 +0.j      , ...,    2.672219 +8.645102j]],

         ..., 
         [[  -1.746757 -1.347122j, ...,   -2.183099+17.472835j],
          ..., 
          [   3.450049 +3.832201j, ...,   -4.072164 -7.279733j]]],


        ..., 
        [[[ 504.971751 +0.j      , ...,   -6.610465-12.385931j],
          ..., 
          [ 512.756185 +0.j      , ...,   -4.344255 -8.458134j]],

         ..., 
         [[  -7.979198 -7.454325j, ...,   -2.962019 +6.43059j ],
          ..., 
          [   4.024805 +3.72519j , ...,   -8.242673 -8.259182j]]]],



       ..., 
       [[[[ 518.573138 +0.j      , ...,    0.573928-10.006888j],
          ..., 
          [ 520.423164 +0.j      , ...,   -1.110088 +0.141936j]],

         ..., 
         [[   2.043005 -3.116515j, ...,    8.697924 -5.116488j],
          ..., 
          [   3.702009 -7.202762j, ...,  -12.007770 +3.514272j]]],


        ..., 
        [[[ 523.615806 +0.j      , ...,   -9.301065 +4.935474j],
          ..., 
          [ 521.535950 +0.j      , ...,    6.826755 +1.688166j]],

         ..., 
         [[   2.157400-14.676636j, ...,   -1.865237-11.408717j],
          ..., 
          [   0.651302 +0.531716j, ...,    5.861882 +5.968681j]]]]])
Coordinates:
  * time            (time) int64 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 ...
  * y_segment       (y_segment) int64 0 1 2 3
  * freq_y          (freq_y) float64 0.0 0.03125 0.0625 0.09375 0.125 0.1562 ...
  * x_segment       (x_segment) int64 0 1 2 3
  * freq_x          (freq_x) float64 0.0 0.03125 0.0625 0.09375 0.125 0.1562 ...
    freq_y_spacing  float64 0.03125
    freq_x_spacing  float64 0.03125

In [10]:
data = da.chunk({'y':32,'x':32}).data
data_rs = data.reshape((256,4,32,4,32))
da_rs = xr.DataArray(data_rs, dims=['time','y_segment','y','x_segment','x'])
da2 = xr.DataArray(dsar.fft.fftn(data_rs, axes=[2,4]).compute(),
                   dims=['time','y_segment','freq_y','x_segment','freq_x'])
da2


Out[10]:
<xarray.DataArray (time: 256, y_segment: 4, freq_y: 32, x_segment: 4, freq_x: 32)>
array([[[[[ 505.090962 +0.j      , ...,    3.673241 +2.033024j],
          ..., 
          [ 506.979486 +0.j      , ...,    2.672219 +8.645102j]],

         ..., 
         [[  -1.746757 -1.347122j, ...,   -2.183099+17.472835j],
          ..., 
          [   3.450049 +3.832201j, ...,   -4.072164 -7.279733j]]],


        ..., 
        [[[ 504.971751 +0.j      , ...,   -6.610465-12.385931j],
          ..., 
          [ 512.756185 +0.j      , ...,   -4.344255 -8.458134j]],

         ..., 
         [[  -7.979198 -7.454325j, ...,   -2.962019 +6.43059j ],
          ..., 
          [   4.024805 +3.72519j , ...,   -8.242673 -8.259182j]]]],



       ..., 
       [[[[ 518.573138 +0.j      , ...,    0.573928-10.006888j],
          ..., 
          [ 520.423164 +0.j      , ...,   -1.110088 +0.141936j]],

         ..., 
         [[   2.043005 -3.116515j, ...,    8.697924 -5.116488j],
          ..., 
          [   3.702009 -7.202762j, ...,  -12.007770 +3.514272j]]],


        ..., 
        [[[ 523.615806 +0.j      , ...,   -9.301065 +4.935474j],
          ..., 
          [ 521.535950 +0.j      , ...,    6.826755 +1.688166j]],

         ..., 
         [[   2.157400-14.676636j, ...,   -1.865237-11.408717j],
          ..., 
          [   0.651302 +0.531716j, ...,    5.861882 +5.968681j]]]]])
Dimensions without coordinates: time, y_segment, freq_y, x_segment, freq_x

We assert that our calculations give equal results.


In [11]:
npt.assert_almost_equal(da2, daft.values)

Power Spectrum


In [14]:
ps = xrft.power_spectrum(da.chunk({'time':1,'y':64,'x':64}), dim=['y','x'], 
                         chunks_to_segments=True, window='True', detrend='linear')
ps = ps.mean(['time','y_segment','x_segment'])
ps


Out[14]:
<xarray.DataArray 'concatenate-34ef1d78d80632d6b25c65df82f67753' (freq_y: 64, freq_x: 64)>
dask.array<mean_agg-aggregate, shape=(64, 64), dtype=float64, chunksize=(32, 32)>
Coordinates:
  * freq_y          (freq_y) float64 -0.5 -0.4844 -0.4688 -0.4531 -0.4375 ...
  * freq_x          (freq_x) float64 -0.5 -0.4844 -0.4688 -0.4531 -0.4375 ...
    freq_y_spacing  float64 0.01562
    freq_x_spacing  float64 0.01562

In [19]:
fig, ax = plt.subplots()
ps.plot(ax=ax, norm=colors.LogNorm(), vmin=6.5e-4, vmax=7.5e-4)


Out[19]:
<matplotlib.collections.QuadMesh at 0x1210117b8>

In [ ]: