Benchmarking searchsorted (1) vs. unique (2) based get_windows

tl;dr unique-based is much faster if there are a lot of empty windows (i.e. sparse data)


In [1]:
import numpy as np

Old version searchsorted()-based.


In [2]:
def get_windows_1(times, window_size, start=None, end=None):
        t = times
        if start is None:
            start = times[0]
        if end is None:
            end = times[-1]
        n_windows = 1 + int((end - start) / window_size)
        windows = []
        idx_last = 0
        num_requests = 0
        n_empty = 0
        for i_window in range(0, n_windows):
            idx_included = np.searchsorted(t[idx_last:] , start + (i_window + 1) * window_size, "right")
            if idx_included == 0:
                n_empty += 1
            else:
                windows.append(np.arange(idx_last, idx_last + idx_included))
                idx_last += idx_included
                num_requests += len(windows[-1])
        assert num_requests == len(t), (num_requests, len(t))
        assert n_windows == len(windows) + n_empty, (n_windows, len(windows), n_empty)

        return windows
    
get_windows_1(np.concatenate((np.zeros(4), np.arange(8))), 0.1)


Out[2]:
[array([0, 1, 2, 3, 4]),
 array([5]),
 array([6]),
 array([7]),
 array([8]),
 array([9]),
 array([10]),
 array([11])]

New version unique()-based.


In [3]:
def get_windows_2(times, window_size, start=None, end_unused=None):
    if start is None:
            start = times[0]
    window_idx = ((times - start) / window_size).astype(int)
    #print(times)
    #print(window_idx)
    #print(window_size)
    first_idx = np.unique(window_idx, return_index=True)[1]
    # print(first_idx)
    windows = []
    for i, j in enumerate(first_idx):
        if i < len(first_idx) - 1:
            end = first_idx[i+1]
        else:
            end = len(window_idx)
        #print(i, j, end, np.arange(j, end)
        windows.append(np.arange(j, end))
    return windows

get_windows_2(np.concatenate((np.zeros(4), np.arange(8))), 0.1)


Out[3]:
[array([0, 1, 2, 3, 4]),
 array([5]),
 array([6]),
 array([7]),
 array([8]),
 array([9]),
 array([10]),
 array([11])]

In [5]:
data1 = np.concatenate((np.zeros(4), np.arange(8)))
data2 = np.random.rand(20)
data2.sort()
data3 = np.random.rand(1000)
data3.sort()

Verify that both algorithms give the same results


In [14]:
r1 = get_windows_1(data3, 1e-3)
r2 = get_windows_2(data3, 1e-3)
print("same length" if len(r1) == len(r2) else "different length!")
for i1, i2 in zip(r1, r2):
    if len(i1) != len(i2) or (i1 != i2).any():
        print(i1, i2)
        break
else:
    print("equal items")


same length
equal items

Benchmarking results


In [13]:
for n, data in enumerate((data1, data2, data3), 1):
    print("data #{}".format(n))
    for sz in (0.1, 0.01, 0.001, 1e-4):
        print("  window size: {}".format(sz))
        print("    1:", end="")
        %timeit get_windows_1(data3, sz, 0, 7)
        print("    2:", end="")
        %timeit get_windows_2(data3, sz, 0, 7)


data #1
  window size: 0.1
    1:10000 loops, best of 3: 185 µs per loop
    2:10000 loops, best of 3: 61.2 µs per loop
  window size: 0.01
    1:1000 loops, best of 3: 1.84 ms per loop
    2:1000 loops, best of 3: 283 µs per loop
  window size: 0.001
    1:100 loops, best of 3: 17.4 ms per loop
    2:1000 loops, best of 3: 1.59 ms per loop
  window size: 0.0001
    1:10 loops, best of 3: 160 ms per loop
    2:100 loops, best of 3: 2.5 ms per loop
data #2
  window size: 0.1
    1:10000 loops, best of 3: 196 µs per loop
    2:10000 loops, best of 3: 63.6 µs per loop
  window size: 0.01
    1:100 loops, best of 3: 1.9 ms per loop
    2:1000 loops, best of 3: 283 µs per loop
  window size: 0.001
    1:100 loops, best of 3: 18 ms per loop
    2:1000 loops, best of 3: 1.61 ms per loop
  window size: 0.0001
    1:10 loops, best of 3: 167 ms per loop
    2:100 loops, best of 3: 2.38 ms per loop
data #3
  window size: 0.1
    1:10000 loops, best of 3: 196 µs per loop
    2:10000 loops, best of 3: 62 µs per loop
  window size: 0.01
    1:100 loops, best of 3: 1.87 ms per loop
    2:1000 loops, best of 3: 279 µs per loop
  window size: 0.001
    1:100 loops, best of 3: 18 ms per loop
    2:1000 loops, best of 3: 1.6 ms per loop
  window size: 0.0001
    1:10 loops, best of 3: 165 ms per loop
    2:100 loops, best of 3: 2.37 ms per loop