In [1]:
import array as ar
import numpy as np
from bisect import bisect
from itertools import accumulate
from random import random, randrange

In [2]:
N = 1000
tpcounts = tuple(int(5*random()) for i in range(N))
tpcuml = tuple(accumulate(tpcounts))
tot = tpcuml[-1]

arcounts = ar.array('l', tpcounts)
arcuml = ar.array('l', accumulate(tpcounts))
arcounts = ar.array('l', tpcounts)
arcuml = ar.array('l', accumulate(tpcounts))
npcounts = np.array(tpcounts, dtype=np.uint32)
npcuml = np.cumsum(npcounts, dtype=np.uint32)

In [3]:
seek = list(randrange(tot) for i in range(N))
npseek = np.array(seek, dtype=np.uint32)

In [4]:
type(tpcounts[0]), type(tpcuml[0]), type(arcounts[0]), type(arcuml[0]), type(npcounts[0]), type(npcuml[0]), type(seek[0])


Out[4]:
(int, int, int, int, numpy.uint32, numpy.uint32, int)

In [5]:
%timeit for i in seek:    bisect(tpcuml, i)
%timeit for i in seek:    bisect(arcuml, i)
%timeit for i in npseek:  bisect(npcuml, i)  # Good enough?
%timeit for i in npseek:  np.searchsorted(npcuml, i, "right")    
%timeit for i in seek:    np.searchsorted(npcuml, i, "right")
%timeit -n 10 for i in seek:    bisect(npcuml, i)  # Worst


1000 loops, best of 3: 316 µs per loop
1000 loops, best of 3: 393 µs per loop
1000 loops, best of 3: 797 µs per loop
1000 loops, best of 3: 1.16 ms per loop
100 loops, best of 3: 1.93 ms per loop
100 loops, best of 3: 16.2 ms per loop

In [6]:
arloc = [bisect(arcuml, i) for i in seek ]
tploc = [bisect(tpcuml, i) for i in seek]
nploc = [bisect(npcuml, i) for i in npseek]

In [7]:
%%timeit -n 1
for loc in arloc: 
    val = arcuml[loc]
    if loc:
        val -= arcuml[loc-1]
    for j in range(loc, N):
        arcuml[j] -= val//2


1 loop, best of 3: 72.7 ms per loop

In [8]:
tpcuml = list(tpcuml)

In [9]:
%%timeit -n 1
for loc in tploc: 
    val = tpcuml[loc]
    if loc:
        val -= tpcuml[loc-1]
    for j in range(loc, N):
        tpcuml[j] -= val//2


1 loop, best of 3: 53.8 ms per loop

In [10]:
%%timeit -n 1
for loc in tploc: 
    val = npcuml[loc]
    if loc:
        val -= npcuml[loc-1]
    npcuml[loc:] -= val


1 loop, best of 3: 2.81 ms per loop

In [ ]: