In [1]:
import array as ar
import numpy as np
from bisect import bisect
from itertools import accumulate
from random import random, randrange
In [2]:
N = 1000
tpcounts = tuple(int(5*random()) for i in range(N))
tpcuml = tuple(accumulate(tpcounts))
tot = tpcuml[-1]
arcounts = ar.array('l', tpcounts)
arcuml = ar.array('l', accumulate(tpcounts))
arcounts = ar.array('l', tpcounts)
arcuml = ar.array('l', accumulate(tpcounts))
npcounts = np.array(tpcounts, dtype=np.uint32)
npcuml = np.cumsum(npcounts, dtype=np.uint32)
In [3]:
seek = list(randrange(tot) for i in range(N))
npseek = np.array(seek, dtype=np.uint32)
In [4]:
type(tpcounts[0]), type(tpcuml[0]), type(arcounts[0]), type(arcuml[0]), type(npcounts[0]), type(npcuml[0]), type(seek[0])
Out[4]:
In [5]:
%timeit for i in seek: bisect(tpcuml, i)
%timeit for i in seek: bisect(arcuml, i)
%timeit for i in npseek: bisect(npcuml, i) # Good enough?
%timeit for i in npseek: np.searchsorted(npcuml, i, "right")
%timeit for i in seek: np.searchsorted(npcuml, i, "right")
%timeit -n 10 for i in seek: bisect(npcuml, i) # Worst
In [6]:
arloc = [bisect(arcuml, i) for i in seek ]
tploc = [bisect(tpcuml, i) for i in seek]
nploc = [bisect(npcuml, i) for i in npseek]
In [7]:
%%timeit -n 1
for loc in arloc:
val = arcuml[loc]
if loc:
val -= arcuml[loc-1]
for j in range(loc, N):
arcuml[j] -= val//2
In [8]:
tpcuml = list(tpcuml)
In [9]:
%%timeit -n 1
for loc in tploc:
val = tpcuml[loc]
if loc:
val -= tpcuml[loc-1]
for j in range(loc, N):
tpcuml[j] -= val//2
In [10]:
%%timeit -n 1
for loc in tploc:
val = npcuml[loc]
if loc:
val -= npcuml[loc-1]
npcuml[loc:] -= val
In [ ]: