Interesting and quite simple problem to cound 1 bits in buffer.
I think you might get different results if your data is not long enough. I got these from some links and then modified to use numpy and bytearray. These are generally untested.
http://www.valuedlessons.com/2009/01/popcount-in-python-with-benchmarks.html
http://stackoverflow.com/questions/9829578/fast-way-of-counting-bits-in-python
https://en.wikipedia.org/wiki/Hamming_weight#Efficient_implementation
http://www.expobrain.net/2013/07/29/hamming-weights-python-implementation/
http://stackoverflow.com/questions/8220801/how-to-use-timeit-module
http://graphics.stanford.edu/~seander/bithacks.html
Clearly best algorithm depends on data and CPU you currently have. Is data 32-bit, 64-bit or big blob of bytearray? I think it clearly matters how data is arranged. Can you use big numpy darrays or only small arrays or no array at all?
In [1]:
import random, struct
import numpy as np
import gmpy2
from gmpy2 import mpz
# not meant to be random
random.seed(1)
d = bytearray([random.randint(0,255) for i in range(4096)])
print(len(d))
v = np.frombuffer(d, dtype=np.uint32)
print(v.shape, v[0])
v = np.frombuffer(d, dtype=np.uint64)*mpz(1)
print(v.shape, v[0], gmpy2.popcount(v[0]))
def count1s64(d):
v = np.frombuffer(d, dtype=np.uint64)
v = np.bitwise_and(v, 0x5555555555555555) + np.right_shift(np.bitwise_and(v, 0xAAAAAAAAAAAAAAAA), 1)
v = np.bitwise_and(v, 0x3333333333333333) + np.right_shift(np.bitwise_and(v, 0xCCCCCCCCCCCCCCCC), 2)
v = np.bitwise_and(v, 0x0F0F0F0F0F0F0F0F) + np.right_shift(np.bitwise_and(v, 0xF0F0F0F0F0F0F0F0), 4)
v = np.bitwise_and(v, 0x00FF00FF00FF00FF) + np.right_shift(np.bitwise_and(v, 0xFF00FF00FF00FF00), 8)
v = np.bitwise_and(v, 0x0000FFFF0000FFFF) + np.right_shift(np.bitwise_and(v, 0xFFFF0000FFFF0000), 16)
v = np.bitwise_and(v, 0x00000000FFFFFFFF) + np.right_shift(v, 32)
return v.sum()
v = np.frombuffer(d, dtype=np.uint64)
print(count1s64(d))
In [2]:
import timeit
from functools import partial
import random
import copy
In [3]:
class popcount:
# Everty actual function has prefix: count1s_
#
# after that is function arg type:
# s: takes a number
# l: takes list etc for iterable
# a: takes numpy.ndarray
# d: takes bitarray
#
# If function is limited to 32/64 bits it has suffix _32/_64
def count1s_s_naive(v):
c = 0
while(v):
c += v & 1
v >>= 1
return c
def count1s_l_naive(ve):
c = 0
for v in ve:
while(v):
c += v & 1
v >>= 1
return c
def count1s_ls_naive(ve):
return sum(popcount.count1s_s_naive(v) for v in ve)
def count1s_al_naive(ve):
c = 0
for v in ve.tolist():
while(v):
c += v & 1
v >>= 1
return c
def count1s_als_naive(ve):
return sum(popcount.count1s_s_naive(v) for v in ve.tolist())
def count1s_ai_naive(ve):
c = 0
for v in np.nditer(ve, flags=['buffered']):
x = int(v)
while(x):
c += x & 1
x >>= 1
return c
def count1s_ais_naive(ve):
return sum(popcount.count1s_s_naive(int(v)) for v in np.nditer(ve))
def count1s_d_naive(d):
c = 0
dtype = np.uint64
if len(d) <= 4:
dtype = np.uint32
for v in np.frombuffer(d, dtype=dtype).tolist():
while(v):
c += v & 1
v >>= 1
return c
########################################################################
def count1s_s_pythonic(v):
return bin(v).count("1")
def count1s_l_pythonic(ve):
c = 0
for v in ve:
c += bin(v).count("1")
return c
def count1s_ls_pythonic(ve):
return sum(popcount.count1s_s_pythonic(v) for v in ve)
def count1s_al_pythonic(ve):
c = 0
for v in ve.tolist():
c += bin(v).count("1")
return c
def count1s_als_pythonic(ve):
return sum(popcount.count1s_s_pythonic(v) for v in ve.tolist())
def count1s_ai_pythonic(ve):
c = 0
for v in np.nditer(ve):
c += bin(v).count("1")
return c
def count1s_ais_pythonic(ve):
return sum(popcount.count1s_s_pythonic(v) for v in np.nditer(ve))
def count1s_dl_pythonic(d):
c = 0
dtype = np.uint64
if len(d) <= 4:
dtype = np.uint32
for v in np.frombuffer(d, dtype=dtype).tolist():
c += bin(v).count("1")
return c
def count1s_di_pythonic(d):
c = 0
dtype = np.uint64
if len(d) <= 4:
dtype = np.uint32
for v in np.nditer(np.frombuffer(d, dtype=dtype)):
c += bin(v).count("1")
return c
########################################################################
# Brian Kernighan way
def count1s_s_zero(x):
c = 0
while x:
x &= x - 1
c += 1
return c
def count1s_l_zero(ve):
c = 0
for x in ve:
while x:
x &= x - 1
c += 1
return c
def count1s_al_zero(ve):
c = 0
for x in ve.tolist():
while x:
x &= x - 1
c += 1
return c
def count1s_ai_zero(ve):
c = 0
for v in np.nditer(ve):
x = int(v)
while x:
x &= x - 1
c += 1
return c
def count1s_du_zero(d):
c = 0
(div, mod) = divmod(len(d), 8)
fmts = '{:d}Q'.format(div)
if mod: fmts += 'I'
for x in struct.unpack(fmts, d):
while x:
x &= x - 1
c += 1
return c
# Seems I can't get this to work, result of minus is l but x can be I
def broken_count1s_di_zero(d):
c = 0
dtype = np.uint64
if len(d) <= 4:
dtype = np.uint32
for x in np.nditer(np.frombuffer(d, dtype=dtype), op_flags=['readwrite']):
while x:
x &= x - 1
c += 1
return c
########################################################################
TABLE8 = [0] * 2**8
for index in range(len(TABLE8)):
TABLE8[index] = (index & 1) + TABLE8[index >> 1]
def popcount32_table8(v):
return (popcount.TABLE8[ v & 0xff ] +
popcount.TABLE8[(v >> 8) & 0xff ] +
popcount.TABLE8[(v >> 16) & 0xff ] +
popcount.TABLE8[(v >> 24) ])
def popcount64_table8(v):
return (popcount.TABLE8[ v & 0xff ] +
popcount.TABLE8[(v >> 8) & 0xff ] +
popcount.TABLE8[(v >> 16) & 0xff ] +
popcount.TABLE8[(v >> 24) & 0xff ] +
popcount.TABLE8[(v >> 32) & 0xff ] +
popcount.TABLE8[(v >> 40) & 0xff ] +
popcount.TABLE8[(v >> 48) & 0xff ] +
popcount.TABLE8[(v >> 56) ])
def count1s_s_lut8_32(v):
return popcount.popcount32_table8(v)
def count1s_l1_lut8_32(ve):
return sum(popcount.popcount32_table8(v) for v in ve)
def count1s_l2_lut8_32(ve):
c = 0
for v in ve:
c += popcount.popcount32_table8(v)
return c
def count1s_dl_lut8_32(d):
c = 0
for v in np.frombuffer(d, dtype=np.uint32).tolist():
c += popcount.popcount32_table8(v)
return c
def count1s_di_lut8_32(d):
return sum(popcount.popcount32_table8(v)
for v in np.nditer(np.frombuffer(d, dtype=np.uint32)))
def count1s_l_lut8_32(ve):
c = 0
for v in ve:
c += popcount.popcount32_table8(v)
return c
def count1s_s_lut8_64(v):
return popcount.popcount64_table8(v)
def count1s_l_lut8_64(ve):
c = 0
for v in ve:
c += popcount.popcount64_table8(v)
return c
def count1s_l_lut8kc_64(ve):
# Variant from: https://gist.github.com/KrzysztofCiba/5714765
c = 0
e = lambda x, y: x + int( bool( y ) ) << 3
for x in ve:
arg = int(x)
c += sum([
popcount.TABLE8[ ( arg >> i ) & 255 ]
for i in range(0,
e( *divmod( arg.bit_length(), 8)),
min(8,divmod( arg.bit_length(), 4)[0]))
])
return c
TABLE16 = [0] * 2**16
for index in range(len(TABLE16)):
TABLE16[index] = (index & 1) + TABLE16[index >> 1]
def popcount32_table16(v):
return (popcount.TABLE16[ v & 0xffff ] +
popcount.TABLE16[ v >> 16 ])
def popcount64_table16(v):
return (popcount.TABLE16[ v & 0xffff] +
popcount.TABLE16[(v >> 16) & 0xffff] +
popcount.TABLE16[(v >> 32) & 0xffff] +
popcount.TABLE16[(v >> 48) ])
def count1s_l_lut16_32(ve):
c = 0
for v in ve:
c += popcount.popcount32_table16(v)
return c
def count1s_l_lut16_64(ve):
c = 0
for v in ve:
c += popcount.popcount64_table16(v)
return c
def count1s_l_lut8kc_64(ve):
# Adapted from: https://gist.github.com/KrzysztofCiba/5714765
e = lambda x, y: x + int( bool( y ) ) << 7
c = 0
for x in ve:
arg = int(x)
c += sum([
popcount.TABLE16[ ( arg >> i ) & 0xffff ]
for i in range(0,
e(*divmod( arg.bit_length(), 16)),
16)
])
return c
TABLE16_npi = np.zeros(2**16, dtype=int) #has to be an array
for index in range(len(TABLE16_npi)):
TABLE16_npi[index] = (index & 1) + TABLE16_npi[index >> 1]
def popcount32_table16_npi(v):
return (popcount.TABLE16_npi[ v & 0xffff ] +
popcount.TABLE16_npi[ v >> 16 ])
def popcount64_table16_npi(v):
return (popcount.TABLE16_npi[ v & 0xffff ] +
popcount.TABLE16_npi[(v >> 16) & 0xffff ] +
popcount.TABLE16_npi[(v >> 32) & 0xffff ] +
popcount.TABLE16_npi[(v >> 48) ])
def count1s_s_nplut16i_32(v):
return popcount.popcount32_table16_npi(np.array(v)).sum()
def count1s_l_nplut16i_32(v):
return popcount.popcount32_table16_npi(np.array(v)).sum()
def count1s_a_nplut16i_32(v):
return popcount.popcount32_table16_npi(v).sum()
def count1s_d_nplut16i_32(d):
v = np.frombuffer(d, dtype=np.uint32)
return popcount.popcount32_table16_npi(v).sum()
def count1s_a_nplut16i_64(v):
return popcount.popcount64_table16_npi(v).sum()
def count1s_l_lut16i_32(ve):
c = 0
for v in ve:
c += popcount.popcount32_table16_npi(v)
return c
def count1s_l_lut16i_64(ve):
c = 0
for v in ve:
c += popcount.popcount64_table16_npi(v)
return c
TABLE16_npb = np.zeros(2**16, dtype=np.byte) #has to be an array
for index in range(len(TABLE16_npb)):
TABLE16_npb[index] = (index & 1) + TABLE16_npb[index >> 1]
def popcount32_table16_npb(v):
return (popcount.TABLE16_npb[ v & 0xffff ] +
popcount.TABLE16_npb[ v >> 16 ])
def popcount64_table16_npb(v):
return (popcount.TABLE16_npb[ v & 0xffff ] +
popcount.TABLE16_npb[(v >> 16) & 0xffff ] +
popcount.TABLE16_npb[(v >> 32) & 0xffff ] +
popcount.TABLE16_npb[(v >> 48) ])
def count1s_a_nplut16b_32(v):
return popcount.popcount32_table16_npb(v).sum()
def count1s_a_nplut16b_64(v):
return popcount.popcount64_table16_npb(v).sum()
def count1s_l_lut16b_32(ve):
c = 0
for v in ve:
c += popcount.popcount32_table16_npb(v)
return c
def count1s_l_lut16b_64(ve):
c = 0
for v in ve:
c += popcount.popcount64_table16_npb(v)
return c
########################################################################
m1 = 0x5555555555555555
m1b = 0xAAAAAAAAAAAAAAAA
m2 = 0x3333333333333333
m2b = 0xCCCCCCCCCCCCCCCC
m4 = 0x0f0f0f0f0f0f0f0f
m4b = 0xf0f0f0f0f0f0f0f0
m8 = 0x00ff00ff00ff00ff
m8b = 0xff00ff00ff00ff00
m16 = 0x0000ffff0000ffff
m16b = 0xffff0000ffff0000
m32 = 0x00000000ffffffff
h01 = 0x0101010101010101
def count1s_s_bw1a_64(v):
v = (v & popcount.m1 ) + ((v & popcount.m1b ) >> 1 )
v = (v & popcount.m2 ) + ((v & popcount.m2b ) >> 2 )
v = (v & popcount.m4 ) + ((v & popcount.m4b ) >> 4 )
v = (v & popcount.m8 ) + ((v & popcount.m8b ) >> 8 )
v = (v & popcount.m16) + ((v & popcount.m16b) >> 16)
return (v & popcount.m32) + (v >> 32)
def count1s_l_bw1a_64(ve):
c = 0
for v in ve:
v = (v & popcount.m1 ) + ((v & popcount.m1b ) >> 1 )
v = (v & popcount.m2 ) + ((v & popcount.m2b ) >> 2 )
v = (v & popcount.m4 ) + ((v & popcount.m4b ) >> 4 )
v = (v & popcount.m8 ) + ((v & popcount.m8b ) >> 8 )
v = (v & popcount.m16) + ((v & popcount.m16b) >> 16)
v = (v & popcount.m32) + (v >> 32)
c += v
return c
def count1s_a_npbw1a_64(v):
v = np.bitwise_and(v, popcount.m1 ) + np.right_shift(np.bitwise_and(v, popcount.m1b ), 1)
v = np.bitwise_and(v, popcount.m2 ) + np.right_shift(np.bitwise_and(v, popcount.m2b ), 2)
v = np.bitwise_and(v, popcount.m4 ) + np.right_shift(np.bitwise_and(v, popcount.m4b ), 4)
v = np.bitwise_and(v, popcount.m8 ) + np.right_shift(np.bitwise_and(v, popcount.m8b ), 8)
v = np.bitwise_and(v, popcount.m16) + np.right_shift(np.bitwise_and(v, popcount.m16b), 16)
v = np.bitwise_and(v, popcount.m32) + np.right_shift(v, 32)
return v.sum()
def count1s_d_npbw1a_64(d):
dtype = np.uint64
if len(d) <= 4:
dtype = np.uint32
return popcount.count1s_a_npbw1a_64(np.frombuffer(d, dtype=dtype))
def count1s_s_bw1b_64(v):
v = (v & popcount.m1 ) + ((v >> 1) & popcount.m1 )
v = (v & popcount.m2 ) + ((v >> 2) & popcount.m2 )
v = (v & popcount.m4 ) + ((v >> 4) & popcount.m4 )
v = (v & popcount.m8 ) + ((v >> 8) & popcount.m8 )
v = (v & popcount.m16) + ((v >> 16) & popcount.m16)
return (v & popcount.m32) + ((v >> 32) & popcount.m32)
def count1s_l_bw1b_64(ve):
c = 0
for v in ve:
v = (v & popcount.m1 ) + ((v >> 1) & popcount.m1 )
v = (v & popcount.m2 ) + ((v >> 2) & popcount.m2 )
v = (v & popcount.m4 ) + ((v >> 4) & popcount.m4 )
v = (v & popcount.m8 ) + ((v >> 8) & popcount.m8 )
v = (v & popcount.m16) + ((v >> 16) & popcount.m16)
v = (v & popcount.m32) + ((v >> 32) & popcount.m32)
c += v
return c
def count1s_a_npbw1b_64(v):
v = np.bitwise_and(v, popcount.m1 ) + np.bitwise_and(np.right_shift(v, 1), popcount.m1 )
v = np.bitwise_and(v, popcount.m2 ) + np.bitwise_and(np.right_shift(v, 2), popcount.m2 )
v = np.bitwise_and(v, popcount.m4 ) + np.bitwise_and(np.right_shift(v, 4), popcount.m4 )
v = np.bitwise_and(v, popcount.m8 ) + np.bitwise_and(np.right_shift(v, 8), popcount.m8 )
v = np.bitwise_and(v, popcount.m16) + np.bitwise_and(np.right_shift(v, 16), popcount.m16)
v = np.bitwise_and(v, popcount.m32) + np.bitwise_and(np.right_shift(v, 32), popcount.m32)
return v.sum()
def count1s_d_npbw1b_64(d):
dtype = np.uint64
if len(d) <= 4:
dtype = np.uint32
return popcount.count1s_a_npbw1b_64(np.frombuffer(d, dtype=dtype))
def count1s_s_bw2_64(v):
v -= (v >> 1) & popcount.m1
v = (v & popcount.m2) + ((v >> 2) & popcount.m2)
v = (v + (v >> 4)) & popcount.m4
v += v >> 8
v += v >> 16
v += v >> 32
return (v & 0x7f)
def count1s_l_bw2_64(ve):
c = 0
for v in ve:
v -= (v >> 1) & popcount.m1
v = (v & popcount.m2) + ((v >> 2) & popcount.m2)
v = (v + (v >> 4)) & popcount.m4
v += v >> 8
v += v >> 16
v += v >> 32
c += (v & 0x7f)
return c
def count1s_a_npbw2_64(v):
v = v - np.bitwise_and(np.right_shift(v, 1), popcount.m1)
v = np.bitwise_and(v, popcount.m2) + np.bitwise_and(np.right_shift(v, 2), popcount.m2)
v = np.bitwise_and(v + np.right_shift(v, 4), popcount.m4)
v += np.right_shift(v, 8)
v += np.right_shift(v, 16)
v = np.bitwise_and(v + np.right_shift(v, 32), 0x7f)
return v.sum()
def count1s_d_npbw2_64(d):
dtype = np.uint64
if len(d) <= 4:
dtype = np.uint32
return popcount.count1s_a_npbw2_64(np.frombuffer(d, dtype=dtype))
def count1s_s_bw3_64(v):
v -= (v >> 1) & popcount.m1
v = (v & popcount.m2) + ((v >> 2) & popcount.m2)
v = (v + (v >> 4)) & popcount.m4
return (v * popcount.h01 & 0xffffffffffffffff) >> 56
def count1s_l_bw3_64(ve):
c = 0
for v in ve:
v -= (v >> 1) & popcount.m1
v = (v & popcount.m2) + ((v >> 2) & popcount.m2)
v = (v + (v >> 4)) & popcount.m4
v = (v * popcount.h01 & 0xffffffffffffffff) >> 56
c += v
return c
def count1s_a_npbw3_64(v):
v = v - np.bitwise_and(np.right_shift(v, 1), popcount.m1)
v = np.bitwise_and(v, popcount.m2) + np.bitwise_and(np.right_shift(v, 2), popcount.m2)
v = np.bitwise_and(v + np.right_shift(v, 4), popcount.m4)
v = np.right_shift(v * popcount.h01, 56)
return v.sum()
def count1s_d_npbw3_64(d):
dtype = np.uint64
if len(d) <= 4:
dtype = np.uint32
return popcount.count1s_a_npbw3_64(np.frombuffer(d, dtype=dtype))
ma = 0x01001001001001
mb = 0x84210842108421
def count1s_s_bw64_32(v):
return ((((v & 0xfff) * popcount.ma & popcount.mb) % 0x1f)
+ ((((v & 0xfff000) >> 12) * popcount.ma & popcount.mb) % 0x1f)
+ (((v >> 24) * popcount.ma & popcount.mb) % 0x1f))
def count1s_l_bw64_32(ve):
c = 0
for v in ve:
c += ((((v & 0xfff) * popcount.ma & popcount.mb) % 0x1f)
+ ((((v & 0xfff000) >> 12) * popcount.ma & popcount.mb) % 0x1f)
+ (((v >> 24) * popcount.ma & popcount.mb) % 0x1f))
return c
def count1s_a_npbw64_32(v):
c = ( np.mod(np.bitwise_and(np.bitwise_and(v, 0xfff) * popcount.ma, popcount.mb), 0x1f)
+ np.mod(np.bitwise_and(np.right_shift(np.bitwise_and(v, 0xfff000), 12) * popcount.ma, popcount.mb), 0x1f)
+ np.mod(np.bitwise_and(np.right_shift(v, 24) * popcount.ma, popcount.mb), 0x1f)
)
return c.sum()
########################################################################
def count1s_s_gmpy2(v):
return gmpy2.popcount(v*mpz(1))
def count1s_a_gmpy2(v):
v2 = v*mpz(1)
return sum(gmpy2.popcount(a) for a in v2)
########################################################################
def test_helper_start(a, d, dadi):
fmt = '' # formatstring for unpack
dlen = 0 # real bytelen for calcs
v = None # value to test
orig_v = None # original value before testing
typ = None # type of func to test
# Can't test too short array
if a.endswith('_64') and dadi < 8: return None
if a.endswith('_32') or dadi < 8:
fmt = 'I'
dlen = 4
else:
fmt = 'Q'
dlen = 8
if a.startswith('count1s_s'): # scalar
if dadi >= 16: return None
v = struct.unpack(fmt, d[:dlen])[0]
orig_v = v
typ = 's'
elif a.startswith('count1s_l'): # list
fmts = '{:d}{:s}'.format(divmod(len(d), dlen)[0], fmt)
v = list(struct.unpack(fmts, d))
orig_v = copy.copy(v)
dlen = dadi
typ = 'l'
elif a.startswith('count1s_a'): # np.array
if dlen == 4:
v = np.frombuffer(d, dtype=np.uint32)
elif dlen == 8:
v = np.frombuffer(d, dtype=np.uint64)
else: raise
orig_v = np.copy(v)
dlen = dadi
typ = 'a'
elif a.startswith('count1s_d'): # bytearray
v = d
orig_v = d
dlen = dadi
typ = 'd'
else: raise
return (dlen, v, orig_v, typ)
def test_helper_check(a, dlen, v, orig_v, typ):
# Check if data has survived
ok = True
if typ == 'd':
if len(v) != len(orig_v):
print('d: len')
ok = False
elif not v.startswith(orig_v):
print("d: startswith")
ok = False
elif typ == 'a':
if len(v.tolist()) != len(orig_v.tolist()):
print("a: len")
ok = False
else:
for x in range(len(v.tolist())):
if v[x] != orig_v[x]:
print("a: item", v[x], orig_v[x])
ok = False
break
elif typ == 'l':
if len(v) != len(orig_v):
print('l: len')
ok = False
else:
for x in range(len(v)):
if v[x] != orig_v[x]:
print("l: item", v[x], orig_v[x])
ok = False
break
elif typ == 's':
if v != orig_v:
print('s: !=')
ok = False
else:
print("Could not check:", a, dlen)
if not ok:
print("ERROR no data ok:", typ, a, dlen, v, orig_v)
popcount_methods = [ a for a in dir(popcount) if a.startswith('count1s') ]
popcount_methods.sort()
popcount_methods
Out[3]:
In [4]:
da = (4, 8, 16)
numa = (0, 1, 255)
ra = (0, 1, 8) # result len per item
for di in range(len(da)):
for i in range(len(numa)):
num = numa[i]
d = bytearray([num for i in range(da[di])])
l = []
for a in popcount_methods:
start = popcount.test_helper_start(a, d, da[di])
if start is None: continue
(dlen, v, orig_v, typ) = start
r = getattr(popcount, a)(v)
popcount.test_helper_check(a, dlen, v, orig_v, typ)
l.append({a: r})
if r != ra[i]*dlen:
print("ERROR:", a, dlen, '*', numa[i], ',', r, "!=", ra[i]*dlen)
#print(num, l)
Run through different vector sizes.
In [5]:
da = (4, 8, 16, 1024, 4096, 409600)
for di in range(len(da)):
random.seed(1)
d = bytearray([random.randint(0,255) for i in range(da[di])])
repeat = 3
l = []
res = {4: 0, 8: 0}
for a in popcount_methods:
start = popcount.test_helper_start(a, d, da[di])
if start is None: continue
(dlen, v, orig_v, typ) = start
r = getattr(popcount, a)(v)
# Need to take into account scalars
if dlen == 4: ri = 4
else: ri = 8
if res[ri] == 0: res[ri] = r
if res[ri] != r:
print("ERROR:", a, di, dlen, r, "!=", res[ri])
l.append({a: r})
#print(da[di], l)
In [ ]:
unit = [['Mi', 1024*1024], ['ki', 1024], ['', 1]]
ro15 = [['.0f', 150], ['.1f', 15], ['.2f',1.5]]
test_target = 2.0 # seconds, real test can be about twice as much
repeat = 3
max_repeats = 30
for di in range(len(da)):
random.seed(1)
d = bytearray([random.randint(0,255) for i in range(da[di])])
for a in popcount_methods:
if a.endswith('_64') and da[di] < 8: continue
r = [0.0001]
number = 1
total_number = 0
last_run_repeat = 0
while min(r) < (test_target / 2.0):
if min(r) < (test_target / 2.0 / 10.0):
number *= 10
else:
number_new = int(number * test_target / min(r))
if number_new <= (number*1.25):
number *= 1.25
else:
number = number_new
start = popcount.test_helper_start(a, d, da[di])
if start is None: continue
(dlen, v, orig_v, typ) = start
# Try to get minimum by running tests again if there is too big values
# But only run tests limited times
# And only if this is last run, aka min(r) is big enough
r = []
last_run_repeats = 0
max_r = None
while len(r) < repeat and max_repeats > last_run_repeats:
r += timeit.repeat(partial(getattr(popcount, a), v), number=number, repeat=repeat)
total_number += number * repeat
last_run_repeats += repeat
if min(r) < (test_target / 2.0):
break
while max(r) > (min(r) * 1.025):
if max_r is None or max(r) < max_r:
max_r = max(r)
r.remove(max(r))
popcount.test_helper_check(a, dlen, v, orig_v, typ)
ra = dlen * number / min(r)
fmt = ''
for u in range(len(unit)):
for ro in range(len(ro15)):
if fmt == '' and ra > unit[u][1] * ro15[ro][1]:
fmt = '{0:' + ro15[ro][0] + '}' + unit[u][0] + 'B/s'
ra = fmt.format(ra/unit[u][1])
if fmt == '':
ra = '{0:.2f}B/s'.format(ra)
out = '{:24s} {:6d} {:>12s} {:2d} {:9d} {:10d}'.format(a, da[di], ra, last_run_repeats, number, total_number)
r.sort()
for re in r:
out += ' {0:.03f}'.format(re)
if max_r is not None:
out += ' max:{0:.03f}'.format(max_r)
print(out)
In [ ]: