In [1]:
%%time
from eden import fast_hash_2, fast_hash_4

nbits=30
bitmask = pow(2, nbits) - 1
n=1000
res=set()
for dat1 in range(n):
    for dat2 in range(n):
        res.add(fast_hash_4(dat1,dat2,dat1,dat2, bitmask))
print len(res),n*n, n*n - len(res), (len(res))/float(n*n)


999210 1000000 790 0.99921
CPU times: user 935 ms, sys: 56.7 ms, total: 992 ms
Wall time: 1 s

In [5]:
%%time
# using python hash
nbits=30
bitmask = pow(2, nbits) - 1
n=1000
res=set()
for dat1 in range(n):
    for dat2 in range(n):
        res.add(hash((dat1,dat2,dat1,dat2)) & bitmask)
print len(res),n*n, n*n - len(res), (len(res))/float(n*n)


999210 1000000 790 0.99921
CPU times: user 667 ms, sys: 27.2 ms, total: 695 ms
Wall time: 681 ms

In [7]:
def calc_running_hash(running_hash, list_item, counter):
    return ((~(((running_hash << 11) + list_item) ^ (running_hash >> 5))), ((running_hash << 7) ^ list_item * (running_hash >> 3)))[bool((counter & 1) == 0)]


def fast_hash(vec, bitmask):
    running_hash = 0xAAAAAAAA
    for i, list_item in enumerate(vec):
        running_hash ^= calc_running_hash(running_hash, list_item, i)
    return int(running_hash & bitmask) + 1


nbits=30
bitmask = pow(2, nbits) - 1
n=1000
res=set()
for dat1 in range(n):
    for dat2 in range(n):
        res.add(fast_hash([dat1,dat2,dat1,dat2],bitmask))
print len(res),n*n, n*n - len(res), (len(res))/float(n*n)


999544 1000000 456 0.999544

In [8]:
%%time
from eden import fast_hash
nbits=30
bitmask = pow(2, nbits) - 1
n=1000
res=set()
for dat1 in range(n):
    for dat2 in range(n):
        res.add(fast_hash([dat1,dat2,dat1,dat2],bitmask))
print len(res),n*n, n*n - len(res), (len(res))/float(n*n)


999544 1000000 456 0.999544
CPU times: user 5.61 s, sys: 199 ms, total: 5.81 s
Wall time: 5.68 s

In [ ]: