Calculate sum over boolean arrays



In [1]:

    
import numpy as np
%load_ext cythonmagic



In [2]:

    
def direct(phen, env):
    protected = np.sum(phen, axis = 1)
    infected = np.sum((~phen & env), axis=1)
    return protected, infected



In [3]:

    
%%cython
import cython
import numpy as np
cimport numpy as np
@cython.boundscheck(False)
@cython.wraparound(False)
def cython(np.ndarray[np.uint8_t, cast=True, ndim=2] phen,
           np.ndarray[np.uint8_t, cast=True, ndim=1] env):
    cdef np.ndarray[np.uint32_t, ndim=1] protected = np.empty(phen.shape[0], dtype = np.uint32)
    cdef np.ndarray[np.uint32_t, ndim=1] infected = np.empty(phen.shape[0], dtype = np.uint32)
# alternatively use memoryview access (slightly faster but less convenient)
#def cython(unsigned int[:, ::1] phen,
#           unsigned int[:] env):
#    cdef unsigned int[:] protected = np.empty(phen.shape[0], dtype = np.uint32)
#    cdef unsigned int[:] infected = np.empty(phen.shape[0], dtype = np.uint32)
    cdef Py_ssize_t nind = phen.shape[0]
    cdef Py_ssize_t L = phen.shape[1]
    cdef Py_ssize_t i, j
    cdef unsigned int tprotected, tinfected
    for i in range(nind):
        tprotected = 0
        tinfected = 0
        for j in range(L):
            if phen[i, j]:
                tprotected += 1
            elif env[j]:
                tinfected += 1
        protected[i] = tprotected
        infected[i] = tinfected
    return protected, infected



In [4]:

    
import numexpr as ne
def numexpr(phen, env):
    protected = np.sum(phen, axis=1)
    infected = ne.evaluate('~phen & env').sum(axis=1)
    # the following is not working as boolean do not support addition in numexpr
    #infected = ne.evaluate('sum((~phen & env), axis=1)')
    return protected, infected



In [5]:

    
L = 100
nind = 1000
phen = np.random.rand(nind, L) < 0.5
env = np.random.rand(L) < 0.5
p1, i1 = direct(phen, env)
p2, i2 = cython(phen, env)
p3, i3 = numexpr(phen, env)
print np.alltrue(p1 == p2) and np.alltrue(i1 == i2)
print np.alltrue(p1 == p3) and np.alltrue(i1 == i3)









    



True
True



In [6]:

    
for L in [1, 10, 100]:
    nind = 100
    phen = np.random.rand(nind, L) < 0.5
    env = np.random.rand(L) < 0.5
    %timeit direct(phen, env)
    %timeit cython(phen, env)
    %timeit numexpr(phen, env)
    print









    



100000 loops, best of 3: 9.9 µs per loop
100000 loops, best of 3: 3.52 µs per loop
100000 loops, best of 3: 18.9 µs per loop

100000 loops, best of 3: 16 µs per loop
100000 loops, best of 3: 4.33 µs per loop
10000 loops, best of 3: 27 µs per loop

10000 loops, best of 3: 41 µs per loop
10000 loops, best of 3: 39.2 µs per loop
10000 loops, best of 3: 135 µs per loop

Winner depends on size: for large sizes numpy leads, whereas for very small arrays cython wins out. As expected there is no large gain to be made over the already fully vectorized numpy code.