In [2]:
import numpy as np
from typing import Any, Dict, List, Optional, Tuple, TypeVar, Union, cast

import numba
import cython
import pythran

IntOrIntArray = TypeVar("IntOrIntArray", np.ndarray, int)
NumberOrArrayUnion = Union[np.ndarray, float]

In [3]:
%load_ext cython

In [4]:
import pythran
%load_ext pythran.magic

In [5]:
a = np.array([10])

Regular Python


In [6]:
def count_bits_single_element(n: IntOrIntArray
                               ) -> IntOrIntArray:  # pragma: no cover
    """
    Count the number of bits that are set in `n`.

    Parameters
    ----------
    n : int | np.ndarray
        An integer number or a numpy array of integer numbers.

    Returns
    -------
    Number of bits that are equal to 1 in the bit representation of the
    number `n`.

    Examples
    --------
    >>> a = np.array([3, 0, 2])
    >>> print(count_bits(a))
    [2 0 1]

    """
    count = 0
    while n > 0:
        if n & 1 == 1:
            count += 1
        n >>= 1
    return count


count_bits = np.vectorize(count_bits_single_element)

In [7]:
time_regular_python_single_element = %timeit -o count_bits_single_element(10)


402 ns ± 12.7 ns per loop (mean ± std. dev. of 7 runs, 1000000 loops each)

In [8]:
time_regular_python = %timeit -o count_bits(a)


12.2 µs ± 121 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)

Cython


In [9]:
%%cython

cimport numpy as np
import numpy as np
cimport cython

cdef int count_bits_single_element_cython(int n):
    """
    Count the number of bits that are set in an integer number.

    Parameters
    ----------
    n : int
        The integer number.

    Returns
    -------
    int
        Number of bits that are equal to 1 in the bit representation of
        the number `n`.
    """
    cdef int count = 0
    while n > 0:
        if n & 1 == 1:
            count += 1
        n >>= 1
    return count

def count_bits_single_element_cython2(int n):
    """
    Count the number of bits that are set in an integer number.

    Parameters
    ----------
    n : int
        The integer number.

    Returns
    -------
    int
        Number of bits that are equal to 1 in the bit representation of
        the number `n`.
    """
    cdef int count = 0
    while n > 0:
        if n & 1 == 1:
            count += 1
        n >>= 1
    return count



@cython.boundscheck(False)
@cython.wraparound(False)
def count_bits_1D_array(np.ndarray[np.int_t, ndim=1] n):
    """
    Count the number of bits that are set.

    Parameters
    ----------
    n : np.ndarray
        An integer number or a numpy array of integer numbers.

    Returns
    -------
    num_bits : np.ndarray
        1D numpy array with the number of bits that are set for each
        element in `n`

    """
    assert n.dtype == np.int

    cdef int num_el = len(n)
    cdef Py_ssize_t index  # Since we will use index for indexing 'n', then
                           # using Py_ssize_t as the type for index give
                           # faster results then using a simple int.
    cdef np.ndarray[np.int_t, ndim=1] num_bits = np.empty(num_el, dtype=np.int)
    for index in range(num_el):
        num_bits[index] = count_bits_single_element_cython(n[index])

    return num_bits

@cython.boundscheck(False)
@cython.wraparound(False)
def count_bits_ND_array(n):
    cdef np.ndarray[np.int_t, ndim=1] flattened_input = n.flatten()

    cdef int num_el = len(flattened_input)
    cdef Py_ssize_t index  # Since we will use index for indexing 'n', then
                           # using Py_ssize_t as the type for index give
                           # faster results then using a simple int.
    cdef np.ndarray[np.int_t, ndim=1] num_bits_flat = np.empty(num_el, dtype=np.int)
    for index in range(num_el):
        num_bits_flat[index] = count_bits_single_element_cython(
            flattened_input[index])

    return np.reshape(num_bits_flat, n.shape)


def count_bits_cython(n):
    """
    Count the number of bits that are set in `n`.

    Parameters
    ----------
    n : int | np.ndarray
        An integer number or a numpy array of integer numbers.

    Returns
    -------
    num_bits : int | np.ndarray
        Number of bits that are set in `n`. If `n` is a numpy array then
        `num_bits` will also be a numpy array with the number of bits that
        are set for each element in `n`
    """
    if not isinstance(n, np.ndarray):
        # If the input is not a numpy array we assume it to be an integer
        # and we call _count_bits_single_element directly
        return count_bits_single_element_cython(n)

    assert n.dtype == np.int

    if n.ndim == 1:
        return count_bits_1D_array(n)

    # General case
    return count_bits_ND_array(n)

In [10]:
time_cython_single_element = %timeit -o count_bits_single_element_cython2(10)


29.6 ns ± 0.275 ns per loop (mean ± std. dev. of 7 runs, 10000000 loops each)

In [11]:
time_cython = %timeit -o count_bits_cython(a)


1.37 µs ± 10.6 ns per loop (mean ± std. dev. of 7 runs, 1000000 loops each)

Numba


In [12]:
@numba.jit(nopython=True)
def count_bits_single_element_jit(n: IntOrIntArray
                               ) -> IntOrIntArray:  # pragma: no cover
    """
    Count the number of bits that are set in `n`.

    Parameters
    ----------
    n : int | np.ndarray
        An integer number or a numpy array of integer numbers.

    Returns
    -------
    Number of bits that are equal to 1 in the bit representation of the
    number `n`.

    Examples
    --------
    >>> a = np.array([3, 0, 2])
    >>> print(count_bits(a))
    [2 0 1]

    """
    count = 0
    while n > 0:
        if n & 1 == 1:
            count += 1
        n >>= 1
    return count

In [13]:
@numba.vectorize
def count_bits_numba(n: IntOrIntArray
                               ) -> IntOrIntArray:  # pragma: no cover
    """
    Count the number of bits that are set in `n`.

    Parameters
    ----------
    n : int | np.ndarray
        An integer number or a numpy array of integer numbers.

    Returns
    -------
    Number of bits that are equal to 1 in the bit representation of the
    number `n`.

    Examples
    --------
    >>> a = np.array([3, 0, 2])
    >>> print(count_bits(a))
    [2 0 1]

    """
    count = 0
    while n > 0:
        if n & 1 == 1:
            count += 1
        n >>= 1
    return count

In [14]:
count_bits_single_element_jit(10)
count_bits_numba(np.array([10]))


Out[14]:
array([2])

In [15]:
time_numba_single_element = %timeit -o count_bits_single_element_jit(10)


131 ns ± 0.248 ns per loop (mean ± std. dev. of 7 runs, 10000000 loops each)

In [16]:
time_numba = %timeit -o count_bits_numba(a)


320 ns ± 1.81 ns per loop (mean ± std. dev. of 7 runs, 1000000 loops each)

Pythran


In [30]:
%%pythran
#pythran export count_bits_single_element_pythran(int)
def count_bits_single_element_pythran(n):
    """
    Count the number of bits that are set in `n`.

    Parameters
    ----------
    n : int | np.ndarray
        An integer number or a numpy array of integer numbers.

    Returns
    -------
    Number of bits that are equal to 1 in the bit representation of the
    number `n`.

    Examples
    --------
    >>> a = np.array([3, 0, 2])
    >>> print(count_bits(a))
    [2 0 1]

    """
    count = 0
    while n > 0:
        if n & 1 == 1:
            count += 1
        n >>= 1
    return count

#pythran export count_bits_pythran(int[])
def count_bits_pythran(n_arr):    
    return [count_bits_single_element_pythran(n) for n in n_arr]

In [18]:
time_pythran_single_element = %timeit -o count_bits_single_element_pythran(10)


115 ns ± 0.346 ns per loop (mean ± std. dev. of 7 runs, 10000000 loops each)

In [31]:
time_pythran = %timeit -o count_bits_pythran(a)


208 ns ± 8.36 ns per loop (mean ± std. dev. of 7 runs, 1000000 loops each)

Reports


In [20]:
print(f"Python Single element (micro seconds): {1e6 * time_regular_python_single_element.average:.2}")
print(f"Cython Single element (micro seconds): {1e6 * time_cython_single_element.average:.2}")
print(f"Numba Single element (micro seconds): {1e6 * time_numba_single_element.average:.2}")
print(f"Pythran Single element (micro seconds): {1e6 * time_pythran_single_element.average:.2}")


Python Single element (micro seconds): 0.4
Cython Single element (micro seconds): 0.03
Numba Single element (micro seconds): 0.13
Pythran Single element (micro seconds): 0.12

In [32]:
print(f"Python (micro seconds): {1e6 * time_regular_python.average:.2}")
print(f"Cython (micro seconds): {1e6 * time_cython.average:.2}")
print(f"Numba (micro seconds): {1e6 * time_numba.average:.2}")
print(f"Pythran (micro seconds): {1e6 * time_pythran.average:.2}")


Python (micro seconds): 1.2e+01
Cython (micro seconds): 1.4
Numba (micro seconds): 0.32
Pythran (micro seconds): 0.21