-
-
Save craffel/e470421958cad33df550 to your computer and use it in GitHub Desktop.
Popcount of a numpy array of integers of any dtype
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
Functions for computing the population count (aka Hamming weight) of an array | |
in-place using the built-in popcount routine. | |
Works with any integer datatype 64 bit-width or smaller. | |
Compile with gcc flag -mpopcnt | |
Adapted from | |
https://gist.github.com/aldro61/f604a3fa79b3dec5436a by Alexandre Drouin | |
""" | |
import numpy as np | |
cimport numpy as np | |
cimport cython | |
from libc.stdint cimport uint32_t, uint64_t | |
cdef extern int __builtin_popcount(unsigned int) nogil | |
cdef extern int __builtin_popcountll(unsigned long long) nogil | |
@cython.boundscheck(False) | |
@cython.wraparound(False) | |
cdef void _inplace_popcount_32(uint32_t[:, :] array) nogil: | |
""" | |
Iterates over the elements of an 2D array of unsigned 32 bit integers and | |
replaces them by their popcount. | |
Parameters | |
---------- | |
array : np.ndarray, dtype=np.uint32 | |
The array for which the popcounts should be computed. | |
""" | |
cdef int i | |
cdef int j | |
for i in xrange(array.shape[0]): | |
for j in xrange(array.shape[1]): | |
# Use __builtin_popcount for unsigned 32-bit integers | |
array[i, j] = __builtin_popcount(array[i, j]) | |
@cython.boundscheck(False) | |
@cython.wraparound(False) | |
cdef void _inplace_popcount_64(uint64_t[:, :] array) nogil: | |
""" | |
Iterates over the elements of an 2D array of unsigned 64-bit integers and | |
replaces them by their popcount. | |
Parameters | |
---------- | |
array : np.ndarray, dtype=np.uint32 | |
The array for which the popcounts should be computed. | |
""" | |
cdef int i | |
cdef int j | |
for i in xrange(array.shape[0]): | |
for j in xrange(array.shape[1]): | |
# Use __builtin_popcountll for unsigned 64-bit integers | |
array[i, j] = __builtin_popcountll(array[i, j]) | |
def inplace_popcount(array): | |
""" | |
Computes the bitwise popcount of each element of a 2D numpy array in place. | |
Parameters | |
---------- | |
array : np.ndarray | |
The array of integers for which the popcounts should be computed. | |
Note | |
---- | |
All integer dtypes with 64 bits per element or fewer are hypotheticaly | |
supported, but any dtype with fewer than 32 bits per element will incur an | |
additional copy and up and downcast. | |
""" | |
# Choose the correct __builtin_popcount depending on dtype | |
if array.dtype == np.uint32: | |
_inplace_popcount_32(array) | |
elif array.dtype == np.uint64: | |
_inplace_popcount_64(array) | |
elif not np.issubdtype(array.dtype, np.integer): | |
raise ValueError("dtype {} not supported.".format(array.dtype)) | |
else: | |
if array.nbytes / array.size < 4: | |
# Allow integer dtypes to be used with fewer than 32 bits by using | |
# astype. We can't use view here because if the array doesn't have | |
# a total size which is divisible by 32 bits, it will raise an | |
# error. | |
array_32 = array.astype(np.uint32, copy=False) | |
_inplace_popcount_32(array_32) | |
array[:] = array_32 | |
# Use np.view for anything with 32 or 64 bits | |
elif array.nbytes / array.size == 4: | |
_inplace_popcount_32(array.view(np.uint32)) | |
elif array.nbytes / array.size == 8: | |
_inplace_popcount_64(array.view(np.uint64)) | |
# We don't support any datatypes with > 64 bits per element | |
else: | |
raise ValueError("dtype {} not supported.".format(array.dtype)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment