ENH: Added countbits (popcount)

ganesh-k13 · ganesh-k13 · commit 050127a9c706 · 2022-11-26T18:46:09.000+05:30
ENH, DOC: Added countbits (popcount) ENH: Popcount implementation ENH: Add popcount to umath ENH: Added countbits (popcount) to umath `__all__` ENH: Refined popcount logic DOC: Added `bit_count` Co-authored-by: Eric Wieser <wieser.eric@gmail.com> MAINT: Renamed `countbits` to `bit_count` MAINT: Fixed 4 1s magic number DOC: Added `popcount` to docstring ENH: Added bit_count annotations ENH: Added GNU/CLANG popcount DOC: Added `popcount` language example ENH, BUG: Moved `bitcount` to npy_math.h as `popcount` | Fixed final right shift ENH: Enable `popcount` for signed TST: Tests for `bit_count` BUG, DOC: (BUG) Added missing typecast causing an unwanted upcast (DOC) Added more details on `popcount` implementation MAINT, BUG: (MAINT) Refined `popcount` TC to use typecode (BUG) Fixed ufunc.ntypes to include signed ints ENH: Added windows builtin support ENH: Added `popcount` implementation for big python ints natively [1/2] `popcount` object loop changes ENH: Object loop for `bit_count` [2/2] `popcount` object loop changes TST: Refined `bit_count` tests and added object type ENH: Added `bit_count` to `np.int*` DOC: Added `np.bit_count` (#19355) MAINT: Various linting and minor fixes: 1. Fixed passing all args to _internals umath bitcount. Note: We use kwargs here that might hinder performance 2. Fixed linting errors. 3. Improved verbosity of logs 4. Made a generic TO_BITS_LEN macro to accomdate more length based functions in future BENCH: Added bit_count (popcount) MAINT: Style nits | Added signed case DOC, MAINT: Improved example ENH: Added annotations for bit_count TST: Added annotations tests for bit_count MAINT: Fixed linting errors MAINT: Moved Magic constants to npy_math_internal MAINT: Remove python implementation | Added 3.10 check to tests DOC: Added abs value usage to doc MAINT: Resolved merge conflicts
diff --git a/benchmarks/benchmarks/bench_ufunc.py b/benchmarks/benchmarks/bench_ufunc.py
@@ -4,7 +4,7 @@
 
 
 ufuncs = ['abs', 'absolute', 'add', 'arccos', 'arccosh', 'arcsin', 'arcsinh',
-          'arctan', 'arctan2', 'arctanh', 'bitwise_and', 'bitwise_not',
+          'arctan', 'arctan2', 'arctanh', 'bit_count', 'bitwise_and', 'bitwise_not',
           'bitwise_or', 'bitwise_xor', 'cbrt', 'ceil', 'conj', 'conjugate',
           'copysign', 'cos', 'cosh', 'deg2rad', 'degrees', 'divide', 'divmod',
           'equal', 'exp', 'exp2', 'expm1', 'fabs', 'float_power', 'floor',
diff --git a/doc/release/upcoming_changes/19355.new_feature.rst b/doc/release/upcoming_changes/19355.new_feature.rst
@@ -0,0 +1,12 @@
+`np.bit_count` to compute the number of 1-bits in an integer
+------------------------------------------------------------
+
+This new function counts the number of 1-bits in a number.
+These work on all the numpy integer types, as well as the
+builtin arbitrary-precision `Decimal` and `long` types.
+
+.. code-block:: python
+
+    >>> a = np.array([2**i - 1 for i in range(16)])
+    >>> np.bit_count(a)
+    array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15])
diff --git a/doc/source/reference/routines.math.rst b/doc/source/reference/routines.math.rst
@@ -179,3 +179,5 @@ Miscellaneous
    real_if_close
 
    interp
+
+   bit_count
diff --git a/numpy/__init__.pyi b/numpy/__init__.pyi
@@ -2517,6 +2517,17 @@ class ndarray(_ArrayOrScalarCommon, Generic[_ShapeType, _DType_co]):
     def __dlpack__(self: NDArray[number[Any]], *, stream: None = ...) -> _PyCapsule: ...
     def __dlpack_device__(self) -> tuple[int, L[0]]: ...
 
+    def bit_count(
+        self,
+        out: None | NDArray[Any] = ...,
+        *,
+        where: _ArrayLikeBool_co = ...,
+        casting: _CastingKind = ...,
+        order: _OrderKACF = ...,
+        dtype: DTypeLike = ...,
+        subok: bool = ...,
+    ) -> NDArray[Any]: ...
+
     # Keep `dtype` at the bottom to avoid name conflicts with `np.dtype`
     @property
     def dtype(self) -> _DType_co: ...
@@ -2660,6 +2671,17 @@ class generic(_ArrayOrScalarCommon):
         self: _ScalarType, *shape: SupportsIndex, order: _OrderACF = ...
     ) -> ndarray[Any, _dtype[_ScalarType]]: ...
 
+    def bit_count(
+        self,
+        out: None | NDArray[Any] = ...,
+        *,
+        where: _ArrayLikeBool_co = ...,
+        casting: _CastingKind = ...,
+        order: _OrderKACF = ...,
+        dtype: DTypeLike = ...,
+        subok: bool = ...,
+    ) -> Any: ...
+
     def squeeze(
         self: _ScalarType, axis: None | L[0] | tuple[()] = ...
     ) -> _ScalarType: ...
@@ -3229,6 +3251,7 @@ arcsinh: _UFunc_Nin1_Nout1[L['arcsinh'], L[8], None]
 arctan2: _UFunc_Nin2_Nout1[L['arctan2'], L[5], None]
 arctan: _UFunc_Nin1_Nout1[L['arctan'], L[8], None]
 arctanh: _UFunc_Nin1_Nout1[L['arctanh'], L[8], None]
+bit_count: _UFunc_Nin1_Nout1[L['bit_count'], L[11], None]
 bitwise_and: _UFunc_Nin2_Nout1[L['bitwise_and'], L[12], L[-1]]
 bitwise_not: _UFunc_Nin1_Nout1[L['invert'], L[12], None]
 bitwise_or: _UFunc_Nin2_Nout1[L['bitwise_or'], L[12], L[0]]
diff --git a/numpy/core/_methods.py b/numpy/core/_methods.py
@@ -20,6 +20,7 @@
 umr_minimum = um.minimum.reduce
 umr_sum = um.add.reduce
 umr_prod = um.multiply.reduce
+umr_bit_count = um.bit_count
 umr_any = um.logical_or.reduce
 umr_all = um.logical_and.reduce
 
@@ -295,3 +296,8 @@ def _dump(self, file, protocol=2):
 
 def _dumps(self, protocol=2):
     return pickle.dumps(self, protocol=protocol)
+
+def _bit_count(a, out=None, *, where=True, casting='same_kind',
+          order='K', dtype=None, subok=True):
+    return umr_bit_count(a, out, where=where, casting=casting,
+            order=order, dtype=dtype, subok=subok)
diff --git a/numpy/core/code_generators/generate_umath.py b/numpy/core/code_generators/generate_umath.py
@@ -981,6 +981,13 @@ def english_upper(s):
           TD(ints),
           TD('O', f='npy_ObjectLCM'),
           ),
+'bit_count':
+    Ufunc(1, 1, None,
+          docstrings.get('numpy.core.umath.bit_count'),
+          None,
+          TD(ints),
+          TD('O', f='npy_ObjectPopCount'),
+          ),
 'matmul' :
     Ufunc(2, 1, None,
           docstrings.get('numpy.core.umath.matmul'),
diff --git a/numpy/core/code_generators/ufunc_docstrings.py b/numpy/core/code_generators/ufunc_docstrings.py
@@ -4214,3 +4214,37 @@ def add_newdoc(place, name, doc):
     array([ 0, 20, 20, 60, 20, 20])
 
     """)
+
+add_newdoc('numpy.core.umath', 'bit_count',
+    """
+    Computes the number of 1-bits in the absolute value of ``x``.
+    Analogous to the builtin `int.bit_count` or ``popcount`` in C++.
+
+    Parameters
+    ----------
+    x : array_like, unsigned int
+        Input array.
+    $PARAMS
+
+    Returns
+    -------
+    y : ndarray
+        The corresponding number of 1-bits in the input.
+        $OUT_SCALAR_1
+
+    References
+    ----------
+    .. [1] https://stackoverflow.com/a/109025/5671364
+
+    .. [2] Wikipedia, "Hamming weight",
+           https://en.wikipedia.org/wiki/Hamming_weight
+
+    Examples
+    --------
+    >>> np.bit_count(1023)
+    10
+    >>> a = np.array([2**i - 1 for i in range(16)])
+    >>> np.bit_count(a)
+    array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15])
+
+    """)
diff --git a/numpy/core/src/multiarray/methods.c b/numpy/core/src/multiarray/methods.c
@@ -354,6 +354,12 @@ array_ptp(PyArrayObject *self, PyObject *args, PyObject *kwds)
     NPY_FORWARD_NDARRAY_METHOD("_ptp");
 }
 
+static PyObject *
+array_bit_count(PyArrayObject *self, PyObject *args, PyObject *kwds)
+{
+    NPY_FORWARD_NDARRAY_METHOD("_bit_count");
+}
+
 
 static PyObject *
 array_swapaxes(PyArrayObject *self, PyObject *args)
@@ -3076,9 +3082,11 @@ NPY_NO_EXPORT PyMethodDef array_methods[] = {
     {"__dlpack__",
         (PyCFunction)array_dlpack,
         METH_FASTCALL | METH_KEYWORDS, NULL},
-
     {"__dlpack_device__",
         (PyCFunction)array_dlpack_device,
         METH_NOARGS, NULL},
+    {"bit_count",
+        (PyCFunction)array_bit_count,
+        METH_VARARGS | METH_KEYWORDS, NULL},
     {NULL, NULL, 0, NULL}           /* sentinel */
 };
diff --git a/numpy/core/src/multiarray/scalartypes.c.src b/numpy/core/src/multiarray/scalartypes.c.src
@@ -1566,7 +1566,7 @@ gentype_byteswap(PyObject *self, PyObject *args, PyObject *kwds)
  *         std, var, sum, cumsum, prod, cumprod, compress, sort, argsort,
  *         round, argmax, argmin, max, min, ptp, any, all, astype, resize,
  *         reshape, choose, tostring, tobytes, copy, searchsorted, view,
- *         flatten, ravel, squeeze#
+ *         flatten, ravel, squeeze, bit_count#
  */
 static PyObject *
 gentype_@name@(PyObject *self, PyObject *args, PyObject *kwds)
@@ -2192,6 +2192,9 @@ static PyMethodDef gentype_methods[] = {
     {"sum",
         (PyCFunction)gentype_sum,
         METH_VARARGS | METH_KEYWORDS, NULL},
+    {"bit_count",
+        (PyCFunction)gentype_bit_count,
+        METH_VARARGS | METH_KEYWORDS, NULL},
     {"cumsum",
         (PyCFunction)gentype_cumsum,
         METH_VARARGS | METH_KEYWORDS, NULL},
diff --git a/numpy/core/src/npymath/npy_math_internal.h.src b/numpy/core/src/npymath/npy_math_internal.h.src
@@ -678,7 +678,6 @@ npy_rshift@u@@c@(npy_@u@@type@ a, npy_@u@@type@ b)
 /**end repeat1**/
 /**end repeat**/
 
-
 #define __popcnt32 __popcnt
 /**begin repeat
  *
diff --git a/numpy/core/src/umath/funcs.inc.src b/numpy/core/src/umath/funcs.inc.src
@@ -267,6 +267,21 @@ npy_ObjectClip(PyObject *arr, PyObject *min, PyObject *max) {
     return o;
 }
 
+static PyObject *
+npy_ObjectPopCount(PyObject *obj) {
+    PyObject *result = NULL;
+
+    /* Try to use inbuilt popcount if available */
+    static PyObject *builtin_popcount_func = NULL;
+    builtin_popcount_func = PyObject_GetAttrString(obj, "bit_count");
+
+    if (builtin_popcount_func != NULL) {
+        result = PyObject_CallFunction(builtin_popcount_func, NULL);
+    }
+
+    return result;
+}
+
 /*
  *****************************************************************************
  **                           COMPLEX FUNCTIONS                             **
diff --git a/numpy/core/src/umath/loops.c.src b/numpy/core/src/umath/loops.c.src
@@ -569,6 +569,15 @@ NPY_NO_EXPORT void
     UNARY_LOOP_FAST(@type@, @type@, *out = +in);
 }
 
+NPY_NO_EXPORT void
+@TYPE@_bit_count(char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(func))
+{
+    UNARY_LOOP {
+        const @type@ in1 = *(@type@ *)ip1;
+        *((@type@ *)op1) = npy_popcount@c@(in1);
+    }
+}
+
 /**begin repeat1
  * #isa = , _avx2#
  * #CHK = 1, defined(HAVE_ATTRIBUTE_TARGET_AVX2)#
diff --git a/numpy/core/src/umath/loops.h.src b/numpy/core/src/umath/loops.h.src
@@ -202,8 +202,10 @@ NPY_NO_EXPORT void
 @S@@TYPE@_@kind@(char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(func));
 /**end repeat2**/
 
-/**end repeat1**/
+NPY_NO_EXPORT void
+@S@@TYPE@_bit_count(char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(func));
 
+/**end repeat1**/
 /**end repeat**/
 
 /*
diff --git a/numpy/core/tests/test_umath.py b/numpy/core/tests/test_umath.py
@@ -2361,7 +2361,15 @@ def test_reduce(self):
 
 class TestBitwiseUFuncs:
 
-    bitwise_types = [np.dtype(c) for c in '?' + 'bBhHiIlLqQ' + 'O']
+    _all_ints_bits = [
+        np.dtype(c).itemsize * 8 for c in np.typecodes["AllInteger"]]
+    bitwise_types = [
+        np.dtype(c) for c in '?' + np.typecodes["AllInteger"] + 'O']
+    bitwise_bits = [
+        2,  # boolean type
+        *_all_ints_bits,  # All integers
+        max(_all_ints_bits) + 1,  # Object_ type
+    ]
 
     def test_values(self):
         for dt in self.bitwise_types:
@@ -2442,6 +2450,30 @@ def test_reduction(self):
             btype = np.array([True], dtype=object)
             assert_(type(f.reduce(btype)) is bool, msg)
 
+    @pytest.mark.parametrize("input_dtype_obj, bitsize",
+            zip(bitwise_types, bitwise_bits))
+    def test_popcount(self, input_dtype_obj, bitsize):
+        input_dtype = input_dtype_obj.type
+
+        # bit_count is only in-built in 3.10+
+        if sys.version_info < (3, 10) and input_dtype == np.object_:
+            pytest.skip()
+
+        for i in range(1, bitsize):
+            num = 2**i - 1
+            msg = f"bit_count for {num}"
+            assert i == np.bit_count(input_dtype(num)), msg
+            if np.issubdtype(
+                input_dtype, np.signedinteger) or input_dtype == np.object_:
+                assert i == np.bit_count(input_dtype(-num)), msg
+
+        a = np.array([2**i-1 for i in range(1, bitsize)], dtype=input_dtype)
+        bit_count_a = np.bit_count(a)
+        expected = np.arange(1, bitsize, dtype=input_dtype)
+
+        msg = f"array bit_count for {input_dtype}"
+        assert all(bit_count_a == expected), msg
+
 
 class TestInt:
     def test_logical_not(self):
diff --git a/numpy/core/umath.py b/numpy/core/umath.py
@@ -22,7 +22,7 @@
     'UFUNC_PYVALS_NAME', '_add_newdoc_ufunc', 'absolute', 'add',
     'arccos', 'arccosh', 'arcsin', 'arcsinh', 'arctan', 'arctan2', 'arctanh',
     'bitwise_and', 'bitwise_or', 'bitwise_xor', 'cbrt', 'ceil', 'conj',
-    'conjugate', 'copysign', 'cos', 'cosh', 'deg2rad', 'degrees', 'divide',
+    'conjugate', 'copysign', 'cos', 'cosh', 'bit_count', 'deg2rad', 'degrees', 'divide',
     'divmod', 'e', 'equal', 'euler_gamma', 'exp', 'exp2', 'expm1', 'fabs',
     'floor', 'floor_divide', 'float_power', 'fmax', 'fmin', 'fmod', 'frexp',
     'frompyfunc', 'gcd', 'geterrobj', 'greater', 'greater_equal', 'heaviside',
diff --git a/numpy/matrixlib/tests/test_defmatrix.py b/numpy/matrixlib/tests/test_defmatrix.py
@@ -286,7 +286,7 @@ def test_instance_methods(self):
             'partition', 'argpartition',
             'take', 'tofile', 'tolist', 'tostring', 'tobytes', 'all', 'any',
             'sum', 'argmax', 'argmin', 'min', 'max', 'mean', 'var', 'ptp',
-            'prod', 'std', 'ctypes', 'itemset',
+            'prod', 'std', 'ctypes', 'itemset', 'bit_count',
             ]
         for attrib in dir(a):
             if attrib.startswith('_') or attrib in excluded_methods:
diff --git a/numpy/typing/tests/data/reveal/ufuncs.pyi b/numpy/typing/tests/data/reveal/ufuncs.pyi
@@ -1,6 +1,7 @@
 import numpy as np
 import numpy.typing as npt
 
+i8: np.int64
 f8: np.float64
 AR_f8: npt.NDArray[np.float64]
 AR_i8: npt.NDArray[np.int64]
@@ -66,3 +67,14 @@ reveal_type(np.matmul.signature)  # E: Literal['(n?,k),(k,m?)->(n?,m?)']
 reveal_type(np.matmul.identity)  # E: None
 reveal_type(np.matmul(AR_f8, AR_f8))  # E: Any
 reveal_type(np.matmul(AR_f8, AR_f8, axes=[(0, 1), (0, 1), (0, 1)]))  # E: Any
+
+reveal_type(np.bit_count.__name__)  # E: Literal['bit_count']
+reveal_type(np.bit_count.ntypes)  # E: Literal[11]
+reveal_type(np.bit_count.identity)  # E: None
+reveal_type(np.bit_count.nin)  # E: Literal[1]
+reveal_type(np.bit_count.nout)  # E: Literal[1]
+reveal_type(np.bit_count.nargs)  # E: Literal[2]
+reveal_type(np.bit_count.signature)  # E: None
+reveal_type(np.bit_count.identity)  # E: None
+reveal_type(np.bit_count(i8))  # E: Any
+reveal_type(np.bit_count(AR_i8))  # E: Any

Original file line number	Diff line number	Diff line change
`@@ -678,7 +678,6 @@ npy_rshift@u@@c@(npy_@u@@type@ a, npy_@u@@type@ b)`
`678`	`678`	`/end repeat1/`
`679`	`679`	`/end repeat/`
`680`	`680`
`681`		`-`
`682`	`681`	`#define __popcnt32 __popcnt`
`683`	`682`	`/**begin repeat`
`684`	`683`	`*`
Original file line number	Diff line number	Diff line change
`@@ -286,7 +286,7 @@ def test_instance_methods(self):`
`286`	`286`	`'partition', 'argpartition',`
`287`	`287`	`'take', 'tofile', 'tolist', 'tostring', 'tobytes', 'all', 'any',`
`288`	`288`	`'sum', 'argmax', 'argmin', 'min', 'max', 'mean', 'var', 'ptp',`
`289`		`- 'prod', 'std', 'ctypes', 'itemset',`
	`289`	`+ 'prod', 'std', 'ctypes', 'itemset', 'bit_count',`
`290`	`290`	`]`
`291`	`291`	`for attrib in dir(a):`
`292`	`292`	`if attrib.startswith('_') or attrib in excluded_methods:`