DEV: add HexBar3DCollection for plotting hexagonal prisms

astromancer · astromancer · commit 96f79eeb474c · 2024-06-17T14:12:14.000+02:00
MNT: add stub def for newly added functions

FIX: `hexbin` to return grid info

FIX: add omitted function, import, remove whitespace

FIX: yscaling

MNT: move `hexbin` helper to `cbook`

FIX: whitespace

FIX: whitespace

FIX: stub

FIX spelling (again!)

FIX: whitespace
diff --git a/lib/matplotlib/axes/_axes.py b/lib/matplotlib/axes/_axes.py
@@ -5137,112 +5137,19 @@ def reduce_C_function(C: array) -> float
 
         x, y, C = cbook.delete_masked_points(x, y, C)
 
-        # Set the size of the hexagon grid
-        if np.iterable(gridsize):
-            nx, ny = gridsize
-        else:
-            nx = gridsize
-            ny = int(nx / math.sqrt(3))
         # Count the number of data in each hexagon
         x = np.asarray(x, float)
         y = np.asarray(y, float)
 
-        # Will be log()'d if necessary, and then rescaled.
-        tx = x
-        ty = y
-
-        if xscale == 'log':
-            if np.any(x <= 0.0):
-                raise ValueError(
-                    "x contains non-positive values, so cannot be log-scaled")
-            tx = np.log10(tx)
-        if yscale == 'log':
-            if np.any(y <= 0.0):
-                raise ValueError(
-                    "y contains non-positive values, so cannot be log-scaled")
-            ty = np.log10(ty)
-        if extent is not None:
-            xmin, xmax, ymin, ymax = extent
-            if xmin > xmax:
-                raise ValueError("In extent, xmax must be greater than xmin")
-            if ymin > ymax:
-                raise ValueError("In extent, ymax must be greater than ymin")
-        else:
-            xmin, xmax = (tx.min(), tx.max()) if len(x) else (0, 1)
-            ymin, ymax = (ty.min(), ty.max()) if len(y) else (0, 1)
-
-            # to avoid issues with singular data, expand the min/max pairs
-            xmin, xmax = mtransforms.nonsingular(xmin, xmax, expander=0.1)
-            ymin, ymax = mtransforms.nonsingular(ymin, ymax, expander=0.1)
-
-        nx1 = nx + 1
-        ny1 = ny + 1
-        nx2 = nx
-        ny2 = ny
-        n = nx1 * ny1 + nx2 * ny2
-
-        # In the x-direction, the hexagons exactly cover the region from
-        # xmin to xmax. Need some padding to avoid roundoff errors.
-        padding = 1.e-9 * (xmax - xmin)
-        xmin -= padding
-        xmax += padding
+        (*offsets, accum), (xmin, xmax), (ymin, ymax), (nx, ny) = cbook.hexbin(
+            x, y, C, gridsize, xscale, yscale, extent, reduce_C_function, mincnt
+        )
+        offsets = np.transpose(offsets)
         sx = (xmax - xmin) / nx
         sy = (ymax - ymin) / ny
-        # Positions in hexagon index coordinates.
-        ix = (tx - xmin) / sx
-        iy = (ty - ymin) / sy
-        ix1 = np.round(ix).astype(int)
-        iy1 = np.round(iy).astype(int)
-        ix2 = np.floor(ix).astype(int)
-        iy2 = np.floor(iy).astype(int)
-        # flat indices, plus one so that out-of-range points go to position 0.
-        i1 = np.where((0 <= ix1) & (ix1 < nx1) & (0 <= iy1) & (iy1 < ny1),
-                      ix1 * ny1 + iy1 + 1, 0)
-        i2 = np.where((0 <= ix2) & (ix2 < nx2) & (0 <= iy2) & (iy2 < ny2),
-                      ix2 * ny2 + iy2 + 1, 0)
-
-        d1 = (ix - ix1) ** 2 + 3.0 * (iy - iy1) ** 2
-        d2 = (ix - ix2 - 0.5) ** 2 + 3.0 * (iy - iy2 - 0.5) ** 2
-        bdist = (d1 < d2)
-
-        if C is None:  # [1:] drops out-of-range points.
-            counts1 = np.bincount(i1[bdist], minlength=1 + nx1 * ny1)[1:]
-            counts2 = np.bincount(i2[~bdist], minlength=1 + nx2 * ny2)[1:]
-            accum = np.concatenate([counts1, counts2]).astype(float)
-            if mincnt is not None:
-                accum[accum < mincnt] = np.nan
+
+        if C is None:
             C = np.ones(len(x))
-        else:
-            # store the C values in a list per hexagon index
-            Cs_at_i1 = [[] for _ in range(1 + nx1 * ny1)]
-            Cs_at_i2 = [[] for _ in range(1 + nx2 * ny2)]
-            for i in range(len(x)):
-                if bdist[i]:
-                    Cs_at_i1[i1[i]].append(C[i])
-                else:
-                    Cs_at_i2[i2[i]].append(C[i])
-            if mincnt is None:
-                mincnt = 1
-            accum = np.array(
-                [reduce_C_function(acc) if len(acc) >= mincnt else np.nan
-                 for Cs_at_i in [Cs_at_i1, Cs_at_i2]
-                 for acc in Cs_at_i[1:]],  # [1:] drops out-of-range points.
-                float)
-
-        good_idxs = ~np.isnan(accum)
-
-        offsets = np.zeros((n, 2), float)
-        offsets[:nx1 * ny1, 0] = np.repeat(np.arange(nx1), ny1)
-        offsets[:nx1 * ny1, 1] = np.tile(np.arange(ny1), nx1)
-        offsets[nx1 * ny1:, 0] = np.repeat(np.arange(nx2) + 0.5, ny2)
-        offsets[nx1 * ny1:, 1] = np.tile(np.arange(ny2), nx2) + 0.5
-        offsets[:, 0] *= sx
-        offsets[:, 1] *= sy
-        offsets[:, 0] += xmin
-        offsets[:, 1] += ymin
-        # remove accumulation bins with no data
-        offsets = offsets[good_idxs, :]
-        accum = accum[good_idxs]
 
         polygon = [sx, sy / 3] * np.array(
             [[.5, -.5], [.5, .5], [0., 1.], [-.5, .5], [-.5, -.5], [0., -1.]])
diff --git a/lib/matplotlib/cbook.py b/lib/matplotlib/cbook.py
@@ -503,6 +503,31 @@ def is_scalar_or_string(val):
     return isinstance(val, str) or not np.iterable(val)
 
 
+def duplicate_if_scalar(obj, n=2, raises=True):
+    """Ensure object size or duplicate into a list if necessary."""
+
+    if is_scalar_or_string(obj):
+        return [obj] * n
+
+    size = len(obj)
+    if size == 0:
+        if raises:
+            raise ValueError(f'Cannot duplicate empty {type(obj)}.')
+        return [obj] * n
+
+    if size == 1:
+        return list(obj) * n
+
+    if (size != n) and raises:
+        raise ValueError(
+            f'Input object of type {type(obj)} has incorrect size. Expected '
+            f'either a scalar type object, or a Container with length in {{1, '
+            f'{n}}}.'
+        )
+
+    return obj
+
+
 @_api.delete_parameter(
     "3.8", "np_load", alternative="open(get_sample_data(..., asfileobj=False))")
 def get_sample_data(fname, asfileobj=True, *, np_load=True):
@@ -567,6 +592,23 @@ def flatten(seq, scalarp=is_scalar_or_string):
             yield from flatten(item, scalarp)
 
 
+def pairwise(iterable):
+    """
+    Returns an iterator of paired items, overlapping, from the original
+
+    take(4, pairwise(count()))
+            [(0, 1), (1, 2), (2, 3), (3, 4)]
+
+    From more_itertools:
+    https://more-itertools.readthedocs.io/en/stable/_modules/more_itertools/recipes.html#pairwise
+
+    Can be removed on python >3.10 in favour of itertools.pairwise
+    """
+    a, b = itertools.tee(iterable)
+    next(b, None)
+    return zip(a, b)
+
+
 @_api.deprecated("3.8")
 class Stack:
     """
@@ -1473,6 +1515,120 @@ def _reshape_2D(X, name):
         return result
 
 
+def hexbin(x, y, C=None, gridsize=100,
+           xscale='linear', yscale='linear', extent=None,
+           reduce_C_function=np.mean, mincnt=None):
+
+    # local import to avoid circular import
+    import matplotlib.transforms as mtransforms
+
+    # Set the size of the hexagon grid
+    if np.iterable(gridsize):
+        nx, ny = gridsize
+    else:
+        nx = gridsize
+        ny = int(nx / math.sqrt(3))
+
+    # Will be log()'d if necessary, and then rescaled.
+    tx = x
+    ty = y
+
+    if xscale == 'log':
+        if np.any(x <= 0.0):
+            raise ValueError(
+                "x contains non-positive values, so cannot be log-scaled")
+        tx = np.log10(tx)
+    if yscale == 'log':
+        if np.any(y <= 0.0):
+            raise ValueError(
+                "y contains non-positive values, so cannot be log-scaled")
+        ty = np.log10(ty)
+    if extent is not None:
+        xmin, xmax, ymin, ymax = extent
+        if xmin > xmax:
+            raise ValueError("In extent, xmax must be greater than xmin")
+        if ymin > ymax:
+            raise ValueError("In extent, ymax must be greater than ymin")
+    else:
+        xmin, xmax = (tx.min(), tx.max()) if len(x) else (0, 1)
+        ymin, ymax = (ty.min(), ty.max()) if len(y) else (0, 1)
+
+        # to avoid issues with singular data, expand the min/max pairs
+        xmin, xmax = mtransforms.nonsingular(xmin, xmax, expander=0.1)
+        ymin, ymax = mtransforms.nonsingular(ymin, ymax, expander=0.1)
+
+    nx1 = nx + 1
+    ny1 = ny + 1
+    nx2 = nx
+    ny2 = ny
+    n = nx1 * ny1 + nx2 * ny2
+
+    # In the x-direction, the hexagons exactly cover the region from
+    # xmin to xmax. Need some padding to avoid roundoff errors.
+    padding = 1.e-9 * (xmax - xmin)
+    xmin -= padding
+    xmax += padding
+    sx = (xmax - xmin) / nx
+    sy = (ymax - ymin) / ny
+    # Positions in hexagon index coordinates.
+    ix = (tx - xmin) / sx
+    iy = (ty - ymin) / sy
+    ix1 = np.round(ix).astype(int)
+    iy1 = np.round(iy).astype(int)
+    ix2 = np.floor(ix).astype(int)
+    iy2 = np.floor(iy).astype(int)
+    # flat indices, plus one so that out-of-range points go to position 0.
+    i1 = np.where((0 <= ix1) & (ix1 < nx1) & (0 <= iy1) & (iy1 < ny1),
+                  ix1 * ny1 + iy1 + 1, 0)
+    i2 = np.where((0 <= ix2) & (ix2 < nx2) & (0 <= iy2) & (iy2 < ny2),
+                  ix2 * ny2 + iy2 + 1, 0)
+
+    d1 = (ix - ix1) ** 2 + 3.0 * (iy - iy1) ** 2
+    d2 = (ix - ix2 - 0.5) ** 2 + 3.0 * (iy - iy2 - 0.5) ** 2
+    bdist = (d1 < d2)
+
+    if C is None:  # [1:] drops out-of-range points.
+        counts1 = np.bincount(i1[bdist], minlength=1 + nx1 * ny1)[1:]
+        counts2 = np.bincount(i2[~bdist], minlength=1 + nx2 * ny2)[1:]
+        accum = np.concatenate([counts1, counts2]).astype(float)
+        if mincnt is not None:
+            accum[accum < mincnt] = np.nan
+
+    else:
+        # store the C values in a list per hexagon index
+        Cs_at_i1 = [[] for _ in range(1 + nx1 * ny1)]
+        Cs_at_i2 = [[] for _ in range(1 + nx2 * ny2)]
+        for i in range(len(x)):
+            if bdist[i]:
+                Cs_at_i1[i1[i]].append(C[i])
+            else:
+                Cs_at_i2[i2[i]].append(C[i])
+        if mincnt is None:
+            mincnt = 1
+        accum = np.array(
+            [reduce_C_function(acc) if len(acc) >= mincnt else np.nan
+                for Cs_at_i in [Cs_at_i1, Cs_at_i2]
+                for acc in Cs_at_i[1:]],  # [1:] drops out-of-range points.
+            float)
+
+    good_idxs = ~np.isnan(accum)
+
+    offsets = np.zeros((n, 2), float)
+    offsets[:nx1 * ny1, 0] = np.repeat(np.arange(nx1), ny1)
+    offsets[:nx1 * ny1, 1] = np.tile(np.arange(ny1), nx1)
+    offsets[nx1 * ny1:, 0] = np.repeat(np.arange(nx2) + 0.5, ny2)
+    offsets[nx1 * ny1:, 1] = np.tile(np.arange(ny2), nx2) + 0.5
+    offsets[:, 0] *= sx
+    offsets[:, 1] *= sy
+    offsets[:, 0] += xmin
+    offsets[:, 1] += ymin
+    # remove accumulation bins with no data
+    offsets = offsets[good_idxs, :]
+    accum = accum[good_idxs]
+
+    return (*offsets.T, accum), (xmin, xmax), (ymin, ymax), (nx, ny)
+
+
 def violin_stats(X, method, points=100, quantiles=None):
     """
     Return a list of dictionaries of data which can be used to draw a series
diff --git a/lib/matplotlib/cbook.pyi b/lib/matplotlib/cbook.pyi
@@ -72,6 +72,7 @@ def open_file_cm(
     encoding: str | None = ...,
 ) -> contextlib.AbstractContextManager[IO]: ...
 def is_scalar_or_string(val: Any) -> bool: ...
+def duplicate_if_scalar(obj: Any, n: int = 2, raises: bool = True) -> list: ...
 @overload
 def get_sample_data(
     fname: str | os.PathLike, asfileobj: Literal[True] = ..., *, np_load: Literal[True]
@@ -91,6 +92,7 @@ def _get_data_path(*args: Path | str) -> Path: ...
 def flatten(
     seq: Iterable[Any], scalarp: Callable[[Any], bool] = ...
 ) -> Generator[Any, None, None]: ...
+def pairwise(iterable:  Iterable[Any]) -> Iterable[Any]: ...
 
 class Stack(Generic[_T]):
     def __init__(self, default: _T | None = ...) -> None: ...
@@ -144,6 +146,17 @@ ls_mapper_r: dict[str, str]
 
 def contiguous_regions(mask: ArrayLike) -> list[np.ndarray]: ...
 def is_math_text(s: str) -> bool: ...
+def hexbin(
+    x: ArrayLike,
+    y: ArrayLike,
+    C: ArrayLike | None = None,
+    gridsize: int | tuple[int, int] = 100,
+    xscale: str = 'linear',
+    yscale: str = 'linear',
+    extent: ArrayLike | None = None,
+    reduce_C_function: Callable = np.mean,
+    mincnt: int | None = None
+) -> tuple[tuple]: ...
 def violin_stats(
     X: ArrayLike, method: Callable, points: int = ..., quantiles: ArrayLike | None = ...
 ) -> list[dict[str, Any]]: ...
diff --git a/lib/mpl_toolkits/mplot3d/art3d.py b/lib/mpl_toolkits/mplot3d/art3d.py