matplotlib · QuLogic · Jul 19, 2019 · Mar 10, 2019 · Jul 19, 2019 · anntzer
diff --git a/doc/api/next_api_changes/2019-03-04-AL.rst b/doc/api/next_api_changes/2019-03-04-AL.rst
@@ -13,6 +13,41 @@ without an explicit call to `Axes.autoscale_view`.
 In some cases, this can result in different limits being reported.  If this is
 an issue, consider triggering a draw with `fig.canvas.draw`.
 
+Autoscaling changes for Collections
+```````````````````````````````````
+
+Autoscaling has also changed for artists that are based on the `.Collection`
+class.  Previously, the method that calculates the automatic limits
+`.Collection.get_datalim` tried to take into account the size of objects
+in the collection and make the limits large enough to not clip any of the
+object, i.e., for `.Axes.scatter` it would make the limits large enough to not
+clip any markers in the scatter.  This is problematic when the object size is
+specified in physical space, or figure-relative space, because the transform
+from physical units to data limits requires knowing the data limits, and
+becomes invalid when the new limits are applied.  This is an inverse
+problem that is theoretically solvable (if the object is physically smaller
+than the axes), but the extra complexity was not deemed worth it, particularly
+as the most common use case is for markers in scatter that are usually small
+enough to be accommodated by the default data limit margins.
+
+While the new behavior is algorithmically simpler, it is conditional on
+properties of the `.Collection` object:
+
+  1. ``offsets = None``, ``transform`` is a child of `.Axes.transData`: use the paths
+     for the automatic limits (i.e. for `.LineCollection` in `.Axes.streamplot`).
+  2.  ``offsets != None``, and ``offset_transform`` is child of `.Axes.transData`:
+
+    a) ``transform`` is child of `.Axes.transData`: use the ``path + offset`` for
+        limits (i.e., for `.Axes.bar`).
+    b) ``transform`` is not a child of `.Axes.transData`: just use the offsets
+        for the limits (i.e. for scatter)
+
+  3. otherwise return a null `.Bbox`.
+
+While this seems complicated, the logic is simply to use the information from
+the object that are in data space for the limits, but not information that is
+in physical units.
+
 LogLocator.nonsingular now maintains the orders of its arguments
 ````````````````````````````````````````````````````````````````
 

diff --git a/lib/matplotlib/collections.py b/lib/matplotlib/collections.py
@@ -146,6 +146,8 @@ def __init__(self,
             self._joinstyle = None
 
         self._offsets = np.zeros((1, 2))
+        # save if offsets passed in were none...
+        self._offsetsNone = offsets is None
         self._uniform_offsets = None
         if offsets is not None:
             offsets = np.asanyarray(offsets, float)
@@ -179,9 +181,30 @@ def get_offset_transform(self):
         return t
 
     def get_datalim(self, transData):
+
+        # Get the automatic datalim of the collection.
+        #
+        # This operation depends on the transforms for the data in the
+        # collection and whether the collection has offsets.
+        #
+        # 1) offsets = None, transform child of transData: use the paths for
+        # the automatic limits (i.e. for LineCollection in streamline).
+        # 2) offsets != None: offset_transform is child of transData:
+        #    a) transform is child of transData: use the path + offset for
+        #       limits (i.e for bar).
+        #    b) transform is not a child of transData: just use the offsets
+        #       for the limits (i.e. for scatter)
+        # 3) otherwise return a null Bbox.
+
         transform = self.get_transform()
         transOffset = self.get_offset_transform()
+        if (not self._offsetsNone and
+            not transOffset.contains_branch(transData)):
+            # if there are offsets but in some co-ords other than data,
+            # then don't use them for autoscaling.
+            return transforms.Bbox.null()
         offsets = self._offsets
+
         paths = self.get_paths()
 
         if not transform.is_affine:
@@ -196,13 +219,30 @@ def get_datalim(self, transData):
             # get_path_collection_extents handles nan but not masked arrays
 
         if len(paths) and len(offsets):
-            result = mpath.get_path_collection_extents(
-                transform.frozen(), paths, self.get_transforms(),
-                offsets, transOffset.frozen())
-            result = result.inverse_transformed(transData)
-        else:
-            result = transforms.Bbox.null()
-        return result
+            if transform.contains_branch(transData):
+                # collections that are just in data units (like quiver)
+                # can properly have the axes limits set by their shape +
+                # offset.  LineCollections that have no offsets can
+                # also use this algorithm (like streamplot).
+                result = mpath.get_path_collection_extents(
+                    transform.frozen(), paths, self.get_transforms(),
+                    offsets, transOffset.frozen())
+                return result.inverse_transformed(transData)
+            if not self._offsetsNone:
+                # this is for collections that have their paths (shapes)
+                # in physical, axes-relative, or figure-relative units
+                # (i.e. like scatter). We can't uniquely set limits based on
+                # those shapes, so we just set the limits based on their
+                # location.
+                # Finish the transform:
+                offsets = (transOffset +
+                           transData.inverted()).transform(offsets)
+                offsets = np.ma.masked_invalid(offsets)
+                if not offsets.mask.all():
+                    points = np.row_stack((offsets.min(axis=0),
+                                           offsets.max(axis=0)))
+                    return transforms.Bbox(points)
+        return transforms.Bbox.null()
 
     def get_window_extent(self, renderer):
         # TODO: check to ensure that this does not fail for
@@ -1299,7 +1339,6 @@ def __init__(self, segments,     # Can be None.
             antialiaseds = (mpl.rcParams['lines.antialiased'],)
 
         colors = mcolors.to_rgba_array(colors)
-
         Collection.__init__(
             self,
             edgecolors=colors,

diff --git a/lib/matplotlib/tests/baseline_images/test_axes/scatter.pdf b/lib/matplotlib/tests/baseline_images/test_axes/scatter.pdf
diff --git a/lib/matplotlib/tests/baseline_images/test_axes/scatter.png b/lib/matplotlib/tests/baseline_images/test_axes/scatter.png