From 08ebd60036634254da6c8558f9cb114f5efd461a Mon Sep 17 00:00:00 2001
From: Sebastian Berg <sebastian@sipsolutions.net>
Date: Fri, 7 Jun 2019 16:00:51 -0500
Subject: [PATCH 1/3] DOC: Mention and try to explain pairwise summation in sum

Note that this behavour is of course inherited into `np.add.reduce` and
many other reductions such as `mean` or users of this reduction, such
as `cov`. This is ignored here.

Closes gh-11331, gh-9393, gh-13734
---
 numpy/core/fromnumeric.py | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/numpy/core/fromnumeric.py b/numpy/core/fromnumeric.py
index af2a5298de08..13a12817a069 100644
--- a/numpy/core/fromnumeric.py
+++ b/numpy/core/fromnumeric.py
@@ -2104,6 +2104,8 @@ def sum(a, axis=None, dtype=None, out=None, keepdims=np._NoValue,
     --------
     ndarray.sum : Equivalent method.
 
+    add.reduce : Equivalent functionality of `add`.
+
     cumsum : Cumulative sum of array elements.
 
     trapz : Integration of array values using the composite trapezoidal rule.
@@ -2120,6 +2122,19 @@ def sum(a, axis=None, dtype=None, out=None, keepdims=np._NoValue,
     >>> np.sum([])
     0.0
 
+    The numerical precision of sum (and ``np.add.reduce``) is in general
+    limited by directly adding each number individually to the result
+    causing rounding errors in every step.
+    However, often numpy will use a  numerically better approach
+    (pairwise summation) leading to improved precision in many use cases.
+    This improved precision is always provided when no ``axis`` is given.
+    When ``axis`` is given, it will depend on which axis is summed.
+    Technically, to provide the best speed possible, the improved precision
+    is only used when the summation is along the fast axis in memory.
+    Note that the exact precision may vary depending on other parameters.
+    In contrast to NumPy, Python's ``sum`` function uses a slower but more
+    precise approach to summation.
+
     Examples
     --------
     >>> np.sum([0.5, 1.5])

From c1fdef890146221764e2cec4f261cea9dd14de31 Mon Sep 17 00:00:00 2001
From: Sebastian Berg <sebastian@sipsolutions.net>
Date: Sat, 8 Jun 2019 11:50:41 -0500
Subject: [PATCH 2/3] DOC: Fixup review comment, also its math.fsum not sum
 itself

---
 numpy/core/fromnumeric.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/numpy/core/fromnumeric.py b/numpy/core/fromnumeric.py
index 13a12817a069..50a0149a6e1a 100644
--- a/numpy/core/fromnumeric.py
+++ b/numpy/core/fromnumeric.py
@@ -2122,9 +2122,9 @@ def sum(a, axis=None, dtype=None, out=None, keepdims=np._NoValue,
     >>> np.sum([])
     0.0
 
-    The numerical precision of sum (and ``np.add.reduce``) is in general
-    limited by directly adding each number individually to the result
-    causing rounding errors in every step.
+    For floating point numbers the numerical precision of sum (and
+    ``np.add.reduce``) is in general limited by directly adding each number
+    individually to the result causing rounding errors in every step.
     However, often numpy will use a  numerically better approach
     (pairwise summation) leading to improved precision in many use cases.
     This improved precision is always provided when no ``axis`` is given.
@@ -2132,8 +2132,8 @@ def sum(a, axis=None, dtype=None, out=None, keepdims=np._NoValue,
     Technically, to provide the best speed possible, the improved precision
     is only used when the summation is along the fast axis in memory.
     Note that the exact precision may vary depending on other parameters.
-    In contrast to NumPy, Python's ``sum`` function uses a slower but more
-    precise approach to summation.
+    In contrast to NumPy, Python's ``math.fsum`` function uses a slower but
+    more precise approach to summation.
 
     Examples
     --------

From 86235ecb420f1df41a3189000c16400b6b45bfaf Mon Sep 17 00:00:00 2001
From: Sebastian Berg <sebastian@sipsolutions.net>
Date: Mon, 10 Jun 2019 15:50:06 -0500
Subject: [PATCH 3/3] DOC: add Marten's comment about f8, and smuggle in
 "partial" to pairwise

---
 numpy/core/fromnumeric.py | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/numpy/core/fromnumeric.py b/numpy/core/fromnumeric.py
index 50a0149a6e1a..f262f8552108 100644
--- a/numpy/core/fromnumeric.py
+++ b/numpy/core/fromnumeric.py
@@ -2125,8 +2125,8 @@ def sum(a, axis=None, dtype=None, out=None, keepdims=np._NoValue,
     For floating point numbers the numerical precision of sum (and
     ``np.add.reduce``) is in general limited by directly adding each number
     individually to the result causing rounding errors in every step.
-    However, often numpy will use a  numerically better approach
-    (pairwise summation) leading to improved precision in many use cases.
+    However, often numpy will use a  numerically better approach (partial
+    pairwise summation) leading to improved precision in many use-cases.
     This improved precision is always provided when no ``axis`` is given.
     When ``axis`` is given, it will depend on which axis is summed.
     Technically, to provide the best speed possible, the improved precision
@@ -2134,6 +2134,10 @@ def sum(a, axis=None, dtype=None, out=None, keepdims=np._NoValue,
     Note that the exact precision may vary depending on other parameters.
     In contrast to NumPy, Python's ``math.fsum`` function uses a slower but
     more precise approach to summation.
+    Especially when summing a large number of lower precision floating point
+    numbers, such as ``float32``, numerical errors can become significant.
+    In such cases it can be advisable to use `dtype="float64"` to use a higher
+    precision for the output.
 
     Examples
     --------