From 03b88d6a6d3756c2c1fa5ad5aebf56da2686e8a5 Mon Sep 17 00:00:00 2001 From: Tim Peters Date: Tue, 19 Oct 2021 18:39:05 -0500 Subject: [PATCH 1/9] First stab. About 40% speedup on tupsort.py's "(float,)" case. --- Objects/listobject.c | 31 +++++++++++++++++++------------ 1 file changed, 19 insertions(+), 12 deletions(-) diff --git a/Objects/listobject.c b/Objects/listobject.c index ed5324155f6275..f257e21696d636 100644 --- a/Objects/listobject.c +++ b/Objects/listobject.c @@ -2195,25 +2195,32 @@ unsafe_tuple_compare(PyObject *v, PyObject *w, MergeState *ms) vt = (PyTupleObject *)v; wt = (PyTupleObject *)w; + /* See whether fast compares of the first elements settle it. */ + k = ms->tuple_elem_compare(vt->ob_item[0], wt->ob_item[0], ms); + if (k) /* error, or v < w */ + return k; + k = ms->tuple_elem_compare(wt->ob_item[0], vt->ob_item[0], ms); + if (k < 0) /* error */ + return -1; + if (k > 0) /* w < v */ + return 0; + + /* first elements are equal */ vlen = Py_SIZE(vt); wlen = Py_SIZE(wt); - - for (i = 0; i < vlen && i < wlen; i++) { + for (i = 1; i < vlen && i < wlen; i++) { k = PyObject_RichCompareBool(vt->ob_item[i], wt->ob_item[i], Py_EQ); if (k < 0) return -1; - if (!k) - break; + if (!k) { + return PyObject_RichCompareBool(vt->ob_item[i], wt->ob_item[i], + Py_LT); + } } + /* all equal until we fell off the end */ + return vlen < wlen; - if (i >= vlen || i >= wlen) - return vlen < wlen; - - if (i == 0) - return ms->tuple_elem_compare(vt->ob_item[i], wt->ob_item[i], ms); - else - return PyObject_RichCompareBool(vt->ob_item[i], wt->ob_item[i], Py_LT); -} + } /* An adaptive, stable, natural mergesort. See listsort.txt. * Returns Py_None on success, NULL on error. Even in case of error, the From 01bbf651d73066c73365d3ae841fbf3e59a2c5a9 Mon Sep 17 00:00:00 2001 From: "blurb-it[bot]" <43283697+blurb-it[bot]@users.noreply.github.com> Date: Wed, 20 Oct 2021 01:28:28 +0000 Subject: [PATCH 2/9] =?UTF-8?q?=F0=9F=93=9C=F0=9F=A4=96=20Added=20by=20blu?= =?UTF-8?q?rb=5Fit.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../2021-10-20-01-28-26.bpo-45530.5r7n4m.rst | 6 ++++++ 1 file changed, 6 insertions(+) create mode 100644 Misc/NEWS.d/next/Core and Builtins/2021-10-20-01-28-26.bpo-45530.5r7n4m.rst diff --git a/Misc/NEWS.d/next/Core and Builtins/2021-10-20-01-28-26.bpo-45530.5r7n4m.rst b/Misc/NEWS.d/next/Core and Builtins/2021-10-20-01-28-26.bpo-45530.5r7n4m.rst new file mode 100644 index 00000000000000..6104a5092a299b --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2021-10-20-01-28-26.bpo-45530.5r7n4m.rst @@ -0,0 +1,6 @@ +Cases of sorting using tuples as keys may be significantly faster +in some cases. This is worth mentioning because, if the tuple +elements don't define a total ordering, the order of the result +may differ from earlier releases. It's generally true that the +result of sorting simply isn't well-defined in the absence of a +total ordering on list elements. \ No newline at end of file From 4551c498d9a37c10f6630513acf91d705bbb1d84 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz=20Langa?= Date: Wed, 20 Oct 2021 20:35:23 +0200 Subject: [PATCH 3/9] Add reordering notification to whatsnew, clarify Blurb a little --- Doc/reference/expressions.rst | 2 ++ Doc/whatsnew/3.11.rst | 7 +++++++ .../2021-10-20-01-28-26.bpo-45530.5r7n4m.rst | 14 ++++++++------ 3 files changed, 17 insertions(+), 6 deletions(-) diff --git a/Doc/reference/expressions.rst b/Doc/reference/expressions.rst index d70fcb34d2168e..d21a44431e52aa 100644 --- a/Doc/reference/expressions.rst +++ b/Doc/reference/expressions.rst @@ -1424,6 +1424,8 @@ Note that ``a op1 b op2 c`` doesn't imply any kind of comparison between *a* and *c*, so that, e.g., ``x < y > z`` is perfectly legal (though perhaps not pretty). +.. _expressions-value-comparisons: + Value comparisons ----------------- diff --git a/Doc/whatsnew/3.11.rst b/Doc/whatsnew/3.11.rst index 2e57f0cea53db2..1b6d799dae5dc7 100644 --- a/Doc/whatsnew/3.11.rst +++ b/Doc/whatsnew/3.11.rst @@ -450,6 +450,13 @@ Changes in the Python API the ``'utf-8'`` encoding. (Contributed by Srinivas Reddy Thatiparthy (శ్రీనివాస్ రెడ్డి తాటిపర్తి) in :issue:`41137`.) +* When sorting using tuples as keys, the order of the result may differ + from earlier releases if the tuple elements don't define a total + ordering (see :ref:`expressions-value-comparisons` for + information on total ordering). It's generally true that the result + of sorting simply isn't well-defined in the absence of a total ordering + on list elements. + Build Changes ============= diff --git a/Misc/NEWS.d/next/Core and Builtins/2021-10-20-01-28-26.bpo-45530.5r7n4m.rst b/Misc/NEWS.d/next/Core and Builtins/2021-10-20-01-28-26.bpo-45530.5r7n4m.rst index 6104a5092a299b..dafb0bc3633a13 100644 --- a/Misc/NEWS.d/next/Core and Builtins/2021-10-20-01-28-26.bpo-45530.5r7n4m.rst +++ b/Misc/NEWS.d/next/Core and Builtins/2021-10-20-01-28-26.bpo-45530.5r7n4m.rst @@ -1,6 +1,8 @@ -Cases of sorting using tuples as keys may be significantly faster -in some cases. This is worth mentioning because, if the tuple -elements don't define a total ordering, the order of the result -may differ from earlier releases. It's generally true that the -result of sorting simply isn't well-defined in the absence of a -total ordering on list elements. \ No newline at end of file +Cases of sorting using tuples as keys may now be significantly faster +in some cases. Patch by Tim Peters. + +The order of the result may differ from earlier releases if the tuple +elements don't define a total ordering (see +:ref:`reference/expressions:value-comparisons` for information on +total ordering). It's generally true that the result of sorting simply +isn't well-defined in the absence of a total ordering on list elements. From 67c4cc95e1bd50d50d645fa1670d74a8615a8e8b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz=20Langa?= Date: Wed, 20 Oct 2021 20:36:37 +0200 Subject: [PATCH 4/9] Fix invalid ref in Blurb --- .../Core and Builtins/2021-10-20-01-28-26.bpo-45530.5r7n4m.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Misc/NEWS.d/next/Core and Builtins/2021-10-20-01-28-26.bpo-45530.5r7n4m.rst b/Misc/NEWS.d/next/Core and Builtins/2021-10-20-01-28-26.bpo-45530.5r7n4m.rst index dafb0bc3633a13..a8b155e7ccfcd8 100644 --- a/Misc/NEWS.d/next/Core and Builtins/2021-10-20-01-28-26.bpo-45530.5r7n4m.rst +++ b/Misc/NEWS.d/next/Core and Builtins/2021-10-20-01-28-26.bpo-45530.5r7n4m.rst @@ -3,6 +3,6 @@ in some cases. Patch by Tim Peters. The order of the result may differ from earlier releases if the tuple elements don't define a total ordering (see -:ref:`reference/expressions:value-comparisons` for information on +:ref:`expressions-value-comparisons` for information on total ordering). It's generally true that the result of sorting simply isn't well-defined in the absence of a total ordering on list elements. From 5b2870399e3500fc688240f2ab4ef841da40feb2 Mon Sep 17 00:00:00 2001 From: Tim Peters Date: Wed, 20 Oct 2021 20:08:35 -0500 Subject: [PATCH 5/9] Keep track of whether unsafe_tuple_compare() calls are resolved by the very first tuple elements, and adjust strategy accordingly. --- Objects/listobject.c | 55 +++++++++++++++++++++++++++++++------------- 1 file changed, 39 insertions(+), 16 deletions(-) diff --git a/Objects/listobject.c b/Objects/listobject.c index f257e21696d636..ab966d9ee46fec 100644 --- a/Objects/listobject.c +++ b/Objects/listobject.c @@ -1208,6 +1208,13 @@ struct s_MergeState { * of tuples. It may be set to safe_object_compare, but the idea is that hopefully * we can assume more, and use one of the special-case compares. */ int (*tuple_elem_compare)(PyObject *, PyObject *, MergeState *); + + /* Used by unsafe_tuple_compare to record whether the very first tuple + * elements resolved the last comparison attempt. If so, next time a + * method that may avoid PyObject_RichCompareBool() entirely is tried. + * 0 for false, 1 for true. + */ + int first_tuple_items_resolved_it; }; /* binarysort is the best method for sorting small arrays: it does @@ -2183,7 +2190,7 @@ static int unsafe_tuple_compare(PyObject *v, PyObject *w, MergeState *ms) { PyTupleObject *vt, *wt; - Py_ssize_t i, vlen, wlen; + Py_ssize_t i, vlen, wlen, firsti; int k; /* Modified from Objects/tupleobject.c:tuplerichcompare, assuming: */ @@ -2194,27 +2201,42 @@ unsafe_tuple_compare(PyObject *v, PyObject *w, MergeState *ms) vt = (PyTupleObject *)v; wt = (PyTupleObject *)w; + firsti = 0; + if (ms->first_tuple_items_resolved_it) { + /* See whether fast compares of the first elements settle it. */ + k = ms->tuple_elem_compare(vt->ob_item[0], wt->ob_item[0], ms); + if (k) /* error, or v < w */ + return k; + k = ms->tuple_elem_compare(wt->ob_item[0], vt->ob_item[0], ms); + if (k < 0) /* error */ + return -1; + if (k > 0) /* w < v */ + return 0; + /* We have + * not (v[0] < w[0]) and not (w[0] < v[0]) + * which implies, for a total order, that the first elements are + * equal. So skip them in the loop. + */ + firsti = 1; + ms->first_tuple_items_resolved_it = 0; + } - /* See whether fast compares of the first elements settle it. */ - k = ms->tuple_elem_compare(vt->ob_item[0], wt->ob_item[0], ms); - if (k) /* error, or v < w */ - return k; - k = ms->tuple_elem_compare(wt->ob_item[0], vt->ob_item[0], ms); - if (k < 0) /* error */ - return -1; - if (k > 0) /* w < v */ - return 0; - - /* first elements are equal */ vlen = Py_SIZE(vt); wlen = Py_SIZE(wt); - for (i = 1; i < vlen && i < wlen; i++) { + for (i = firsti; i < vlen && i < wlen; i++) { k = PyObject_RichCompareBool(vt->ob_item[i], wt->ob_item[i], Py_EQ); if (k < 0) return -1; - if (!k) { - return PyObject_RichCompareBool(vt->ob_item[i], wt->ob_item[i], - Py_LT); + if (!k) { /* not equal */ + if (!i) { + ms->first_tuple_items_resolved_it = 1; + return ms->tuple_elem_compare(vt->ob_item[0], wt->ob_item[0], + ms); + } + else { + return PyObject_RichCompareBool(vt->ob_item[i], wt->ob_item[i], + Py_LT); + } } } /* all equal until we fell off the end */ @@ -2403,6 +2425,7 @@ list_sort_impl(PyListObject *self, PyObject *keyfunc, int reverse) } ms.key_compare = unsafe_tuple_compare; + ms.first_tuple_items_resolved_it = 1; /* be optimistic */ } } /* End of pre-sort check: ms is now set properly! */ From b0b394392e11080288fb069032f7cc7c2bb16e6d Mon Sep 17 00:00:00 2001 From: Tim Peters Date: Wed, 20 Oct 2021 20:51:36 -0500 Subject: [PATCH 6/9] Add commenra explaining the ms->first_tuple_items_resolved_it strategy. This looks to be quite successful. It loses a few per cent in speed in cases that always want to use the cheaper tests, but can gain far more (compared to this branch's state before this commit) in some cases where PyObject_RichCompareBool(..., Py_EQ) typically returns 1 (they're equal) when applied to the first pair. --- Objects/listobject.c | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/Objects/listobject.c b/Objects/listobject.c index ab966d9ee46fec..db96bbe6cb7f83 100644 --- a/Objects/listobject.c +++ b/Objects/listobject.c @@ -2185,7 +2185,24 @@ unsafe_float_compare(PyObject *v, PyObject *w, MergeState *ms) * using the same pre-sort check as we use for ms->key_compare, * but run on the list [x[0] for x in L]. This allows us to optimize compares * on two levels (as long as [x[0] for x in L] is type-homogeneous.) The idea is - * that most tuple compares don't involve x[1:]. */ + * that most tuple compares don't involve x[1:]. + * However, that may not be right. When it is right, we can win by calling the + * relatively cheap ms->tuple_elem_compare on the first pair of elements, to + * see whether v[0] < w[0] or w[0] < v[0]. If either are so, we're done. + * Else we proceed as in the tuple compare, comparing the remaining pairs via + * the probably more expensive PyObject_RichCompareBool(..., Py_EQ) until (if + * ever) that says "no, not equal!". Then, if we're still on the first pair, + * ms->tuple_elem_compare can resolve it, else PyObject_RichCompareBool(..., + * Py_LT) finishes the job. + * In any case, ms->first_tuple_items_resolved_it keeps track of whether the + * most recent tuple comparison was resolved by the first pair. If so, the + * next attempt starts by trying the cheap tests on the first pair again, else + * PyObject_RichCompareBool(..., Py_EQ) is used from the start. + * There are cases where PyObject_RichCompareBool(..., Py_EQ) is much cheaper! + * For example, that can return "almost immediately" if passed the same + * object twice (it special-cases object identity for Py_EQ), which can, + * potentially, be unboundedly faster than ms->tuple_elem_compare. + */ static int unsafe_tuple_compare(PyObject *v, PyObject *w, MergeState *ms) { From ff23c7bcf1dcfcd966e8e6b3a59a6fa4be309b1f Mon Sep 17 00:00:00 2001 From: Tim Peters Date: Thu, 21 Oct 2021 13:09:31 -0500 Subject: [PATCH 7/9] Add a clariying assert. Swap the order of if/else blocks to put the more likely block first. --- Objects/listobject.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/Objects/listobject.c b/Objects/listobject.c index db96bbe6cb7f83..76bbeaf49c0be4 100644 --- a/Objects/listobject.c +++ b/Objects/listobject.c @@ -2237,6 +2237,10 @@ unsafe_tuple_compare(PyObject *v, PyObject *w, MergeState *ms) firsti = 1; ms->first_tuple_items_resolved_it = 0; } + /* Now first_tuple_items_resolved_it was 0 on entry, or was forced to 0 + * at the end of the `if` block just above. + */ + assert(! ms->first_tuple_items_resolved_it); vlen = Py_SIZE(vt); wlen = Py_SIZE(wt); @@ -2245,15 +2249,15 @@ unsafe_tuple_compare(PyObject *v, PyObject *w, MergeState *ms) if (k < 0) return -1; if (!k) { /* not equal */ - if (!i) { + if (i) { + return PyObject_RichCompareBool(vt->ob_item[i], wt->ob_item[i], + Py_LT); + } + else { ms->first_tuple_items_resolved_it = 1; return ms->tuple_elem_compare(vt->ob_item[0], wt->ob_item[0], ms); } - else { - return PyObject_RichCompareBool(vt->ob_item[i], wt->ob_item[i], - Py_LT); - } } } /* all equal until we fell off the end */ From cd69e8b8af29a34065434e75eddac9109e23a8fd Mon Sep 17 00:00:00 2001 From: Tim Peters Date: Thu, 21 Oct 2021 23:52:00 -0500 Subject: [PATCH 8/9] Remove the new `firsti` vrbl - needless name proliferation. --- Objects/listobject.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/Objects/listobject.c b/Objects/listobject.c index 76bbeaf49c0be4..5db8c1cecc8bc7 100644 --- a/Objects/listobject.c +++ b/Objects/listobject.c @@ -2207,7 +2207,7 @@ static int unsafe_tuple_compare(PyObject *v, PyObject *w, MergeState *ms) { PyTupleObject *vt, *wt; - Py_ssize_t i, vlen, wlen, firsti; + Py_ssize_t i, vlen, wlen; int k; /* Modified from Objects/tupleobject.c:tuplerichcompare, assuming: */ @@ -2218,7 +2218,7 @@ unsafe_tuple_compare(PyObject *v, PyObject *w, MergeState *ms) vt = (PyTupleObject *)v; wt = (PyTupleObject *)w; - firsti = 0; + i = 0; if (ms->first_tuple_items_resolved_it) { /* See whether fast compares of the first elements settle it. */ k = ms->tuple_elem_compare(vt->ob_item[0], wt->ob_item[0], ms); @@ -2234,7 +2234,7 @@ unsafe_tuple_compare(PyObject *v, PyObject *w, MergeState *ms) * which implies, for a total order, that the first elements are * equal. So skip them in the loop. */ - firsti = 1; + i = 1; ms->first_tuple_items_resolved_it = 0; } /* Now first_tuple_items_resolved_it was 0 on entry, or was forced to 0 @@ -2244,7 +2244,7 @@ unsafe_tuple_compare(PyObject *v, PyObject *w, MergeState *ms) vlen = Py_SIZE(vt); wlen = Py_SIZE(wt); - for (i = firsti; i < vlen && i < wlen; i++) { + for (; i < vlen && i < wlen; i++) { k = PyObject_RichCompareBool(vt->ob_item[i], wt->ob_item[i], Py_EQ); if (k < 0) return -1; From 7c158f67e26619d5ee03ac17c0fcb4c6040d68fa Mon Sep 17 00:00:00 2001 From: Tim Peters Date: Fri, 22 Oct 2021 11:54:49 -0500 Subject: [PATCH 9/9] Errors from comparisons are rare, so rearrange code to act on normal outcomes first. --- Objects/listobject.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/Objects/listobject.c b/Objects/listobject.c index 5db8c1cecc8bc7..08dfdefa3e97a2 100644 --- a/Objects/listobject.c +++ b/Objects/listobject.c @@ -2225,10 +2225,10 @@ unsafe_tuple_compare(PyObject *v, PyObject *w, MergeState *ms) if (k) /* error, or v < w */ return k; k = ms->tuple_elem_compare(wt->ob_item[0], vt->ob_item[0], ms); - if (k < 0) /* error */ - return -1; if (k > 0) /* w < v */ return 0; + if (k < 0) /* error */ + return -1; /* We have * not (v[0] < w[0]) and not (w[0] < v[0]) * which implies, for a total order, that the first elements are @@ -2246,8 +2246,6 @@ unsafe_tuple_compare(PyObject *v, PyObject *w, MergeState *ms) wlen = Py_SIZE(wt); for (; i < vlen && i < wlen; i++) { k = PyObject_RichCompareBool(vt->ob_item[i], wt->ob_item[i], Py_EQ); - if (k < 0) - return -1; if (!k) { /* not equal */ if (i) { return PyObject_RichCompareBool(vt->ob_item[i], wt->ob_item[i], @@ -2259,6 +2257,8 @@ unsafe_tuple_compare(PyObject *v, PyObject *w, MergeState *ms) ms); } } + if (k < 0) + return -1; } /* all equal until we fell off the end */ return vlen < wlen;