From 035b3e23c7c3f40ebe3423e6fa890184d80e1efa Mon Sep 17 00:00:00 2001 From: Pieter Eendebak Date: Thu, 11 Apr 2024 21:40:12 +0200 Subject: [PATCH 1/9] Improve performance of startswith by eliminating double work in tailmatch --- Objects/unicodeobject.c | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 2c259b7e869efe..f8e487a3c92c0a 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -9272,24 +9272,23 @@ tailmatch(PyObject *self, else offset = start; - if (PyUnicode_READ(kind_self, data_self, offset) == - PyUnicode_READ(kind_sub, data_sub, 0) && - PyUnicode_READ(kind_self, data_self, offset + end_sub) == - PyUnicode_READ(kind_sub, data_sub, end_sub)) { + int last_character_matches = PyUnicode_READ(kind_self, data_self, offset + end_sub) == + PyUnicode_READ(kind_sub, data_sub, end_sub); + + if (last_character_matches) { + if (end_sub==0) + return 1; /* If both are of the same kind, memcmp is sufficient */ if (kind_self == kind_sub) { - return ! memcmp((char *)data_self + - (offset * PyUnicode_KIND(substring)), - data_sub, - PyUnicode_GET_LENGTH(substring) * - PyUnicode_KIND(substring)); + return ! memcmp((char *)data_self + (offset * kind_sub), + data_sub, end_sub * kind_sub); } /* otherwise we have to compare each character by first accessing it */ else { /* We do not need to compare 0 and len(substring)-1 because the if statement above ensured already that they are equal when we end up here. */ - for (i = 1; i < end_sub; ++i) { + for (i = 0; i < end_sub; ++i) { if (PyUnicode_READ(kind_self, data_self, offset + i) != PyUnicode_READ(kind_sub, data_sub, i)) return 0; From 4f4b084eadd50e65f165ad011777e5a7991ff240 Mon Sep 17 00:00:00 2001 From: Pieter Eendebak Date: Thu, 11 Apr 2024 22:57:02 +0200 Subject: [PATCH 2/9] code style --- Objects/unicodeobject.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index f8e487a3c92c0a..e9417adf7035b4 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -9272,12 +9272,13 @@ tailmatch(PyObject *self, else offset = start; - int last_character_matches = PyUnicode_READ(kind_self, data_self, offset + end_sub) == - PyUnicode_READ(kind_sub, data_sub, end_sub); + int match_last = PyUnicode_READ(kind_self, data_self, offset + end_sub) == + PyUnicode_READ(kind_sub, data_sub, end_sub); - if (last_character_matches) { - if (end_sub==0) + if (match_last) { + if (end_sub==0) { return 1; + } /* If both are of the same kind, memcmp is sufficient */ if (kind_self == kind_sub) { return ! memcmp((char *)data_self + (offset * kind_sub), From 9f201b16c6d38a3b89a54fdc794410a1c0eb5f0a Mon Sep 17 00:00:00 2001 From: "blurb-it[bot]" <43283697+blurb-it[bot]@users.noreply.github.com> Date: Thu, 11 Apr 2024 21:17:26 +0000 Subject: [PATCH 3/9] =?UTF-8?q?=F0=9F=93=9C=F0=9F=A4=96=20Added=20by=20blu?= =?UTF-8?q?rb=5Fit.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../2024-04-11-21-17-23.gh-issue-117431.ZxdAFN.rst | 1 + 1 file changed, 1 insertion(+) create mode 100644 Misc/NEWS.d/next/Core and Builtins/2024-04-11-21-17-23.gh-issue-117431.ZxdAFN.rst diff --git a/Misc/NEWS.d/next/Core and Builtins/2024-04-11-21-17-23.gh-issue-117431.ZxdAFN.rst b/Misc/NEWS.d/next/Core and Builtins/2024-04-11-21-17-23.gh-issue-117431.ZxdAFN.rst new file mode 100644 index 00000000000000..ea449637abc68e --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2024-04-11-21-17-23.gh-issue-117431.ZxdAFN.rst @@ -0,0 +1 @@ +Improve performance of :func:`str.startswith` and `str.endswith`. From 8792d0b9d001a4c8a1b7e523e60f9450098a1e21 Mon Sep 17 00:00:00 2001 From: Pieter Eendebak Date: Thu, 11 Apr 2024 23:44:01 +0200 Subject: [PATCH 4/9] lint --- .../2024-04-11-21-17-23.gh-issue-117431.ZxdAFN.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Misc/NEWS.d/next/Core and Builtins/2024-04-11-21-17-23.gh-issue-117431.ZxdAFN.rst b/Misc/NEWS.d/next/Core and Builtins/2024-04-11-21-17-23.gh-issue-117431.ZxdAFN.rst index ea449637abc68e..b6be9b7b66ba4f 100644 --- a/Misc/NEWS.d/next/Core and Builtins/2024-04-11-21-17-23.gh-issue-117431.ZxdAFN.rst +++ b/Misc/NEWS.d/next/Core and Builtins/2024-04-11-21-17-23.gh-issue-117431.ZxdAFN.rst @@ -1 +1 @@ -Improve performance of :func:`str.startswith` and `str.endswith`. +Improve performance of :func:`str.startswith` and :func:`str.endswith`. From 2a2cfb36840bd096ed4d1679d9ac37290f8c75e6 Mon Sep 17 00:00:00 2001 From: Pieter Eendebak Date: Mon, 20 May 2024 23:04:00 +0200 Subject: [PATCH 5/9] Update Objects/unicodeobject.c Co-authored-by: Erlend E. Aasland --- Objects/unicodeobject.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 1d13227fef282a..7fd29531ad55a4 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -9276,7 +9276,7 @@ tailmatch(PyObject *self, PyUnicode_READ(kind_sub, data_sub, end_sub); if (match_last) { - if (end_sub==0) { + if (end_sub == 0) { return 1; } /* If both are of the same kind, memcmp is sufficient */ From 9f8e4b880c9c8d08f4a4f5973e1b64db846ab0c7 Mon Sep 17 00:00:00 2001 From: Pieter Eendebak Date: Mon, 20 May 2024 23:04:18 +0200 Subject: [PATCH 6/9] update comment --- Objects/unicodeobject.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 1d13227fef282a..c27cb27763dc35 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -9286,9 +9286,9 @@ tailmatch(PyObject *self, } /* otherwise we have to compare each character by first accessing it */ else { - /* We do not need to compare 0 and len(substring)-1 because - the if statement above ensured already that they are equal - when we end up here. */ + /* We do not need to compare len(substring)-1 because the if + statement above ensured already that they are equal when we + end up here. */ for (i = 0; i < end_sub; ++i) { if (PyUnicode_READ(kind_self, data_self, offset + i) != PyUnicode_READ(kind_sub, data_sub, i)) From 8a7b9fe363d838b8d7e7930e3cf9487a55ec23c8 Mon Sep 17 00:00:00 2001 From: Pieter Eendebak Date: Tue, 21 May 2024 22:12:33 +0200 Subject: [PATCH 7/9] update news entry --- .../2024-04-11-21-17-23.gh-issue-117431.ZxdAFN.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Misc/NEWS.d/next/Core and Builtins/2024-04-11-21-17-23.gh-issue-117431.ZxdAFN.rst b/Misc/NEWS.d/next/Core and Builtins/2024-04-11-21-17-23.gh-issue-117431.ZxdAFN.rst index b6be9b7b66ba4f..19dc551118ae67 100644 --- a/Misc/NEWS.d/next/Core and Builtins/2024-04-11-21-17-23.gh-issue-117431.ZxdAFN.rst +++ b/Misc/NEWS.d/next/Core and Builtins/2024-04-11-21-17-23.gh-issue-117431.ZxdAFN.rst @@ -1 +1 @@ -Improve performance of :func:`str.startswith` and :func:`str.endswith`. +Improve performance of :func:`str.startswith`, :func:`str.endswith`, :func:`str.removeprefix` and :func:`str.removesuffix`. From ed8b9d3c35e1813dfb7eaf71c47fd2327025f7a3 Mon Sep 17 00:00:00 2001 From: Pieter Eendebak Date: Tue, 21 May 2024 22:17:38 +0200 Subject: [PATCH 8/9] Update Misc/NEWS.d/next/Core and Builtins/2024-04-11-21-17-23.gh-issue-117431.ZxdAFN.rst Co-authored-by: Erlend E. Aasland --- .../2024-04-11-21-17-23.gh-issue-117431.ZxdAFN.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Misc/NEWS.d/next/Core and Builtins/2024-04-11-21-17-23.gh-issue-117431.ZxdAFN.rst b/Misc/NEWS.d/next/Core and Builtins/2024-04-11-21-17-23.gh-issue-117431.ZxdAFN.rst index 19dc551118ae67..1e77d5ba1413b8 100644 --- a/Misc/NEWS.d/next/Core and Builtins/2024-04-11-21-17-23.gh-issue-117431.ZxdAFN.rst +++ b/Misc/NEWS.d/next/Core and Builtins/2024-04-11-21-17-23.gh-issue-117431.ZxdAFN.rst @@ -1 +1 @@ -Improve performance of :func:`str.startswith`, :func:`str.endswith`, :func:`str.removeprefix` and :func:`str.removesuffix`. +Improve performance of :meth:`str.startswith`, :meth:`str.endswith`, :meth:`str.removeprefix` and :meth:`str.removesuffix`. From abe35e8c3a53c8120bdffb395d18fbdfd1d99b68 Mon Sep 17 00:00:00 2001 From: Pieter Eendebak Date: Mon, 10 Feb 2025 13:27:10 +0100 Subject: [PATCH 9/9] reduce churn --- Objects/unicodeobject.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index c529d6e9b4435b..2970dbfe7fbb4b 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -9947,9 +9947,6 @@ tailmatch(PyObject *self, PyUnicode_READ(kind_sub, data_sub, end_sub); if (match_last) { - if (end_sub == 0) { - return 1; - } /* If both are of the same kind, memcmp is sufficient */ if (kind_self == kind_sub) { return ! memcmp((char *)data_self + (offset * kind_sub), @@ -9957,8 +9954,8 @@ tailmatch(PyObject *self, } /* otherwise we have to compare each character by first accessing it */ else { - /* We do not need to compare len(substring)-1 because the if - statement above ensured already that they are equal when we + /* We do not need to compare len(substring)-1 because the check on + match_last above ensured already that they are equal when we end up here. */ for (i = 0; i < end_sub; ++i) { if (PyUnicode_READ(kind_self, data_self, offset + i) !=