From 4c678174bfae84ae9a27b5740d478adeaa866eff Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Tue, 8 Oct 2024 14:05:29 +0200 Subject: [PATCH 1/3] gh-124502: Optimize unicode_eq() * Cleanup unicode_compare_eq() code. * Copy unicode_compare_eq() code in unicode_eq(): the two functions are now identical. --- Objects/stringlib/eq.h | 20 +++++++++++++------- Objects/unicodeobject.c | 22 ++++++++++------------ 2 files changed, 23 insertions(+), 19 deletions(-) diff --git a/Objects/stringlib/eq.h b/Objects/stringlib/eq.h index 2eac4baf5ca9ce..55e0ebf171be5b 100644 --- a/Objects/stringlib/eq.h +++ b/Objects/stringlib/eq.h @@ -4,14 +4,20 @@ * unicode_eq() is called when the hash of two unicode objects is equal. */ Py_LOCAL_INLINE(int) -unicode_eq(PyObject *a, PyObject *b) +unicode_eq(PyObject *str1, PyObject *str2) { - if (PyUnicode_GET_LENGTH(a) != PyUnicode_GET_LENGTH(b)) + Py_ssize_t len = PyUnicode_GET_LENGTH(str1); + if (PyUnicode_GET_LENGTH(str2) != len) { return 0; - if (PyUnicode_GET_LENGTH(a) == 0) - return 1; - if (PyUnicode_KIND(a) != PyUnicode_KIND(b)) + } + + int kind = PyUnicode_KIND(str1); + if (PyUnicode_KIND(str2) != kind) { return 0; - return memcmp(PyUnicode_1BYTE_DATA(a), PyUnicode_1BYTE_DATA(b), - PyUnicode_GET_LENGTH(a) * PyUnicode_KIND(a)) == 0; + } + + const void *data1 = PyUnicode_DATA(str1); + const void *data2 = PyUnicode_DATA(str2); + int cmp = memcmp(data1, data2, len * kind); + return (cmp == 0); } diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 60d4875d3b393e..5d31ad56351b16 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -10971,21 +10971,19 @@ unicode_compare(PyObject *str1, PyObject *str2) static int unicode_compare_eq(PyObject *str1, PyObject *str2) { - int kind; - const void *data1, *data2; - Py_ssize_t len; - int cmp; - - len = PyUnicode_GET_LENGTH(str1); - if (PyUnicode_GET_LENGTH(str2) != len) + Py_ssize_t len = PyUnicode_GET_LENGTH(str1); + if (PyUnicode_GET_LENGTH(str2) != len) { return 0; - kind = PyUnicode_KIND(str1); - if (PyUnicode_KIND(str2) != kind) + } + + int kind = PyUnicode_KIND(str1); + if (PyUnicode_KIND(str2) != kind) { return 0; - data1 = PyUnicode_DATA(str1); - data2 = PyUnicode_DATA(str2); + } - cmp = memcmp(data1, data2, len * kind); + const void *data1 = PyUnicode_DATA(str1); + const void *data2 = PyUnicode_DATA(str2); + int cmp = memcmp(data1, data2, len * kind); return (cmp == 0); } From 8592ef1fa62565c6dff76dd4d495fbc295d2bc3e Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Tue, 8 Oct 2024 15:18:04 +0200 Subject: [PATCH 2/3] Address review: remove 'cmp' variable --- Objects/stringlib/eq.h | 3 +-- Objects/unicodeobject.c | 3 +-- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/Objects/stringlib/eq.h b/Objects/stringlib/eq.h index 55e0ebf171be5b..821b692f26b830 100644 --- a/Objects/stringlib/eq.h +++ b/Objects/stringlib/eq.h @@ -18,6 +18,5 @@ unicode_eq(PyObject *str1, PyObject *str2) const void *data1 = PyUnicode_DATA(str1); const void *data2 = PyUnicode_DATA(str2); - int cmp = memcmp(data1, data2, len * kind); - return (cmp == 0); + return (memcmp(data1, data2, len * kind) == 0); } diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 5d31ad56351b16..f4ac7e8dc63977 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -10983,8 +10983,7 @@ unicode_compare_eq(PyObject *str1, PyObject *str2) const void *data1 = PyUnicode_DATA(str1); const void *data2 = PyUnicode_DATA(str2); - int cmp = memcmp(data1, data2, len * kind); - return (cmp == 0); + return (memcmp(data1, data2, len * kind) == 0); } int From c83bb276e7cc119c626414addbd901e9685d1c11 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Tue, 8 Oct 2024 15:44:03 +0200 Subject: [PATCH 3/3] Revert unicode_compare_eq() changes --- Objects/unicodeobject.c | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index f4ac7e8dc63977..60d4875d3b393e 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -10971,19 +10971,22 @@ unicode_compare(PyObject *str1, PyObject *str2) static int unicode_compare_eq(PyObject *str1, PyObject *str2) { - Py_ssize_t len = PyUnicode_GET_LENGTH(str1); - if (PyUnicode_GET_LENGTH(str2) != len) { - return 0; - } + int kind; + const void *data1, *data2; + Py_ssize_t len; + int cmp; - int kind = PyUnicode_KIND(str1); - if (PyUnicode_KIND(str2) != kind) { + len = PyUnicode_GET_LENGTH(str1); + if (PyUnicode_GET_LENGTH(str2) != len) return 0; - } + kind = PyUnicode_KIND(str1); + if (PyUnicode_KIND(str2) != kind) + return 0; + data1 = PyUnicode_DATA(str1); + data2 = PyUnicode_DATA(str2); - const void *data1 = PyUnicode_DATA(str1); - const void *data2 = PyUnicode_DATA(str2); - return (memcmp(data1, data2, len * kind) == 0); + cmp = memcmp(data1, data2, len * kind); + return (cmp == 0); } int