From 80c1c6cd0621d7f1bc258bd42a5918eab2f6cddb Mon Sep 17 00:00:00 2001 From: fluhus Date: Sat, 22 Feb 2025 12:38:37 -0800 Subject: [PATCH 1/8] Add failing regression test for _TO_BOOL_STR --- Lib/test/test_capi/test_opt.py | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/Lib/test/test_capi/test_opt.py b/Lib/test/test_capi/test_opt.py index b7083dbfb89db8..904c216f0f2de0 100644 --- a/Lib/test/test_capi/test_opt.py +++ b/Lib/test/test_capi/test_opt.py @@ -1499,6 +1499,39 @@ def f(n): # But all of the appends we care about are still there: self.assertEqual(uops.count("_CALL_LIST_APPEND"), len("ABCDEFG")) + def test_narrow_type_to_constant_str_empty(self): + def f(n): + trace = [] + for i in range(n): + empty = "" + # Hopefully the optimizer can't guess what the value is. + # f is always "", but we can only prove that it's a string: + f = empty + empty + trace.append("A") + if not f: # Kept. + trace.append("B") + if not f: # Removed! + trace.append("C") + trace.append("D") + if f: # Removed! + trace.append("X") + trace.append("E") + trace.append("F") + if f: # Removed! + trace.append("X") + trace.append("G") + return trace + + trace, ex = self._run_with_optimizer(f, TIER2_THRESHOLD) + self.assertEqual(trace, list("ABCDEFG") * TIER2_THRESHOLD) + self.assertIsNotNone(ex) + uops = get_opnames(ex) + # Only one guard remains: + self.assertEqual(uops.count("_GUARD_IS_FALSE_POP"), 1) + self.assertEqual(uops.count("_GUARD_IS_TRUE_POP"), 0) + # But all of the appends we care about are still there: + self.assertEqual(uops.count("_CALL_LIST_APPEND"), len("ABCDEFG")) + def global_identity(x): return x From 23695d6705cda01e3eb68a9cb98028bf0c254035 Mon Sep 17 00:00:00 2001 From: fluhus Date: Sat, 22 Feb 2025 13:02:30 -0800 Subject: [PATCH 2/8] Improve test to outsmart JIT --- Lib/test/test_capi/test_opt.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Lib/test/test_capi/test_opt.py b/Lib/test/test_capi/test_opt.py index 904c216f0f2de0..55509e5888cf1b 100644 --- a/Lib/test/test_capi/test_opt.py +++ b/Lib/test/test_capi/test_opt.py @@ -1503,10 +1503,10 @@ def test_narrow_type_to_constant_str_empty(self): def f(n): trace = [] for i in range(n): - empty = "" + dummy = "aaa" # Hopefully the optimizer can't guess what the value is. # f is always "", but we can only prove that it's a string: - f = empty + empty + f = dummy[:0] trace.append("A") if not f: # Kept. trace.append("B") From fcf411621544952cb4224e5eb81b4f41de6f97cd Mon Sep 17 00:00:00 2001 From: fluhus Date: Sat, 22 Feb 2025 13:30:42 -0800 Subject: [PATCH 3/8] Add optimization path to _TO_BOOL_STR --- Python/optimizer_bytecodes.c | 13 +++++++++++++ Python/optimizer_cases.c.h | 14 ++++++++++++++ 2 files changed, 27 insertions(+) diff --git a/Python/optimizer_bytecodes.c b/Python/optimizer_bytecodes.c index c4e4b28e50d211..7cbbf26fb23c19 100644 --- a/Python/optimizer_bytecodes.c +++ b/Python/optimizer_bytecodes.c @@ -429,6 +429,19 @@ dummy_func(void) { res = sym_new_type(ctx, &PyBool_Type); sym_set_type(value, &PyUnicode_Type); } + if (!sym_is_const(value)) { + assert(sym_matches_type(value, &PyUnicode_Type)); + int next_opcode = (this_instr + 1)->opcode; + assert(next_opcode == _CHECK_VALIDITY_AND_SET_IP); + next_opcode = (this_instr + 2)->opcode; + // If the next uop is a guard, we can narrow value. However, we + // *can't* narrow res, since that would cause the guard to be + // removed and the narrowed value to be invalid: + if (next_opcode == _GUARD_IS_FALSE_POP) { + sym_set_const(value, Py_GetConstant(Py_CONSTANT_EMPTY_STR)); + res = sym_new_type(ctx, &PyUnicode_Type); + } + } } op(_UNARY_NOT, (value -- res)) { diff --git a/Python/optimizer_cases.c.h b/Python/optimizer_cases.c.h index 397184dd87ad7d..5a7cf895169998 100644 --- a/Python/optimizer_cases.c.h +++ b/Python/optimizer_cases.c.h @@ -216,6 +216,20 @@ res = sym_new_type(ctx, &PyBool_Type); sym_set_type(value, &PyUnicode_Type); } + if (!sym_is_const(value)) { + assert(sym_matches_type(value, &PyUnicode_Type)); + int next_opcode = (this_instr + 1)->opcode; + assert(next_opcode == _CHECK_VALIDITY_AND_SET_IP); + next_opcode = (this_instr + 2)->opcode; + // If the next uop is a guard, we can narrow value. However, we + // *can't* narrow res, since that would cause the guard to be + // removed and the narrowed value to be invalid: + if (next_opcode == _GUARD_IS_FALSE_POP) { + stack_pointer[-1] = res; + sym_set_const(value, Py_GetConstant(Py_CONSTANT_EMPTY_STR)); + res = sym_new_type(ctx, &PyUnicode_Type); + } + } stack_pointer[-1] = res; break; } From e23b84a3fecf824f43cd900c0c21d020c2de0cac Mon Sep 17 00:00:00 2001 From: "blurb-it[bot]" <43283697+blurb-it[bot]@users.noreply.github.com> Date: Sat, 22 Feb 2025 22:49:01 +0000 Subject: [PATCH 4/8] =?UTF-8?q?=F0=9F=93=9C=F0=9F=A4=96=20Added=20by=20blu?= =?UTF-8?q?rb=5Fit.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../2025-02-22-22-49-00.gh-issue-130415.WyxBYS.rst | 1 + 1 file changed, 1 insertion(+) create mode 100644 Misc/NEWS.d/next/Core_and_Builtins/2025-02-22-22-49-00.gh-issue-130415.WyxBYS.rst diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-02-22-22-49-00.gh-issue-130415.WyxBYS.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-02-22-22-49-00.gh-issue-130415.WyxBYS.rst new file mode 100644 index 00000000000000..2559aa8e212bfa --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2025-02-22-22-49-00.gh-issue-130415.WyxBYS.rst @@ -0,0 +1 @@ +Improve JIT's ability to optimize strings in boolean contexts. From ab263fe32a73500a8760ebb0d4f9e306247a4543 Mon Sep 17 00:00:00 2001 From: fluhus Date: Sat, 22 Feb 2025 15:02:12 -0800 Subject: [PATCH 5/8] Correct res type and change `f` var to `empty` --- Lib/test/test_capi/test_opt.py | 12 ++++++------ Python/optimizer_bytecodes.c | 2 +- Python/optimizer_cases.c.h | 2 +- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/Lib/test/test_capi/test_opt.py b/Lib/test/test_capi/test_opt.py index 55509e5888cf1b..71f5b092a2a1d0 100644 --- a/Lib/test/test_capi/test_opt.py +++ b/Lib/test/test_capi/test_opt.py @@ -1505,19 +1505,19 @@ def f(n): for i in range(n): dummy = "aaa" # Hopefully the optimizer can't guess what the value is. - # f is always "", but we can only prove that it's a string: - f = dummy[:0] + # empty is always "", but we can only prove that it's a string: + empty = dummy[:0] trace.append("A") - if not f: # Kept. + if not empty: # Kept. trace.append("B") - if not f: # Removed! + if not empty: # Removed! trace.append("C") trace.append("D") - if f: # Removed! + if empty: # Removed! trace.append("X") trace.append("E") trace.append("F") - if f: # Removed! + if empty: # Removed! trace.append("X") trace.append("G") return trace diff --git a/Python/optimizer_bytecodes.c b/Python/optimizer_bytecodes.c index 7cbbf26fb23c19..1795087521a7a5 100644 --- a/Python/optimizer_bytecodes.c +++ b/Python/optimizer_bytecodes.c @@ -439,7 +439,7 @@ dummy_func(void) { // removed and the narrowed value to be invalid: if (next_opcode == _GUARD_IS_FALSE_POP) { sym_set_const(value, Py_GetConstant(Py_CONSTANT_EMPTY_STR)); - res = sym_new_type(ctx, &PyUnicode_Type); + res = sym_new_type(ctx, &PyBool_Type); } } } diff --git a/Python/optimizer_cases.c.h b/Python/optimizer_cases.c.h index 5a7cf895169998..d47a6c1206e291 100644 --- a/Python/optimizer_cases.c.h +++ b/Python/optimizer_cases.c.h @@ -227,7 +227,7 @@ if (next_opcode == _GUARD_IS_FALSE_POP) { stack_pointer[-1] = res; sym_set_const(value, Py_GetConstant(Py_CONSTANT_EMPTY_STR)); - res = sym_new_type(ctx, &PyUnicode_Type); + res = sym_new_type(ctx, &PyBool_Type); } } stack_pointer[-1] = res; From d5bfcdcced0074e78dbe99729b12ee12b313eb9e Mon Sep 17 00:00:00 2001 From: fluhus Date: Sat, 22 Feb 2025 16:39:03 -0800 Subject: [PATCH 6/8] Add Amit Lavon to ACKS --- Misc/ACKS | 1 + 1 file changed, 1 insertion(+) diff --git a/Misc/ACKS b/Misc/ACKS index 2a68b69f161041..e623c3e28d0554 100644 --- a/Misc/ACKS +++ b/Misc/ACKS @@ -1063,6 +1063,7 @@ Amos Latteier Piers Lauder Ben Laurie Yoni Lavi +Amit Lavon Simon Law Julia Lawall Chris Lawrence From 601392df5fadcc94c1ef91e570123bdac1b9d3f2 Mon Sep 17 00:00:00 2001 From: fluhus Date: Sun, 2 Mar 2025 17:09:46 -0800 Subject: [PATCH 7/8] Assign truthiness to empty strings in JIT --- Python/optimizer_bytecodes.c | 15 +-------------- Python/optimizer_cases.c.h | 16 +--------------- Python/optimizer_symbols.c | 3 +++ 3 files changed, 5 insertions(+), 29 deletions(-) diff --git a/Python/optimizer_bytecodes.c b/Python/optimizer_bytecodes.c index 1795087521a7a5..d29d547c5be6f3 100644 --- a/Python/optimizer_bytecodes.c +++ b/Python/optimizer_bytecodes.c @@ -426,22 +426,9 @@ dummy_func(void) { op(_TO_BOOL_STR, (value -- res)) { if (!optimize_to_bool(this_instr, ctx, value, &res)) { - res = sym_new_type(ctx, &PyBool_Type); + res = sym_new_truthiness(ctx, value, true); sym_set_type(value, &PyUnicode_Type); } - if (!sym_is_const(value)) { - assert(sym_matches_type(value, &PyUnicode_Type)); - int next_opcode = (this_instr + 1)->opcode; - assert(next_opcode == _CHECK_VALIDITY_AND_SET_IP); - next_opcode = (this_instr + 2)->opcode; - // If the next uop is a guard, we can narrow value. However, we - // *can't* narrow res, since that would cause the guard to be - // removed and the narrowed value to be invalid: - if (next_opcode == _GUARD_IS_FALSE_POP) { - sym_set_const(value, Py_GetConstant(Py_CONSTANT_EMPTY_STR)); - res = sym_new_type(ctx, &PyBool_Type); - } - } } op(_UNARY_NOT, (value -- res)) { diff --git a/Python/optimizer_cases.c.h b/Python/optimizer_cases.c.h index d47a6c1206e291..6bd4c4ae3c0200 100644 --- a/Python/optimizer_cases.c.h +++ b/Python/optimizer_cases.c.h @@ -213,23 +213,9 @@ JitOptSymbol *res; value = stack_pointer[-1]; if (!optimize_to_bool(this_instr, ctx, value, &res)) { - res = sym_new_type(ctx, &PyBool_Type); + res = sym_new_truthiness(ctx, value, true); sym_set_type(value, &PyUnicode_Type); } - if (!sym_is_const(value)) { - assert(sym_matches_type(value, &PyUnicode_Type)); - int next_opcode = (this_instr + 1)->opcode; - assert(next_opcode == _CHECK_VALIDITY_AND_SET_IP); - next_opcode = (this_instr + 2)->opcode; - // If the next uop is a guard, we can narrow value. However, we - // *can't* narrow res, since that would cause the guard to be - // removed and the narrowed value to be invalid: - if (next_opcode == _GUARD_IS_FALSE_POP) { - stack_pointer[-1] = res; - sym_set_const(value, Py_GetConstant(Py_CONSTANT_EMPTY_STR)); - res = sym_new_type(ctx, &PyBool_Type); - } - } stack_pointer[-1] = res; break; } diff --git a/Python/optimizer_symbols.c b/Python/optimizer_symbols.c index e8a3b918e56a94..0beb8028bd7921 100644 --- a/Python/optimizer_symbols.c +++ b/Python/optimizer_symbols.c @@ -299,6 +299,9 @@ _Py_uop_sym_set_const(JitOptContext *ctx, JitOptSymbol *sym, PyObject *const_val else if (type == &PyBool_Type) { _Py_uop_sym_set_const(ctx, value, Py_False); } + else if (type == &PyUnicode_Type) { + _Py_uop_sym_set_const(ctx, value, Py_GetConstant(Py_CONSTANT_EMPTY_STR)); + } // TODO: More types (GH-130415)! make_const(sym, const_val); return; From 58884ab4e9066b3e4a2cc168c1c7ee4b8bec1f0c Mon Sep 17 00:00:00 2001 From: fluhus Date: Sun, 2 Mar 2025 17:24:07 -0800 Subject: [PATCH 8/8] Improve un-proveable empty string in JIT test --- Lib/test/test_capi/test_opt.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Lib/test/test_capi/test_opt.py b/Lib/test/test_capi/test_opt.py index 71f5b092a2a1d0..2028384fec5547 100644 --- a/Lib/test/test_capi/test_opt.py +++ b/Lib/test/test_capi/test_opt.py @@ -1503,10 +1503,10 @@ def test_narrow_type_to_constant_str_empty(self): def f(n): trace = [] for i in range(n): - dummy = "aaa" # Hopefully the optimizer can't guess what the value is. # empty is always "", but we can only prove that it's a string: - empty = dummy[:0] + false = i == TIER2_THRESHOLD + empty = "X"[:false] trace.append("A") if not empty: # Kept. trace.append("B")