From 007e7d749bb0f3f289920666e5f894735316a8a8 Mon Sep 17 00:00:00 2001 From: Seonkyo Ok Date: Sun, 12 Mar 2023 07:25:28 +0900 Subject: [PATCH 1/8] pythongh-100061 Restore the global Input Stream pointer --- Lib/test/test_re.py | 2 ++ Modules/_sre/sre_lib.h | 3 +++ 2 files changed, 5 insertions(+) diff --git a/Lib/test/test_re.py b/Lib/test/test_re.py index 11628a236ade9a..3badead91ac0da 100644 --- a/Lib/test/test_re.py +++ b/Lib/test/test_re.py @@ -2303,6 +2303,8 @@ def test_possessive_quantifiers(self): self.assertIsNone(re.match("^x{}+$", "xxx")) self.assertTrue(re.match("^x{}+$", "x{}")) + self.assertEqual(re.match('((.(?!C))++)', 'ABCD').span(), (0, 1)) + def test_fullmatch_possessive_quantifiers(self): self.assertTrue(re.fullmatch(r'a++', 'a')) self.assertTrue(re.fullmatch(r'a*+', 'a')) diff --git a/Modules/_sre/sre_lib.h b/Modules/_sre/sre_lib.h index fb4c18b63d643d..9e0eeb5c4e1be0 100644 --- a/Modules/_sre/sre_lib.h +++ b/Modules/_sre/sre_lib.h @@ -1334,6 +1334,9 @@ SRE(match)(SRE_STATE* state, const SRE_CODE* pattern, int toplevel) MARK_POP(ctx->lastmark); LASTMARK_RESTORE(); + /* Return to the previous Input Stream pointer */ + state->ptr = ptr; + /* We have sufficient matches, so exit loop. */ break; } From 43dc3cf2627c9909f45a4f156e0d542dcc2d9389 Mon Sep 17 00:00:00 2001 From: Seonkyo Ok Date: Sun, 12 Mar 2023 07:32:09 +0900 Subject: [PATCH 2/8] pythongh-100061 Refine comment --- Modules/_sre/sre_lib.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Modules/_sre/sre_lib.h b/Modules/_sre/sre_lib.h index 9e0eeb5c4e1be0..58951062150c5d 100644 --- a/Modules/_sre/sre_lib.h +++ b/Modules/_sre/sre_lib.h @@ -1334,7 +1334,7 @@ SRE(match)(SRE_STATE* state, const SRE_CODE* pattern, int toplevel) MARK_POP(ctx->lastmark); LASTMARK_RESTORE(); - /* Return to the previous Input Stream pointer */ + /* Restore the global Input Stream pointer */ state->ptr = ptr; /* We have sufficient matches, so exit loop. */ From d33c01266b3687239ca56db3efef443543e9ca8d Mon Sep 17 00:00:00 2001 From: Seonkyo Ok Date: Mon, 13 Mar 2023 13:12:44 +0900 Subject: [PATCH 3/8] gh-100061 Add github issue number on comments --- Lib/test/test_re.py | 1 + Modules/_sre/sre_lib.h | 4 +++- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/Lib/test/test_re.py b/Lib/test/test_re.py index 3badead91ac0da..b846128813bdb6 100644 --- a/Lib/test/test_re.py +++ b/Lib/test/test_re.py @@ -2303,6 +2303,7 @@ def test_possessive_quantifiers(self): self.assertIsNone(re.match("^x{}+$", "xxx")) self.assertTrue(re.match("^x{}+$", "x{}")) + # gh-100061 self.assertEqual(re.match('((.(?!C))++)', 'ABCD').span(), (0, 1)) def test_fullmatch_possessive_quantifiers(self): diff --git a/Modules/_sre/sre_lib.h b/Modules/_sre/sre_lib.h index 58951062150c5d..01e53476640200 100644 --- a/Modules/_sre/sre_lib.h +++ b/Modules/_sre/sre_lib.h @@ -1334,7 +1334,9 @@ SRE(match)(SRE_STATE* state, const SRE_CODE* pattern, int toplevel) MARK_POP(ctx->lastmark); LASTMARK_RESTORE(); - /* Restore the global Input Stream pointer */ + /* Restore the global Input Stream pointer + since it can change after the jump. + See gh-100061 */ state->ptr = ptr; /* We have sufficient matches, so exit loop. */ From 8a958f125f19e6bf6870be3769279e6497a46f22 Mon Sep 17 00:00:00 2001 From: Seonkyo Ok Date: Mon, 13 Mar 2023 23:52:24 +0900 Subject: [PATCH 4/8] gh-100061 Remove unneeded comment --- Modules/_sre/sre_lib.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/Modules/_sre/sre_lib.h b/Modules/_sre/sre_lib.h index 01e53476640200..e83149825e2cdb 100644 --- a/Modules/_sre/sre_lib.h +++ b/Modules/_sre/sre_lib.h @@ -1335,8 +1335,7 @@ SRE(match)(SRE_STATE* state, const SRE_CODE* pattern, int toplevel) LASTMARK_RESTORE(); /* Restore the global Input Stream pointer - since it can change after the jump. - See gh-100061 */ + since it can change after jumps. */ state->ptr = ptr; /* We have sufficient matches, so exit loop. */ From b5476e416f23b8f0d7944e93967580c6f89fbe02 Mon Sep 17 00:00:00 2001 From: Seonkyo Ok Date: Tue, 14 Mar 2023 00:07:24 +0900 Subject: [PATCH 5/8] gh-100061 Refine test case --- Lib/test/test_re.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Lib/test/test_re.py b/Lib/test/test_re.py index b846128813bdb6..99b0727ad83a99 100644 --- a/Lib/test/test_re.py +++ b/Lib/test/test_re.py @@ -2304,7 +2304,7 @@ def test_possessive_quantifiers(self): self.assertTrue(re.match("^x{}+$", "x{}")) # gh-100061 - self.assertEqual(re.match('((.(?!C))++)', 'ABCD').span(), (0, 1)) + self.assertEqual(re.match('((.(?!D))++)', 'ABCDE').span(), (0, 2)) def test_fullmatch_possessive_quantifiers(self): self.assertTrue(re.fullmatch(r'a++', 'a')) From 2ffcc1679d50f05dc831bdd641bf910f417ea5e3 Mon Sep 17 00:00:00 2001 From: Seonkyo Ok Date: Tue, 14 Mar 2023 01:20:25 +0900 Subject: [PATCH 6/8] Add blurb entry --- .../next/Library/2023-03-14-01-19-57.gh-issue-100061.CiXJYn.rst | 2 ++ 1 file changed, 2 insertions(+) create mode 100644 Misc/NEWS.d/next/Library/2023-03-14-01-19-57.gh-issue-100061.CiXJYn.rst diff --git a/Misc/NEWS.d/next/Library/2023-03-14-01-19-57.gh-issue-100061.CiXJYn.rst b/Misc/NEWS.d/next/Library/2023-03-14-01-19-57.gh-issue-100061.CiXJYn.rst new file mode 100644 index 00000000000000..dfed34f6ae9768 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2023-03-14-01-19-57.gh-issue-100061.CiXJYn.rst @@ -0,0 +1,2 @@ +Fix a bug that causes wrong matches for regular expressions with possessive +qualifier. From 86ca7e71fc7b5ea195f731355849df9ccec01f07 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Wed, 16 Aug 2023 09:19:01 +0300 Subject: [PATCH 7/8] Re-enable possesive qualifiers in the commpiler. --- Lib/re/_compiler.py | 7 ------- Lib/test/test_re.py | 2 -- 2 files changed, 9 deletions(-) diff --git a/Lib/re/_compiler.py b/Lib/re/_compiler.py index f5fd160ba00435..d0a4c55caf6e41 100644 --- a/Lib/re/_compiler.py +++ b/Lib/re/_compiler.py @@ -100,13 +100,6 @@ def _compile(code, pattern, flags): emit(ANY_ALL) else: emit(ANY) - elif op is POSSESSIVE_REPEAT: - # gh-106052: Possessive quantifiers do not work when the - # subpattern contains backtracking, i.e. "(?:ab?c)*+". - # Implement it as equivalent greedy qualifier in atomic group. - p = [(MAX_REPEAT, av)] - p = [(ATOMIC_GROUP, p)] - _compile(code, p, flags) elif op in REPEATING_CODES: if _simple(av[2]): emit(REPEATING_CODES[op][2]) diff --git a/Lib/test/test_re.py b/Lib/test/test_re.py index 0c6767e651a30d..b3dc86a342b189 100644 --- a/Lib/test/test_re.py +++ b/Lib/test/test_re.py @@ -2454,7 +2454,6 @@ def test_atomic_group(self): 17: SUCCESS ''') - @unittest.expectedFailure # gh-106052 def test_possesive_repeat_one(self): self.assertEqual(get_debug_out(r'a?+'), '''\ POSSESSIVE_REPEAT 0 1 @@ -2467,7 +2466,6 @@ def test_possesive_repeat_one(self): 12: SUCCESS ''') - @unittest.expectedFailure # gh-106052 def test_possesive_repeat(self): self.assertEqual(get_debug_out(r'(?:ab)?+'), '''\ POSSESSIVE_REPEAT 0 1 From 6656aa20d530400a2f6f2c9cd88934b8aa06a4ec Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Wed, 16 Aug 2023 09:43:39 +0300 Subject: [PATCH 8/8] Refine tests. --- Lib/test/test_re.py | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/Lib/test/test_re.py b/Lib/test/test_re.py index b3dc86a342b189..042f97f57ecf18 100644 --- a/Lib/test/test_re.py +++ b/Lib/test/test_re.py @@ -2248,9 +2248,6 @@ def test_possessive_quantifiers(self): self.assertIsNone(re.match("^x{}+$", "xxx")) self.assertTrue(re.match("^x{}+$", "x{}")) - # gh-100061 - self.assertEqual(re.match('((.(?!D))++)', 'ABCDE').span(), (0, 2)) - def test_fullmatch_possessive_quantifiers(self): self.assertTrue(re.fullmatch(r'a++', 'a')) self.assertTrue(re.fullmatch(r'a*+', 'a')) @@ -2345,7 +2342,17 @@ def test_bug_gh91616(self): self.assertTrue(re.fullmatch(r'(?s:(?>.*?\.).*)\Z', "a.txt")) # reproducer self.assertTrue(re.fullmatch(r'(?s:(?=(?P.*?\.))(?P=g0).*)\Z', "a.txt")) - def test_bug_gh106052(self): + def test_bug_gh100061(self): + # gh-100061 + self.assertEqual(re.match('(?>(?:.(?!D))+)', 'ABCDE').span(), (0, 2)) + self.assertEqual(re.match('(?:.(?!D))++', 'ABCDE').span(), (0, 2)) + self.assertEqual(re.match('(?>(?:.(?!D))*)', 'ABCDE').span(), (0, 2)) + self.assertEqual(re.match('(?:.(?!D))*+', 'ABCDE').span(), (0, 2)) + self.assertEqual(re.match('(?>(?:.(?!D))?)', 'CDE').span(), (0, 0)) + self.assertEqual(re.match('(?:.(?!D))?+', 'CDE').span(), (0, 0)) + self.assertEqual(re.match('(?>(?:.(?!D)){1,3})', 'ABCDE').span(), (0, 2)) + self.assertEqual(re.match('(?:.(?!D)){1,3}+', 'ABCDE').span(), (0, 2)) + # gh-106052 self.assertEqual(re.match("(?>(?:ab?c)+)", "aca").span(), (0, 2)) self.assertEqual(re.match("(?:ab?c)++", "aca").span(), (0, 2)) self.assertEqual(re.match("(?>(?:ab?c)*)", "aca").span(), (0, 2))