From bfcee6c1e3c9973bed8ba45b0cded6ab884a8bed Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Wed, 9 Aug 2023 08:47:57 +0300 Subject: [PATCH] [3.12] gh-106052: Fix bug in the matching of possessive quantifiers (gh-106515) It did not work in the case of a subpattern containing backtracking. Temporary implement possessive quantifiers as equivalent greedy qualifiers in atomic groups.. (cherry picked from commit 7b6e34e5baeb4162815ffa4d943b09a58e3f6580) Co-authored-by: Serhiy Storchaka --- Lib/re/_compiler.py | 7 +++++++ Lib/test/test_re.py | 12 ++++++++++++ .../2023-07-07-14-52-31.gh-issue-106052.ak8nbs.rst | 2 ++ 3 files changed, 21 insertions(+) create mode 100644 Misc/NEWS.d/next/Library/2023-07-07-14-52-31.gh-issue-106052.ak8nbs.rst diff --git a/Lib/re/_compiler.py b/Lib/re/_compiler.py index d8e0d2fdefdcca..e30740b9c30b0e 100644 --- a/Lib/re/_compiler.py +++ b/Lib/re/_compiler.py @@ -100,6 +100,13 @@ def _compile(code, pattern, flags): emit(ANY_ALL) else: emit(ANY) + elif op is POSSESSIVE_REPEAT: + # gh-106052: Possessive quantifiers do not work when the + # subpattern contains backtracking, i.e. "(?:ab?c)*+". + # Implement it as equivalent greedy qualifier in atomic group. + p = [(MAX_REPEAT, av)] + p = [(ATOMIC_GROUP, p)] + _compile(code, p, flags) elif op in REPEATING_CODES: if flags & SRE_FLAG_TEMPLATE: raise error("internal: unsupported template operator %r" % (op,)) diff --git a/Lib/test/test_re.py b/Lib/test/test_re.py index 50b9ad701f0ce7..85541f4451d031 100644 --- a/Lib/test/test_re.py +++ b/Lib/test/test_re.py @@ -2365,6 +2365,16 @@ def test_template_function_and_flag_is_deprecated(self): self.assertTrue(template_re1.match('ahoy')) self.assertFalse(template_re1.match('nope')) + def test_bug_gh106052(self): + self.assertEqual(re.match("(?>(?:ab?c)+)", "aca").span(), (0, 2)) + self.assertEqual(re.match("(?:ab?c)++", "aca").span(), (0, 2)) + self.assertEqual(re.match("(?>(?:ab?c)*)", "aca").span(), (0, 2)) + self.assertEqual(re.match("(?:ab?c)*+", "aca").span(), (0, 2)) + self.assertEqual(re.match("(?>(?:ab?c)?)", "a").span(), (0, 0)) + self.assertEqual(re.match("(?:ab?c)?+", "a").span(), (0, 0)) + self.assertEqual(re.match("(?>(?:ab?c){1,3})", "aca").span(), (0, 2)) + self.assertEqual(re.match("(?:ab?c){1,3}+", "aca").span(), (0, 2)) + @unittest.skipIf(multiprocessing is None, 'test requires multiprocessing') def test_regression_gh94675(self): pattern = re.compile(r'(?<=[({}])(((//[^\n]*)?[\n])([\000-\040])*)*' @@ -2461,6 +2471,7 @@ def test_atomic_group(self): 17: SUCCESS ''') + @unittest.expectedFailure # gh-106052 def test_possesive_repeat_one(self): self.assertEqual(get_debug_out(r'a?+'), '''\ POSSESSIVE_REPEAT 0 1 @@ -2473,6 +2484,7 @@ def test_possesive_repeat_one(self): 12: SUCCESS ''') + @unittest.expectedFailure # gh-106052 def test_possesive_repeat(self): self.assertEqual(get_debug_out(r'(?:ab)?+'), '''\ POSSESSIVE_REPEAT 0 1 diff --git a/Misc/NEWS.d/next/Library/2023-07-07-14-52-31.gh-issue-106052.ak8nbs.rst b/Misc/NEWS.d/next/Library/2023-07-07-14-52-31.gh-issue-106052.ak8nbs.rst new file mode 100644 index 00000000000000..f2d4c2f7b18ec7 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2023-07-07-14-52-31.gh-issue-106052.ak8nbs.rst @@ -0,0 +1,2 @@ +:mod:`re` module: fix the matching of possessive quantifiers in the case of +a subpattern containing backtracking.