Thanks to visit codestin.com
Credit goes to github.com

Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
gh-140797 forbid capturing groups in re.Scanner lexicon patterns- Rev-7
  • Loading branch information
Abhi210 committed Nov 4, 2025
commit feaee4e3e82dd7727ea8173178e0513329fd7bd7
8 changes: 2 additions & 6 deletions Lib/re/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -398,12 +398,8 @@ def __init__(self, lexicon, flags=0):
s.flags = flags
for phrase, action in lexicon:
sub_pattern = _parser.parse(phrase, flags)
if sub_pattern.state.groups != 1: # <- 1 means always has \0
raise ValueError(
"re.Scanner lexicon patterns must not contain capturing groups;\n"
"Please use non-capturing groups (?:...) instead"
)

if sub_pattern.state.groups != 1:
raise ValueError("Cannot use capturing groups in re.Scanner")
gid = s.opengroup()
Comment thread
Abhi210 marked this conversation as resolved.
p.append(_parser.SubPattern(s, [
(SUBPATTERN, (gid, 0, 0, sub_pattern)),
Expand Down
23 changes: 11 additions & 12 deletions Lib/test/test_re.py
Original file line number Diff line number Diff line change
Expand Up @@ -1639,25 +1639,24 @@ def s_int(scanner, token): return int(token)
(['sum', 'op=', 3, 'op*', 'foo', 'op+', 312.5,
'op+', 'bar'], ''))

def test_bug_140797(self):
#bug 140797: remove capturing groups compilation form re.Scanner
def test_bug_gh140797(self):
# gh140797: capturing groups is not allowed in re.Scanner
Comment thread
Abhi210 marked this conversation as resolved.
Outdated

#Presence of Capturing group throws an error
lex = [("(a)b", None)]
with self.assertRaises(ValueError):
Scanner(lex)
msg = "Cannot use capturing groups in re.Scanner"
Comment thread
Abhi210 marked this conversation as resolved.
Outdated
# Capturing group throws an error
with self.assertRaisesRegex(ValueError, msg):
Scanner([("(a)b", None)])

#Presence of non-capturing groups should pass normally
# Named Group
with self.assertRaisesRegex(ValueError, msg):
Scanner([("(?P<name>a)", None)])

# Non-capturing groups should pass normally
s = Scanner([("(?:a)b", lambda scanner, token: token)])
result, rem = s.scan("ab")
self.assertEqual(result,['ab'])
self.assertEqual(rem,'')
Comment thread
serhiy-storchaka marked this conversation as resolved.

#Testing a very complex capturing group
pattern= "(?P<name>a)"
with self.assertRaises(ValueError):
Scanner([(pattern, None)])

def test_bug_448951(self):
# bug 448951 (similar to 429357, but with single char match)
# (Also test greedy matches.)
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,2 @@
The re.Scanner class now forbids regular expressions containing capturing
groups in its lexicon patterns. Patterns using capturing groups could
previously lead to crashes with segmentation fault. Use non-capturing groups
(?:...) instead.
The undocumented :class:`!re.Scanner` class now forbids regular expressions containing capturing groups in its lexicon patterns. Patterns using capturing groups could
previously lead to crashes with segmentation fault. Use non-capturing groups (?:...) instead.
Loading
You are viewing a condensed version of this merge commit. You can view the full changes here.