Thanks to visit codestin.com
Credit goes to github.com

Skip to content

[3.13] gh-124130: Increase test coverage for \b and \B in regular expressions (GH-124330) #124413

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Oct 7, 2024
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
120 changes: 113 additions & 7 deletions Lib/test/test_re.py
Original file line number Diff line number Diff line change
Expand Up @@ -884,31 +884,137 @@ def test_named_unicode_escapes(self):
self.checkPatternError(br'\N{LESS-THAN SIGN}', r'bad escape \N', 0)
self.checkPatternError(br'[\N{LESS-THAN SIGN}]', r'bad escape \N', 1)

def test_string_boundaries(self):
def test_word_boundaries(self):
# See http://bugs.python.org/issue10713
self.assertEqual(re.search(r"\b(abc)\b", "abc").group(1),
"abc")
self.assertEqual(re.search(r"\b(abc)\b", "abc").group(1), "abc")
self.assertEqual(re.search(r"\b(abc)\b", "abc", re.ASCII).group(1), "abc")
self.assertEqual(re.search(br"\b(abc)\b", b"abc").group(1), b"abc")
self.assertEqual(re.search(br"\b(abc)\b", b"abc", re.LOCALE).group(1), b"abc")
self.assertEqual(re.search(r"\b(ьюя)\b", "ьюя").group(1), "ьюя")
self.assertIsNone(re.search(r"\b(ьюя)\b", "ьюя", re.ASCII))
# There's a word boundary between a word and a non-word.
self.assertTrue(re.match(r".\b", "a="))
self.assertTrue(re.match(r".\b", "a=", re.ASCII))
self.assertTrue(re.match(br".\b", b"a="))
self.assertTrue(re.match(br".\b", b"a=", re.LOCALE))
self.assertTrue(re.match(r".\b", "я="))
self.assertIsNone(re.match(r".\b", "я=", re.ASCII))
# There's a word boundary between a non-word and a word.
self.assertTrue(re.match(r".\b", "=a"))
self.assertTrue(re.match(r".\b", "=a", re.ASCII))
self.assertTrue(re.match(br".\b", b"=a"))
self.assertTrue(re.match(br".\b", b"=a", re.LOCALE))
self.assertTrue(re.match(r".\b", "=я"))
self.assertIsNone(re.match(r".\b", "=я", re.ASCII))
# There is no word boundary inside a word.
self.assertIsNone(re.match(r".\b", "ab"))
self.assertIsNone(re.match(r".\b", "ab", re.ASCII))
self.assertIsNone(re.match(br".\b", b"ab"))
self.assertIsNone(re.match(br".\b", b"ab", re.LOCALE))
self.assertIsNone(re.match(r".\b", "юя"))
self.assertIsNone(re.match(r".\b", "юя", re.ASCII))
# There is no word boundary between a non-word characters.
self.assertIsNone(re.match(r".\b", "=-"))
self.assertIsNone(re.match(r".\b", "=-", re.ASCII))
self.assertIsNone(re.match(br".\b", b"=-"))
self.assertIsNone(re.match(br".\b", b"=-", re.LOCALE))
# There is no non-boundary match between a word and a non-word.
self.assertIsNone(re.match(r".\B", "a="))
self.assertIsNone(re.match(r".\B", "a=", re.ASCII))
self.assertIsNone(re.match(br".\B", b"a="))
self.assertIsNone(re.match(br".\B", b"a=", re.LOCALE))
self.assertIsNone(re.match(r".\B", "я="))
self.assertTrue(re.match(r".\B", "я=", re.ASCII))
# There is no non-boundary match between a non-word and a word.
self.assertIsNone(re.match(r".\B", "=a"))
self.assertIsNone(re.match(r".\B", "=a", re.ASCII))
self.assertIsNone(re.match(br".\B", b"=a"))
self.assertIsNone(re.match(br".\B", b"=a", re.LOCALE))
self.assertIsNone(re.match(r".\B", "=я"))
self.assertTrue(re.match(r".\B", "=я", re.ASCII))
# There's a non-boundary match inside a word.
self.assertTrue(re.match(r".\B", "ab"))
self.assertTrue(re.match(r".\B", "ab", re.ASCII))
self.assertTrue(re.match(br".\B", b"ab"))
self.assertTrue(re.match(br".\B", b"ab", re.LOCALE))
self.assertTrue(re.match(r".\B", "юя"))
self.assertTrue(re.match(r".\B", "юя", re.ASCII))
# There's a non-boundary match between a non-word characters.
self.assertTrue(re.match(r".\B", "=-"))
self.assertTrue(re.match(r".\B", "=-", re.ASCII))
self.assertTrue(re.match(br".\B", b"=-"))
self.assertTrue(re.match(br".\B", b"=-", re.LOCALE))
# There's a word boundary at the start of a string.
self.assertTrue(re.match(r"\b", "abc"))
self.assertTrue(re.match(r"\b", "abc", re.ASCII))
self.assertTrue(re.match(br"\b", b"abc"))
self.assertTrue(re.match(br"\b", b"abc", re.LOCALE))
self.assertTrue(re.match(r"\b", "ьюя"))
self.assertIsNone(re.match(r"\b", "ьюя", re.ASCII))
# There's a word boundary at the end of a string.
self.assertTrue(re.fullmatch(r".+\b", "abc"))
self.assertTrue(re.fullmatch(r".+\b", "abc", re.ASCII))
self.assertTrue(re.fullmatch(br".+\b", b"abc"))
self.assertTrue(re.fullmatch(br".+\b", b"abc", re.LOCALE))
self.assertTrue(re.fullmatch(r".+\b", "ьюя"))
self.assertIsNone(re.search(r"\b", "ьюя", re.ASCII))
# A non-empty string includes a non-boundary zero-length match.
self.assertTrue(re.search(r"\B", "abc"))
self.assertEqual(re.search(r"\B", "abc").span(), (1, 1))
self.assertEqual(re.search(r"\B", "abc", re.ASCII).span(), (1, 1))
self.assertEqual(re.search(br"\B", b"abc").span(), (1, 1))
self.assertEqual(re.search(br"\B", b"abc", re.LOCALE).span(), (1, 1))
self.assertEqual(re.search(r"\B", "ьюя").span(), (1, 1))
self.assertEqual(re.search(r"\B", "ьюя", re.ASCII).span(), (0, 0))
# There is no non-boundary match at the start of a string.
self.assertFalse(re.match(r"\B", "abc"))
self.assertIsNone(re.match(r"\B", "abc"))
self.assertIsNone(re.match(r"\B", "abc", re.ASCII))
self.assertIsNone(re.match(br"\B", b"abc"))
self.assertIsNone(re.match(br"\B", b"abc", re.LOCALE))
self.assertIsNone(re.match(r"\B", "ьюя"))
self.assertTrue(re.match(r"\B", "ьюя", re.ASCII))
# There is no non-boundary match at the end of a string.
self.assertIsNone(re.fullmatch(r".+\B", "abc"))
self.assertIsNone(re.fullmatch(r".+\B", "abc", re.ASCII))
self.assertIsNone(re.fullmatch(br".+\B", b"abc"))
self.assertIsNone(re.fullmatch(br".+\B", b"abc", re.LOCALE))
self.assertIsNone(re.fullmatch(r".+\B", "ьюя"))
self.assertTrue(re.fullmatch(r".+\B", "ьюя", re.ASCII))
# However, an empty string contains no word boundaries, and also no
# non-boundaries.
self.assertIsNone(re.search(r"\B", ""))
self.assertIsNone(re.search(r"\b", ""))
self.assertIsNone(re.search(r"\b", "", re.ASCII))
self.assertIsNone(re.search(br"\b", b""))
self.assertIsNone(re.search(br"\b", b"", re.LOCALE))
# This one is questionable and different from the perlre behaviour,
# but describes current behavior.
self.assertIsNone(re.search(r"\b", ""))
self.assertIsNone(re.search(r"\B", ""))
self.assertIsNone(re.search(r"\B", "", re.ASCII))
self.assertIsNone(re.search(br"\B", b""))
self.assertIsNone(re.search(br"\B", b"", re.LOCALE))
# A single word-character string has two boundaries, but no
# non-boundary gaps.
self.assertEqual(len(re.findall(r"\b", "a")), 2)
self.assertEqual(len(re.findall(r"\b", "a", re.ASCII)), 2)
self.assertEqual(len(re.findall(br"\b", b"a")), 2)
self.assertEqual(len(re.findall(br"\b", b"a", re.LOCALE)), 2)
self.assertEqual(len(re.findall(r"\B", "a")), 0)
self.assertEqual(len(re.findall(r"\B", "a", re.ASCII)), 0)
self.assertEqual(len(re.findall(br"\B", b"a")), 0)
self.assertEqual(len(re.findall(br"\B", b"a", re.LOCALE)), 0)
# If there are no words, there are no boundaries
self.assertEqual(len(re.findall(r"\b", " ")), 0)
self.assertEqual(len(re.findall(r"\b", " ", re.ASCII)), 0)
self.assertEqual(len(re.findall(br"\b", b" ")), 0)
self.assertEqual(len(re.findall(br"\b", b" ", re.LOCALE)), 0)
self.assertEqual(len(re.findall(r"\b", " ")), 0)
self.assertEqual(len(re.findall(r"\b", " ", re.ASCII)), 0)
self.assertEqual(len(re.findall(br"\b", b" ")), 0)
self.assertEqual(len(re.findall(br"\b", b" ", re.LOCALE)), 0)
# Can match around the whitespace.
self.assertEqual(len(re.findall(r"\B", " ")), 2)
self.assertEqual(len(re.findall(r"\B", " ", re.ASCII)), 2)
self.assertEqual(len(re.findall(br"\B", b" ")), 2)
self.assertEqual(len(re.findall(br"\B", b" ", re.LOCALE)), 2)

def test_bigcharset(self):
self.assertEqual(re.match("([\u2222\u2223])",
Expand Down
Loading