Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit a380dc6

Browse files
[3.13] gh-124130: Increase test coverage for \b and \B in regular expressions (GH-124330) (GH-124413)
(cherry picked from commit b82f076) Co-authored-by: Serhiy Storchaka <[email protected]>
1 parent 566983d commit a380dc6

File tree

1 file changed

+113
-7
lines changed

1 file changed

+113
-7
lines changed

Lib/test/test_re.py

Lines changed: 113 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -884,31 +884,137 @@ def test_named_unicode_escapes(self):
884884
self.checkPatternError(br'\N{LESS-THAN SIGN}', r'bad escape \N', 0)
885885
self.checkPatternError(br'[\N{LESS-THAN SIGN}]', r'bad escape \N', 1)
886886

887-
def test_string_boundaries(self):
887+
def test_word_boundaries(self):
888888
# See http://bugs.python.org/issue10713
889-
self.assertEqual(re.search(r"\b(abc)\b", "abc").group(1),
890-
"abc")
889+
self.assertEqual(re.search(r"\b(abc)\b", "abc").group(1), "abc")
890+
self.assertEqual(re.search(r"\b(abc)\b", "abc", re.ASCII).group(1), "abc")
891+
self.assertEqual(re.search(br"\b(abc)\b", b"abc").group(1), b"abc")
892+
self.assertEqual(re.search(br"\b(abc)\b", b"abc", re.LOCALE).group(1), b"abc")
893+
self.assertEqual(re.search(r"\b(ьюя)\b", "ьюя").group(1), "ьюя")
894+
self.assertIsNone(re.search(r"\b(ьюя)\b", "ьюя", re.ASCII))
895+
# There's a word boundary between a word and a non-word.
896+
self.assertTrue(re.match(r".\b", "a="))
897+
self.assertTrue(re.match(r".\b", "a=", re.ASCII))
898+
self.assertTrue(re.match(br".\b", b"a="))
899+
self.assertTrue(re.match(br".\b", b"a=", re.LOCALE))
900+
self.assertTrue(re.match(r".\b", "я="))
901+
self.assertIsNone(re.match(r".\b", "я=", re.ASCII))
902+
# There's a word boundary between a non-word and a word.
903+
self.assertTrue(re.match(r".\b", "=a"))
904+
self.assertTrue(re.match(r".\b", "=a", re.ASCII))
905+
self.assertTrue(re.match(br".\b", b"=a"))
906+
self.assertTrue(re.match(br".\b", b"=a", re.LOCALE))
907+
self.assertTrue(re.match(r".\b", "=я"))
908+
self.assertIsNone(re.match(r".\b", "=я", re.ASCII))
909+
# There is no word boundary inside a word.
910+
self.assertIsNone(re.match(r".\b", "ab"))
911+
self.assertIsNone(re.match(r".\b", "ab", re.ASCII))
912+
self.assertIsNone(re.match(br".\b", b"ab"))
913+
self.assertIsNone(re.match(br".\b", b"ab", re.LOCALE))
914+
self.assertIsNone(re.match(r".\b", "юя"))
915+
self.assertIsNone(re.match(r".\b", "юя", re.ASCII))
916+
# There is no word boundary between a non-word characters.
917+
self.assertIsNone(re.match(r".\b", "=-"))
918+
self.assertIsNone(re.match(r".\b", "=-", re.ASCII))
919+
self.assertIsNone(re.match(br".\b", b"=-"))
920+
self.assertIsNone(re.match(br".\b", b"=-", re.LOCALE))
921+
# There is no non-boundary match between a word and a non-word.
922+
self.assertIsNone(re.match(r".\B", "a="))
923+
self.assertIsNone(re.match(r".\B", "a=", re.ASCII))
924+
self.assertIsNone(re.match(br".\B", b"a="))
925+
self.assertIsNone(re.match(br".\B", b"a=", re.LOCALE))
926+
self.assertIsNone(re.match(r".\B", "я="))
927+
self.assertTrue(re.match(r".\B", "я=", re.ASCII))
928+
# There is no non-boundary match between a non-word and a word.
929+
self.assertIsNone(re.match(r".\B", "=a"))
930+
self.assertIsNone(re.match(r".\B", "=a", re.ASCII))
931+
self.assertIsNone(re.match(br".\B", b"=a"))
932+
self.assertIsNone(re.match(br".\B", b"=a", re.LOCALE))
933+
self.assertIsNone(re.match(r".\B", "=я"))
934+
self.assertTrue(re.match(r".\B", "=я", re.ASCII))
935+
# There's a non-boundary match inside a word.
936+
self.assertTrue(re.match(r".\B", "ab"))
937+
self.assertTrue(re.match(r".\B", "ab", re.ASCII))
938+
self.assertTrue(re.match(br".\B", b"ab"))
939+
self.assertTrue(re.match(br".\B", b"ab", re.LOCALE))
940+
self.assertTrue(re.match(r".\B", "юя"))
941+
self.assertTrue(re.match(r".\B", "юя", re.ASCII))
942+
# There's a non-boundary match between a non-word characters.
943+
self.assertTrue(re.match(r".\B", "=-"))
944+
self.assertTrue(re.match(r".\B", "=-", re.ASCII))
945+
self.assertTrue(re.match(br".\B", b"=-"))
946+
self.assertTrue(re.match(br".\B", b"=-", re.LOCALE))
891947
# There's a word boundary at the start of a string.
892948
self.assertTrue(re.match(r"\b", "abc"))
949+
self.assertTrue(re.match(r"\b", "abc", re.ASCII))
950+
self.assertTrue(re.match(br"\b", b"abc"))
951+
self.assertTrue(re.match(br"\b", b"abc", re.LOCALE))
952+
self.assertTrue(re.match(r"\b", "ьюя"))
953+
self.assertIsNone(re.match(r"\b", "ьюя", re.ASCII))
954+
# There's a word boundary at the end of a string.
955+
self.assertTrue(re.fullmatch(r".+\b", "abc"))
956+
self.assertTrue(re.fullmatch(r".+\b", "abc", re.ASCII))
957+
self.assertTrue(re.fullmatch(br".+\b", b"abc"))
958+
self.assertTrue(re.fullmatch(br".+\b", b"abc", re.LOCALE))
959+
self.assertTrue(re.fullmatch(r".+\b", "ьюя"))
960+
self.assertIsNone(re.search(r"\b", "ьюя", re.ASCII))
893961
# A non-empty string includes a non-boundary zero-length match.
894-
self.assertTrue(re.search(r"\B", "abc"))
962+
self.assertEqual(re.search(r"\B", "abc").span(), (1, 1))
963+
self.assertEqual(re.search(r"\B", "abc", re.ASCII).span(), (1, 1))
964+
self.assertEqual(re.search(br"\B", b"abc").span(), (1, 1))
965+
self.assertEqual(re.search(br"\B", b"abc", re.LOCALE).span(), (1, 1))
966+
self.assertEqual(re.search(r"\B", "ьюя").span(), (1, 1))
967+
self.assertEqual(re.search(r"\B", "ьюя", re.ASCII).span(), (0, 0))
895968
# There is no non-boundary match at the start of a string.
896-
self.assertFalse(re.match(r"\B", "abc"))
969+
self.assertIsNone(re.match(r"\B", "abc"))
970+
self.assertIsNone(re.match(r"\B", "abc", re.ASCII))
971+
self.assertIsNone(re.match(br"\B", b"abc"))
972+
self.assertIsNone(re.match(br"\B", b"abc", re.LOCALE))
973+
self.assertIsNone(re.match(r"\B", "ьюя"))
974+
self.assertTrue(re.match(r"\B", "ьюя", re.ASCII))
975+
# There is no non-boundary match at the end of a string.
976+
self.assertIsNone(re.fullmatch(r".+\B", "abc"))
977+
self.assertIsNone(re.fullmatch(r".+\B", "abc", re.ASCII))
978+
self.assertIsNone(re.fullmatch(br".+\B", b"abc"))
979+
self.assertIsNone(re.fullmatch(br".+\B", b"abc", re.LOCALE))
980+
self.assertIsNone(re.fullmatch(r".+\B", "ьюя"))
981+
self.assertTrue(re.fullmatch(r".+\B", "ьюя", re.ASCII))
897982
# However, an empty string contains no word boundaries, and also no
898983
# non-boundaries.
899-
self.assertIsNone(re.search(r"\B", ""))
984+
self.assertIsNone(re.search(r"\b", ""))
985+
self.assertIsNone(re.search(r"\b", "", re.ASCII))
986+
self.assertIsNone(re.search(br"\b", b""))
987+
self.assertIsNone(re.search(br"\b", b"", re.LOCALE))
900988
# This one is questionable and different from the perlre behaviour,
901989
# but describes current behavior.
902-
self.assertIsNone(re.search(r"\b", ""))
990+
self.assertIsNone(re.search(r"\B", ""))
991+
self.assertIsNone(re.search(r"\B", "", re.ASCII))
992+
self.assertIsNone(re.search(br"\B", b""))
993+
self.assertIsNone(re.search(br"\B", b"", re.LOCALE))
903994
# A single word-character string has two boundaries, but no
904995
# non-boundary gaps.
905996
self.assertEqual(len(re.findall(r"\b", "a")), 2)
997+
self.assertEqual(len(re.findall(r"\b", "a", re.ASCII)), 2)
998+
self.assertEqual(len(re.findall(br"\b", b"a")), 2)
999+
self.assertEqual(len(re.findall(br"\b", b"a", re.LOCALE)), 2)
9061000
self.assertEqual(len(re.findall(r"\B", "a")), 0)
1001+
self.assertEqual(len(re.findall(r"\B", "a", re.ASCII)), 0)
1002+
self.assertEqual(len(re.findall(br"\B", b"a")), 0)
1003+
self.assertEqual(len(re.findall(br"\B", b"a", re.LOCALE)), 0)
9071004
# If there are no words, there are no boundaries
9081005
self.assertEqual(len(re.findall(r"\b", " ")), 0)
1006+
self.assertEqual(len(re.findall(r"\b", " ", re.ASCII)), 0)
1007+
self.assertEqual(len(re.findall(br"\b", b" ")), 0)
1008+
self.assertEqual(len(re.findall(br"\b", b" ", re.LOCALE)), 0)
9091009
self.assertEqual(len(re.findall(r"\b", " ")), 0)
1010+
self.assertEqual(len(re.findall(r"\b", " ", re.ASCII)), 0)
1011+
self.assertEqual(len(re.findall(br"\b", b" ")), 0)
1012+
self.assertEqual(len(re.findall(br"\b", b" ", re.LOCALE)), 0)
9101013
# Can match around the whitespace.
9111014
self.assertEqual(len(re.findall(r"\B", " ")), 2)
1015+
self.assertEqual(len(re.findall(r"\B", " ", re.ASCII)), 2)
1016+
self.assertEqual(len(re.findall(br"\B", b" ")), 2)
1017+
self.assertEqual(len(re.findall(br"\B", b" ", re.LOCALE)), 2)
9121018

9131019
def test_bigcharset(self):
9141020
self.assertEqual(re.match("([\u2222\u2223])",

0 commit comments

Comments
 (0)