@@ -884,31 +884,137 @@ def test_named_unicode_escapes(self):
884
884
self .checkPatternError (br'\N{LESS-THAN SIGN}' , r'bad escape \N' , 0 )
885
885
self .checkPatternError (br'[\N{LESS-THAN SIGN}]' , r'bad escape \N' , 1 )
886
886
887
- def test_string_boundaries (self ):
887
+ def test_word_boundaries (self ):
888
888
# See http://bugs.python.org/issue10713
889
- self .assertEqual (re .search (r"\b(abc)\b" , "abc" ).group (1 ),
890
- "abc" )
889
+ self .assertEqual (re .search (r"\b(abc)\b" , "abc" ).group (1 ), "abc" )
890
+ self .assertEqual (re .search (r"\b(abc)\b" , "abc" , re .ASCII ).group (1 ), "abc" )
891
+ self .assertEqual (re .search (br"\b(abc)\b" , b"abc" ).group (1 ), b"abc" )
892
+ self .assertEqual (re .search (br"\b(abc)\b" , b"abc" , re .LOCALE ).group (1 ), b"abc" )
893
+ self .assertEqual (re .search (r"\b(ьюя)\b" , "ьюя" ).group (1 ), "ьюя" )
894
+ self .assertIsNone (re .search (r"\b(ьюя)\b" , "ьюя" , re .ASCII ))
895
+ # There's a word boundary between a word and a non-word.
896
+ self .assertTrue (re .match (r".\b" , "a=" ))
897
+ self .assertTrue (re .match (r".\b" , "a=" , re .ASCII ))
898
+ self .assertTrue (re .match (br".\b" , b"a=" ))
899
+ self .assertTrue (re .match (br".\b" , b"a=" , re .LOCALE ))
900
+ self .assertTrue (re .match (r".\b" , "я=" ))
901
+ self .assertIsNone (re .match (r".\b" , "я=" , re .ASCII ))
902
+ # There's a word boundary between a non-word and a word.
903
+ self .assertTrue (re .match (r".\b" , "=a" ))
904
+ self .assertTrue (re .match (r".\b" , "=a" , re .ASCII ))
905
+ self .assertTrue (re .match (br".\b" , b"=a" ))
906
+ self .assertTrue (re .match (br".\b" , b"=a" , re .LOCALE ))
907
+ self .assertTrue (re .match (r".\b" , "=я" ))
908
+ self .assertIsNone (re .match (r".\b" , "=я" , re .ASCII ))
909
+ # There is no word boundary inside a word.
910
+ self .assertIsNone (re .match (r".\b" , "ab" ))
911
+ self .assertIsNone (re .match (r".\b" , "ab" , re .ASCII ))
912
+ self .assertIsNone (re .match (br".\b" , b"ab" ))
913
+ self .assertIsNone (re .match (br".\b" , b"ab" , re .LOCALE ))
914
+ self .assertIsNone (re .match (r".\b" , "юя" ))
915
+ self .assertIsNone (re .match (r".\b" , "юя" , re .ASCII ))
916
+ # There is no word boundary between a non-word characters.
917
+ self .assertIsNone (re .match (r".\b" , "=-" ))
918
+ self .assertIsNone (re .match (r".\b" , "=-" , re .ASCII ))
919
+ self .assertIsNone (re .match (br".\b" , b"=-" ))
920
+ self .assertIsNone (re .match (br".\b" , b"=-" , re .LOCALE ))
921
+ # There is no non-boundary match between a word and a non-word.
922
+ self .assertIsNone (re .match (r".\B" , "a=" ))
923
+ self .assertIsNone (re .match (r".\B" , "a=" , re .ASCII ))
924
+ self .assertIsNone (re .match (br".\B" , b"a=" ))
925
+ self .assertIsNone (re .match (br".\B" , b"a=" , re .LOCALE ))
926
+ self .assertIsNone (re .match (r".\B" , "я=" ))
927
+ self .assertTrue (re .match (r".\B" , "я=" , re .ASCII ))
928
+ # There is no non-boundary match between a non-word and a word.
929
+ self .assertIsNone (re .match (r".\B" , "=a" ))
930
+ self .assertIsNone (re .match (r".\B" , "=a" , re .ASCII ))
931
+ self .assertIsNone (re .match (br".\B" , b"=a" ))
932
+ self .assertIsNone (re .match (br".\B" , b"=a" , re .LOCALE ))
933
+ self .assertIsNone (re .match (r".\B" , "=я" ))
934
+ self .assertTrue (re .match (r".\B" , "=я" , re .ASCII ))
935
+ # There's a non-boundary match inside a word.
936
+ self .assertTrue (re .match (r".\B" , "ab" ))
937
+ self .assertTrue (re .match (r".\B" , "ab" , re .ASCII ))
938
+ self .assertTrue (re .match (br".\B" , b"ab" ))
939
+ self .assertTrue (re .match (br".\B" , b"ab" , re .LOCALE ))
940
+ self .assertTrue (re .match (r".\B" , "юя" ))
941
+ self .assertTrue (re .match (r".\B" , "юя" , re .ASCII ))
942
+ # There's a non-boundary match between a non-word characters.
943
+ self .assertTrue (re .match (r".\B" , "=-" ))
944
+ self .assertTrue (re .match (r".\B" , "=-" , re .ASCII ))
945
+ self .assertTrue (re .match (br".\B" , b"=-" ))
946
+ self .assertTrue (re .match (br".\B" , b"=-" , re .LOCALE ))
891
947
# There's a word boundary at the start of a string.
892
948
self .assertTrue (re .match (r"\b" , "abc" ))
949
+ self .assertTrue (re .match (r"\b" , "abc" , re .ASCII ))
950
+ self .assertTrue (re .match (br"\b" , b"abc" ))
951
+ self .assertTrue (re .match (br"\b" , b"abc" , re .LOCALE ))
952
+ self .assertTrue (re .match (r"\b" , "ьюя" ))
953
+ self .assertIsNone (re .match (r"\b" , "ьюя" , re .ASCII ))
954
+ # There's a word boundary at the end of a string.
955
+ self .assertTrue (re .fullmatch (r".+\b" , "abc" ))
956
+ self .assertTrue (re .fullmatch (r".+\b" , "abc" , re .ASCII ))
957
+ self .assertTrue (re .fullmatch (br".+\b" , b"abc" ))
958
+ self .assertTrue (re .fullmatch (br".+\b" , b"abc" , re .LOCALE ))
959
+ self .assertTrue (re .fullmatch (r".+\b" , "ьюя" ))
960
+ self .assertIsNone (re .search (r"\b" , "ьюя" , re .ASCII ))
893
961
# A non-empty string includes a non-boundary zero-length match.
894
- self .assertTrue (re .search (r"\B" , "abc" ))
962
+ self .assertEqual (re .search (r"\B" , "abc" ).span (), (1 , 1 ))
963
+ self .assertEqual (re .search (r"\B" , "abc" , re .ASCII ).span (), (1 , 1 ))
964
+ self .assertEqual (re .search (br"\B" , b"abc" ).span (), (1 , 1 ))
965
+ self .assertEqual (re .search (br"\B" , b"abc" , re .LOCALE ).span (), (1 , 1 ))
966
+ self .assertEqual (re .search (r"\B" , "ьюя" ).span (), (1 , 1 ))
967
+ self .assertEqual (re .search (r"\B" , "ьюя" , re .ASCII ).span (), (0 , 0 ))
895
968
# There is no non-boundary match at the start of a string.
896
- self .assertFalse (re .match (r"\B" , "abc" ))
969
+ self .assertIsNone (re .match (r"\B" , "abc" ))
970
+ self .assertIsNone (re .match (r"\B" , "abc" , re .ASCII ))
971
+ self .assertIsNone (re .match (br"\B" , b"abc" ))
972
+ self .assertIsNone (re .match (br"\B" , b"abc" , re .LOCALE ))
973
+ self .assertIsNone (re .match (r"\B" , "ьюя" ))
974
+ self .assertTrue (re .match (r"\B" , "ьюя" , re .ASCII ))
975
+ # There is no non-boundary match at the end of a string.
976
+ self .assertIsNone (re .fullmatch (r".+\B" , "abc" ))
977
+ self .assertIsNone (re .fullmatch (r".+\B" , "abc" , re .ASCII ))
978
+ self .assertIsNone (re .fullmatch (br".+\B" , b"abc" ))
979
+ self .assertIsNone (re .fullmatch (br".+\B" , b"abc" , re .LOCALE ))
980
+ self .assertIsNone (re .fullmatch (r".+\B" , "ьюя" ))
981
+ self .assertTrue (re .fullmatch (r".+\B" , "ьюя" , re .ASCII ))
897
982
# However, an empty string contains no word boundaries, and also no
898
983
# non-boundaries.
899
- self .assertIsNone (re .search (r"\B" , "" ))
984
+ self .assertIsNone (re .search (r"\b" , "" ))
985
+ self .assertIsNone (re .search (r"\b" , "" , re .ASCII ))
986
+ self .assertIsNone (re .search (br"\b" , b"" ))
987
+ self .assertIsNone (re .search (br"\b" , b"" , re .LOCALE ))
900
988
# This one is questionable and different from the perlre behaviour,
901
989
# but describes current behavior.
902
- self .assertIsNone (re .search (r"\b" , "" ))
990
+ self .assertIsNone (re .search (r"\B" , "" ))
991
+ self .assertIsNone (re .search (r"\B" , "" , re .ASCII ))
992
+ self .assertIsNone (re .search (br"\B" , b"" ))
993
+ self .assertIsNone (re .search (br"\B" , b"" , re .LOCALE ))
903
994
# A single word-character string has two boundaries, but no
904
995
# non-boundary gaps.
905
996
self .assertEqual (len (re .findall (r"\b" , "a" )), 2 )
997
+ self .assertEqual (len (re .findall (r"\b" , "a" , re .ASCII )), 2 )
998
+ self .assertEqual (len (re .findall (br"\b" , b"a" )), 2 )
999
+ self .assertEqual (len (re .findall (br"\b" , b"a" , re .LOCALE )), 2 )
906
1000
self .assertEqual (len (re .findall (r"\B" , "a" )), 0 )
1001
+ self .assertEqual (len (re .findall (r"\B" , "a" , re .ASCII )), 0 )
1002
+ self .assertEqual (len (re .findall (br"\B" , b"a" )), 0 )
1003
+ self .assertEqual (len (re .findall (br"\B" , b"a" , re .LOCALE )), 0 )
907
1004
# If there are no words, there are no boundaries
908
1005
self .assertEqual (len (re .findall (r"\b" , " " )), 0 )
1006
+ self .assertEqual (len (re .findall (r"\b" , " " , re .ASCII )), 0 )
1007
+ self .assertEqual (len (re .findall (br"\b" , b" " )), 0 )
1008
+ self .assertEqual (len (re .findall (br"\b" , b" " , re .LOCALE )), 0 )
909
1009
self .assertEqual (len (re .findall (r"\b" , " " )), 0 )
1010
+ self .assertEqual (len (re .findall (r"\b" , " " , re .ASCII )), 0 )
1011
+ self .assertEqual (len (re .findall (br"\b" , b" " )), 0 )
1012
+ self .assertEqual (len (re .findall (br"\b" , b" " , re .LOCALE )), 0 )
910
1013
# Can match around the whitespace.
911
1014
self .assertEqual (len (re .findall (r"\B" , " " )), 2 )
1015
+ self .assertEqual (len (re .findall (r"\B" , " " , re .ASCII )), 2 )
1016
+ self .assertEqual (len (re .findall (br"\B" , b" " )), 2 )
1017
+ self .assertEqual (len (re .findall (br"\B" , b" " , re .LOCALE )), 2 )
912
1018
913
1019
def test_bigcharset (self ):
914
1020
self .assertEqual (re .match ("([\u2222 \u2223 ])" ,
0 commit comments