Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 22628c4

Browse files
committed
#3231: re.compile fails with some bytes patterns
1 parent 943f339 commit 22628c4

3 files changed

Lines changed: 26 additions & 27 deletions

File tree

Lib/sre_parse.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -200,7 +200,7 @@ def __next(self):
200200
except IndexError:
201201
raise error("bogus escape (end of line)")
202202
if isinstance(self.string, bytes):
203-
char = chr(c)
203+
c = chr(c)
204204
char = char + c
205205
self.index = self.index + len(char)
206206
self.next = char

Lib/test/re_tests.py

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -661,12 +661,8 @@
661661
('^([ab]*?)(?<!(a))c', 'abc', SUCCEED, 'g1+"-"+g2', 'ab-None'),
662662
]
663663

664-
try:
665-
u = eval("u'\N{LATIN CAPITAL LETTER A WITH DIAERESIS}'")
666-
except SyntaxError:
667-
pass
668-
else:
669-
tests.extend([
664+
u = '\N{LATIN CAPITAL LETTER A WITH DIAERESIS}'
665+
tests.extend([
670666
# bug 410271: \b broken under locales
671667
(r'\b.\b', 'a', SUCCEED, 'found', 'a'),
672668
(r'(?u)\b.\b', u, SUCCEED, 'found', u),

Lib/test/test_re.py

Lines changed: 23 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -732,23 +732,25 @@ def run_re_tests():
732732
else:
733733
print('=== Failed incorrectly', t)
734734

735-
# Try the match on a unicode string, and check that it
736-
# still succeeds.
735+
# Try the match with both pattern and string converted to
736+
# bytes, and check that it still succeeds.
737737
try:
738-
result = obj.search(str(s, "latin-1"))
739-
if result is None:
740-
print('=== Fails on unicode match', t)
741-
except NameError:
742-
continue # 1.5.2
743-
except TypeError:
744-
continue # unicode test case
745-
746-
# Try the match on a unicode pattern, and check that it
747-
# still succeeds.
748-
obj=re.compile(str(pattern, "latin-1"))
749-
result = obj.search(s)
750-
if result is None:
751-
print('=== Fails on unicode pattern match', t)
738+
bpat = bytes(pattern, "ascii")
739+
bs = bytes(s, "ascii")
740+
except UnicodeEncodeError:
741+
# skip non-ascii tests
742+
pass
743+
else:
744+
try:
745+
bpat = re.compile(bpat)
746+
except Exception:
747+
print('=== Fails on bytes pattern compile', t)
748+
if verbose:
749+
traceback.print_exc(file=sys.stdout)
750+
else:
751+
bytes_result = bpat.search(bs)
752+
if bytes_result is None:
753+
print('=== Fails on bytes pattern match', t)
752754

753755
# Try the match with the search area limited to the extent
754756
# of the match and see if it still succeeds. \B will
@@ -771,10 +773,11 @@ def run_re_tests():
771773

772774
# Try the match with LOCALE enabled, and check that it
773775
# still succeeds.
774-
obj = re.compile(pattern, re.LOCALE)
775-
result = obj.search(s)
776-
if result is None:
777-
print('=== Fails on locale-sensitive match', t)
776+
if '(?u)' not in pattern:
777+
obj = re.compile(pattern, re.LOCALE)
778+
result = obj.search(s)
779+
if result is None:
780+
print('=== Fails on locale-sensitive match', t)
778781

779782
# Try the match with UNICODE locale enabled, and check
780783
# that it still succeeds.

0 commit comments

Comments
 (0)