Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit c1b59d4

Browse files
Issue #16688: Fix backreferences did make case-insensitive regex fail on non-ASCII strings.
Patch by Matthew Barnett.
1 parent 2d8298d commit c1b59d4

4 files changed

Lines changed: 15 additions & 5 deletions

File tree

Lib/test/test_re.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -968,6 +968,11 @@ def test_large_subn(self, size):
968968
self.assertEqual(r, s)
969969
self.assertEqual(n, size + 1)
970970

971+
def test_bug_16688(self):
972+
# Issue 16688: Backreferences make case-insensitive regex fail on
973+
# non-ASCII strings.
974+
self.assertEqual(re.findall(r"(?i)(a)\1", "aa \u0100"), ['a'])
975+
self.assertEqual(re.match(r"(?s).{1,3}", "\u0100\u0100").span(), (0, 2))
971976

972977
def run_re_tests():
973978
from test.re_tests import tests, SUCCEED, FAIL, SYNTAX_ERROR

Misc/ACKS

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,7 @@ Anton Barkovsky
7070
Nick Barnes
7171
Quentin Barnes
7272
David Barnett
73+
Matthew Barnett
7374
Richard Barran
7475
Cesar Eduardo Barros
7576
Des Barry

Misc/NEWS

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -124,6 +124,9 @@ Core and Builtins
124124
Library
125125
-------
126126

127+
- Issue #16688: Fix backreferences did make case-insensitive regex fail on
128+
non-ASCII strings. Patch by Matthew Barnett.
129+
127130
- Issue #16485: Fix file descriptor not being closed if file header patching
128131
fails on closing of aifc file.
129132

Modules/_sre.c

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -492,7 +492,7 @@ SRE_COUNT(SRE_STATE* state, SRE_CODE* pattern, Py_ssize_t maxcount)
492492
Py_ssize_t i;
493493

494494
/* adjust end */
495-
if (maxcount < end - ptr && maxcount != 65535)
495+
if (maxcount < (end - ptr) / state->charsize && maxcount != 65535)
496496
end = ptr + maxcount*state->charsize;
497497

498498
switch (pattern[0]) {
@@ -583,7 +583,7 @@ SRE_INFO(SRE_STATE* state, SRE_CODE* pattern)
583583
Py_ssize_t i;
584584

585585
/* check minimal length */
586-
if (pattern[3] && (end - ptr) < pattern[3])
586+
if (pattern[3] && (end - ptr)/state->charsize < pattern[3])
587587
return 0;
588588

589589
/* check known prefix */
@@ -801,7 +801,7 @@ SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern)
801801
/* <INFO> <1=skip> <2=flags> <3=min> ... */
802802
if (ctx->pattern[3] && (end - ctx->ptr)/state->charsize < ctx->pattern[3]) {
803803
TRACE(("reject (got %d chars, need %d)\n",
804-
(end - ctx->ptr), ctx->pattern[3]));
804+
(end - ctx->ptr)/state->charsize, ctx->pattern[3]));
805805
RETURN_FAILURE;
806806
}
807807
ctx->pattern += ctx->pattern[1] + 1;
@@ -1329,9 +1329,10 @@ SRE_MATCH(SRE_STATE* state, SRE_CODE* pattern)
13291329
RETURN_FAILURE;
13301330
while (p < e) {
13311331
if (ctx->ptr >= end ||
1332-
state->lower(SRE_CHARGET(state, ctx->ptr, 0)) != state->lower(*p))
1332+
state->lower(SRE_CHARGET(state, ctx->ptr, 0)) !=
1333+
state->lower(SRE_CHARGET(state, p, 0)))
13331334
RETURN_FAILURE;
1334-
p++;
1335+
p += state->charsize;
13351336
ctx->ptr += state->charsize;
13361337
}
13371338
}

0 commit comments

Comments
 (0)