Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit be80fc9

Browse files
Issue #19327: Fixed the working of regular expressions with too big charset.
1 parent b82a3dc commit be80fc9

4 files changed

Lines changed: 8 additions & 3 deletions

File tree

Lib/sre_compile.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -345,7 +345,7 @@ def _optimize_unicode(charset, fixup):
345345
else:
346346
code = 'I'
347347
# Convert block indices to byte array of 256 bytes
348-
mapping = array.array('b', mapping).tobytes()
348+
mapping = array.array('B', mapping).tobytes()
349349
# Convert byte array to word array
350350
mapping = array.array(code, mapping)
351351
assert mapping.itemsize == _sre.CODESIZE

Lib/test/test_re.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -428,6 +428,9 @@ def test_bigcharset(self):
428428
"\u2222").group(1), "\u2222")
429429
self.assertEqual(re.match("([\u2222\u2223])",
430430
"\u2222", re.UNICODE).group(1), "\u2222")
431+
r = '[%s]' % ''.join(map(chr, range(256, 2**16, 255)))
432+
self.assertEqual(re.match(r,
433+
"\uff01", re.UNICODE).group(), "\uff01")
431434

432435
def test_big_codesize(self):
433436
# Issue #1160

Misc/NEWS

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,8 @@ Core and Builtins
8181
Library
8282
-------
8383

84+
- Issue #19327: Fixed the working of regular expressions with too big charset.
85+
8486
- Issue #19350: Increasing the test coverage of macurl2path. Patch by Colin
8587
Williams.
8688

Modules/_sre.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -451,7 +451,7 @@ SRE_CHARSET(SRE_CODE* set, SRE_CODE ch)
451451
count = *(set++);
452452

453453
if (sizeof(SRE_CODE) == 2) {
454-
block = ((char*)set)[ch >> 8];
454+
block = ((unsigned char*)set)[ch >> 8];
455455
set += 128;
456456
if (set[block*16 + ((ch & 255)>>4)] & (1 << (ch & 15)))
457457
return ok;
@@ -461,7 +461,7 @@ SRE_CHARSET(SRE_CODE* set, SRE_CODE ch)
461461
/* !(c & ~N) == (c < N+1) for any unsigned c, this avoids
462462
* warnings when c's type supports only numbers < N+1 */
463463
if (!(ch & ~65535))
464-
block = ((char*)set)[ch >> 8];
464+
block = ((unsigned char*)set)[ch >> 8];
465465
else
466466
block = -1;
467467
set += 64;

0 commit comments

Comments
 (0)