Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 7d9c6c7

Browse files
committed
Fix _sre.CODESIZE on 64-bit machines in UCS-4 mode. Fixes #931848.
Backported to 2.3.
1 parent 156c49a commit 7d9c6c7

3 files changed

Lines changed: 15 additions & 4 deletions

File tree

Lib/sre_compile.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -333,14 +333,16 @@ def _optimize_unicode(charset, fixup):
333333
block = block + 1
334334
data = data + _mk_bitmap(chunk)
335335
header = [block]
336-
if MAXCODE == 65535:
336+
if _sre.CODESIZE == 2:
337337
code = 'H'
338338
else:
339-
code = 'L'
339+
code = 'I'
340340
# Convert block indices to byte array of 256 bytes
341341
mapping = array.array('b', mapping).tostring()
342342
# Convert byte array to word array
343-
header = header + array.array(code, mapping).tolist()
343+
mapping = array.array(code, mapping)
344+
assert mapping.itemsize == _sre.CODESIZE
345+
header = header + mapping.tolist()
344346
data[0:0] = header
345347
return [(BIGCHARSET, data)]
346348

Lib/test/test_re.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -497,6 +497,15 @@ def test_bug_926075(self):
497497
self.assert_(re.compile('bug_926075') is not
498498
re.compile(eval("u'bug_926075'")))
499499

500+
def test_bug_931848(self):
501+
try:
502+
unicode
503+
except NameError:
504+
pass
505+
pattern = eval('u"[\u002E\u3002\uFF0E\uFF61]"')
506+
self.assertEqual(re.compile(pattern).split("a.b.c"),
507+
['a','b','c'])
508+
500509
def run_re_tests():
501510
from test.re_tests import benchmarks, tests, SUCCEED, FAIL, SYNTAX_ERROR
502511
if verbose:

Modules/sre.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616
/* size of a code word (must be unsigned short or larger, and
1717
large enough to hold a Py_UNICODE character) */
1818
#ifdef Py_UNICODE_WIDE
19-
#define SRE_CODE unsigned long
19+
#define SRE_CODE Py_UCS4
2020
#else
2121
#define SRE_CODE unsigned short
2222
#endif

0 commit comments

Comments
 (0)