Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 39bdad8

Browse files
committed
Issue #1160: Fix compiling large regular expressions on UCS2 builds.
Patch by Serhiy Storchaka.
1 parent 4a1fdcf commit 39bdad8

4 files changed

Lines changed: 17 additions & 9 deletions

File tree

Lib/test/test_re.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -419,6 +419,12 @@ def test_bigcharset(self):
419419
self.assertEqual(re.match("([\u2222\u2223])",
420420
"\u2222", re.UNICODE).group(1), "\u2222")
421421

422+
def test_big_codesize(self):
423+
# Issue #1160
424+
r = re.compile('|'.join(('%d'%x for x in range(10000))))
425+
self.assertIsNotNone(r.match('1000'))
426+
self.assertIsNotNone(r.match('9999'))
427+
422428
def test_anyall(self):
423429
self.assertEqual(re.match("a.b", "a\nb", re.DOTALL).group(0),
424430
"a\nb")

Misc/NEWS

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -167,6 +167,9 @@ Core and Builtins
167167
Library
168168
-------
169169

170+
- Issue #1160: Fix compiling large regular expressions on UCS2 builds.
171+
Patch by Serhiy Storchaka.
172+
170173
- Issue #14313: zipfile now raises NotImplementedError when the compression
171174
type is unknown.
172175

Modules/_sre.c

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2695,6 +2695,13 @@ _compile(PyObject* self_, PyObject* args)
26952695
for (i = 0; i < n; i++) {
26962696
PyObject *o = PyList_GET_ITEM(code, i);
26972697
unsigned long value = PyLong_AsUnsignedLong(o);
2698+
if (value == (unsigned long)-1 && PyErr_Occurred()) {
2699+
if (PyErr_ExceptionMatches(PyExc_OverflowError)) {
2700+
PyErr_SetString(PyExc_OverflowError,
2701+
"regular expression code size limit exceeded");
2702+
}
2703+
break;
2704+
}
26982705
self->code[i] = (SRE_CODE) value;
26992706
if ((unsigned long) self->code[i] != value) {
27002707
PyErr_SetString(PyExc_OverflowError,
@@ -3065,10 +3072,8 @@ _validate_inner(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups)
30653072
GET_ARG; max = arg;
30663073
if (min > max)
30673074
FAIL;
3068-
#ifdef Py_UNICODE_WIDE
30693075
if (max > 65535)
30703076
FAIL;
3071-
#endif
30723077
if (!_validate_inner(code, code+skip-4, groups))
30733078
FAIL;
30743079
code += skip-4;
@@ -3086,10 +3091,8 @@ _validate_inner(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups)
30863091
GET_ARG; max = arg;
30873092
if (min > max)
30883093
FAIL;
3089-
#ifdef Py_UNICODE_WIDE
30903094
if (max > 65535)
30913095
FAIL;
3092-
#endif
30933096
if (!_validate_inner(code, code+skip-3, groups))
30943097
FAIL;
30953098
code += skip-3;

Modules/sre.h

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -14,12 +14,8 @@
1414
#include "sre_constants.h"
1515

1616
/* size of a code word (must be unsigned short or larger, and
17-
large enough to hold a Py_UNICODE character) */
18-
#ifdef Py_UNICODE_WIDE
17+
large enough to hold a UCS4 character) */
1918
#define SRE_CODE Py_UCS4
20-
#else
21-
#define SRE_CODE unsigned short
22-
#endif
2319

2420
typedef struct {
2521
PyObject_VAR_HEAD

0 commit comments

Comments
 (0)