Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 02eae6b

Browse files
Issue #18684: Fixed reading out of the buffer in the re module.
2 parents e12aa62 + 03d6ee3 commit 02eae6b

3 files changed

Lines changed: 43 additions & 14 deletions

File tree

Misc/NEWS

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,8 @@ Core and Builtins
1717
Library
1818
-------
1919

20+
- Issue #18684: Fixed reading out of the buffer in the re module.
21+
2022
- Issue #24259: tarfile now raises a ReadError if an archive is truncated
2123
inside a data segment.
2224

Modules/_sre.c

Lines changed: 21 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -994,7 +994,7 @@ _sre_SRE_Pattern_split_impl(PatternObject *self, PyObject *string,
994994
}
995995

996996
if (state.start == state.ptr) {
997-
if (last == state.end)
997+
if (last == state.end || state.ptr == state.end)
998998
break;
999999
/* skip one character */
10001000
state.start = (void*) ((char*) state.ptr + state.charsize);
@@ -1191,6 +1191,8 @@ pattern_subx(PatternObject* self, PyObject* ptemplate, PyObject* string,
11911191

11921192
next:
11931193
/* move on */
1194+
if (state.ptr == state.end)
1195+
break;
11941196
if (state.ptr == state.start)
11951197
state.start = (void*) ((char*) state.ptr + state.charsize);
11961198
else
@@ -2564,6 +2566,9 @@ _sre_SRE_Scanner_match_impl(ScannerObject *self)
25642566
PyObject* match;
25652567
Py_ssize_t status;
25662568

2569+
if (state->start == NULL)
2570+
Py_RETURN_NONE;
2571+
25672572
state_reset(state);
25682573

25692574
state->ptr = state->start;
@@ -2575,10 +2580,14 @@ _sre_SRE_Scanner_match_impl(ScannerObject *self)
25752580
match = pattern_new_match((PatternObject*) self->pattern,
25762581
state, status);
25772582

2578-
if (status == 0 || state->ptr == state->start)
2583+
if (status == 0)
2584+
state->start = NULL;
2585+
else if (state->ptr != state->start)
2586+
state->start = state->ptr;
2587+
else if (state->ptr != state->end)
25792588
state->start = (void*) ((char*) state->ptr + state->charsize);
25802589
else
2581-
state->start = state->ptr;
2590+
state->start = NULL;
25822591

25832592
return match;
25842593
}
@@ -2597,6 +2606,9 @@ _sre_SRE_Scanner_search_impl(ScannerObject *self)
25972606
PyObject* match;
25982607
Py_ssize_t status;
25992608

2609+
if (state->start == NULL)
2610+
Py_RETURN_NONE;
2611+
26002612
state_reset(state);
26012613

26022614
state->ptr = state->start;
@@ -2608,10 +2620,14 @@ _sre_SRE_Scanner_search_impl(ScannerObject *self)
26082620
match = pattern_new_match((PatternObject*) self->pattern,
26092621
state, status);
26102622

2611-
if (status == 0 || state->ptr == state->start)
2623+
if (status == 0)
2624+
state->start = NULL;
2625+
else if (state->ptr != state->start)
2626+
state->start = state->ptr;
2627+
else if (state->ptr != state->end)
26122628
state->start = (void*) ((char*) state->ptr + state->charsize);
26132629
else
2614-
state->start = state->ptr;
2630+
state->start = NULL;
26152631

26162632
return match;
26172633
}

Modules/sre_lib.h

Lines changed: 20 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ SRE(at)(SRE_STATE* state, SRE_CHAR* ptr, SRE_CODE at)
3030
SRE_IS_LINEBREAK((int) ptr[-1]));
3131

3232
case SRE_AT_END:
33-
return (((void*) (ptr+1) == state->end &&
33+
return (((SRE_CHAR *)state->end - ptr == 1 &&
3434
SRE_IS_LINEBREAK((int) ptr[0])) ||
3535
((void*) ptr == state->end));
3636

@@ -1109,9 +1109,9 @@ SRE(match)(SRE_STATE* state, SRE_CODE* pattern, int match_all)
11091109
/* <ASSERT> <skip> <back> <pattern> */
11101110
TRACE(("|%p|%p|ASSERT %d\n", ctx->pattern,
11111111
ctx->ptr, ctx->pattern[1]));
1112-
state->ptr = ctx->ptr - ctx->pattern[1];
1113-
if (state->ptr < state->beginning)
1112+
if (ctx->ptr - (SRE_CHAR *)state->beginning < (Py_ssize_t)ctx->pattern[1])
11141113
RETURN_FAILURE;
1114+
state->ptr = ctx->ptr - ctx->pattern[1];
11151115
DO_JUMP0(JUMP_ASSERT, jump_assert, ctx->pattern+2);
11161116
RETURN_ON_FAILURE(ret);
11171117
ctx->pattern += ctx->pattern[0];
@@ -1122,8 +1122,8 @@ SRE(match)(SRE_STATE* state, SRE_CODE* pattern, int match_all)
11221122
/* <ASSERT_NOT> <skip> <back> <pattern> */
11231123
TRACE(("|%p|%p|ASSERT_NOT %d\n", ctx->pattern,
11241124
ctx->ptr, ctx->pattern[1]));
1125-
state->ptr = ctx->ptr - ctx->pattern[1];
1126-
if (state->ptr >= state->beginning) {
1125+
if (ctx->ptr - (SRE_CHAR *)state->beginning >= (Py_ssize_t)ctx->pattern[1]) {
1126+
state->ptr = ctx->ptr - ctx->pattern[1];
11271127
DO_JUMP0(JUMP_ASSERT_NOT, jump_assert_not, ctx->pattern+2);
11281128
if (ret) {
11291129
RETURN_ON_ERROR(ret);
@@ -1215,12 +1215,20 @@ SRE(search)(SRE_STATE* state, SRE_CODE* pattern)
12151215
SRE_CODE* overlap = NULL;
12161216
int flags = 0;
12171217

1218+
if (ptr > end)
1219+
return 0;
1220+
12181221
if (pattern[0] == SRE_OP_INFO) {
12191222
/* optimization info block */
12201223
/* <INFO> <1=skip> <2=flags> <3=min> <4=max> <5=prefix info> */
12211224

12221225
flags = pattern[2];
12231226

1227+
if (pattern[3] && end - ptr < (Py_ssize_t)pattern[3]) {
1228+
TRACE(("reject (got %u chars, need %u)\n",
1229+
(unsigned int)(end - ptr), pattern[3]));
1230+
return 0;
1231+
}
12241232
if (pattern[3] > 1) {
12251233
/* adjust end point (but make sure we leave at least one
12261234
character in there, so literal search will work) */
@@ -1338,15 +1346,18 @@ SRE(search)(SRE_STATE* state, SRE_CODE* pattern)
13381346
break;
13391347
ptr++;
13401348
}
1341-
} else
1349+
} else {
13421350
/* general case */
1343-
while (ptr <= end) {
1351+
assert(ptr <= end);
1352+
while (1) {
13441353
TRACE(("|%p|%p|SEARCH\n", pattern, ptr));
1345-
state->start = state->ptr = ptr++;
1354+
state->start = state->ptr = ptr;
13461355
status = SRE(match)(state, pattern, 0);
1347-
if (status != 0)
1356+
if (status != 0 || ptr >= end)
13481357
break;
1358+
ptr++;
13491359
}
1360+
}
13501361

13511362
return status;
13521363
}

0 commit comments

Comments
 (0)