Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 03d6ee3

Browse files
Issue #18684: Fixed reading out of the buffer in the re module.
1 parent 0357268 commit 03d6ee3

3 files changed

Lines changed: 43 additions & 14 deletions

File tree

Misc/NEWS

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,8 @@ Core and Builtins
6666
Library
6767
-------
6868

69+
- Issue #18684: Fixed reading out of the buffer in the re module.
70+
6971
- Issue #24259: tarfile now raises a ReadError if an archive is truncated
7072
inside a data segment.
7173

Modules/_sre.c

Lines changed: 21 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -883,7 +883,7 @@ pattern_split(PatternObject* self, PyObject* args, PyObject* kw)
883883
}
884884

885885
if (state.start == state.ptr) {
886-
if (last == state.end)
886+
if (last == state.end || state.ptr == state.end)
887887
break;
888888
/* skip one character */
889889
state.start = (void*) ((char*) state.ptr + state.charsize);
@@ -1081,6 +1081,8 @@ pattern_subx(PatternObject* self, PyObject* ptemplate, PyObject* string,
10811081

10821082
next:
10831083
/* move on */
1084+
if (state.ptr == state.end)
1085+
break;
10841086
if (state.ptr == state.start)
10851087
state.start = (void*) ((char*) state.ptr + state.charsize);
10861088
else
@@ -2567,6 +2569,9 @@ scanner_match(ScannerObject* self, PyObject *unused)
25672569
PyObject* match;
25682570
Py_ssize_t status;
25692571

2572+
if (state->start == NULL)
2573+
Py_RETURN_NONE;
2574+
25702575
state_reset(state);
25712576

25722577
state->ptr = state->start;
@@ -2578,10 +2583,14 @@ scanner_match(ScannerObject* self, PyObject *unused)
25782583
match = pattern_new_match((PatternObject*) self->pattern,
25792584
state, status);
25802585

2581-
if (status == 0 || state->ptr == state->start)
2586+
if (status == 0)
2587+
state->start = NULL;
2588+
else if (state->ptr != state->start)
2589+
state->start = state->ptr;
2590+
else if (state->ptr != state->end)
25822591
state->start = (void*) ((char*) state->ptr + state->charsize);
25832592
else
2584-
state->start = state->ptr;
2593+
state->start = NULL;
25852594

25862595
return match;
25872596
}
@@ -2594,6 +2603,9 @@ scanner_search(ScannerObject* self, PyObject *unused)
25942603
PyObject* match;
25952604
Py_ssize_t status;
25962605

2606+
if (state->start == NULL)
2607+
Py_RETURN_NONE;
2608+
25972609
state_reset(state);
25982610

25992611
state->ptr = state->start;
@@ -2605,10 +2617,14 @@ scanner_search(ScannerObject* self, PyObject *unused)
26052617
match = pattern_new_match((PatternObject*) self->pattern,
26062618
state, status);
26072619

2608-
if (status == 0 || state->ptr == state->start)
2620+
if (status == 0)
2621+
state->start = NULL;
2622+
else if (state->ptr != state->start)
2623+
state->start = state->ptr;
2624+
else if (state->ptr != state->end)
26092625
state->start = (void*) ((char*) state->ptr + state->charsize);
26102626
else
2611-
state->start = state->ptr;
2627+
state->start = NULL;
26122628

26132629
return match;
26142630
}

Modules/sre_lib.h

Lines changed: 20 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ SRE(at)(SRE_STATE* state, SRE_CHAR* ptr, SRE_CODE at)
3030
SRE_IS_LINEBREAK((int) ptr[-1]));
3131

3232
case SRE_AT_END:
33-
return (((void*) (ptr+1) == state->end &&
33+
return (((SRE_CHAR *)state->end - ptr == 1 &&
3434
SRE_IS_LINEBREAK((int) ptr[0])) ||
3535
((void*) ptr == state->end));
3636

@@ -1093,9 +1093,9 @@ SRE(match)(SRE_STATE* state, SRE_CODE* pattern, int match_all)
10931093
/* <ASSERT> <skip> <back> <pattern> */
10941094
TRACE(("|%p|%p|ASSERT %d\n", ctx->pattern,
10951095
ctx->ptr, ctx->pattern[1]));
1096-
state->ptr = ctx->ptr - ctx->pattern[1];
1097-
if (state->ptr < state->beginning)
1096+
if (ctx->ptr - (SRE_CHAR *)state->beginning < (Py_ssize_t)ctx->pattern[1])
10981097
RETURN_FAILURE;
1098+
state->ptr = ctx->ptr - ctx->pattern[1];
10991099
DO_JUMP0(JUMP_ASSERT, jump_assert, ctx->pattern+2);
11001100
RETURN_ON_FAILURE(ret);
11011101
ctx->pattern += ctx->pattern[0];
@@ -1106,8 +1106,8 @@ SRE(match)(SRE_STATE* state, SRE_CODE* pattern, int match_all)
11061106
/* <ASSERT_NOT> <skip> <back> <pattern> */
11071107
TRACE(("|%p|%p|ASSERT_NOT %d\n", ctx->pattern,
11081108
ctx->ptr, ctx->pattern[1]));
1109-
state->ptr = ctx->ptr - ctx->pattern[1];
1110-
if (state->ptr >= state->beginning) {
1109+
if (ctx->ptr - (SRE_CHAR *)state->beginning >= (Py_ssize_t)ctx->pattern[1]) {
1110+
state->ptr = ctx->ptr - ctx->pattern[1];
11111111
DO_JUMP0(JUMP_ASSERT_NOT, jump_assert_not, ctx->pattern+2);
11121112
if (ret) {
11131113
RETURN_ON_ERROR(ret);
@@ -1199,12 +1199,20 @@ SRE(search)(SRE_STATE* state, SRE_CODE* pattern)
11991199
SRE_CODE* overlap = NULL;
12001200
int flags = 0;
12011201

1202+
if (ptr > end)
1203+
return 0;
1204+
12021205
if (pattern[0] == SRE_OP_INFO) {
12031206
/* optimization info block */
12041207
/* <INFO> <1=skip> <2=flags> <3=min> <4=max> <5=prefix info> */
12051208

12061209
flags = pattern[2];
12071210

1211+
if (pattern[3] && end - ptr < (Py_ssize_t)pattern[3]) {
1212+
TRACE(("reject (got %u chars, need %u)\n",
1213+
(unsigned int)(end - ptr), pattern[3]));
1214+
return 0;
1215+
}
12081216
if (pattern[3] > 1) {
12091217
/* adjust end point (but make sure we leave at least one
12101218
character in there, so literal search will work) */
@@ -1322,15 +1330,18 @@ SRE(search)(SRE_STATE* state, SRE_CODE* pattern)
13221330
break;
13231331
ptr++;
13241332
}
1325-
} else
1333+
} else {
13261334
/* general case */
1327-
while (ptr <= end) {
1335+
assert(ptr <= end);
1336+
while (1) {
13281337
TRACE(("|%p|%p|SEARCH\n", pattern, ptr));
1329-
state->start = state->ptr = ptr++;
1338+
state->start = state->ptr = ptr;
13301339
status = SRE(match)(state, pattern, 0);
1331-
if (status != 0)
1340+
if (status != 0 || ptr >= end)
13321341
break;
1342+
ptr++;
13331343
}
1344+
}
13341345

13351346
return status;
13361347
}

0 commit comments

Comments
 (0)