Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 4894c30

Browse files
committed
Fix a bug in the memory reallocation code of PyUnicode_TranslateCharmap().
charmaptranslate_makespace() allocated more memory than required for the next replacement but didn't remember that fact, so memory size was growing exponentially every time a replacement string is longer that one character. This fixes SF bug #828737.
1 parent 6a5b027 commit 4894c30

2 files changed

Lines changed: 32 additions & 19 deletions

File tree

Lib/test/test_codeccallbacks.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -690,6 +690,18 @@ def __getitem__(self, key):
690690
self.assertRaises(TypeError, u"\xff".translate, {0xff: sys.maxunicode+1})
691691
self.assertRaises(TypeError, u"\xff".translate, {0xff: ()})
692692

693+
def test_bug828737(self):
694+
charmap = {
695+
ord("&"): u"&",
696+
ord("<"): u"&lt;",
697+
ord(">"): u"&gt;",
698+
ord('"'): u"&quot;",
699+
}
700+
701+
for n in (1, 10, 100, 1000):
702+
text = u'abc<def>ghi'*n
703+
text.translate(charmap)
704+
693705
def test_main():
694706
test.test_support.run_unittest(CodecCallbackTest)
695707

Objects/unicodeobject.c

Lines changed: 20 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -3222,19 +3222,19 @@ int charmaptranslate_lookup(Py_UNICODE c, PyObject *mapping, PyObject **result)
32223222
if not reallocate and adjust various state variables.
32233223
Return 0 on success, -1 on error */
32243224
static
3225-
int charmaptranslate_makespace(PyObject **outobj, Py_UNICODE **outp, int *outsize,
3225+
int charmaptranslate_makespace(PyObject **outobj, Py_UNICODE **outp,
32263226
int requiredsize)
32273227
{
3228-
if (requiredsize > *outsize) {
3228+
int oldsize = PyUnicode_GET_SIZE(*outobj);
3229+
if (requiredsize > oldsize) {
32293230
/* remember old output position */
32303231
int outpos = *outp-PyUnicode_AS_UNICODE(*outobj);
32313232
/* exponentially overallocate to minimize reallocations */
3232-
if (requiredsize < 2 * *outsize)
3233-
requiredsize = 2 * *outsize;
3233+
if (requiredsize < 2 * oldsize)
3234+
requiredsize = 2 * oldsize;
32343235
if (_PyUnicode_Resize(outobj, requiredsize) < 0)
32353236
return -1;
32363237
*outp = PyUnicode_AS_UNICODE(*outobj) + outpos;
3237-
*outsize = requiredsize;
32383238
}
32393239
return 0;
32403240
}
@@ -3245,14 +3245,15 @@ int charmaptranslate_makespace(PyObject **outobj, Py_UNICODE **outp, int *outsiz
32453245
The called must decref result.
32463246
Return 0 on success, -1 on error. */
32473247
static
3248-
int charmaptranslate_output(Py_UNICODE c, PyObject *mapping,
3249-
PyObject **outobj, int *outsize, Py_UNICODE **outp, PyObject **res)
3248+
int charmaptranslate_output(const Py_UNICODE *startinp, const Py_UNICODE *curinp,
3249+
int insize, PyObject *mapping, PyObject **outobj, Py_UNICODE **outp,
3250+
PyObject **res)
32503251
{
3251-
if (charmaptranslate_lookup(c, mapping, res))
3252+
if (charmaptranslate_lookup(*curinp, mapping, res))
32523253
return -1;
32533254
if (*res==NULL) {
32543255
/* not found => default to 1:1 mapping */
3255-
*(*outp)++ = (Py_UNICODE)c;
3256+
*(*outp)++ = *curinp;
32563257
}
32573258
else if (*res==Py_None)
32583259
;
@@ -3268,8 +3269,10 @@ int charmaptranslate_output(Py_UNICODE c, PyObject *mapping,
32683269
}
32693270
else if (repsize!=0) {
32703271
/* more than one character */
3271-
int requiredsize = *outsize + repsize - 1;
3272-
if (charmaptranslate_makespace(outobj, outp, outsize, requiredsize))
3272+
int requiredsize = (*outp-PyUnicode_AS_UNICODE(*outobj)) +
3273+
(insize - (*curinp-*startinp)) +
3274+
repsize - 1;
3275+
if (charmaptranslate_makespace(outobj, outp, requiredsize))
32733276
return -1;
32743277
memcpy(*outp, PyUnicode_AS_UNICODE(*res), sizeof(Py_UNICODE)*repsize);
32753278
*outp += repsize;
@@ -3294,7 +3297,6 @@ PyObject *PyUnicode_TranslateCharmap(const Py_UNICODE *p,
32943297
Py_UNICODE *str;
32953298
/* current output position */
32963299
int respos = 0;
3297-
int ressize;
32983300
char *reason = "character maps to <undefined>";
32993301
PyObject *errorHandler = NULL;
33003302
PyObject *exc = NULL;
@@ -3312,16 +3314,15 @@ PyObject *PyUnicode_TranslateCharmap(const Py_UNICODE *p,
33123314
replacements, if we need more, we'll resize */
33133315
res = PyUnicode_FromUnicode(NULL, size);
33143316
if (res == NULL)
3315-
goto onError;
3317+
goto onError;
33163318
if (size == 0)
33173319
return res;
33183320
str = PyUnicode_AS_UNICODE(res);
3319-
ressize = size;
33203321

33213322
while (p<endp) {
33223323
/* try to encode it */
33233324
PyObject *x = NULL;
3324-
if (charmaptranslate_output(*p, mapping, &res, &ressize, &str, &x)) {
3325+
if (charmaptranslate_output(startp, p, size, mapping, &res, &str, &x)) {
33253326
Py_XDECREF(x);
33263327
goto onError;
33273328
}
@@ -3340,7 +3341,7 @@ PyObject *PyUnicode_TranslateCharmap(const Py_UNICODE *p,
33403341

33413342
/* find all untranslatable characters */
33423343
while (collend < endp) {
3343-
if (charmaptranslate_lookup(*collend, mapping, &x))
3344+
if (charmaptranslate_lookup(*collend, mapping, &x))
33443345
goto onError;
33453346
Py_XDECREF(x);
33463347
if (x!=Py_None)
@@ -3379,7 +3380,7 @@ PyObject *PyUnicode_TranslateCharmap(const Py_UNICODE *p,
33793380
char buffer[2+29+1+1];
33803381
char *cp;
33813382
sprintf(buffer, "&#%d;", (int)*p);
3382-
if (charmaptranslate_makespace(&res, &str, &ressize,
3383+
if (charmaptranslate_makespace(&res, &str,
33833384
(str-PyUnicode_AS_UNICODE(res))+strlen(buffer)+(endp-collend)))
33843385
goto onError;
33853386
for (cp = buffer; *cp; ++cp)
@@ -3395,7 +3396,7 @@ PyObject *PyUnicode_TranslateCharmap(const Py_UNICODE *p,
33953396
goto onError;
33963397
/* generate replacement */
33973398
repsize = PyUnicode_GET_SIZE(repunicode);
3398-
if (charmaptranslate_makespace(&res, &str, &ressize,
3399+
if (charmaptranslate_makespace(&res, &str,
33993400
(str-PyUnicode_AS_UNICODE(res))+repsize+(endp-collend))) {
34003401
Py_DECREF(repunicode);
34013402
goto onError;
@@ -3409,7 +3410,7 @@ PyObject *PyUnicode_TranslateCharmap(const Py_UNICODE *p,
34093410
}
34103411
/* Resize if we allocated to much */
34113412
respos = str-PyUnicode_AS_UNICODE(res);
3412-
if (respos<ressize) {
3413+
if (respos<PyUnicode_GET_SIZE(res)) {
34133414
if (_PyUnicode_Resize(&res, respos) < 0)
34143415
goto onError;
34153416
}

0 commit comments

Comments
 (0)