Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit eeb7eea

Browse files
committed
Issue #12911: Fix memory consumption when calculating the repr() of huge tuples or lists.
This introduces a small private API for this common pattern. The issue has been discovered thanks to Martin's huge-mem buildbot.
1 parent bb2095f commit eeb7eea

13 files changed

Lines changed: 271 additions & 87 deletions

File tree

Include/Python.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -100,7 +100,7 @@
100100
#include "warnings.h"
101101
#include "weakrefobject.h"
102102
#include "structseq.h"
103-
103+
#include "accu.h"
104104

105105
#include "codecs.h"
106106
#include "pyerrors.h"
@@ -141,7 +141,7 @@ PyAPI_FUNC(PyObject*) _Py_Mangle(PyObject *p, PyObject *name);
141141
#endif
142142

143143
/* Argument must be a char or an int in [-128, 127] or [0, 255]. */
144-
#define Py_CHARMASK(c) ((unsigned char)((c) & 0xff))
144+
#define Py_CHARMASK(c) ((unsigned char)((c) & 0xff))
145145

146146
#include "pyfpe.h"
147147

Include/accu.h

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
#ifndef Py_LIMITED_API
2+
#ifndef Py_ACCU_H
3+
#define Py_ACCU_H
4+
5+
/*** This is a private API for use by the interpreter and the stdlib.
6+
*** Its definition may be changed or removed at any moment.
7+
***/
8+
9+
/*
10+
* A two-level accumulator of unicode objects that avoids both the overhead
11+
* of keeping a huge number of small separate objects, and the quadratic
12+
* behaviour of using a naive repeated concatenation scheme.
13+
*/
14+
15+
#ifdef __cplusplus
16+
extern "C" {
17+
#endif
18+
19+
typedef struct {
20+
PyObject *large; /* A list of previously accumulated large strings */
21+
PyObject *small; /* Pending small strings */
22+
} _PyAccu;
23+
24+
PyAPI_FUNC(int) _PyAccu_Init(_PyAccu *acc);
25+
PyAPI_FUNC(int) _PyAccu_Accumulate(_PyAccu *acc, PyObject *unicode);
26+
PyAPI_FUNC(PyObject *) _PyAccu_FinishAsList(_PyAccu *acc);
27+
PyAPI_FUNC(PyObject *) _PyAccu_Finish(_PyAccu *acc);
28+
PyAPI_FUNC(void) _PyAccu_Destroy(_PyAccu *acc);
29+
30+
#ifdef __cplusplus
31+
}
32+
#endif
33+
34+
#endif /* Py_ACCU_H */
35+
#endif /* Py_LIMITED_API */

Lib/test/test_list.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,17 @@ def imul(a, b): a *= b
5959
self.assertRaises((MemoryError, OverflowError), mul, lst, n)
6060
self.assertRaises((MemoryError, OverflowError), imul, lst, n)
6161

62+
def test_repr_large(self):
63+
# Check the repr of large list objects
64+
def check(n):
65+
l = [0] * n
66+
s = repr(l)
67+
self.assertEqual(s,
68+
'[' + ', '.join(['0'] * n) + ']')
69+
check(10) # check our checking code
70+
check(1000000)
71+
72+
6273
def test_main(verbose=None):
6374
support.run_unittest(ListTest)
6475

Lib/test/test_tuple.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -154,6 +154,16 @@ def test_bug7466(self):
154154
# Trying to untrack an unfinished tuple could crash Python
155155
self._not_tracked(tuple(gc.collect() for i in range(101)))
156156

157+
def test_repr_large(self):
158+
# Check the repr of large list objects
159+
def check(n):
160+
l = (0,) * n
161+
s = repr(l)
162+
self.assertEqual(s,
163+
'(' + ', '.join(['0'] * n) + ')')
164+
check(10) # check our checking code
165+
check(1000000)
166+
157167
def test_main():
158168
support.run_unittest(TupleTest)
159169

Makefile.pre.in

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -342,6 +342,7 @@ PYTHON_OBJS= \
342342
# Objects
343343
OBJECT_OBJS= \
344344
Objects/abstract.o \
345+
Objects/accu.o \
345346
Objects/boolobject.o \
346347
Objects/bytes_methods.o \
347348
Objects/bytearrayobject.o \
@@ -664,6 +665,7 @@ PYTHON_HEADERS= \
664665
Include/Python-ast.h \
665666
Include/Python.h \
666667
Include/abstract.h \
668+
Include/accu.h \
667669
Include/asdl.h \
668670
Include/ast.h \
669671
Include/bltinmodule.h \

Misc/NEWS

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,9 @@ What's New in Python 3.2.3?
1010
Core and Builtins
1111
-----------------
1212

13+
- Issue #12911: Fix memory consumption when calculating the repr() of huge
14+
tuples or lists.
15+
1316
- Issue #7732: Don't open a directory as a file anymore while importing a
1417
module. Ignore the direcotry if its name matchs the module name (e.g.
1518
"__init__.py") and raise a ImportError instead.

Objects/accu.c

Lines changed: 114 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,114 @@
1+
/* Accumulator struct implementation */
2+
3+
#include "Python.h"
4+
5+
static PyObject *
6+
join_list_unicode(PyObject *lst)
7+
{
8+
/* return ''.join(lst) */
9+
PyObject *sep, *ret;
10+
sep = PyUnicode_FromStringAndSize("", 0);
11+
ret = PyUnicode_Join(sep, lst);
12+
Py_DECREF(sep);
13+
return ret;
14+
}
15+
16+
int
17+
_PyAccu_Init(_PyAccu *acc)
18+
{
19+
/* Lazily allocated */
20+
acc->large = NULL;
21+
acc->small = PyList_New(0);
22+
if (acc->small == NULL)
23+
return -1;
24+
return 0;
25+
}
26+
27+
static int
28+
flush_accumulator(_PyAccu *acc)
29+
{
30+
Py_ssize_t nsmall = PyList_GET_SIZE(acc->small);
31+
if (nsmall) {
32+
int ret;
33+
PyObject *joined;
34+
if (acc->large == NULL) {
35+
acc->large = PyList_New(0);
36+
if (acc->large == NULL)
37+
return -1;
38+
}
39+
joined = join_list_unicode(acc->small);
40+
if (joined == NULL)
41+
return -1;
42+
if (PyList_SetSlice(acc->small, 0, nsmall, NULL)) {
43+
Py_DECREF(joined);
44+
return -1;
45+
}
46+
ret = PyList_Append(acc->large, joined);
47+
Py_DECREF(joined);
48+
return ret;
49+
}
50+
return 0;
51+
}
52+
53+
int
54+
_PyAccu_Accumulate(_PyAccu *acc, PyObject *unicode)
55+
{
56+
Py_ssize_t nsmall;
57+
assert(PyUnicode_Check(unicode));
58+
59+
if (PyList_Append(acc->small, unicode))
60+
return -1;
61+
nsmall = PyList_GET_SIZE(acc->small);
62+
/* Each item in a list of unicode objects has an overhead (in 64-bit
63+
* builds) of:
64+
* - 8 bytes for the list slot
65+
* - 56 bytes for the header of the unicode object
66+
* that is, 64 bytes. 100000 such objects waste more than 6MB
67+
* compared to a single concatenated string.
68+
*/
69+
if (nsmall < 100000)
70+
return 0;
71+
return flush_accumulator(acc);
72+
}
73+
74+
PyObject *
75+
_PyAccu_FinishAsList(_PyAccu *acc)
76+
{
77+
int ret;
78+
PyObject *res;
79+
80+
ret = flush_accumulator(acc);
81+
Py_CLEAR(acc->small);
82+
if (ret) {
83+
Py_CLEAR(acc->large);
84+
return NULL;
85+
}
86+
res = acc->large;
87+
acc->large = NULL;
88+
return res;
89+
}
90+
91+
PyObject *
92+
_PyAccu_Finish(_PyAccu *acc)
93+
{
94+
PyObject *list, *res;
95+
if (acc->large == NULL) {
96+
list = acc->small;
97+
acc->small = NULL;
98+
}
99+
else {
100+
list = _PyAccu_FinishAsList(acc);
101+
if (!list)
102+
return NULL;
103+
}
104+
res = join_list_unicode(list);
105+
Py_DECREF(list);
106+
return res;
107+
}
108+
109+
void
110+
_PyAccu_Destroy(_PyAccu *acc)
111+
{
112+
Py_CLEAR(acc->small);
113+
Py_CLEAR(acc->large);
114+
}

Objects/listobject.c

Lines changed: 35 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -321,70 +321,59 @@ static PyObject *
321321
list_repr(PyListObject *v)
322322
{
323323
Py_ssize_t i;
324-
PyObject *s, *temp;
325-
PyObject *pieces = NULL, *result = NULL;
324+
PyObject *s = NULL;
325+
_PyAccu acc;
326+
static PyObject *sep = NULL;
327+
328+
if (Py_SIZE(v) == 0) {
329+
return PyUnicode_FromString("[]");
330+
}
331+
332+
if (sep == NULL) {
333+
sep = PyUnicode_FromString(", ");
334+
if (sep == NULL)
335+
return NULL;
336+
}
326337

327338
i = Py_ReprEnter((PyObject*)v);
328339
if (i != 0) {
329340
return i > 0 ? PyUnicode_FromString("[...]") : NULL;
330341
}
331342

332-
if (Py_SIZE(v) == 0) {
333-
result = PyUnicode_FromString("[]");
334-
goto Done;
335-
}
343+
if (_PyAccu_Init(&acc))
344+
goto error;
336345

337-
pieces = PyList_New(0);
338-
if (pieces == NULL)
339-
goto Done;
346+
s = PyUnicode_FromString("[");
347+
if (s == NULL || _PyAccu_Accumulate(&acc, s))
348+
goto error;
349+
Py_CLEAR(s);
340350

341351
/* Do repr() on each element. Note that this may mutate the list,
342352
so must refetch the list size on each iteration. */
343353
for (i = 0; i < Py_SIZE(v); ++i) {
344-
int status;
345354
if (Py_EnterRecursiveCall(" while getting the repr of a list"))
346-
goto Done;
355+
goto error;
347356
s = PyObject_Repr(v->ob_item[i]);
348357
Py_LeaveRecursiveCall();
349-
if (s == NULL)
350-
goto Done;
351-
status = PyList_Append(pieces, s);
352-
Py_DECREF(s); /* append created a new ref */
353-
if (status < 0)
354-
goto Done;
358+
if (i > 0 && _PyAccu_Accumulate(&acc, sep))
359+
goto error;
360+
if (s == NULL || _PyAccu_Accumulate(&acc, s))
361+
goto error;
362+
Py_CLEAR(s);
355363
}
364+
s = PyUnicode_FromString("]");
365+
if (s == NULL || _PyAccu_Accumulate(&acc, s))
366+
goto error;
367+
Py_CLEAR(s);
356368

357-
/* Add "[]" decorations to the first and last items. */
358-
assert(PyList_GET_SIZE(pieces) > 0);
359-
s = PyUnicode_FromString("[");
360-
if (s == NULL)
361-
goto Done;
362-
temp = PyList_GET_ITEM(pieces, 0);
363-
PyUnicode_AppendAndDel(&s, temp);
364-
PyList_SET_ITEM(pieces, 0, s);
365-
if (s == NULL)
366-
goto Done;
369+
Py_ReprLeave((PyObject *)v);
370+
return _PyAccu_Finish(&acc);
367371

368-
s = PyUnicode_FromString("]");
369-
if (s == NULL)
370-
goto Done;
371-
temp = PyList_GET_ITEM(pieces, PyList_GET_SIZE(pieces) - 1);
372-
PyUnicode_AppendAndDel(&temp, s);
373-
PyList_SET_ITEM(pieces, PyList_GET_SIZE(pieces) - 1, temp);
374-
if (temp == NULL)
375-
goto Done;
376-
377-
/* Paste them all together with ", " between. */
378-
s = PyUnicode_FromString(", ");
379-
if (s == NULL)
380-
goto Done;
381-
result = PyUnicode_Join(s, pieces);
382-
Py_DECREF(s);
383-
384-
Done:
385-
Py_XDECREF(pieces);
372+
error:
373+
_PyAccu_Destroy(&acc);
374+
Py_XDECREF(s);
386375
Py_ReprLeave((PyObject *)v);
387-
return result;
376+
return NULL;
388377
}
389378

390379
static Py_ssize_t

0 commit comments

Comments
 (0)