Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 184984d

Browse files
Support arbitrary non-continuous memo keys.
1 parent 1e487ca commit 184984d

3 files changed

Lines changed: 82 additions & 72 deletions

File tree

Lib/test/pickletester.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1127,6 +1127,18 @@ def itersize(self, start, stop):
11271127
size <<= 1
11281128
yield stop
11291129

1130+
def test_too_large_put(self):
1131+
data = lambda n: (b'((lp' + str(n).encode() + b'\n' +
1132+
b'g' + str(n).encode() + b'\nt.')
1133+
for idx in [10**6, 10**9, 10**12]:
1134+
self.assertEqual(self.loads(data(idx)), ([],)*2)
1135+
1136+
def test_too_large_long_binput(self):
1137+
data = lambda n: (b'(]r' + struct.pack('<I', n) +
1138+
b'j' + struct.pack('<I', n) + b't.')
1139+
for idx in self.itersize(1 << 20, min(sys.maxsize, (1 << 32) - 1)):
1140+
self.assertEqual(self.loads(data(idx)), ([],)*2)
1141+
11301142
def _test_truncated_data(self, dumped, expected_error=None):
11311143
if expected_error is None:
11321144
expected_error = self.truncated_data_error

Lib/test/test_pickle.py

Lines changed: 1 addition & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -57,18 +57,6 @@ def loads(self, buf, **kwds):
5757
u = self.unpickler(f, **kwds)
5858
return u.load()
5959

60-
def test_too_large_put(self):
61-
data = lambda n: (b'((lp' + str(n).encode() + b'\n' +
62-
b'g' + str(n).encode() + b'\nt.')
63-
for idx in [10**6, 10**9, 10**12]:
64-
self.assertEqual(self.loads(data(idx)), ([],)*2)
65-
66-
def test_too_large_long_binput(self):
67-
data = lambda n: (b'(]r' + struct.pack('<I', n) +
68-
b'j' + struct.pack('<I', n) + b't.')
69-
for idx in self.itersize(1 << 17, min(sys.maxsize, (1 << 32) - 1)):
70-
self.assertEqual(self.loads(data(idx)), ([],)*2)
71-
7260

7361
class PyPicklingErrorTests(AbstractPicklingErrorTests, unittest.TestCase):
7462

@@ -300,24 +288,6 @@ class CUnpicklerTests(PyUnpicklerTests):
300288
truncated_data_error = (pickle.UnpicklingError, 'truncated')
301289
size_overflow_error = (OverflowError, 'exceeds')
302290

303-
def test_too_large_put(self):
304-
data = lambda n: (b'((lp' + str(n).encode() + b'\n' +
305-
b'g' + str(n).encode() + b'\nt.')
306-
self.assertEqual(self.loads(data(100000)), ([],)*2) # self-testing
307-
for idx in [10**6, 10**9, min(sys.maxsize, 10**12)]:
308-
with self.assertRaisesRegex(pickle.UnpicklingError,
309-
'too sparse memo indices'):
310-
self.loads(data(idx))
311-
312-
def test_too_large_long_binput(self):
313-
data = lambda n: (b'(]r' + struct.pack('<I', n) +
314-
b'j' + struct.pack('<I', n) + b't.')
315-
self.assertEqual(self.loads(data(1 << 16)), ([],)*2) # self-testing
316-
for idx in self.itersize(1 << 20, min(sys.maxsize, (1 << 32) - 1)):
317-
with self.assertRaisesRegex(pickle.UnpicklingError,
318-
'too sparse memo indices'):
319-
self.loads(data(idx))
320-
321291
class CPicklingErrorTests(PyPicklingErrorTests):
322292
pickler = _pickle.Pickler
323293

@@ -420,7 +390,7 @@ def test_pickler(self):
420390
0) # Write buffer is cleared after every dump().
421391

422392
def test_unpickler(self):
423-
basesize = support.calcobjsize('2P2nP 2P2n2i5P 2P3n8P2n2i')
393+
basesize = support.calcobjsize('2P2n2P 2P2n2i5P 2P3n8P2n2i')
424394
unpickler = _pickle.Unpickler
425395
P = struct.calcsize('P') # Size of memo table entry.
426396
n = struct.calcsize('n') # Size of mark table entry.

Modules/_pickle.c

Lines changed: 69 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -651,10 +651,11 @@ typedef struct UnpicklerObject {
651651
Pdata *stack; /* Pickle data stack, store unpickled objects. */
652652

653653
/* The unpickler memo is just an array of PyObject *s. Using a dict
654-
is unnecessary, since the keys are contiguous ints. */
654+
is unnecessary, since the keys usually are contiguous ints. */
655655
PyObject **memo;
656656
size_t memo_size; /* Capacity of the memo array */
657657
size_t memo_len; /* Number of objects in the memo */
658+
PyObject *memo_dict; /* The backup memo dict for non-continuous keys. */
658659

659660
PyObject *persistent_load; /* persistent_load() method, can be NULL. */
660661

@@ -1546,32 +1547,64 @@ _Unpickler_ResizeMemoList(UnpicklerObject *self, size_t new_size)
15461547

15471548
/* Returns NULL if idx is out of bounds. */
15481549
static PyObject *
1549-
_Unpickler_MemoGet(UnpicklerObject *self, size_t idx)
1550+
_Unpickler_MemoGet(PickleState *st, UnpicklerObject *self, size_t idx)
15501551
{
1551-
if (idx >= self->memo_size)
1552-
return NULL;
1553-
1554-
return self->memo[idx];
1552+
PyObject *value;
1553+
if (idx < self->memo_size) {
1554+
value = self->memo[idx];
1555+
if (value != NULL) {
1556+
return value;
1557+
}
1558+
}
1559+
if (self->memo_dict != NULL) {
1560+
PyObject *key = PyLong_FromSsize_t(idx);
1561+
if (key == NULL) {
1562+
return NULL;
1563+
}
1564+
if (idx < self->memo_size) {
1565+
(void)PyDict_Pop(self->memo_dict, key, &value);
1566+
self->memo[idx] = value;
1567+
}
1568+
else {
1569+
value = PyDict_GetItemWithError(self->memo_dict, key);
1570+
}
1571+
Py_DECREF(key);
1572+
if (value != NULL || PyErr_Occurred()) {
1573+
return value;
1574+
}
1575+
}
1576+
PyErr_Format(st->UnpicklingError, "Memo value not found at index %zd", idx);
1577+
return NULL;
15551578
}
15561579

15571580
/* Returns -1 (with an exception set) on failure, 0 on success.
15581581
This takes its own reference to `value`. */
15591582
static int
1560-
_Unpickler_MemoPut(PickleState *st, UnpicklerObject *self, size_t idx, PyObject *value)
1583+
_Unpickler_MemoPut(UnpicklerObject *self, size_t idx, PyObject *value)
15611584
{
15621585
PyObject *old_item;
15631586

15641587
if (idx >= self->memo_size) {
1565-
/* MAX_MEMO_INDICES_GAP was introduced mainly for making testing of
1566-
* PUT, BINPUT and LONG_BINPUT opcodes simpler. It should be more
1567-
* than 1<<16 for LONG_BINPUT.
1568-
* The standard pickler never produces data that requires more than 0.
1569-
* The Python code does not have such limitation.
1570-
*/
1571-
const int MAX_MEMO_INDICES_GAP = 1 << 17;
1572-
if (idx > self->memo_len * 2 + MAX_MEMO_INDICES_GAP) {
1573-
PyErr_SetString(st->UnpicklingError, "too sparse memo indices");
1574-
return -1;
1588+
if (idx > self->memo_len * 2) {
1589+
/* The memo keys are too sparse. Use a dict instead of
1590+
* a continuous array for the memo. */
1591+
if (self->memo_dict == NULL) {
1592+
self->memo_dict = PyDict_New();
1593+
if (self->memo_dict == NULL) {
1594+
return -1;
1595+
}
1596+
}
1597+
PyObject *key = PyLong_FromSize_t(idx);
1598+
if (key == NULL) {
1599+
return -1;
1600+
}
1601+
1602+
if (PyDict_SetItem(self->memo_dict, key, value) < 0) {
1603+
Py_DECREF(key);
1604+
return -1;
1605+
}
1606+
Py_DECREF(key);
1607+
return 0;
15751608
}
15761609
if (_Unpickler_ResizeMemoList(self, idx * 2) < 0)
15771610
return -1;
@@ -1642,6 +1675,7 @@ _Unpickler_New(PyObject *module)
16421675
self->memo = memo;
16431676
self->memo_size = MEMO_SIZE;
16441677
self->memo_len = 0;
1678+
self->memo_dict = NULL;
16451679
self->persistent_load = NULL;
16461680
memset(&self->buffer, 0, sizeof(Py_buffer));
16471681
self->input_buffer = NULL;
@@ -6149,20 +6183,15 @@ load_get(PickleState *st, UnpicklerObject *self)
61496183
if (key == NULL)
61506184
return -1;
61516185
idx = PyLong_AsSsize_t(key);
6186+
Py_DECREF(key);
61526187
if (idx == -1 && PyErr_Occurred()) {
6153-
Py_DECREF(key);
61546188
return -1;
61556189
}
61566190

6157-
value = _Unpickler_MemoGet(self, idx);
6191+
value = _Unpickler_MemoGet(st, self, idx);
61586192
if (value == NULL) {
6159-
if (!PyErr_Occurred()) {
6160-
PyErr_Format(st->UnpicklingError, "Memo value not found at index %ld", idx);
6161-
}
6162-
Py_DECREF(key);
61636193
return -1;
61646194
}
6165-
Py_DECREF(key);
61666195

61676196
PDATA_APPEND(self->stack, value, -1);
61686197
return 0;
@@ -6180,13 +6209,8 @@ load_binget(PickleState *st, UnpicklerObject *self)
61806209

61816210
idx = Py_CHARMASK(s[0]);
61826211

6183-
value = _Unpickler_MemoGet(self, idx);
6212+
value = _Unpickler_MemoGet(st, self, idx);
61846213
if (value == NULL) {
6185-
PyObject *key = PyLong_FromSsize_t(idx);
6186-
if (key != NULL) {
6187-
PyErr_Format(st->UnpicklingError, "Memo value not found at index %ld", idx);
6188-
Py_DECREF(key);
6189-
}
61906214
return -1;
61916215
}
61926216

@@ -6206,13 +6230,8 @@ load_long_binget(PickleState *st, UnpicklerObject *self)
62066230

62076231
idx = calc_binsize(s, 4);
62086232

6209-
value = _Unpickler_MemoGet(self, idx);
6233+
value = _Unpickler_MemoGet(st, self, idx);
62106234
if (value == NULL) {
6211-
PyObject *key = PyLong_FromSsize_t(idx);
6212-
if (key != NULL) {
6213-
PyErr_Format(st->UnpicklingError, "Memo value not found at index %ld", idx);
6214-
Py_DECREF(key);
6215-
}
62166235
return -1;
62176236
}
62186237

@@ -6337,7 +6356,7 @@ load_put(PickleState *state, UnpicklerObject *self)
63376356
return -1;
63386357
}
63396358

6340-
return _Unpickler_MemoPut(state, self, idx, value);
6359+
return _Unpickler_MemoPut(self, idx, value);
63416360
}
63426361

63436362
static int
@@ -6356,7 +6375,7 @@ load_binput(PickleState *state, UnpicklerObject *self)
63566375

63576376
idx = Py_CHARMASK(s[0]);
63586377

6359-
return _Unpickler_MemoPut(state, self, idx, value);
6378+
return _Unpickler_MemoPut(self, idx, value);
63606379
}
63616380

63626381
static int
@@ -6380,7 +6399,7 @@ load_long_binput(PickleState *state, UnpicklerObject *self)
63806399
return -1;
63816400
}
63826401

6383-
return _Unpickler_MemoPut(state, self, idx, value);
6402+
return _Unpickler_MemoPut(self, idx, value);
63846403
}
63856404

63866405
static int
@@ -6392,7 +6411,7 @@ load_memoize(PickleState *state, UnpicklerObject *self)
63926411
return Pdata_stack_underflow(state, self->stack);
63936412
value = self->stack->data[Py_SIZE(self->stack) - 1];
63946413

6395-
return _Unpickler_MemoPut(state, self, self->memo_len, value);
6414+
return _Unpickler_MemoPut(self, self->memo_len, value);
63966415
}
63976416

63986417
static int
@@ -7141,6 +7160,13 @@ _pickle_Unpickler___sizeof___impl(UnpicklerObject *self)
71417160
size_t res = _PyObject_SIZE(Py_TYPE(self));
71427161
if (self->memo != NULL)
71437162
res += self->memo_size * sizeof(PyObject *);
7163+
if (self->memo_dict != NULL) {
7164+
size_t s = _PySys_GetSizeOf(self->memo_dict);
7165+
if (s == (size_t)-1) {
7166+
return -1;
7167+
}
7168+
res += s;
7169+
}
71447170
if (self->marks != NULL)
71457171
res += (size_t)self->marks_size * sizeof(Py_ssize_t);
71467172
if (self->input_line != NULL)
@@ -7175,6 +7201,7 @@ Unpickler_clear(UnpicklerObject *self)
71757201
self->buffer.buf = NULL;
71767202
}
71777203

7204+
Py_CLEAR(self->memo_dict);
71787205
_Unpickler_MemoCleanup(self);
71797206
PyMem_Free(self->marks);
71807207
self->marks = NULL;
@@ -7209,6 +7236,7 @@ Unpickler_traverse(UnpicklerObject *self, visitproc visit, void *arg)
72097236
Py_VISIT(self->stack);
72107237
Py_VISIT(self->persistent_load);
72117238
Py_VISIT(self->buffers);
7239+
Py_VISIT(self->memo_dict);
72127240
PyObject **memo = self->memo;
72137241
if (memo) {
72147242
Py_ssize_t i = self->memo_size;
@@ -7514,7 +7542,7 @@ Unpickler_set_memo(UnpicklerObject *self, PyObject *obj, void *Py_UNUSED(ignored
75147542
"memo key must be positive integers.");
75157543
goto error;
75167544
}
7517-
if (_Unpickler_MemoPut(state, self, idx, value) < 0)
7545+
if (_Unpickler_MemoPut(self, idx, value) < 0)
75187546
goto error;
75197547
}
75207548
}

0 commit comments

Comments
 (0)