Thanks to visit codestin.com
Credit goes to github.com

Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
30 commits
Select commit Hold shift + click to select a range
822230d
gh-115952: Fix vulnerability in the pickle module
serhiy-storchaka May 20, 2024
88f1461
Try to fix tests of 32-bit platforms.
serhiy-storchaka May 20, 2024
048099b
Try to fix more tests on 32-bit platforms.
serhiy-storchaka May 20, 2024
d9d1d1d
Apply suggestions from code review
serhiy-storchaka May 22, 2024
6f6f765
Merge branch 'main' into unpickle-overallocate
serhiy-storchaka May 22, 2024
d0e667e
Remove empty lines.
serhiy-storchaka Jun 29, 2024
3462d0e
Merge branch 'main' into unpickle-overallocate
serhiy-storchaka Jun 29, 2024
becbd25
Merge remote-tracking branch 'refs/remotes/origin/unpickle-overalloca…
serhiy-storchaka Jun 29, 2024
b257974
Change names, add more commentis and update the NEWS entry.
serhiy-storchaka Jun 30, 2024
1e487ca
Merge branch 'main' into unpickle-overallocate
serhiy-storchaka Sep 6, 2024
184984d
Support arbitrary non-continuous memo keys.
serhiy-storchaka Sep 6, 2024
f0c0728
Reworded NEWS a bit.
gpshead Sep 27, 2024
1f4e2f1
Merge branch 'main' into unpickle-overallocate
serhiy-storchaka Sep 28, 2024
c72d095
Fix C to Python integer conversion.
serhiy-storchaka Sep 28, 2024
e89bfea
Add more comments.
serhiy-storchaka Sep 28, 2024
a80106c
Fix test on 32-bit platforms.
serhiy-storchaka Sep 28, 2024
01bc6b9
Merge branch 'main' into unpickle-overallocate
serhiy-storchaka Apr 8, 2025
20aa1bf
Fix __sizeof__.
serhiy-storchaka Apr 8, 2025
ab58869
Merge branch 'main' into unpickle-overallocate
serhiy-storchaka Apr 9, 2025
2a1cff8
Merge branch 'main' into unpickle-overallocate
serhiy-storchaka Nov 18, 2025
9d4af4e
Improve security in pickle module
serhiy-storchaka Nov 18, 2025
572a2f2
reword NEWS a bit
gpshead Nov 23, 2025
d6279ae
add a couple of comments
gpshead Nov 23, 2025
022108d
expand comment in test_too_large_long_binput
gpshead Nov 23, 2025
f5f50e7
Add memory DoS impact benchmark for pickle module
gpshead Nov 24, 2025
44dbe03
fix docs build?
gpshead Nov 24, 2025
a29c90c
Merge branch 'main' into unpickle-overallocate
gpshead Nov 24, 2025
583df53
Merge branch 'main' into unpickle-overallocate
serhiy-storchaka Nov 27, 2025
54dfd58
Merge branch 'main' into unpickle-overallocate
serhiy-storchaka Dec 1, 2025
7afe4e1
Update comments.
serhiy-storchaka Dec 1, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Support arbitrary non-continuous memo keys.
  • Loading branch information
serhiy-storchaka committed Sep 6, 2024
commit 184984db7e85c19fae6258a471e7cbe527069c2f
12 changes: 12 additions & 0 deletions Lib/test/pickletester.py
Original file line number Diff line number Diff line change
Expand Up @@ -1127,6 +1127,18 @@ def itersize(self, start, stop):
size <<= 1
yield stop

def test_too_large_put(self):
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you add a comment explaining why this and the next test method result in ([], []) being returned no matter what rather than an error when the values are too large? (I suspect readers with a knowledge of the specific pickle protocol may understand, but it isn't obvious otherwise)

data = lambda n: (b'((lp' + str(n).encode() + b'\n' +
b'g' + str(n).encode() + b'\nt.')
for idx in [10**6, 10**9, 10**12]:
self.assertEqual(self.loads(data(idx)), ([],)*2)

def test_too_large_long_binput(self):
data = lambda n: (b'(]r' + struct.pack('<I', n) +
b'j' + struct.pack('<I', n) + b't.')
for idx in self.itersize(1 << 20, min(sys.maxsize, (1 << 32) - 1)):
self.assertEqual(self.loads(data(idx)), ([],)*2)

def _test_truncated_data(self, dumped, expected_error=None):
if expected_error is None:
expected_error = self.truncated_data_error
Expand Down
32 changes: 1 addition & 31 deletions Lib/test/test_pickle.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,18 +57,6 @@ def loads(self, buf, **kwds):
u = self.unpickler(f, **kwds)
return u.load()

def test_too_large_put(self):
data = lambda n: (b'((lp' + str(n).encode() + b'\n' +
b'g' + str(n).encode() + b'\nt.')
for idx in [10**6, 10**9, 10**12]:
self.assertEqual(self.loads(data(idx)), ([],)*2)

def test_too_large_long_binput(self):
data = lambda n: (b'(]r' + struct.pack('<I', n) +
b'j' + struct.pack('<I', n) + b't.')
for idx in self.itersize(1 << 17, min(sys.maxsize, (1 << 32) - 1)):
self.assertEqual(self.loads(data(idx)), ([],)*2)


class PyPicklingErrorTests(AbstractPicklingErrorTests, unittest.TestCase):

Expand Down Expand Up @@ -300,24 +288,6 @@ class CUnpicklerTests(PyUnpicklerTests):
truncated_data_error = (pickle.UnpicklingError, 'truncated')
size_overflow_error = (OverflowError, 'exceeds')

def test_too_large_put(self):
data = lambda n: (b'((lp' + str(n).encode() + b'\n' +
b'g' + str(n).encode() + b'\nt.')
self.assertEqual(self.loads(data(100000)), ([],)*2) # self-testing
for idx in [10**6, 10**9, min(sys.maxsize, 10**12)]:
with self.assertRaisesRegex(pickle.UnpicklingError,
'too sparse memo indices'):
self.loads(data(idx))

def test_too_large_long_binput(self):
data = lambda n: (b'(]r' + struct.pack('<I', n) +
b'j' + struct.pack('<I', n) + b't.')
self.assertEqual(self.loads(data(1 << 16)), ([],)*2) # self-testing
for idx in self.itersize(1 << 20, min(sys.maxsize, (1 << 32) - 1)):
with self.assertRaisesRegex(pickle.UnpicklingError,
'too sparse memo indices'):
self.loads(data(idx))

class CPicklingErrorTests(PyPicklingErrorTests):
pickler = _pickle.Pickler

Expand Down Expand Up @@ -420,7 +390,7 @@ def test_pickler(self):
0) # Write buffer is cleared after every dump().

def test_unpickler(self):
basesize = support.calcobjsize('2P2nP 2P2n2i5P 2P3n8P2n2i')
basesize = support.calcobjsize('2P2n2P 2P2n2i5P 2P3n8P2n2i')
unpickler = _pickle.Unpickler
P = struct.calcsize('P') # Size of memo table entry.
n = struct.calcsize('n') # Size of mark table entry.
Expand Down
110 changes: 69 additions & 41 deletions Modules/_pickle.c
Original file line number Diff line number Diff line change
Expand Up @@ -651,10 +651,11 @@ typedef struct UnpicklerObject {
Pdata *stack; /* Pickle data stack, store unpickled objects. */

/* The unpickler memo is just an array of PyObject *s. Using a dict
is unnecessary, since the keys are contiguous ints. */
is unnecessary, since the keys usually are contiguous ints. */
PyObject **memo;
size_t memo_size; /* Capacity of the memo array */
size_t memo_len; /* Number of objects in the memo */
PyObject *memo_dict; /* The backup memo dict for non-continuous keys. */

PyObject *persistent_load; /* persistent_load() method, can be NULL. */

Expand Down Expand Up @@ -1546,32 +1547,64 @@ _Unpickler_ResizeMemoList(UnpicklerObject *self, size_t new_size)

/* Returns NULL if idx is out of bounds. */
static PyObject *
_Unpickler_MemoGet(UnpicklerObject *self, size_t idx)
_Unpickler_MemoGet(PickleState *st, UnpicklerObject *self, size_t idx)
{
if (idx >= self->memo_size)
return NULL;

return self->memo[idx];
PyObject *value;
if (idx < self->memo_size) {
value = self->memo[idx];
if (value != NULL) {
return value;
}
}
if (self->memo_dict != NULL) {
PyObject *key = PyLong_FromSsize_t(idx);
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

size_t being fed into FromSsize_t could be a problem if size_t idx were large enough to overflow that. is that plausible in this code?

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good catch. Actually, the argument of _Unpickler_MemoGet() is always a Py_size_t casted to size_t, but it is better to always treat it as size_t here.

if (key == NULL) {
return NULL;
}
if (idx < self->memo_size) {
(void)PyDict_Pop(self->memo_dict, key, &value);
self->memo[idx] = value;
}
else {
value = PyDict_GetItemWithError(self->memo_dict, key);
}
Py_DECREF(key);
if (value != NULL || PyErr_Occurred()) {
return value;
}
}
PyErr_Format(st->UnpicklingError, "Memo value not found at index %zd", idx);
return NULL;
}

/* Returns -1 (with an exception set) on failure, 0 on success.
This takes its own reference to `value`. */
static int
_Unpickler_MemoPut(PickleState *st, UnpicklerObject *self, size_t idx, PyObject *value)
_Unpickler_MemoPut(UnpicklerObject *self, size_t idx, PyObject *value)
{
PyObject *old_item;

if (idx >= self->memo_size) {
/* MAX_MEMO_INDICES_GAP was introduced mainly for making testing of
* PUT, BINPUT and LONG_BINPUT opcodes simpler. It should be more
* than 1<<16 for LONG_BINPUT.
* The standard pickler never produces data that requires more than 0.
* The Python code does not have such limitation.
*/
const int MAX_MEMO_INDICES_GAP = 1 << 17;
if (idx > self->memo_len * 2 + MAX_MEMO_INDICES_GAP) {
PyErr_SetString(st->UnpicklingError, "too sparse memo indices");
return -1;
if (idx > self->memo_len * 2) {
/* The memo keys are too sparse. Use a dict instead of
* a continuous array for the memo. */
if (self->memo_dict == NULL) {
self->memo_dict = PyDict_New();
if (self->memo_dict == NULL) {
return -1;
}
}
PyObject *key = PyLong_FromSize_t(idx);
if (key == NULL) {
return -1;
}

if (PyDict_SetItem(self->memo_dict, key, value) < 0) {
Py_DECREF(key);
return -1;
}
Py_DECREF(key);
return 0;
}
if (_Unpickler_ResizeMemoList(self, idx * 2) < 0)
return -1;
Expand Down Expand Up @@ -1642,6 +1675,7 @@ _Unpickler_New(PyObject *module)
self->memo = memo;
self->memo_size = MEMO_SIZE;
self->memo_len = 0;
self->memo_dict = NULL;
self->persistent_load = NULL;
memset(&self->buffer, 0, sizeof(Py_buffer));
self->input_buffer = NULL;
Expand Down Expand Up @@ -6149,20 +6183,15 @@ load_get(PickleState *st, UnpicklerObject *self)
if (key == NULL)
return -1;
idx = PyLong_AsSsize_t(key);
Py_DECREF(key);
if (idx == -1 && PyErr_Occurred()) {
Py_DECREF(key);
return -1;
}

value = _Unpickler_MemoGet(self, idx);
value = _Unpickler_MemoGet(st, self, idx);
if (value == NULL) {
if (!PyErr_Occurred()) {
PyErr_Format(st->UnpicklingError, "Memo value not found at index %ld", idx);
}
Py_DECREF(key);
return -1;
}
Py_DECREF(key);

PDATA_APPEND(self->stack, value, -1);
return 0;
Expand All @@ -6180,13 +6209,8 @@ load_binget(PickleState *st, UnpicklerObject *self)

idx = Py_CHARMASK(s[0]);

value = _Unpickler_MemoGet(self, idx);
value = _Unpickler_MemoGet(st, self, idx);
if (value == NULL) {
PyObject *key = PyLong_FromSsize_t(idx);
if (key != NULL) {
PyErr_Format(st->UnpicklingError, "Memo value not found at index %ld", idx);
Py_DECREF(key);
}
return -1;
}

Expand All @@ -6206,13 +6230,8 @@ load_long_binget(PickleState *st, UnpicklerObject *self)

idx = calc_binsize(s, 4);

value = _Unpickler_MemoGet(self, idx);
value = _Unpickler_MemoGet(st, self, idx);
if (value == NULL) {
PyObject *key = PyLong_FromSsize_t(idx);
if (key != NULL) {
PyErr_Format(st->UnpicklingError, "Memo value not found at index %ld", idx);
Py_DECREF(key);
}
return -1;
}

Expand Down Expand Up @@ -6337,7 +6356,7 @@ load_put(PickleState *state, UnpicklerObject *self)
return -1;
}

return _Unpickler_MemoPut(state, self, idx, value);
return _Unpickler_MemoPut(self, idx, value);
}

static int
Expand All @@ -6356,7 +6375,7 @@ load_binput(PickleState *state, UnpicklerObject *self)

idx = Py_CHARMASK(s[0]);

return _Unpickler_MemoPut(state, self, idx, value);
return _Unpickler_MemoPut(self, idx, value);
}

static int
Expand All @@ -6380,7 +6399,7 @@ load_long_binput(PickleState *state, UnpicklerObject *self)
return -1;
}

return _Unpickler_MemoPut(state, self, idx, value);
return _Unpickler_MemoPut(self, idx, value);
}

static int
Expand All @@ -6392,7 +6411,7 @@ load_memoize(PickleState *state, UnpicklerObject *self)
return Pdata_stack_underflow(state, self->stack);
value = self->stack->data[Py_SIZE(self->stack) - 1];

return _Unpickler_MemoPut(state, self, self->memo_len, value);
return _Unpickler_MemoPut(self, self->memo_len, value);
}

static int
Expand Down Expand Up @@ -7141,6 +7160,13 @@ _pickle_Unpickler___sizeof___impl(UnpicklerObject *self)
size_t res = _PyObject_SIZE(Py_TYPE(self));
if (self->memo != NULL)
res += self->memo_size * sizeof(PyObject *);
if (self->memo_dict != NULL) {
size_t s = _PySys_GetSizeOf(self->memo_dict);
if (s == (size_t)-1) {
return -1;
}
res += s;
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

to be pedantic, check for the possibility of overflow first. (practically speaking: I don't think res or s could ever be large enough for that to actually happen) The existing code being replaced already had this "problem" FWIW.

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

When I wrote code for all these __sizeof__ implementations, I initially added integer overflow checks. But MvL said me to remove them. If we are going to add integer overflow checks, we should add it not only here, but few lines above (for res += self->memo_size * sizeof(PyObject *)) and in dozens of other places. This is a separate large issue.

}
if (self->marks != NULL)
res += (size_t)self->marks_size * sizeof(Py_ssize_t);
if (self->input_line != NULL)
Expand Down Expand Up @@ -7175,6 +7201,7 @@ Unpickler_clear(UnpicklerObject *self)
self->buffer.buf = NULL;
}

Py_CLEAR(self->memo_dict);
_Unpickler_MemoCleanup(self);
PyMem_Free(self->marks);
self->marks = NULL;
Expand Down Expand Up @@ -7209,6 +7236,7 @@ Unpickler_traverse(UnpicklerObject *self, visitproc visit, void *arg)
Py_VISIT(self->stack);
Py_VISIT(self->persistent_load);
Py_VISIT(self->buffers);
Py_VISIT(self->memo_dict);
PyObject **memo = self->memo;
if (memo) {
Py_ssize_t i = self->memo_size;
Expand Down Expand Up @@ -7514,7 +7542,7 @@ Unpickler_set_memo(UnpicklerObject *self, PyObject *obj, void *Py_UNUSED(ignored
"memo key must be positive integers.");
goto error;
}
if (_Unpickler_MemoPut(state, self, idx, value) < 0)
if (_Unpickler_MemoPut(self, idx, value) < 0)
goto error;
}
}
Expand Down