From dd71915ef6b902fa3a16a253664f4e1542a7c2b4 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Mon, 22 Jul 2024 23:51:32 +0300 Subject: [PATCH 1/3] gh-82951: Fix serializing by name in pickle protocols < 4 Serializing objects with complex __qualname__ (such as unbound methods and nested classes) by name no longer involves serializing parent objects by value in pickle protocols < 4. --- Lib/pickle.py | 35 ++++++++--- Lib/test/pickletester.py | 12 ++++ ...4-07-23-09-14-44.gh-issue-82951.-F5p5A.rst | 3 + Modules/_pickle.c | 61 +++++++++---------- 4 files changed, 73 insertions(+), 38 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2024-07-23-09-14-44.gh-issue-82951.-F5p5A.rst diff --git a/Lib/pickle.py b/Lib/pickle.py index 115bd893ca1a38..93da6bebce117b 100644 --- a/Lib/pickle.py +++ b/Lib/pickle.py @@ -1110,11 +1110,34 @@ def save_global(self, obj, name=None): self.save(module_name) self.save(name) write(STACK_GLOBAL) - elif parent is not module: - self.save_reduce(getattr, (parent, lastname)) - elif self.proto >= 3: - write(GLOBAL + bytes(module_name, "utf-8") + b'\n' + - bytes(name, "utf-8") + b'\n') + elif '.' in name: + dotted_path = name.split('.') + name = dotted_path.pop(0) + write = self.write + save = self.save + for attrname in dotted_path: + save(getattr) + if self.proto < 2: + write(MARK) + self._save_by_name(module_name, name) + for attrname in dotted_path: + save(attrname) + if self.proto < 2: + write(TUPLE) + else: + write(TUPLE2) + write(REDUCE) + else: + self._save_by_name(module_name, name) + + self.memoize(obj) + + def _save_by_name(self, module_name, name): + write = self.write + if self.proto >= 3: + # Non-ASCII identifiers are supported only with protocols >= 3. + self.write(GLOBAL + bytes(module_name, "utf-8") + b'\n' + + bytes(name, "utf-8") + b'\n') else: if self.fix_imports: r_name_mapping = _compat_pickle.REVERSE_NAME_MAPPING @@ -1131,8 +1154,6 @@ def save_global(self, obj, name=None): "can't pickle global identifier '%s.%s' using " "pickle protocol %i" % (module, name, self.proto)) from None - self.memoize(obj) - def save_type(self, obj): if obj is type(None): return self.save_reduce(type, (None,), obj=obj) diff --git a/Lib/test/pickletester.py b/Lib/test/pickletester.py index 9922591ce7114a..13663220fc77ea 100644 --- a/Lib/test/pickletester.py +++ b/Lib/test/pickletester.py @@ -2818,6 +2818,18 @@ class Recursive: self.assertIs(unpickled, Recursive) del Recursive.mod # break reference loop + def test_recursive_nested_names2(self): + global Recursive + class Recursive: + pass + Recursive.ref = Recursive + Recursive.__qualname__ = 'Recursive.ref' + for proto in range(pickle.HIGHEST_PROTOCOL + 1): + with self.subTest(proto=proto): + unpickled = self.loads(self.dumps(Recursive, proto)) + self.assertIs(unpickled, Recursive) + del Recursive.ref # break reference loop + def test_py_methods(self): global PyMethodsTest class PyMethodsTest: diff --git a/Misc/NEWS.d/next/Library/2024-07-23-09-14-44.gh-issue-82951.-F5p5A.rst b/Misc/NEWS.d/next/Library/2024-07-23-09-14-44.gh-issue-82951.-F5p5A.rst new file mode 100644 index 00000000000000..b3f07889119c9f --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-07-23-09-14-44.gh-issue-82951.-F5p5A.rst @@ -0,0 +1,3 @@ +Serializing objects with complex ``__qualname__`` (such as unbound methods +and nested classes) by name no longer involves serializing parent objects by +value in pickle protocols < 4. diff --git a/Modules/_pickle.c b/Modules/_pickle.c index 7eebe922c93ca1..49d9b9783a395f 100644 --- a/Modules/_pickle.c +++ b/Modules/_pickle.c @@ -1829,27 +1829,22 @@ get_dotted_path(PyObject *obj, PyObject *name) } static PyObject * -get_deep_attribute(PyObject *obj, PyObject *names, PyObject **pparent) +get_deep_attribute(PyObject *obj, PyObject *names) { Py_ssize_t i, n; - PyObject *parent = NULL; assert(PyList_CheckExact(names)); Py_INCREF(obj); n = PyList_GET_SIZE(names); for (i = 0; i < n; i++) { PyObject *name = PyList_GET_ITEM(names, i); - Py_XSETREF(parent, obj); + PyObject *parent = obj; (void)PyObject_GetOptionalAttr(parent, name, &obj); + Py_DECREF(parent); if (obj == NULL) { - Py_DECREF(parent); return NULL; } } - if (pparent != NULL) - *pparent = parent; - else - Py_XDECREF(parent); return obj; } @@ -1863,7 +1858,7 @@ getattribute(PyObject *obj, PyObject *name, int allow_qualname) dotted_path = get_dotted_path(obj, name); if (dotted_path == NULL) return NULL; - attr = get_deep_attribute(obj, dotted_path, NULL); + attr = get_deep_attribute(obj, dotted_path); Py_DECREF(dotted_path); } else { @@ -1888,7 +1883,7 @@ _checkmodule(PyObject *module_name, PyObject *module, return -1; } - PyObject *candidate = get_deep_attribute(module, dotted_path, NULL); + PyObject *candidate = get_deep_attribute(module, dotted_path); if (candidate == NULL) { return -1; } @@ -3590,9 +3585,7 @@ save_global(PickleState *st, PicklerObject *self, PyObject *obj, PyObject *global_name = NULL; PyObject *module_name = NULL; PyObject *module = NULL; - PyObject *parent = NULL; PyObject *dotted_path = NULL; - PyObject *lastname = NULL; PyObject *cls; int status = 0; @@ -3633,10 +3626,7 @@ save_global(PickleState *st, PicklerObject *self, PyObject *obj, obj, module_name); goto error; } - lastname = Py_NewRef(PyList_GET_ITEM(dotted_path, - PyList_GET_SIZE(dotted_path) - 1)); - cls = get_deep_attribute(module, dotted_path, &parent); - Py_CLEAR(dotted_path); + cls = get_deep_attribute(module, dotted_path); if (cls == NULL) { PyErr_Format(st->PicklingError, "Can't pickle %R: attribute lookup %S on %S failed", @@ -3723,9 +3713,6 @@ save_global(PickleState *st, PicklerObject *self, PyObject *obj, } else { gen_global: - if (parent == module) { - Py_SETREF(global_name, Py_NewRef(lastname)); - } if (self->proto >= 4) { const char stack_global_op = STACK_GLOBAL; @@ -3737,17 +3724,22 @@ save_global(PickleState *st, PicklerObject *self, PyObject *obj, if (_Pickler_Write(self, &stack_global_op, 1) < 0) goto error; } - else if (parent != module) { - PyObject *reduce_value = Py_BuildValue("(O(OO))", - st->getattr, parent, lastname); - if (reduce_value == NULL) - goto error; - status = save_reduce(st, self, reduce_value, NULL); - Py_DECREF(reduce_value); - if (status < 0) - goto error; - } else { + const char mark_op = MARK; + const char tupletwo_op = (self->proto < 2) ? TUPLE : TUPLE2; + const char reduce_op = REDUCE; + Py_ssize_t i; + if (PyList_GET_SIZE(dotted_path) > 1) { + Py_SETREF(global_name, Py_NewRef(PyList_GET_ITEM(dotted_path, 0))); + } + for (i = 1; i < PyList_GET_SIZE(dotted_path); i++) { + if (save(st, self, st->getattr, 0) < 0 || + (self->proto < 2 && _Pickler_Write(self, &mark_op, 1) < 0)) + { + goto error; + } + } + /* Generate a normal global opcode if we are using a pickle protocol < 4, or if the object is not registered in the extension registry. */ @@ -3812,6 +3804,15 @@ save_global(PickleState *st, PicklerObject *self, PyObject *obj, Py_DECREF(encoded); if (_Pickler_Write(self, "\n", 1) < 0) goto error; + + for (i = 1; i < PyList_GET_SIZE(dotted_path); i++) { + if (save(st, self, PyList_GET_ITEM(dotted_path, i), 0) < 0 || + _Pickler_Write(self, &tupletwo_op, 1) < 0 || + _Pickler_Write(self, &reduce_op, 1) < 0) + { + goto error; + } + } } /* Memoize the object. */ if (memo_put(st, self, obj) < 0) @@ -3825,9 +3826,7 @@ save_global(PickleState *st, PicklerObject *self, PyObject *obj, Py_XDECREF(module_name); Py_XDECREF(global_name); Py_XDECREF(module); - Py_XDECREF(parent); Py_XDECREF(dotted_path); - Py_XDECREF(lastname); return status; } From 730383a2c6375dc385f870f101c3b46df2b3269b Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Thu, 25 Jul 2024 10:52:48 +0300 Subject: [PATCH 2/3] Polishing. --- Lib/pickle.py | 15 ++++++++------- Modules/_pickle.c | 9 ++++++--- 2 files changed, 14 insertions(+), 10 deletions(-) diff --git a/Lib/pickle.py b/Lib/pickle.py index 93da6bebce117b..2d764980cdf7b2 100644 --- a/Lib/pickle.py +++ b/Lib/pickle.py @@ -1111,15 +1111,17 @@ def save_global(self, obj, name=None): self.save(name) write(STACK_GLOBAL) elif '.' in name: + # In protocol < 4, objects with multi-part __qualname__ + # are represented as + # getattr(getattr(..., attrname1), attrname2). dotted_path = name.split('.') name = dotted_path.pop(0) - write = self.write save = self.save for attrname in dotted_path: save(getattr) if self.proto < 2: write(MARK) - self._save_by_name(module_name, name) + self._save_toplevel_by_name(module_name, name) for attrname in dotted_path: save(attrname) if self.proto < 2: @@ -1128,12 +1130,11 @@ def save_global(self, obj, name=None): write(TUPLE2) write(REDUCE) else: - self._save_by_name(module_name, name) + self._save_toplevel_by_name(module_name, name) self.memoize(obj) - def _save_by_name(self, module_name, name): - write = self.write + def _save_toplevel_by_name(self, module_name, name): if self.proto >= 3: # Non-ASCII identifiers are supported only with protocols >= 3. self.write(GLOBAL + bytes(module_name, "utf-8") + b'\n' + @@ -1147,8 +1148,8 @@ def _save_by_name(self, module_name, name): elif module_name in r_import_mapping: module_name = r_import_mapping[module_name] try: - write(GLOBAL + bytes(module_name, "ascii") + b'\n' + - bytes(name, "ascii") + b'\n') + self.write(GLOBAL + bytes(module_name, "ascii") + b'\n' + + bytes(name, "ascii") + b'\n') except UnicodeEncodeError: raise PicklingError( "can't pickle global identifier '%s.%s' using " diff --git a/Modules/_pickle.c b/Modules/_pickle.c index 49d9b9783a395f..f48fa3c252b707 100644 --- a/Modules/_pickle.c +++ b/Modules/_pickle.c @@ -3725,6 +3725,12 @@ save_global(PickleState *st, PicklerObject *self, PyObject *obj, goto error; } else { + /* Generate a normal global opcode if we are using a pickle + protocol < 4, or if the object is not registered in the + extension registry. + + Objects with multi-part __qualname__ are represented as + getattr(getattr(..., attrname1), attrname2). */ const char mark_op = MARK; const char tupletwo_op = (self->proto < 2) ? TUPLE : TUPLE2; const char reduce_op = REDUCE; @@ -3740,9 +3746,6 @@ save_global(PickleState *st, PicklerObject *self, PyObject *obj, } } - /* Generate a normal global opcode if we are using a pickle - protocol < 4, or if the object is not registered in the - extension registry. */ PyObject *encoded; PyObject *(*unicode_encoder)(PyObject *); From 1c2aa227de24d0422ddece9b4a37b4d870cde9a3 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Thu, 25 Jul 2024 11:16:53 +0300 Subject: [PATCH 3/3] Restore dubious "optimization". --- Modules/_pickle.c | 57 ++++++++++++++++++++++++++++++----------------- 1 file changed, 37 insertions(+), 20 deletions(-) diff --git a/Modules/_pickle.c b/Modules/_pickle.c index f48fa3c252b707..861363b68c20c5 100644 --- a/Modules/_pickle.c +++ b/Modules/_pickle.c @@ -1829,22 +1829,27 @@ get_dotted_path(PyObject *obj, PyObject *name) } static PyObject * -get_deep_attribute(PyObject *obj, PyObject *names) +get_deep_attribute(PyObject *obj, PyObject *names, PyObject **pparent) { Py_ssize_t i, n; + PyObject *parent = NULL; assert(PyList_CheckExact(names)); Py_INCREF(obj); n = PyList_GET_SIZE(names); for (i = 0; i < n; i++) { PyObject *name = PyList_GET_ITEM(names, i); - PyObject *parent = obj; + Py_XSETREF(parent, obj); (void)PyObject_GetOptionalAttr(parent, name, &obj); - Py_DECREF(parent); if (obj == NULL) { + Py_DECREF(parent); return NULL; } } + if (pparent != NULL) + *pparent = parent; + else + Py_XDECREF(parent); return obj; } @@ -1858,7 +1863,7 @@ getattribute(PyObject *obj, PyObject *name, int allow_qualname) dotted_path = get_dotted_path(obj, name); if (dotted_path == NULL) return NULL; - attr = get_deep_attribute(obj, dotted_path); + attr = get_deep_attribute(obj, dotted_path, NULL); Py_DECREF(dotted_path); } else { @@ -1883,7 +1888,7 @@ _checkmodule(PyObject *module_name, PyObject *module, return -1; } - PyObject *candidate = get_deep_attribute(module, dotted_path); + PyObject *candidate = get_deep_attribute(module, dotted_path, NULL); if (candidate == NULL) { return -1; } @@ -3585,6 +3590,7 @@ save_global(PickleState *st, PicklerObject *self, PyObject *obj, PyObject *global_name = NULL; PyObject *module_name = NULL; PyObject *module = NULL; + PyObject *parent = NULL; PyObject *dotted_path = NULL; PyObject *cls; int status = 0; @@ -3626,7 +3632,7 @@ save_global(PickleState *st, PicklerObject *self, PyObject *obj, obj, module_name); goto error; } - cls = get_deep_attribute(module, dotted_path); + cls = get_deep_attribute(module, dotted_path, &parent); if (cls == NULL) { PyErr_Format(st->PicklingError, "Can't pickle %R: attribute lookup %S on %S failed", @@ -3713,6 +3719,12 @@ save_global(PickleState *st, PicklerObject *self, PyObject *obj, } else { gen_global: + if (parent == module) { + Py_SETREF(global_name, + Py_NewRef(PyList_GET_ITEM(dotted_path, + PyList_GET_SIZE(dotted_path) - 1))); + Py_CLEAR(dotted_path); + } if (self->proto >= 4) { const char stack_global_op = STACK_GLOBAL; @@ -3735,14 +3747,16 @@ save_global(PickleState *st, PicklerObject *self, PyObject *obj, const char tupletwo_op = (self->proto < 2) ? TUPLE : TUPLE2; const char reduce_op = REDUCE; Py_ssize_t i; - if (PyList_GET_SIZE(dotted_path) > 1) { - Py_SETREF(global_name, Py_NewRef(PyList_GET_ITEM(dotted_path, 0))); - } - for (i = 1; i < PyList_GET_SIZE(dotted_path); i++) { - if (save(st, self, st->getattr, 0) < 0 || - (self->proto < 2 && _Pickler_Write(self, &mark_op, 1) < 0)) - { - goto error; + if (dotted_path) { + if (PyList_GET_SIZE(dotted_path) > 1) { + Py_SETREF(global_name, Py_NewRef(PyList_GET_ITEM(dotted_path, 0))); + } + for (i = 1; i < PyList_GET_SIZE(dotted_path); i++) { + if (save(st, self, st->getattr, 0) < 0 || + (self->proto < 2 && _Pickler_Write(self, &mark_op, 1) < 0)) + { + goto error; + } } } @@ -3808,12 +3822,14 @@ save_global(PickleState *st, PicklerObject *self, PyObject *obj, if (_Pickler_Write(self, "\n", 1) < 0) goto error; - for (i = 1; i < PyList_GET_SIZE(dotted_path); i++) { - if (save(st, self, PyList_GET_ITEM(dotted_path, i), 0) < 0 || - _Pickler_Write(self, &tupletwo_op, 1) < 0 || - _Pickler_Write(self, &reduce_op, 1) < 0) - { - goto error; + if (dotted_path) { + for (i = 1; i < PyList_GET_SIZE(dotted_path); i++) { + if (save(st, self, PyList_GET_ITEM(dotted_path, i), 0) < 0 || + _Pickler_Write(self, &tupletwo_op, 1) < 0 || + _Pickler_Write(self, &reduce_op, 1) < 0) + { + goto error; + } } } } @@ -3829,6 +3845,7 @@ save_global(PickleState *st, PicklerObject *self, PyObject *obj, Py_XDECREF(module_name); Py_XDECREF(global_name); Py_XDECREF(module); + Py_XDECREF(parent); Py_XDECREF(dotted_path); return status;