From 1e2ad4acb740e8674feba21084ae39bfcb279c16 Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Wed, 9 Aug 2023 21:49:13 +0100 Subject: [PATCH 1/3] Handle dict subclasses correctly when dematerializing. Add tests. --- .../pycore_global_objects_fini_generated.h | 1 + Include/internal/pycore_global_strings.h | 1 + .../internal/pycore_runtime_init_generated.h | 1 + Lib/test/test_opcache.py | 121 ++++++++++++++++++ Modules/_testinternalcapi.c | 36 ++++++ Objects/dictobject.c | 4 +- 6 files changed, 163 insertions(+), 1 deletion(-) diff --git a/Include/internal/pycore_global_objects_fini_generated.h b/Include/internal/pycore_global_objects_fini_generated.h index 6d50ffd0a02f1f..ee9010583ff8b5 100644 --- a/Include/internal/pycore_global_objects_fini_generated.h +++ b/Include/internal/pycore_global_objects_fini_generated.h @@ -547,6 +547,7 @@ _PyStaticObjects_CheckRefcnt(PyInterpreterState *interp) { _PyStaticObject_CheckRefcnt((PyObject *)&_Py_STR(anon_lambda)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_STR(anon_listcomp)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_STR(anon_module)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_STR(anon_null)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_STR(anon_setcomp)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_STR(anon_string)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_STR(anon_unknown)); diff --git a/Include/internal/pycore_global_strings.h b/Include/internal/pycore_global_strings.h index bb1fb13f342fc6..b081c0e023fa4a 100644 --- a/Include/internal/pycore_global_strings.h +++ b/Include/internal/pycore_global_strings.h @@ -33,6 +33,7 @@ struct _Py_global_strings { STRUCT_FOR_STR(anon_lambda, "") STRUCT_FOR_STR(anon_listcomp, "") STRUCT_FOR_STR(anon_module, "") + STRUCT_FOR_STR(anon_null, "") STRUCT_FOR_STR(anon_setcomp, "") STRUCT_FOR_STR(anon_string, "") STRUCT_FOR_STR(anon_unknown, "") diff --git a/Include/internal/pycore_runtime_init_generated.h b/Include/internal/pycore_runtime_init_generated.h index 2d66647438b193..8c9c7f753d8579 100644 --- a/Include/internal/pycore_runtime_init_generated.h +++ b/Include/internal/pycore_runtime_init_generated.h @@ -539,6 +539,7 @@ extern "C" { INIT_STR(anon_lambda, ""), \ INIT_STR(anon_listcomp, ""), \ INIT_STR(anon_module, ""), \ + INIT_STR(anon_null, ""), \ INIT_STR(anon_setcomp, ""), \ INIT_STR(anon_string, ""), \ INIT_STR(anon_unknown, ""), \ diff --git a/Lib/test/test_opcache.py b/Lib/test/test_opcache.py index 7d317c012a304e..628ba4c60798bc 100644 --- a/Lib/test/test_opcache.py +++ b/Lib/test/test_opcache.py @@ -1,8 +1,11 @@ +import copy +import pickle import dis import threading import types import unittest from test.support import threading_helper +import _testinternalcapi class TestLoadSuperAttrCache(unittest.TestCase): @@ -997,6 +1000,124 @@ def write(items): opname = "UNPACK_SEQUENCE_LIST" self.assert_races_do_not_crash(opname, get_items, read, write) +class C: + pass + +class TestInstanceDict(unittest.TestCase): + + def setUp(self): + c = C() + c.a, c.b, c.c = 0,0,0 + + def test_values_on_instance(self): + c = C() + c.a = 1 + C().b = 2 + c.c = 3 + self.assertEqual( + _testinternalcapi.get_objectvalues(c), + (1, '', 3) + ) + + def test_dict_materialization(self): + c = C() + c.a = 1 + c.b = 2 + c.__dict__ + self.assertIs( + _testinternalcapi.get_objectvalues(c), + None + ) + + def test_dict_dematerialization(self): + c = C() + c.a = 1 + c.b = 2 + c.__dict__ + self.assertIs( + _testinternalcapi.get_objectvalues(c), + None + ) + for _ in range(100): + c.a + self.assertEqual( + _testinternalcapi.get_objectvalues(c), + (1, 2, '') + ) + + def test_dict_dematerialization_multiple_refs(self): + c = C() + c.a = 1 + c.b = 2 + d = c.__dict__ + for _ in range(100): + c.a + self.assertIs( + _testinternalcapi.get_objectvalues(c), + None + ) + self.assertIs(c.__dict__, d) + + def test_dict_dematerialization_copy(self): + c = C() + c.a = 1 + c.b = 2 + c2 = copy.copy(c) + for _ in range(100): + c.a + c2.a + self.assertEqual( + _testinternalcapi.get_objectvalues(c), + (1, 2, '') + ) + self.assertEqual( + _testinternalcapi.get_objectvalues(c2), + (1, 2, '') + ) + c3 = copy.deepcopy(c) + for _ in range(100): + c.a + c3.a + self.assertEqual( + _testinternalcapi.get_objectvalues(c), + (1, 2, '') + ) + #NOTE -- c3.__dict__ does not de-materialize + + def test_dict_dematerialization_pickle(self): + c = C() + c.a = 1 + c.b = 2 + c2 = pickle.loads(pickle.dumps(c)) + for _ in range(100): + c.a + c2.a + self.assertEqual( + _testinternalcapi.get_objectvalues(c), + (1, 2, '') + ) + self.assertEqual( + _testinternalcapi.get_objectvalues(c2), + (1, 2, '') + ) + + def test_dict_dematerialization_subclass(self): + class D(dict): pass + c = C() + c.a = 1 + c.b = 2 + c.__dict__ = D(c.__dict__) + for _ in range(100): + c.a + self.assertIs( + _testinternalcapi.get_objectvalues(c), + None + ) + self.assertEqual( + c.__dict__, + {'a':1, 'b':2} + ) + if __name__ == "__main__": unittest.main() diff --git a/Modules/_testinternalcapi.c b/Modules/_testinternalcapi.c index f28b46dd9846c8..c31b55809e51a0 100644 --- a/Modules/_testinternalcapi.c +++ b/Modules/_testinternalcapi.c @@ -15,6 +15,7 @@ #include "pycore_bytesobject.h" // _PyBytes_Find() #include "pycore_compile.h" // _PyCompile_CodeGen, _PyCompile_OptimizeCfg, _PyCompile_Assemble, _PyCompile_CleanDoc #include "pycore_ceval.h" // _PyEval_AddPendingCall +#include "pycore_dict.h" // _PyDictOrValues_GetValues #include "pycore_fileutils.h" // _Py_normpath #include "pycore_frame.h" // _PyInterpreterFrame #include "pycore_gc.h" // PyGC_Head @@ -1530,6 +1531,40 @@ test_pymem_getallocatorsname(PyObject *self, PyObject *args) return PyUnicode_FromString(name); } +static PyObject * +get_objectvalues(PyObject *self, PyObject *obj) +{ + PyTypeObject *type = Py_TYPE(obj); + if (!_PyType_HasFeature(type, Py_TPFLAGS_MANAGED_DICT)) { + Py_RETURN_NONE; + } + PyDictOrValues dorv = *_PyObject_DictOrValuesPointer(obj); + if (!_PyDictOrValues_IsValues(dorv)) { + Py_RETURN_NONE; + } + PyDictValues *values = _PyDictOrValues_GetValues(dorv); + PyDictKeysObject *keys = ((PyHeapTypeObject *)type)->ht_cached_keys; + assert(keys != NULL); + int size = keys->dk_nentries; + assert(size >= 0); + PyObject *res = PyTuple_New(size); + if (res == NULL) { + return NULL; + } + _Py_DECLARE_STR(anon_null, ""); + for(int i = 0; i < size; i++) { + PyObject *item = values->values[i]; + if (item == NULL) { + item = &_Py_STR(anon_null); + } + else { + Py_INCREF(item); + } + PyTuple_SET_ITEM(res, i, item); + } + return res; +} + static PyMethodDef module_functions[] = { {"get_configs", get_configs, METH_NOARGS}, @@ -1594,6 +1629,7 @@ static PyMethodDef module_functions[] = { {"check_pyobject_uninitialized_is_freed", check_pyobject_uninitialized_is_freed, METH_NOARGS}, {"pymem_getallocatorsname", test_pymem_getallocatorsname, METH_NOARGS}, + {"get_objectvalues", get_objectvalues, METH_O}, {NULL, NULL} /* sentinel */ }; diff --git a/Objects/dictobject.c b/Objects/dictobject.c index 931103cdc1a064..ea8c3b9a2df0dc 100644 --- a/Objects/dictobject.c +++ b/Objects/dictobject.c @@ -5476,7 +5476,9 @@ _PyObject_MakeInstanceAttributesFromDict(PyObject *obj, PyDictOrValues *dorv) } // It's likely that this dict still shares its keys (if it was materialized // on request and not heavily modified): - assert(PyDict_CheckExact(dict)); + if (!PyDict_CheckExact(dict)) { + return 0; + } assert(_PyType_HasFeature(Py_TYPE(obj), Py_TPFLAGS_HEAPTYPE)); if (dict->ma_keys != CACHED_KEYS(Py_TYPE(obj)) || Py_REFCNT(dict) != 1) { return 0; From 0772effe6c4cd5fe305b3e0d0658d29fa21df528 Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Wed, 9 Aug 2023 21:54:39 +0100 Subject: [PATCH 2/3] Avoid compiler warning. --- Modules/_testinternalcapi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Modules/_testinternalcapi.c b/Modules/_testinternalcapi.c index c31b55809e51a0..406807ef8b7952 100644 --- a/Modules/_testinternalcapi.c +++ b/Modules/_testinternalcapi.c @@ -1545,7 +1545,7 @@ get_objectvalues(PyObject *self, PyObject *obj) PyDictValues *values = _PyDictOrValues_GetValues(dorv); PyDictKeysObject *keys = ((PyHeapTypeObject *)type)->ht_cached_keys; assert(keys != NULL); - int size = keys->dk_nentries; + int size = (int)keys->dk_nentries; assert(size >= 0); PyObject *res = PyTuple_New(size); if (res == NULL) { From b27e4daba712ef19d65cdc4428ce3e3c3b7c0fb2 Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Wed, 9 Aug 2023 22:51:57 +0100 Subject: [PATCH 3/3] Address review comments --- Include/internal/pycore_dict.h | 2 +- Lib/test/test_opcache.py | 22 +++++++++++----------- Modules/_testinternalcapi.c | 4 ++-- Objects/dictobject.c | 12 ++++++------ 4 files changed, 20 insertions(+), 20 deletions(-) diff --git a/Include/internal/pycore_dict.h b/Include/internal/pycore_dict.h index 6ae81c033c43aa..2ad6ef0f7c04d5 100644 --- a/Include/internal/pycore_dict.h +++ b/Include/internal/pycore_dict.h @@ -199,7 +199,7 @@ _PyDict_NotifyEvent(PyInterpreterState *interp, } extern PyObject *_PyObject_MakeDictFromInstanceAttributes(PyObject *obj, PyDictValues *values); -extern int _PyObject_MakeInstanceAttributesFromDict(PyObject *obj, PyDictOrValues *dorv); +extern bool _PyObject_MakeInstanceAttributesFromDict(PyObject *obj, PyDictOrValues *dorv); extern PyObject *_PyDict_FromItems( PyObject *const *keys, Py_ssize_t keys_offset, PyObject *const *values, Py_ssize_t values_offset, diff --git a/Lib/test/test_opcache.py b/Lib/test/test_opcache.py index 628ba4c60798bc..1baa10cbdfdef2 100644 --- a/Lib/test/test_opcache.py +++ b/Lib/test/test_opcache.py @@ -1015,7 +1015,7 @@ def test_values_on_instance(self): C().b = 2 c.c = 3 self.assertEqual( - _testinternalcapi.get_objectvalues(c), + _testinternalcapi.get_object_dict_values(c), (1, '', 3) ) @@ -1025,7 +1025,7 @@ def test_dict_materialization(self): c.b = 2 c.__dict__ self.assertIs( - _testinternalcapi.get_objectvalues(c), + _testinternalcapi.get_object_dict_values(c), None ) @@ -1035,13 +1035,13 @@ def test_dict_dematerialization(self): c.b = 2 c.__dict__ self.assertIs( - _testinternalcapi.get_objectvalues(c), + _testinternalcapi.get_object_dict_values(c), None ) for _ in range(100): c.a self.assertEqual( - _testinternalcapi.get_objectvalues(c), + _testinternalcapi.get_object_dict_values(c), (1, 2, '') ) @@ -1053,7 +1053,7 @@ def test_dict_dematerialization_multiple_refs(self): for _ in range(100): c.a self.assertIs( - _testinternalcapi.get_objectvalues(c), + _testinternalcapi.get_object_dict_values(c), None ) self.assertIs(c.__dict__, d) @@ -1067,11 +1067,11 @@ def test_dict_dematerialization_copy(self): c.a c2.a self.assertEqual( - _testinternalcapi.get_objectvalues(c), + _testinternalcapi.get_object_dict_values(c), (1, 2, '') ) self.assertEqual( - _testinternalcapi.get_objectvalues(c2), + _testinternalcapi.get_object_dict_values(c2), (1, 2, '') ) c3 = copy.deepcopy(c) @@ -1079,7 +1079,7 @@ def test_dict_dematerialization_copy(self): c.a c3.a self.assertEqual( - _testinternalcapi.get_objectvalues(c), + _testinternalcapi.get_object_dict_values(c), (1, 2, '') ) #NOTE -- c3.__dict__ does not de-materialize @@ -1093,11 +1093,11 @@ def test_dict_dematerialization_pickle(self): c.a c2.a self.assertEqual( - _testinternalcapi.get_objectvalues(c), + _testinternalcapi.get_object_dict_values(c), (1, 2, '') ) self.assertEqual( - _testinternalcapi.get_objectvalues(c2), + _testinternalcapi.get_object_dict_values(c2), (1, 2, '') ) @@ -1110,7 +1110,7 @@ class D(dict): pass for _ in range(100): c.a self.assertIs( - _testinternalcapi.get_objectvalues(c), + _testinternalcapi.get_object_dict_values(c), None ) self.assertEqual( diff --git a/Modules/_testinternalcapi.c b/Modules/_testinternalcapi.c index 406807ef8b7952..d1082c7dae8aee 100644 --- a/Modules/_testinternalcapi.c +++ b/Modules/_testinternalcapi.c @@ -1532,7 +1532,7 @@ test_pymem_getallocatorsname(PyObject *self, PyObject *args) } static PyObject * -get_objectvalues(PyObject *self, PyObject *obj) +get_object_dict_values(PyObject *self, PyObject *obj) { PyTypeObject *type = Py_TYPE(obj); if (!_PyType_HasFeature(type, Py_TPFLAGS_MANAGED_DICT)) { @@ -1629,7 +1629,7 @@ static PyMethodDef module_functions[] = { {"check_pyobject_uninitialized_is_freed", check_pyobject_uninitialized_is_freed, METH_NOARGS}, {"pymem_getallocatorsname", test_pymem_getallocatorsname, METH_NOARGS}, - {"get_objectvalues", get_objectvalues, METH_O}, + {"get_object_dict_values", get_object_dict_values, METH_O}, {NULL, NULL} /* sentinel */ }; diff --git a/Objects/dictobject.c b/Objects/dictobject.c index ea8c3b9a2df0dc..36375f50646fd4 100644 --- a/Objects/dictobject.c +++ b/Objects/dictobject.c @@ -5464,24 +5464,24 @@ _PyObject_MakeDictFromInstanceAttributes(PyObject *obj, PyDictValues *values) return make_dict_from_instance_attributes(interp, keys, values); } -// Return 1 if the dict was dematerialized, 0 otherwise. -int +// Return true if the dict was dematerialized, false otherwise. +bool _PyObject_MakeInstanceAttributesFromDict(PyObject *obj, PyDictOrValues *dorv) { assert(_PyObject_DictOrValuesPointer(obj) == dorv); assert(!_PyDictOrValues_IsValues(*dorv)); PyDictObject *dict = (PyDictObject *)_PyDictOrValues_GetDict(*dorv); if (dict == NULL) { - return 0; + return false; } // It's likely that this dict still shares its keys (if it was materialized // on request and not heavily modified): if (!PyDict_CheckExact(dict)) { - return 0; + return false; } assert(_PyType_HasFeature(Py_TYPE(obj), Py_TPFLAGS_HEAPTYPE)); if (dict->ma_keys != CACHED_KEYS(Py_TYPE(obj)) || Py_REFCNT(dict) != 1) { - return 0; + return false; } assert(dict->ma_values); // We have an opportunity to do something *really* cool: dematerialize it! @@ -5492,7 +5492,7 @@ _PyObject_MakeInstanceAttributesFromDict(PyObject *obj, PyDictOrValues *dorv) dict->ma_keys = NULL; dict->ma_values = NULL; Py_DECREF(dict); - return 1; + return true; } int