From 574e48c060ff85c335bcca5035d8c23c8158c9e5 Mon Sep 17 00:00:00 2001 From: Brett Simmers Date: Wed, 27 Mar 2024 16:06:31 -0700 Subject: [PATCH 1/4] Make _codecs module thread-safe --- Include/internal/pycore_codecs.h | 28 ++++ Include/internal/pycore_interp.h | 6 +- .../internal/pycore_pyatomic_ft_wrappers.h | 7 + Python/codecs.c | 158 ++++++++++++------ Python/pystate.c | 6 +- 5 files changed, 146 insertions(+), 59 deletions(-) diff --git a/Include/internal/pycore_codecs.h b/Include/internal/pycore_codecs.h index a2a7151d50ade7..258de217632d4c 100644 --- a/Include/internal/pycore_codecs.h +++ b/Include/internal/pycore_codecs.h @@ -8,6 +8,8 @@ extern "C" { # error "this header requires Py_BUILD_CORE define" #endif +#include "pycore_lock.h" // PyMutex + extern PyObject* _PyCodec_Lookup(const char *encoding); /* Text codec specific encoding and decoding API. @@ -48,6 +50,32 @@ extern PyObject* _PyCodecInfo_GetIncrementalEncoder( PyObject *codec_info, const char *errors); +// Per-interpreter state used by codecs.c. +struct codecs_state { + // A list of callable objects used to search for codecs. + PyObject *search_path; + + // A dict mapping codec names to codecs returned from a callable in + // search_path. + PyObject *search_cache; + + // A dict mapping error handling strategies to functions to implement them. + PyObject *error_registry; + +#ifdef Py_GIL_DISABLED + // Used to safely delete a specific item from search_path. + PyMutex search_path_mutex; + + // Used to synchronize initialization of the PyObject* members above. + PyMutex init_mutex; +#endif + + // If an acquire load of initialized yields 1, all of the PyObject* members + // above will be set, and their values will not change until interpreter + // finalization. This allows common operations to freely read them without + // additional synchronization. + int initialized; +}; #ifdef __cplusplus } diff --git a/Include/internal/pycore_interp.h b/Include/internal/pycore_interp.h index b8d0fdcce11ba8..3f1ae78b7f7014 100644 --- a/Include/internal/pycore_interp.h +++ b/Include/internal/pycore_interp.h @@ -14,6 +14,7 @@ extern "C" { #include "pycore_atexit.h" // struct atexit_state #include "pycore_ceval_state.h" // struct _ceval_state #include "pycore_code.h" // struct callable_cache +#include "pycore_codecs.h" // struct codecs_state #include "pycore_context.h" // struct _Py_context_state #include "pycore_crossinterp.h" // struct _xidregistry #include "pycore_dict_state.h" // struct _Py_dict_state @@ -164,10 +165,7 @@ struct _is { possible to facilitate out-of-process observability tools. */ - PyObject *codec_search_path; - PyObject *codec_search_cache; - PyObject *codec_error_registry; - int codecs_initialized; + struct codecs_state codecs; PyConfig config; unsigned long feature_flags; diff --git a/Include/internal/pycore_pyatomic_ft_wrappers.h b/Include/internal/pycore_pyatomic_ft_wrappers.h index e441600d54e1aa..10d74958f86717 100644 --- a/Include/internal/pycore_pyatomic_ft_wrappers.h +++ b/Include/internal/pycore_pyatomic_ft_wrappers.h @@ -23,18 +23,25 @@ extern "C" { #define FT_ATOMIC_LOAD_SSIZE(value) _Py_atomic_load_ssize(&value) #define FT_ATOMIC_LOAD_SSIZE_RELAXED(value) \ _Py_atomic_load_ssize_relaxed(&value) +#define FT_ATOMIC_LOAD_INT_ACQUIRE(value) _Py_atomic_load_int_acquire(&value) + #define FT_ATOMIC_STORE_PTR_RELAXED(value, new_value) \ _Py_atomic_store_ptr_relaxed(&value, new_value) #define FT_ATOMIC_STORE_PTR_RELEASE(value, new_value) \ _Py_atomic_store_ptr_release(&value, new_value) #define FT_ATOMIC_STORE_SSIZE_RELAXED(value, new_value) \ _Py_atomic_store_ssize_relaxed(&value, new_value) +#define FT_ATOMIC_STORE_INT_RELEASE(value, new_value) \ + _Py_atomic_store_int_release(&value, new_value) #else #define FT_ATOMIC_LOAD_SSIZE(value) value #define FT_ATOMIC_LOAD_SSIZE_RELAXED(value) value +#define FT_ATOMIC_LOAD_INT_ACQUIRE(value) value + #define FT_ATOMIC_STORE_PTR_RELAXED(value, new_value) value = new_value #define FT_ATOMIC_STORE_PTR_RELEASE(value, new_value) value = new_value #define FT_ATOMIC_STORE_SSIZE_RELAXED(value, new_value) value = new_value +#define FT_ATOMIC_STORE_INT_RELEASE(value, new_value) value = new_value #endif #ifdef __cplusplus diff --git a/Python/codecs.c b/Python/codecs.c index d8fe7b22063a80..9eda246d34c0c9 100644 --- a/Python/codecs.c +++ b/Python/codecs.c @@ -11,6 +11,8 @@ Copyright (c) Corporation for National Research Initiatives. #include "Python.h" #include "pycore_call.h" // _PyObject_CallNoArgs() #include "pycore_interp.h" // PyInterpreterState.codec_search_path +#include "pycore_lock.h" // PyMutex +#include "pycore_pyatomic_ft_wrappers.h" #include "pycore_pyerrors.h" // _PyErr_FormatNote() #include "pycore_pystate.h" // _PyInterpreterState_GET() #include "pycore_ucnhash.h" // _PyUnicode_Name_CAPI @@ -30,13 +32,15 @@ const char *Py_hexdigits = "0123456789abcdef"; */ -static int _PyCodecRegistry_Init(void); /* Forward */ +static int _PyCodecRegistry_EnsureInit(PyInterpreterState *); /* Forward */ int PyCodec_Register(PyObject *search_function) { PyInterpreterState *interp = _PyInterpreterState_GET(); - if (interp->codec_search_path == NULL && _PyCodecRegistry_Init()) + if (_PyCodecRegistry_EnsureInit(interp) < 0) { goto onError; + } + PyObject *search_path = interp->codecs.search_path; if (search_function == NULL) { PyErr_BadArgument(); goto onError; @@ -45,7 +49,14 @@ int PyCodec_Register(PyObject *search_function) PyErr_SetString(PyExc_TypeError, "argument must be callable"); goto onError; } - return PyList_Append(interp->codec_search_path, search_function); +#ifdef Py_GIL_DISABLED + PyMutex_Lock(&interp->codecs.search_path_mutex); +#endif + int ret = PyList_Append(search_path, search_function); +#ifdef Py_GIL_DISABLED + PyMutex_Unlock(&interp->codecs.search_path_mutex); +#endif + return ret; onError: return -1; @@ -55,22 +66,33 @@ int PyCodec_Unregister(PyObject *search_function) { PyInterpreterState *interp = _PyInterpreterState_GET(); - PyObject *codec_search_path = interp->codec_search_path; - /* Do nothing if codec_search_path is not created yet or was cleared. */ - if (codec_search_path == NULL) { + /* Do nothing if codec data structures are not created yet. */ + if (FT_ATOMIC_LOAD_INT_ACQUIRE(interp->codecs.initialized) == 0) { return 0; } + PyObject *codec_search_path = interp->codecs.search_path; assert(PyList_CheckExact(codec_search_path)); - Py_ssize_t n = PyList_GET_SIZE(codec_search_path); - for (Py_ssize_t i = 0; i < n; i++) { - PyObject *item = PyList_GET_ITEM(codec_search_path, i); + for (Py_ssize_t i = 0; i < PyList_GET_SIZE(codec_search_path); i++) { +#ifdef Py_GIL_DISABLED + PyMutex_Lock(&interp->codecs.search_path_mutex); +#endif + PyObject *item = PyList_GetItemRef(codec_search_path, i); + int ret = 1; if (item == search_function) { - if (interp->codec_search_cache != NULL) { - assert(PyDict_CheckExact(interp->codec_search_cache)); - PyDict_Clear(interp->codec_search_cache); - } - return PyList_SetSlice(codec_search_path, i, i+1, NULL); + // We hold a reference to the item, so its destructor can't run + // while we hold search_path_mutex. + ret = PyList_SetSlice(codec_search_path, i, i+1, NULL); + } +#ifdef Py_GIL_DISABLED + PyMutex_Unlock(&interp->codecs.search_path_mutex); +#endif + Py_DECREF(item); + if (ret != 1) { + assert(interp->codecs.search_cache != NULL); + assert(PyDict_CheckExact(interp->codecs.search_cache)); + PyDict_Clear(interp->codecs.search_cache); + return ret; } } return 0; @@ -132,7 +154,7 @@ PyObject *_PyCodec_Lookup(const char *encoding) } PyInterpreterState *interp = _PyInterpreterState_GET(); - if (interp->codec_search_path == NULL && _PyCodecRegistry_Init()) { + if (_PyCodecRegistry_EnsureInit(interp) < 0) { return NULL; } @@ -147,7 +169,7 @@ PyObject *_PyCodec_Lookup(const char *encoding) /* First, try to lookup the name in the registry dictionary */ PyObject *result; - if (PyDict_GetItemRef(interp->codec_search_cache, v, &result) < 0) { + if (PyDict_GetItemRef(interp->codecs.search_cache, v, &result) < 0) { goto onError; } if (result != NULL) { @@ -156,7 +178,7 @@ PyObject *_PyCodec_Lookup(const char *encoding) } /* Next, scan the search functions in order of registration */ - const Py_ssize_t len = PyList_Size(interp->codec_search_path); + const Py_ssize_t len = PyList_Size(interp->codecs.search_path); if (len < 0) goto onError; if (len == 0) { @@ -170,14 +192,15 @@ PyObject *_PyCodec_Lookup(const char *encoding) for (i = 0; i < len; i++) { PyObject *func; - func = PyList_GetItem(interp->codec_search_path, i); + func = PyList_GetItemRef(interp->codecs.search_path, i); if (func == NULL) goto onError; result = PyObject_CallOneArg(func, v); + Py_DECREF(func); if (result == NULL) goto onError; if (result == Py_None) { - Py_DECREF(result); + Py_CLEAR(result); continue; } if (!PyTuple_Check(result) || PyTuple_GET_SIZE(result) != 4) { @@ -188,7 +211,7 @@ PyObject *_PyCodec_Lookup(const char *encoding) } break; } - if (i == len) { + if (result == NULL) { /* XXX Perhaps we should cache misses too ? */ PyErr_Format(PyExc_LookupError, "unknown encoding: %s", encoding); @@ -196,7 +219,7 @@ PyObject *_PyCodec_Lookup(const char *encoding) } /* Cache and return the result */ - if (PyDict_SetItem(interp->codec_search_cache, v, result) < 0) { + if (PyDict_SetItem(interp->codecs.search_cache, v, result) < 0) { Py_DECREF(result); goto onError; } @@ -600,13 +623,14 @@ PyObject *_PyCodec_DecodeText(PyObject *object, int PyCodec_RegisterError(const char *name, PyObject *error) { PyInterpreterState *interp = _PyInterpreterState_GET(); - if (interp->codec_search_path == NULL && _PyCodecRegistry_Init()) + if (_PyCodecRegistry_EnsureInit(interp) < 0) { return -1; + } if (!PyCallable_Check(error)) { PyErr_SetString(PyExc_TypeError, "handler must be callable"); return -1; } - return PyDict_SetItemString(interp->codec_error_registry, + return PyDict_SetItemString(interp->codecs.error_registry, name, error); } @@ -616,13 +640,14 @@ int PyCodec_RegisterError(const char *name, PyObject *error) PyObject *PyCodec_LookupError(const char *name) { PyInterpreterState *interp = _PyInterpreterState_GET(); - if (interp->codec_search_path == NULL && _PyCodecRegistry_Init()) + if (_PyCodecRegistry_EnsureInit(interp) < 0) { return NULL; + } if (name==NULL) name = "strict"; PyObject *handler; - if (PyDict_GetItemStringRef(interp->codec_error_registry, name, &handler) < 0) { + if (PyDict_GetItemStringRef(interp->codecs.error_registry, name, &handler) < 0) { return NULL; } if (handler == NULL) { @@ -1375,7 +1400,7 @@ static PyObject *surrogateescape_errors(PyObject *self, PyObject *exc) return PyCodec_SurrogateEscapeErrors(exc); } -static int _PyCodecRegistry_Init(void) +static int _PyCodecRegistry_EnsureInit(PyInterpreterState *interp) { static struct { const char *name; @@ -1463,45 +1488,74 @@ static int _PyCodecRegistry_Init(void) } }; - PyInterpreterState *interp = _PyInterpreterState_GET(); - PyObject *mod; - - if (interp->codec_search_path != NULL) + if (FT_ATOMIC_LOAD_INT_ACQUIRE(interp->codecs.initialized) == 1) { return 0; - - interp->codec_search_path = PyList_New(0); - if (interp->codec_search_path == NULL) { - return -1; } - interp->codec_search_cache = PyDict_New(); - if (interp->codec_search_cache == NULL) { - return -1; + PyObject *search_path = NULL, *search_cache = NULL, *error_registry = NULL; + search_path = PyList_New(0); + if (search_path == NULL) { + goto error; } - - interp->codec_error_registry = PyDict_New(); - if (interp->codec_error_registry == NULL) { - return -1; + search_cache = PyDict_New(); + if (search_cache == NULL) { + goto error; + } + error_registry = PyDict_New(); + if (error_registry == NULL) { + goto error; } - for (size_t i = 0; i < Py_ARRAY_LENGTH(methods); ++i) { PyObject *func = PyCFunction_NewEx(&methods[i].def, NULL, NULL); - if (!func) { - return -1; + if (func == NULL) { + goto error; } - int res = PyCodec_RegisterError(methods[i].name, func); + int res = PyDict_SetItemString(error_registry, methods[i].name, func); Py_DECREF(func); - if (res) { - return -1; + if (res < 0) { + goto error; } } - mod = PyImport_ImportModule("encodings"); - if (mod == NULL) { - return -1; +#ifdef Py_GIL_DISABLED + PyMutex_Lock(&interp->codecs.init_mutex); +#endif + int do_import = 1; + if (interp->codecs.initialized == 0) { + interp->codecs.search_path = search_path; + interp->codecs.search_cache = search_cache; + interp->codecs.error_registry = error_registry; + FT_ATOMIC_STORE_INT_RELEASE(interp->codecs.initialized, 1); + } else { + // Another thread initialized everything while we were preparing. + Py_DECREF(search_path); + Py_DECREF(search_cache); + Py_DECREF(error_registry); + do_import = 0; + } + + // Importing `encodings' can execute arbitrary code and will call back into + // this module to register codec search functions. Do it once everything is + // initialized and we hold no locks. Other Python code may register other + // codecs before `encodings' is finished importing; this is true with or + // without the GIL. +#ifdef Py_GIL_DISABLED + PyMutex_Unlock(&interp->codecs.init_mutex); +#endif + + if (do_import) { + PyObject *mod = PyImport_ImportModule("encodings"); + if (mod == NULL) { + return -1; + } + Py_DECREF(mod); } - Py_DECREF(mod); - interp->codecs_initialized = 1; return 0; + + error: + Py_XDECREF(search_path); + Py_XDECREF(search_cache); + Py_XDECREF(error_registry); + return -1; } diff --git a/Python/pystate.c b/Python/pystate.c index 925d1cff866f18..dcc8cf40c8ac4d 100644 --- a/Python/pystate.c +++ b/Python/pystate.c @@ -821,9 +821,9 @@ interpreter_clear(PyInterpreterState *interp, PyThreadState *tstate) } PyConfig_Clear(&interp->config); - Py_CLEAR(interp->codec_search_path); - Py_CLEAR(interp->codec_search_cache); - Py_CLEAR(interp->codec_error_registry); + Py_CLEAR(interp->codecs.search_path); + Py_CLEAR(interp->codecs.search_cache); + Py_CLEAR(interp->codecs.error_registry); assert(interp->imports.modules == NULL); assert(interp->imports.modules_by_index == NULL); From d8bcdd8f9bc2cac51b8477143a5b861d302aa00b Mon Sep 17 00:00:00 2001 From: Brett Simmers Date: Thu, 4 Apr 2024 10:39:26 -0700 Subject: [PATCH 2/4] Try to fix check-c-globals --- Tools/c-analyzer/cpython/ignored.tsv | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Tools/c-analyzer/cpython/ignored.tsv b/Tools/c-analyzer/cpython/ignored.tsv index 965346b9b04a32..7463eeffc1378b 100644 --- a/Tools/c-analyzer/cpython/ignored.tsv +++ b/Tools/c-analyzer/cpython/ignored.tsv @@ -343,7 +343,7 @@ Python/ceval.c - _PyEval_BinaryOps - Python/ceval.c - _Py_INTERPRETER_TRAMPOLINE_INSTRUCTIONS - Python/codecs.c - Py_hexdigits - Python/codecs.c - ucnhash_capi - -Python/codecs.c _PyCodecRegistry_Init methods - +Python/codecs.c _PyCodecRegistry_EnsureInit methods - Python/compile.c - NO_LABEL - Python/compile.c - NO_LOCATION - Python/dynload_shlib.c - _PyImport_DynLoadFiletab - From 8076032203cd345ff60839b964898cb61df4f55b Mon Sep 17 00:00:00 2001 From: Brett Simmers Date: Wed, 10 Apr 2024 16:36:00 -0700 Subject: [PATCH 3/4] Initialize codec registry eagerly, greatly simplifying things --- Include/internal/pycore_codecs.h | 13 +-- .../internal/pycore_pyatomic_ft_wrappers.h | 7 -- Objects/object.c | 1 - Objects/unicodeobject.c | 6 +- Python/codecs.c | 105 ++++++------------ Python/pystate.c | 1 + Tools/c-analyzer/cpython/ignored.tsv | 2 +- 7 files changed, 44 insertions(+), 91 deletions(-) diff --git a/Include/internal/pycore_codecs.h b/Include/internal/pycore_codecs.h index 258de217632d4c..dd4e175cf5eeda 100644 --- a/Include/internal/pycore_codecs.h +++ b/Include/internal/pycore_codecs.h @@ -10,6 +10,11 @@ extern "C" { #include "pycore_lock.h" // PyMutex +/* Initialize codecs-related state for the given interpreter. Must be called + before any other _PyCodec* functions, and while only one thread is + active. */ +extern PyStatus _PyCodec_InitRegistry(PyInterpreterState *interp); + extern PyObject* _PyCodec_Lookup(const char *encoding); /* Text codec specific encoding and decoding API. @@ -65,15 +70,9 @@ struct codecs_state { #ifdef Py_GIL_DISABLED // Used to safely delete a specific item from search_path. PyMutex search_path_mutex; - - // Used to synchronize initialization of the PyObject* members above. - PyMutex init_mutex; #endif - // If an acquire load of initialized yields 1, all of the PyObject* members - // above will be set, and their values will not change until interpreter - // finalization. This allows common operations to freely read them without - // additional synchronization. + // Whether or not the rest of the state is initialized. int initialized; }; diff --git a/Include/internal/pycore_pyatomic_ft_wrappers.h b/Include/internal/pycore_pyatomic_ft_wrappers.h index 10d74958f86717..e441600d54e1aa 100644 --- a/Include/internal/pycore_pyatomic_ft_wrappers.h +++ b/Include/internal/pycore_pyatomic_ft_wrappers.h @@ -23,25 +23,18 @@ extern "C" { #define FT_ATOMIC_LOAD_SSIZE(value) _Py_atomic_load_ssize(&value) #define FT_ATOMIC_LOAD_SSIZE_RELAXED(value) \ _Py_atomic_load_ssize_relaxed(&value) -#define FT_ATOMIC_LOAD_INT_ACQUIRE(value) _Py_atomic_load_int_acquire(&value) - #define FT_ATOMIC_STORE_PTR_RELAXED(value, new_value) \ _Py_atomic_store_ptr_relaxed(&value, new_value) #define FT_ATOMIC_STORE_PTR_RELEASE(value, new_value) \ _Py_atomic_store_ptr_release(&value, new_value) #define FT_ATOMIC_STORE_SSIZE_RELAXED(value, new_value) \ _Py_atomic_store_ssize_relaxed(&value, new_value) -#define FT_ATOMIC_STORE_INT_RELEASE(value, new_value) \ - _Py_atomic_store_int_release(&value, new_value) #else #define FT_ATOMIC_LOAD_SSIZE(value) value #define FT_ATOMIC_LOAD_SSIZE_RELAXED(value) value -#define FT_ATOMIC_LOAD_INT_ACQUIRE(value) value - #define FT_ATOMIC_STORE_PTR_RELAXED(value, new_value) value = new_value #define FT_ATOMIC_STORE_PTR_RELEASE(value, new_value) value = new_value #define FT_ATOMIC_STORE_SSIZE_RELAXED(value, new_value) value = new_value -#define FT_ATOMIC_STORE_INT_RELEASE(value, new_value) value = new_value #endif #ifdef __cplusplus diff --git a/Objects/object.c b/Objects/object.c index 60642d899bcafa..06cddf7077c51b 100644 --- a/Objects/object.c +++ b/Objects/object.c @@ -467,7 +467,6 @@ void PyObject_CallFinalizer(PyObject *self) { PyTypeObject *tp = Py_TYPE(self); - if (tp->tp_finalize == NULL) return; /* tp_finalize should only be called once. */ diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index e412af5f797e7a..7d94ee85be1317 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -15452,7 +15452,11 @@ init_fs_encoding(PyThreadState *tstate) PyStatus _PyUnicode_InitEncodings(PyThreadState *tstate) { - PyStatus status = init_fs_encoding(tstate); + PyStatus status = _PyCodec_InitRegistry(tstate->interp); + if (_PyStatus_EXCEPTION(status)) { + return status; + } + status = init_fs_encoding(tstate); if (_PyStatus_EXCEPTION(status)) { return status; } diff --git a/Python/codecs.c b/Python/codecs.c index 9eda246d34c0c9..a02e20f4683452 100644 --- a/Python/codecs.c +++ b/Python/codecs.c @@ -12,7 +12,6 @@ Copyright (c) Corporation for National Research Initiatives. #include "pycore_call.h" // _PyObject_CallNoArgs() #include "pycore_interp.h" // PyInterpreterState.codec_search_path #include "pycore_lock.h" // PyMutex -#include "pycore_pyatomic_ft_wrappers.h" #include "pycore_pyerrors.h" // _PyErr_FormatNote() #include "pycore_pystate.h" // _PyInterpreterState_GET() #include "pycore_ucnhash.h" // _PyUnicode_Name_CAPI @@ -32,15 +31,10 @@ const char *Py_hexdigits = "0123456789abcdef"; */ -static int _PyCodecRegistry_EnsureInit(PyInterpreterState *); /* Forward */ - int PyCodec_Register(PyObject *search_function) { PyInterpreterState *interp = _PyInterpreterState_GET(); - if (_PyCodecRegistry_EnsureInit(interp) < 0) { - goto onError; - } - PyObject *search_path = interp->codecs.search_path; + assert(interp->codecs.initialized); if (search_function == NULL) { PyErr_BadArgument(); goto onError; @@ -52,7 +46,7 @@ int PyCodec_Register(PyObject *search_function) #ifdef Py_GIL_DISABLED PyMutex_Lock(&interp->codecs.search_path_mutex); #endif - int ret = PyList_Append(search_path, search_function); + int ret = PyList_Append(interp->codecs.search_path, search_function); #ifdef Py_GIL_DISABLED PyMutex_Unlock(&interp->codecs.search_path_mutex); #endif @@ -66,8 +60,9 @@ int PyCodec_Unregister(PyObject *search_function) { PyInterpreterState *interp = _PyInterpreterState_GET(); - /* Do nothing if codec data structures are not created yet. */ - if (FT_ATOMIC_LOAD_INT_ACQUIRE(interp->codecs.initialized) == 0) { + if (interp->codecs.initialized != 1) { + /* Do nothing if codecs state was cleared (only possible during + interpreter shutdown). */ return 0; } @@ -154,9 +149,7 @@ PyObject *_PyCodec_Lookup(const char *encoding) } PyInterpreterState *interp = _PyInterpreterState_GET(); - if (_PyCodecRegistry_EnsureInit(interp) < 0) { - return NULL; - } + assert(interp->codecs.initialized); /* Convert the encoding to a normalized Python string: all characters are converted to lower case, spaces and hyphens are @@ -623,9 +616,7 @@ PyObject *_PyCodec_DecodeText(PyObject *object, int PyCodec_RegisterError(const char *name, PyObject *error) { PyInterpreterState *interp = _PyInterpreterState_GET(); - if (_PyCodecRegistry_EnsureInit(interp) < 0) { - return -1; - } + assert(interp->codecs.initialized); if (!PyCallable_Check(error)) { PyErr_SetString(PyExc_TypeError, "handler must be callable"); return -1; @@ -640,9 +631,7 @@ int PyCodec_RegisterError(const char *name, PyObject *error) PyObject *PyCodec_LookupError(const char *name) { PyInterpreterState *interp = _PyInterpreterState_GET(); - if (_PyCodecRegistry_EnsureInit(interp) < 0) { - return NULL; - } + assert(interp->codecs.initialized); if (name==NULL) name = "strict"; @@ -1400,7 +1389,7 @@ static PyObject *surrogateescape_errors(PyObject *self, PyObject *exc) return PyCodec_SurrogateEscapeErrors(exc); } -static int _PyCodecRegistry_EnsureInit(PyInterpreterState *interp) +PyStatus _PyCodec_InitRegistry(PyInterpreterState *interp) { static struct { const char *name; @@ -1488,74 +1477,42 @@ static int _PyCodecRegistry_EnsureInit(PyInterpreterState *interp) } }; - if (FT_ATOMIC_LOAD_INT_ACQUIRE(interp->codecs.initialized) == 1) { - return 0; + assert(interp->codecs.initialized == 0); + interp->codecs.search_path = PyList_New(0); + if (interp->codecs.search_path == NULL) { + return PyStatus_NoMemory(); } - - PyObject *search_path = NULL, *search_cache = NULL, *error_registry = NULL; - search_path = PyList_New(0); - if (search_path == NULL) { - goto error; + interp->codecs.search_cache = PyDict_New(); + if (interp->codecs.search_cache == NULL) { + return PyStatus_NoMemory(); } - search_cache = PyDict_New(); - if (search_cache == NULL) { - goto error; - } - error_registry = PyDict_New(); - if (error_registry == NULL) { - goto error; + interp->codecs.error_registry = PyDict_New(); + if (interp->codecs.error_registry == NULL) { + return PyStatus_NoMemory(); } for (size_t i = 0; i < Py_ARRAY_LENGTH(methods); ++i) { PyObject *func = PyCFunction_NewEx(&methods[i].def, NULL, NULL); if (func == NULL) { - goto error; + return PyStatus_NoMemory(); } - int res = PyDict_SetItemString(error_registry, methods[i].name, func); + int res = PyDict_SetItemString(interp->codecs.error_registry, + methods[i].name, func); Py_DECREF(func); if (res < 0) { - goto error; + return PyStatus_Error("Failed to insert into codec error registry"); } } -#ifdef Py_GIL_DISABLED - PyMutex_Lock(&interp->codecs.init_mutex); -#endif - int do_import = 1; - if (interp->codecs.initialized == 0) { - interp->codecs.search_path = search_path; - interp->codecs.search_cache = search_cache; - interp->codecs.error_registry = error_registry; - FT_ATOMIC_STORE_INT_RELEASE(interp->codecs.initialized, 1); - } else { - // Another thread initialized everything while we were preparing. - Py_DECREF(search_path); - Py_DECREF(search_cache); - Py_DECREF(error_registry); - do_import = 0; - } - - // Importing `encodings' can execute arbitrary code and will call back into - // this module to register codec search functions. Do it once everything is - // initialized and we hold no locks. Other Python code may register other - // codecs before `encodings' is finished importing; this is true with or - // without the GIL. -#ifdef Py_GIL_DISABLED - PyMutex_Unlock(&interp->codecs.init_mutex); -#endif + interp->codecs.initialized = 1; - if (do_import) { - PyObject *mod = PyImport_ImportModule("encodings"); - if (mod == NULL) { - return -1; - } - Py_DECREF(mod); + // Importing `encodings' will call back into this module to register codec + // search functions, so this is done after everything else is initialized. + PyObject *mod = PyImport_ImportModule("encodings"); + if (mod == NULL) { + return PyStatus_Error("Failed to import encodings module"); } - return 0; + Py_DECREF(mod); - error: - Py_XDECREF(search_path); - Py_XDECREF(search_cache); - Py_XDECREF(error_registry); - return -1; + return PyStatus_Ok(); } diff --git a/Python/pystate.c b/Python/pystate.c index dcc8cf40c8ac4d..b438332cb38631 100644 --- a/Python/pystate.c +++ b/Python/pystate.c @@ -824,6 +824,7 @@ interpreter_clear(PyInterpreterState *interp, PyThreadState *tstate) Py_CLEAR(interp->codecs.search_path); Py_CLEAR(interp->codecs.search_cache); Py_CLEAR(interp->codecs.error_registry); + interp->codecs.initialized = 0; assert(interp->imports.modules == NULL); assert(interp->imports.modules_by_index == NULL); diff --git a/Tools/c-analyzer/cpython/ignored.tsv b/Tools/c-analyzer/cpython/ignored.tsv index 7463eeffc1378b..11d91b01b8f956 100644 --- a/Tools/c-analyzer/cpython/ignored.tsv +++ b/Tools/c-analyzer/cpython/ignored.tsv @@ -343,7 +343,7 @@ Python/ceval.c - _PyEval_BinaryOps - Python/ceval.c - _Py_INTERPRETER_TRAMPOLINE_INSTRUCTIONS - Python/codecs.c - Py_hexdigits - Python/codecs.c - ucnhash_capi - -Python/codecs.c _PyCodecRegistry_EnsureInit methods - +Python/codecs.c _PyCodec_InitRegistry methods - Python/compile.c - NO_LABEL - Python/compile.c - NO_LOCATION - Python/dynload_shlib.c - _PyImport_DynLoadFiletab - From 4d5bffc741c6c501fd11cd418b68b5c35ce8ef56 Mon Sep 17 00:00:00 2001 From: Brett Simmers Date: Mon, 15 Apr 2024 11:19:08 -0700 Subject: [PATCH 4/4] Final review comments, and add missing newline after _PyCodec_InitRegistry's return type --- Include/internal/pycore_codecs.h | 10 +++++++--- Objects/object.c | 1 + Python/codecs.c | 23 +++++++++++------------ Python/pystate.c | 5 +---- 4 files changed, 20 insertions(+), 19 deletions(-) diff --git a/Include/internal/pycore_codecs.h b/Include/internal/pycore_codecs.h index dd4e175cf5eeda..5e2d5c5ce9d868 100644 --- a/Include/internal/pycore_codecs.h +++ b/Include/internal/pycore_codecs.h @@ -10,11 +10,15 @@ extern "C" { #include "pycore_lock.h" // PyMutex -/* Initialize codecs-related state for the given interpreter. Must be called - before any other _PyCodec* functions, and while only one thread is - active. */ +/* Initialize codecs-related state for the given interpreter, including + registering the first codec search function. Must be called before any other + PyCodec-related functions, and while only one thread is active. */ extern PyStatus _PyCodec_InitRegistry(PyInterpreterState *interp); +/* Finalize codecs-related state for the given interpreter. No PyCodec-related + functions other than PyCodec_Unregister() may be called after this. */ +extern void _PyCodec_Fini(PyInterpreterState *interp); + extern PyObject* _PyCodec_Lookup(const char *encoding); /* Text codec specific encoding and decoding API. diff --git a/Objects/object.c b/Objects/object.c index 4c1676d77c471e..016d0e1ded92d8 100644 --- a/Objects/object.c +++ b/Objects/object.c @@ -473,6 +473,7 @@ void PyObject_CallFinalizer(PyObject *self) { PyTypeObject *tp = Py_TYPE(self); + if (tp->tp_finalize == NULL) return; /* tp_finalize should only be called once. */ diff --git a/Python/codecs.c b/Python/codecs.c index a02e20f4683452..bed245366f9234 100644 --- a/Python/codecs.c +++ b/Python/codecs.c @@ -20,17 +20,6 @@ const char *Py_hexdigits = "0123456789abcdef"; /* --- Codec Registry ----------------------------------------------------- */ -/* Import the standard encodings package which will register the first - codec search function. - - This is done in a lazy way so that the Unicode implementation does - not downgrade startup time of scripts not needing it. - - ImportErrors are silently ignored by this function. Only one try is - made. - -*/ - int PyCodec_Register(PyObject *search_function) { PyInterpreterState *interp = _PyInterpreterState_GET(); @@ -1389,7 +1378,8 @@ static PyObject *surrogateescape_errors(PyObject *self, PyObject *exc) return PyCodec_SurrogateEscapeErrors(exc); } -PyStatus _PyCodec_InitRegistry(PyInterpreterState *interp) +PyStatus +_PyCodec_InitRegistry(PyInterpreterState *interp) { static struct { const char *name; @@ -1516,3 +1506,12 @@ PyStatus _PyCodec_InitRegistry(PyInterpreterState *interp) return PyStatus_Ok(); } + +void +_PyCodec_Fini(PyInterpreterState *interp) +{ + Py_CLEAR(interp->codecs.search_path); + Py_CLEAR(interp->codecs.search_cache); + Py_CLEAR(interp->codecs.error_registry); + interp->codecs.initialized = 0; +} diff --git a/Python/pystate.c b/Python/pystate.c index b1c83cf2daa5be..811d192820bdc8 100644 --- a/Python/pystate.c +++ b/Python/pystate.c @@ -838,10 +838,7 @@ interpreter_clear(PyInterpreterState *interp, PyThreadState *tstate) } PyConfig_Clear(&interp->config); - Py_CLEAR(interp->codecs.search_path); - Py_CLEAR(interp->codecs.search_cache); - Py_CLEAR(interp->codecs.error_registry); - interp->codecs.initialized = 0; + _PyCodec_Fini(interp); assert(interp->imports.modules == NULL); assert(interp->imports.modules_by_index == NULL);