From 0fadd9fd205e4df105676359876a6d6684f215a3 Mon Sep 17 00:00:00 2001 From: Stan Ulbrych <89152624+StanFromIreland@users.noreply.github.com> Date: Thu, 26 Jun 2025 04:01:25 +0100 Subject: [PATCH 01/55] gh-125142: remove duplicated import in `Lib/pydoc.py` (gh-135215) --- Lib/pydoc.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/Lib/pydoc.py b/Lib/pydoc.py index 7528178fdcae97..d508fb70ea429e 100644 --- a/Lib/pydoc.py +++ b/Lib/pydoc.py @@ -1812,7 +1812,6 @@ def writedocs(dir, pkgpath='', done=None): def _introdoc(): - import textwrap ver = '%d.%d' % sys.version_info[:2] if os.environ.get('PYTHON_BASIC_REPL'): pyrepl_keys = '' @@ -2170,7 +2169,6 @@ def showtopic(self, topic, more_xrefs=''): if more_xrefs: xrefs = (xrefs or '') + ' ' + more_xrefs if xrefs: - import textwrap text = 'Related help topics: ' + ', '.join(xrefs.split()) + '\n' wrapped_text = textwrap.wrap(text, 72) doc += '\n%s\n' % '\n'.join(wrapped_text) From 1f5e23fd7015a8f7b14d0181ec83efa95c5d5b68 Mon Sep 17 00:00:00 2001 From: Neil Schemenauer Date: Wed, 25 Jun 2025 20:03:24 -0700 Subject: [PATCH 02/55] Add whatsnew text for warnings module changes. (gh-135869) --- Doc/whatsnew/3.14.rst | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/Doc/whatsnew/3.14.rst b/Doc/whatsnew/3.14.rst index cbca720b75e96c..a74d414ae4bb70 100644 --- a/Doc/whatsnew/3.14.rst +++ b/Doc/whatsnew/3.14.rst @@ -978,6 +978,23 @@ be specified by the build backend, as it will no longer be determined automatically by the C compiler. For a running interpreter, the setting that was used at compile time can be found using :func:`sysconfig.get_config_var`. +A new flag has been added, :data:`~sys.flags.context_aware_warnings`. This +flag defaults to true for the free-threaded build and false for the GIL-enabled +build. If the flag is true then the :class:`warnings.catch_warnings` context +manager uses a context variable for warning filters. This makes the context +manager behave predicably when used with multiple threads or asynchronous +tasks. + +A new flag has been added, :data:`~sys.flags.thread_inherit_context`. This flag +defaults to true for the free-threaded build and false for the GIL-enabled +build. If the flag is true then threads created with :class:`threading.Thread` +start with a copy of the :class:`~contextvars.Context()` of the caller of +:meth:`~threading.Thread.start`. Most significantly, this makes the warning +filtering context established by :class:`~warnings.catch_warnings` be +"inherited" by threads (or asyncio tasks) started within that context. It also +affects other modules that use context variables, such as the :mod:`decimal` +context manager. + .. _whatsnew314-pyrepl-highlighting: @@ -1028,6 +1045,18 @@ Please report any bugs or major performance regressions that you encounter! .. seealso:: :pep:`744` +Concurrent safe warnings control +-------------------------------- + +The :class:`warnings.catch_warnings` context manager will now optionally +use a context variable for warning filters. This is enabled by setting +the :data:`~sys.flags.context_aware_warnings` flag, either with the ``-X`` +command-line option or an environment variable. This gives predicable +warnings control when using :class:`~warnings.catch_warnings` combined with +multiple threads or asynchronous tasks. The flag defaults to true for the +free-threaded build and false for the GIL-enabled build. + +(Contributed by Neil Schemenauer and Kumar Aditya in :gh:`130010`.) Other language changes ====================== From e3ea6f2b3b084700a34ce392f5cf897407469b3a Mon Sep 17 00:00:00 2001 From: Terry Jan Reedy Date: Wed, 25 Jun 2025 23:44:08 -0400 Subject: [PATCH 03/55] gh-135956: Remove duplicate word in _pydatetime docstring (#135957) _pydatetime.isoformat docstring repeats 'giving'. --- Lib/_pydatetime.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Lib/_pydatetime.py b/Lib/_pydatetime.py index 71f619024e570d..bc35823f70144e 100644 --- a/Lib/_pydatetime.py +++ b/Lib/_pydatetime.py @@ -2164,7 +2164,7 @@ def isoformat(self, sep='T', timespec='auto'): By default, the fractional part is omitted if self.microsecond == 0. If self.tzinfo is not None, the UTC offset is also attached, giving - giving a full format of 'YYYY-MM-DD HH:MM:SS.mmmmmm+HH:MM'. + a full format of 'YYYY-MM-DD HH:MM:SS.mmmmmm+HH:MM'. Optional argument sep specifies the separator between date and time, default 'T'. From a1da208eec3028b1ecae804d4c0dc6b43cdddae9 Mon Sep 17 00:00:00 2001 From: Petr Viktorin Date: Thu, 26 Jun 2025 09:25:41 +0200 Subject: [PATCH 04/55] gh-131591: Add Py_ prefix to MAX_SCRIPT_PATH_SIZE; remove unprefixed struct tag (GH-135924) Names/macros defined in public headers should have `Py`/`_Py` prefixes. --- Include/cpython/pystate.h | 6 +++--- Include/internal/pycore_debug_offsets.h | 2 +- Modules/posixmodule.c | 3 ++- 3 files changed, 6 insertions(+), 5 deletions(-) diff --git a/Include/cpython/pystate.h b/Include/cpython/pystate.h index 54d7e62292966e..be582122118e44 100644 --- a/Include/cpython/pystate.h +++ b/Include/cpython/pystate.h @@ -28,10 +28,10 @@ typedef int (*Py_tracefunc)(PyObject *, PyFrameObject *, int, PyObject *); #define PyTrace_OPCODE 7 /* Remote debugger support */ -#define MAX_SCRIPT_PATH_SIZE 512 -typedef struct _remote_debugger_support { +#define Py_MAX_SCRIPT_PATH_SIZE 512 +typedef struct { int32_t debugger_pending_call; - char debugger_script_path[MAX_SCRIPT_PATH_SIZE]; + char debugger_script_path[Py_MAX_SCRIPT_PATH_SIZE]; } _PyRemoteDebuggerSupport; typedef struct _err_stackitem { diff --git a/Include/internal/pycore_debug_offsets.h b/Include/internal/pycore_debug_offsets.h index ce3fcb109f49f7..1b59fa2ef60014 100644 --- a/Include/internal/pycore_debug_offsets.h +++ b/Include/internal/pycore_debug_offsets.h @@ -368,7 +368,7 @@ typedef struct _Py_DebugOffsets { .remote_debugging_enabled = offsetof(PyInterpreterState, config.remote_debug), \ .debugger_pending_call = offsetof(_PyRemoteDebuggerSupport, debugger_pending_call), \ .debugger_script_path = offsetof(_PyRemoteDebuggerSupport, debugger_script_path), \ - .debugger_script_path_size = MAX_SCRIPT_PATH_SIZE, \ + .debugger_script_path_size = Py_MAX_SCRIPT_PATH_SIZE, \ }, \ } diff --git a/Modules/posixmodule.c b/Modules/posixmodule.c index 7dc5ef39a566e4..b570f81b7cf7c2 100644 --- a/Modules/posixmodule.c +++ b/Modules/posixmodule.c @@ -685,7 +685,8 @@ static void reset_remotedebug_data(PyThreadState *tstate) { tstate->remote_debugger_support.debugger_pending_call = 0; - memset(tstate->remote_debugger_support.debugger_script_path, 0, MAX_SCRIPT_PATH_SIZE); + memset(tstate->remote_debugger_support.debugger_script_path, 0, + Py_MAX_SCRIPT_PATH_SIZE); } From 10a3d431881bb9169abde97f85ea6a670e1ef3cc Mon Sep 17 00:00:00 2001 From: Peter Bierma Date: Thu, 26 Jun 2025 05:43:08 -0400 Subject: [PATCH 05/55] gh-135755: Move `PyFunction_GET_BUILTINS` to the private API (GH-135938) --- Include/cpython/funcobject.h | 5 ----- Include/internal/pycore_function.h | 5 +++++ Modules/_testinternalcapi.c | 5 +++-- Objects/funcobject.c | 2 +- 4 files changed, 9 insertions(+), 8 deletions(-) diff --git a/Include/cpython/funcobject.h b/Include/cpython/funcobject.h index 18249b95befe65..598cd330bc9ca9 100644 --- a/Include/cpython/funcobject.h +++ b/Include/cpython/funcobject.h @@ -97,11 +97,6 @@ static inline PyObject* PyFunction_GET_GLOBALS(PyObject *func) { } #define PyFunction_GET_GLOBALS(func) PyFunction_GET_GLOBALS(_PyObject_CAST(func)) -static inline PyObject* PyFunction_GET_BUILTINS(PyObject *func) { - return _PyFunction_CAST(func)->func_builtins; -} -#define PyFunction_GET_BUILTINS(func) PyFunction_GET_BUILTINS(_PyObject_CAST(func)) - static inline PyObject* PyFunction_GET_MODULE(PyObject *func) { return _PyFunction_CAST(func)->func_module; } diff --git a/Include/internal/pycore_function.h b/Include/internal/pycore_function.h index a30d52d49bdc4d..6e1209659565a3 100644 --- a/Include/internal/pycore_function.h +++ b/Include/internal/pycore_function.h @@ -41,6 +41,11 @@ extern PyObject *_Py_set_function_type_params( PyAPI_FUNC(int) _PyFunction_VerifyStateless(PyThreadState *, PyObject *); +static inline PyObject* _PyFunction_GET_BUILTINS(PyObject *func) { + return _PyFunction_CAST(func)->func_builtins; +} +#define _PyFunction_GET_BUILTINS(func) _PyFunction_GET_BUILTINS(_PyObject_CAST(func)) + #ifdef __cplusplus } diff --git a/Modules/_testinternalcapi.c b/Modules/_testinternalcapi.c index 804cb4e4d1c8ee..fdf22a0c994d3a 100644 --- a/Modules/_testinternalcapi.c +++ b/Modules/_testinternalcapi.c @@ -21,6 +21,7 @@ #include "pycore_fileutils.h" // _Py_normpath() #include "pycore_flowgraph.h" // _PyCompile_OptimizeCfg() #include "pycore_frame.h" // _PyInterpreterFrame +#include "pycore_function.h" // _PyFunction_GET_BUILTINS #include "pycore_gc.h" // PyGC_Head #include "pycore_hashtable.h" // _Py_hashtable_new() #include "pycore_import.h" // _PyImport_ClearExtension() @@ -1022,7 +1023,7 @@ get_code_var_counts(PyObject *self, PyObject *_args, PyObject *_kwargs) globalsns = PyFunction_GET_GLOBALS(codearg); } if (builtinsns == NULL) { - builtinsns = PyFunction_GET_BUILTINS(codearg); + builtinsns = _PyFunction_GET_BUILTINS(codearg); } codearg = PyFunction_GET_CODE(codearg); } @@ -1190,7 +1191,7 @@ verify_stateless_code(PyObject *self, PyObject *args, PyObject *kwargs) globalsns = PyFunction_GET_GLOBALS(codearg); } if (builtinsns == NULL) { - builtinsns = PyFunction_GET_BUILTINS(codearg); + builtinsns = _PyFunction_GET_BUILTINS(codearg); } codearg = PyFunction_GET_CODE(codearg); } diff --git a/Objects/funcobject.c b/Objects/funcobject.c index f87b0e5d8f1e47..f8dd10a346d613 100644 --- a/Objects/funcobject.c +++ b/Objects/funcobject.c @@ -1256,7 +1256,7 @@ _PyFunction_VerifyStateless(PyThreadState *tstate, PyObject *func) return -1; } // Check the builtins. - PyObject *builtinsns = PyFunction_GET_BUILTINS(func); + PyObject *builtinsns = _PyFunction_GET_BUILTINS(func); if (builtinsns != NULL && !PyDict_Check(builtinsns)) { _PyErr_Format(tstate, PyExc_TypeError, "unsupported builtins %R", builtinsns); From 9193efdeab4596f987528ba278afa2eca93a9e8a Mon Sep 17 00:00:00 2001 From: Petr Viktorin Date: Thu, 26 Jun 2025 11:48:37 +0200 Subject: [PATCH 06/55] gh-125206: Make _Py_FFI_SUPPORT_C_COMPLEX private (GH-135932) --- Modules/_ctypes/_ctypes_test.c | 4 ++-- Modules/_ctypes/cfield.c | 4 ++-- Modules/_ctypes/ctypes.h | 2 +- configure | 2 +- configure.ac | 2 +- pyconfig.h.in | 6 +++--- 6 files changed, 10 insertions(+), 10 deletions(-) diff --git a/Modules/_ctypes/_ctypes_test.c b/Modules/_ctypes/_ctypes_test.c index d28e5708b44933..66338805007853 100644 --- a/Modules/_ctypes/_ctypes_test.c +++ b/Modules/_ctypes/_ctypes_test.c @@ -23,7 +23,7 @@ # define _Py_thread_local __thread #endif -#if defined(Py_FFI_SUPPORT_C_COMPLEX) +#if defined(_Py_FFI_SUPPORT_C_COMPLEX) # include // csqrt() # undef I // for _ctypes_test_generated.c.h #endif @@ -457,7 +457,7 @@ EXPORT(double) my_sqrt(double a) return sqrt(a); } -#if defined(Py_FFI_SUPPORT_C_COMPLEX) +#if defined(_Py_FFI_SUPPORT_C_COMPLEX) EXPORT(double complex) my_csqrt(double complex a) { return csqrt(a); diff --git a/Modules/_ctypes/cfield.c b/Modules/_ctypes/cfield.c index 163b92642615e5..547e2471a1cbc0 100644 --- a/Modules/_ctypes/cfield.c +++ b/Modules/_ctypes/cfield.c @@ -759,7 +759,7 @@ d_get(void *ptr, Py_ssize_t size) return PyFloat_FromDouble(val); } -#if defined(Py_FFI_SUPPORT_C_COMPLEX) +#if defined(_Py_FFI_SUPPORT_C_COMPLEX) /* We don't use _Complex types here, using arrays instead, as the C11+ standard says: "Each complex type has the same representation and alignment @@ -1599,7 +1599,7 @@ for base_code, base_c_type in [ /////////////////////////////////////////////////////////////////////////// TABLE_ENTRY_SW(d, &ffi_type_double); -#if defined(Py_FFI_SUPPORT_C_COMPLEX) +#if defined(_Py_FFI_SUPPORT_C_COMPLEX) if (Py_FFI_COMPLEX_AVAILABLE) { TABLE_ENTRY(D, &ffi_type_complex_double); TABLE_ENTRY(F, &ffi_type_complex_float); diff --git a/Modules/_ctypes/ctypes.h b/Modules/_ctypes/ctypes.h index 6a45c11e61af5c..5b4f97d43b8721 100644 --- a/Modules/_ctypes/ctypes.h +++ b/Modules/_ctypes/ctypes.h @@ -23,7 +23,7 @@ // Do we support C99 complex types in ffi? // For Apple's libffi, this must be determined at runtime (see gh-128156). -#if defined(Py_FFI_SUPPORT_C_COMPLEX) +#if defined(_Py_FFI_SUPPORT_C_COMPLEX) # if USING_APPLE_OS_LIBFFI && defined(__has_builtin) # if __has_builtin(__builtin_available) # define Py_FFI_COMPLEX_AVAILABLE __builtin_available(macOS 10.15, *) diff --git a/configure b/configure index 50223d81cd976e..43b36d9231e341 100755 --- a/configure +++ b/configure @@ -15742,7 +15742,7 @@ fi printf "%s\n" "$ac_cv_ffi_complex_double_supported" >&6; } if test "$ac_cv_ffi_complex_double_supported" = "yes"; then -printf "%s\n" "#define Py_FFI_SUPPORT_C_COMPLEX 1" >>confdefs.h +printf "%s\n" "#define _Py_FFI_SUPPORT_C_COMPLEX 1" >>confdefs.h fi diff --git a/configure.ac b/configure.ac index f58e16c774f4aa..e77696e3a4e025 100644 --- a/configure.ac +++ b/configure.ac @@ -4163,7 +4163,7 @@ int main(void) [ac_cv_ffi_complex_double_supported=no]) ])]) if test "$ac_cv_ffi_complex_double_supported" = "yes"; then - AC_DEFINE([Py_FFI_SUPPORT_C_COMPLEX], [1], + AC_DEFINE([_Py_FFI_SUPPORT_C_COMPLEX], [1], [Defined if _Complex C type can be used with libffi.]) fi diff --git a/pyconfig.h.in b/pyconfig.h.in index 65a2c55217c258..d4f1da7fb10776 100644 --- a/pyconfig.h.in +++ b/pyconfig.h.in @@ -1736,9 +1736,6 @@ /* Defined if Python is built as a shared library. */ #undef Py_ENABLE_SHARED -/* Defined if _Complex C type can be used with libffi. */ -#undef Py_FFI_SUPPORT_C_COMPLEX - /* Define if you want to disable the GIL */ #undef Py_GIL_DISABLED @@ -2026,6 +2023,9 @@ /* Maximum length in bytes of a thread name */ #undef _PYTHREAD_NAME_MAXLEN +/* Defined if _Complex C type can be used with libffi. */ +#undef _Py_FFI_SUPPORT_C_COMPLEX + /* Define to force use of thread-safe errno, h_errno, and other functions */ #undef _REENTRANT From 6be17baeb5bcfc78f0b7fcfe5221df0744c865e8 Mon Sep 17 00:00:00 2001 From: Petr Viktorin Date: Thu, 26 Jun 2025 13:05:01 +0200 Subject: [PATCH 07/55] gh-135755: Use private names (_Py*) for header file guards new in 3.14 (GH-135921) These are private API; let's name new ones accordingly. --- Include/audit.h | 10 +++++----- Include/cpython/audit.h | 2 +- Include/refcount.h | 6 +++--- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/Include/audit.h b/Include/audit.h index 793b7077e1027b..9be54ad4411096 100644 --- a/Include/audit.h +++ b/Include/audit.h @@ -1,5 +1,5 @@ -#ifndef Py_AUDIT_H -#define Py_AUDIT_H +#ifndef _Py_AUDIT_H +#define _Py_AUDIT_H #ifdef __cplusplus extern "C" { #endif @@ -18,13 +18,13 @@ PyAPI_FUNC(int) PySys_AuditTuple( #ifndef Py_LIMITED_API -# define Py_CPYTHON_AUDIT_H +# define _Py_CPYTHON_AUDIT_H # include "cpython/audit.h" -# undef Py_CPYTHON_AUDIT_H +# undef _Py_CPYTHON_AUDIT_H #endif #ifdef __cplusplus } #endif -#endif /* !Py_AUDIT_H */ +#endif /* !_Py_AUDIT_H */ diff --git a/Include/cpython/audit.h b/Include/cpython/audit.h index 3c5c7a8c06091d..536f9248632097 100644 --- a/Include/cpython/audit.h +++ b/Include/cpython/audit.h @@ -1,4 +1,4 @@ -#ifndef Py_CPYTHON_AUDIT_H +#ifndef _Py_CPYTHON_AUDIT_H # error "this header file must not be included directly" #endif diff --git a/Include/refcount.h b/Include/refcount.h index ebd1dba6d15e1a..034c453f449f5b 100644 --- a/Include/refcount.h +++ b/Include/refcount.h @@ -1,5 +1,5 @@ -#ifndef Py_REFCOUNT_H -#define Py_REFCOUNT_H +#ifndef _Py_REFCOUNT_H +#define _Py_REFCOUNT_H #ifdef __cplusplus extern "C" { #endif @@ -561,4 +561,4 @@ static inline PyObject* _Py_XNewRef(PyObject *obj) #ifdef __cplusplus } #endif -#endif // !Py_REFCOUNT_H +#endif // !_Py_REFCOUNT_H From ffb2a02f98d904505c8a82d8540c36dee4c67eed Mon Sep 17 00:00:00 2001 From: Weilin Du <108666168+LamentXU123@users.noreply.github.com> Date: Thu, 26 Jun 2025 19:41:41 +0800 Subject: [PATCH 08/55] gh-135965: Delete duplicate word in isolating-extensions howto (#135964) Change use use to use. --- Doc/howto/isolating-extensions.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Doc/howto/isolating-extensions.rst b/Doc/howto/isolating-extensions.rst index b2109b1503992b..fbc426ba1d7d9a 100644 --- a/Doc/howto/isolating-extensions.rst +++ b/Doc/howto/isolating-extensions.rst @@ -453,7 +453,7 @@ Avoiding ``PyObject_New`` GC-tracked objects need to be allocated using GC-aware functions. -If you use use :c:func:`PyObject_New` or :c:func:`PyObject_NewVar`: +If you use :c:func:`PyObject_New` or :c:func:`PyObject_NewVar`: - Get and call type's :c:member:`~PyTypeObject.tp_alloc` slot, if possible. That is, replace ``TYPE *o = PyObject_New(TYPE, typeobj)`` with:: From fb9e292919d82326acea456aa071c9af6aff5626 Mon Sep 17 00:00:00 2001 From: Dylan Date: Thu, 26 Jun 2025 08:02:50 -0500 Subject: [PATCH 09/55] gh-129958: New syntax error in format spec applies to both f-strings and t-strings (#135570) Co-authored-by: Tomas R. Co-authored-by: Lysandros Nikolaou --- Lib/test/test_tstring.py | 1 + .../2025-06-24-06-41-47.gh-issue-129958.EaJuS0.rst | 2 ++ Parser/lexer/lexer.c | 3 ++- 3 files changed, 5 insertions(+), 1 deletion(-) create mode 100644 Misc/NEWS.d/next/Core_and_Builtins/2025-06-24-06-41-47.gh-issue-129958.EaJuS0.rst diff --git a/Lib/test/test_tstring.py b/Lib/test/test_tstring.py index e72a1ea54176d5..aabae38556735b 100644 --- a/Lib/test/test_tstring.py +++ b/Lib/test/test_tstring.py @@ -219,6 +219,7 @@ def test_syntax_errors(self): ("t'{lambda:1}'", "t-string: lambda expressions are not allowed " "without parentheses"), ("t'{x:{;}}'", "t-string: expecting a valid expression after '{'"), + ("t'{1:d\n}'", "t-string: newlines are not allowed in format specifiers") ): with self.subTest(case), self.assertRaisesRegex(SyntaxError, err): eval(case) diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-06-24-06-41-47.gh-issue-129958.EaJuS0.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-06-24-06-41-47.gh-issue-129958.EaJuS0.rst new file mode 100644 index 00000000000000..70b3e99425df14 --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2025-06-24-06-41-47.gh-issue-129958.EaJuS0.rst @@ -0,0 +1,2 @@ +Differentiate between t-strings and f-strings in syntax error for newlines +in format specifiers of single-quoted interpolated strings. diff --git a/Parser/lexer/lexer.c b/Parser/lexer/lexer.c index 4d10bccf0a53f2..0a078dd594148c 100644 --- a/Parser/lexer/lexer.c +++ b/Parser/lexer/lexer.c @@ -1421,7 +1421,8 @@ tok_get_fstring_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct return MAKE_TOKEN( _PyTokenizer_syntaxerror( tok, - "f-string: newlines are not allowed in format specifiers for single quoted f-strings" + "%c-string: newlines are not allowed in format specifiers for single quoted %c-strings", + TOK_GET_STRING_PREFIX(tok), TOK_GET_STRING_PREFIX(tok) ) ); } From 0d76dccc3b4376ba075a1737f58809e3d83aaaa3 Mon Sep 17 00:00:00 2001 From: Connor Denihan <188690869+cdenihan@users.noreply.github.com> Date: Thu, 26 Jun 2025 09:27:25 -0400 Subject: [PATCH 10/55] gh-135110: Fix misleading `generator.close()` documentation (GH-135152) The documentation incorrectly stated that generator.close() 'raises' a GeneratorExit exception. This was misleading because the method doesn't raise the exception to the caller - it sends the exception internally to the generator and returns None. --- Doc/howto/functional.rst | 2 +- Doc/reference/expressions.rst | 6 ++++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/Doc/howto/functional.rst b/Doc/howto/functional.rst index b4f3463afee812..78e56e0c64fb10 100644 --- a/Doc/howto/functional.rst +++ b/Doc/howto/functional.rst @@ -602,7 +602,7 @@ generators: raise an exception inside the generator; the exception is raised by the ``yield`` expression where the generator's execution is paused. -* :meth:`~generator.close` raises a :exc:`GeneratorExit` exception inside the +* :meth:`~generator.close` sends a :exc:`GeneratorExit` exception to the generator to terminate the iteration. On receiving this exception, the generator's code must either raise :exc:`GeneratorExit` or :exc:`StopIteration`; catching the exception and doing anything else is diff --git a/Doc/reference/expressions.rst b/Doc/reference/expressions.rst index 17f39aaf5f57cd..24544a055c3ed2 100644 --- a/Doc/reference/expressions.rst +++ b/Doc/reference/expressions.rst @@ -625,8 +625,10 @@ is already executing raises a :exc:`ValueError` exception. .. method:: generator.close() - Raises a :exc:`GeneratorExit` at the point where the generator function was - paused. If the generator function catches the exception and returns a + Raises a :exc:`GeneratorExit` exception at the point where the generator + function was paused (equivalent to calling ``throw(GeneratorExit)``). + The exception is raised by the yield expression where the generator was paused. + If the generator function catches the exception and returns a value, this value is returned from :meth:`close`. If the generator function is already closed, or raises :exc:`GeneratorExit` (by not catching the exception), :meth:`close` returns :const:`None`. If the generator yields a From a4625d597f9fc2d083fbb9c22d3ffcec73b2061a Mon Sep 17 00:00:00 2001 From: Duane Griffin Date: Fri, 27 Jun 2025 02:18:32 +1200 Subject: [PATCH 11/55] gh-91555: add warning to docs about possibility of deadlock/infinite recursion (GH-135954) * gh-91555: add warning to docs about possibility of deadlock/infinite recursion Attempt to clarify in the documentation that care must be taken when using multiprocessing classes to implement logging since they have builtin internal logging, and hence may cause deadlock/infinite recursion. * Update Doc/library/logging.handlers.rst Co-authored-by: Vinay Sajip * Change whitespace. --------- Co-authored-by: Vinay Sajip --- Doc/library/logging.handlers.rst | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/Doc/library/logging.handlers.rst b/Doc/library/logging.handlers.rst index 8f3aa1dfdd0cde..d74ef73ee28497 100644 --- a/Doc/library/logging.handlers.rst +++ b/Doc/library/logging.handlers.rst @@ -1059,6 +1059,15 @@ possible, while any potentially slow operations (such as sending an email via .. note:: If you are using :mod:`multiprocessing`, you should avoid using :class:`~queue.SimpleQueue` and instead use :class:`multiprocessing.Queue`. + .. warning:: + + The :mod:`multiprocessing` module uses an internal logger created and + accessed via :meth:`~multiprocessing.get_logger`. + :class:`multiprocessing.Queue` will log ``DEBUG`` level messages upon + items being queued. If those log messages are processed by a + :class:`QueueHandler` using the same :class:`multiprocessing.Queue` instance, + it will cause a deadlock or infinite recursion. + .. method:: emit(record) Enqueues the result of preparing the LogRecord. Should an exception From 8594d2c03dccd9731e9fc83a9fe6a19a3a090cb7 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Thu, 26 Jun 2025 18:11:49 +0200 Subject: [PATCH 12/55] gh-135927: Check _MSC_VER to define _Py_NULL macro (#135987) --- Include/pyport.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Include/pyport.h b/Include/pyport.h index 73f071c41a6687..0675294d5bc3b1 100644 --- a/Include/pyport.h +++ b/Include/pyport.h @@ -49,9 +49,9 @@ // Static inline functions should use _Py_NULL rather than using directly NULL // to prevent C++ compiler warnings. On C23 and newer and on C++11 and newer, // _Py_NULL is defined as nullptr. -#if (defined(__GNUC__) || defined(__clang__)) && \ - (defined (__STDC_VERSION__) && __STDC_VERSION__ > 201710L) \ - || (defined(__cplusplus) && __cplusplus >= 201103) +#if !defined(_MSC_VER) && \ + ((defined (__STDC_VERSION__) && __STDC_VERSION__ > 201710L) \ + || (defined(__cplusplus) && __cplusplus >= 201103)) # define _Py_NULL nullptr #else # define _Py_NULL NULL From 642e5dfc74310d15bb81f8e94167590380a5fbfb Mon Sep 17 00:00:00 2001 From: Stan Ulbrych <89152624+StanFromIreland@users.noreply.github.com> Date: Thu, 26 Jun 2025 21:20:07 +0100 Subject: [PATCH 13/55] IDLE: Update NEWS2x.txt with 2.7.0 release date (#129908) --- Lib/idlelib/NEWS2x.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Lib/idlelib/NEWS2x.txt b/Lib/idlelib/NEWS2x.txt index 6751ca5f111b18..3721193007e59b 100644 --- a/Lib/idlelib/NEWS2x.txt +++ b/Lib/idlelib/NEWS2x.txt @@ -1,6 +1,6 @@ What's New in IDLE 2.7? (Merged into 3.1 before 2.7 release.) ======================= -*Release date: XX-XXX-2010* +*Release date: 03-Jul-2010* - idle.py modified and simplified to better support developing experimental versions of IDLE which are not installed in the standard location. From 58a42dea97f4fa0df38ef4a95a2ede65e0549f71 Mon Sep 17 00:00:00 2001 From: Nathan Korth Date: Thu, 26 Jun 2025 18:35:45 -0400 Subject: [PATCH 14/55] gh-135995: Fix missing char in palmos encoding (#135990) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 0x8b correctly encodes to ‹, but 0x9b was mistakenly marked as a control character instead of ›. --------- Co-authored-by: blurb-it[bot] <43283697+blurb-it[bot]@users.noreply.github.com> Co-authored-by: Brian Schubert Co-authored-by: Terry Jan Reedy --- Lib/encodings/palmos.py | 2 +- .../next/Library/2025-06-26-17-28-49.gh-issue-135995.pPrDCt.rst | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) create mode 100644 Misc/NEWS.d/next/Library/2025-06-26-17-28-49.gh-issue-135995.pPrDCt.rst diff --git a/Lib/encodings/palmos.py b/Lib/encodings/palmos.py index c506d654523496..df164ca5b9549c 100644 --- a/Lib/encodings/palmos.py +++ b/Lib/encodings/palmos.py @@ -201,7 +201,7 @@ def getregentry(): '\u02dc' # 0x98 -> SMALL TILDE '\u2122' # 0x99 -> TRADE MARK SIGN '\u0161' # 0x9A -> LATIN SMALL LETTER S WITH CARON - '\x9b' # 0x9B -> + '\u203a' # 0x9B -> SINGLE RIGHT-POINTING ANGLE QUOTATION MARK '\u0153' # 0x9C -> LATIN SMALL LIGATURE OE '\x9d' # 0x9D -> '\x9e' # 0x9E -> diff --git a/Misc/NEWS.d/next/Library/2025-06-26-17-28-49.gh-issue-135995.pPrDCt.rst b/Misc/NEWS.d/next/Library/2025-06-26-17-28-49.gh-issue-135995.pPrDCt.rst new file mode 100644 index 00000000000000..998b3cd85b1d5d --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-06-26-17-28-49.gh-issue-135995.pPrDCt.rst @@ -0,0 +1 @@ +In the palmos encoding, make byte ``0x9b`` decode to ``›`` (U+203A - SINGLE RIGHT-POINTING ANGLE QUOTATION MARK). From 34ce1920ca33c11ca2c379ed0ef30a91010bef4f Mon Sep 17 00:00:00 2001 From: Brian Schubert Date: Thu, 26 Jun 2025 20:00:19 -0400 Subject: [PATCH 15/55] Docs: Fix duplicate word typos (GH-135958) --- Doc/c-api/init.rst | 2 +- Doc/c-api/long.rst | 2 +- Doc/extending/newtypes_tutorial.rst | 2 +- Doc/library/ctypes.rst | 2 +- Doc/library/email.header.rst | 4 ++-- Doc/library/exceptions.rst | 2 +- Doc/library/faulthandler.rst | 2 +- Doc/library/mmap.rst | 2 +- Doc/library/pathlib.rst | 2 +- Doc/library/socketserver.rst | 2 +- Doc/library/threading.rst | 2 +- Doc/using/cmdline.rst | 4 ++-- Doc/whatsnew/3.13.rst | 2 +- 13 files changed, 15 insertions(+), 15 deletions(-) diff --git a/Doc/c-api/init.rst b/Doc/c-api/init.rst index 3106bf9808f254..41fd4ea14ef12f 100644 --- a/Doc/c-api/init.rst +++ b/Doc/c-api/init.rst @@ -1250,7 +1250,7 @@ All of the following functions must be called after :c:func:`Py_Initialize`. .. c:function:: void PyInterpreterState_Clear(PyInterpreterState *interp) Reset all information in an interpreter state object. There must be - an :term:`attached thread state` for the the interpreter. + an :term:`attached thread state` for the interpreter. .. audit-event:: cpython.PyInterpreterState_Clear "" c.PyInterpreterState_Clear diff --git a/Doc/c-api/long.rst b/Doc/c-api/long.rst index 25d9e62e387279..2d0bda76697e81 100644 --- a/Doc/c-api/long.rst +++ b/Doc/c-api/long.rst @@ -439,7 +439,7 @@ distinguished from a number. Use :c:func:`PyErr_Occurred` to disambiguate. All *n_bytes* of the buffer are written: large buffers are padded with zeroes. - If the returned value is greater than than *n_bytes*, the value was + If the returned value is greater than *n_bytes*, the value was truncated: as many of the lowest bits of the value as could fit are written, and the higher bits are ignored. This matches the typical behavior of a C-style downcast. diff --git a/Doc/extending/newtypes_tutorial.rst b/Doc/extending/newtypes_tutorial.rst index f14690de4f86e8..3bbee33bd50698 100644 --- a/Doc/extending/newtypes_tutorial.rst +++ b/Doc/extending/newtypes_tutorial.rst @@ -277,7 +277,7 @@ be an instance of a subclass. The explicit cast to ``CustomObject *`` above is needed because we defined ``Custom_dealloc`` to take a ``PyObject *`` argument, as the ``tp_dealloc`` function pointer expects to receive a ``PyObject *`` argument. - By assigning to the the ``tp_dealloc`` slot of a type, we declare + By assigning to the ``tp_dealloc`` slot of a type, we declare that it can only be called with instances of our ``CustomObject`` class, so the cast to ``(CustomObject *)`` is safe. This is object-oriented polymorphism, in C! diff --git a/Doc/library/ctypes.rst b/Doc/library/ctypes.rst index e00fe9c8145f12..846cece3761858 100644 --- a/Doc/library/ctypes.rst +++ b/Doc/library/ctypes.rst @@ -2965,7 +2965,7 @@ fields, or any other data types containing pointer type fields. .. attribute:: is_anonymous True if this field is anonymous, that is, it contains nested sub-fields - that should be be merged into a containing structure or union. + that should be merged into a containing structure or union. .. _ctypes-arrays-pointers: diff --git a/Doc/library/email.header.rst b/Doc/library/email.header.rst index c3392a62b8ee79..f49885b8785235 100644 --- a/Doc/library/email.header.rst +++ b/Doc/library/email.header.rst @@ -206,7 +206,7 @@ The :mod:`email.header` module also provides the following convenient functions. .. note:: - This function exists for for backwards compatibility only. For + This function exists for backwards compatibility only. For new code, we recommend using :class:`email.headerregistry.HeaderRegistry`. @@ -225,5 +225,5 @@ The :mod:`email.header` module also provides the following convenient functions. .. note:: - This function exists for for backwards compatibility only, and is + This function exists for backwards compatibility only, and is not recommended for use in new code. diff --git a/Doc/library/exceptions.rst b/Doc/library/exceptions.rst index bb72032891ea98..9806ae80905ca0 100644 --- a/Doc/library/exceptions.rst +++ b/Doc/library/exceptions.rst @@ -1048,7 +1048,7 @@ their subgroups based on the types of the contained exceptions. subclasses that need a different constructor signature need to override that rather than :meth:`~object.__init__`. For example, the following defines an exception group subclass which accepts an exit_code and - and constructs the group's message from it. :: + constructs the group's message from it. :: class Errors(ExceptionGroup): def __new__(cls, errors, exit_code): diff --git a/Doc/library/faulthandler.rst b/Doc/library/faulthandler.rst index 5058b85bffb15c..1977f4d3ba3916 100644 --- a/Doc/library/faulthandler.rst +++ b/Doc/library/faulthandler.rst @@ -90,7 +90,7 @@ An error will be printed instead of the stack. Additionally, some compilers do not support :term:`CPython's ` implementation of C stack dumps. As a result, a different error may be printed -instead of the stack, even if the the operating system supports dumping stacks. +instead of the stack, even if the operating system supports dumping stacks. .. note:: diff --git a/Doc/library/mmap.rst b/Doc/library/mmap.rst index 4e20c07331a220..8fca79b23e4e15 100644 --- a/Doc/library/mmap.rst +++ b/Doc/library/mmap.rst @@ -269,7 +269,7 @@ To map anonymous memory, -1 should be passed as the fileno along with the length Resizing a map created with *access* of :const:`ACCESS_READ` or :const:`ACCESS_COPY`, will raise a :exc:`TypeError` exception. - Resizing a map created with with *trackfd* set to ``False``, + Resizing a map created with *trackfd* set to ``False``, will raise a :exc:`ValueError` exception. **On Windows**: Resizing the map will raise an :exc:`OSError` if there are other diff --git a/Doc/library/pathlib.rst b/Doc/library/pathlib.rst index 86351e65dc4ed6..47986a2d9602ee 100644 --- a/Doc/library/pathlib.rst +++ b/Doc/library/pathlib.rst @@ -1985,7 +1985,7 @@ The :mod:`pathlib.types` module provides types for static type checking. If *follow_symlinks* is ``False``, return ``True`` only if the path is a file (without following symlinks); return ``False`` if the path - is a directory or other other non-file, or if it doesn't exist. + is a directory or other non-file, or if it doesn't exist. .. method:: is_symlink() diff --git a/Doc/library/socketserver.rst b/Doc/library/socketserver.rst index 753f12460b824b..7fb629f7d2f256 100644 --- a/Doc/library/socketserver.rst +++ b/Doc/library/socketserver.rst @@ -543,7 +543,7 @@ objects that simplify communication by providing the standard file interface):: The difference is that the ``readline()`` call in the second handler will call ``recv()`` multiple times until it encounters a newline character, while the -the first handler had to use a ``recv()`` loop to accumulate data until a +first handler had to use a ``recv()`` loop to accumulate data until a newline itself. If it had just used a single ``recv()`` without the loop it would just have returned what has been received so far from the client. TCP is stream based: data arrives in the order it was sent, but there no diff --git a/Doc/library/threading.rst b/Doc/library/threading.rst index 52fefd590daf18..cabb41442f8419 100644 --- a/Doc/library/threading.rst +++ b/Doc/library/threading.rst @@ -621,7 +621,7 @@ since it is impossible to detect the termination of alien threads. an error to :meth:`~Thread.join` a thread before it has been started and attempts to do so raise the same exception. - If an attempt is made to join a running daemonic thread in in late stages + If an attempt is made to join a running daemonic thread in late stages of :term:`Python finalization ` :meth:`!join` raises a :exc:`PythonFinalizationError`. diff --git a/Doc/using/cmdline.rst b/Doc/using/cmdline.rst index a5867b489e0053..cad49e2deeb46f 100644 --- a/Doc/using/cmdline.rst +++ b/Doc/using/cmdline.rst @@ -653,7 +653,7 @@ Miscellaneous options .. versionadded:: 3.13 * :samp:`-X thread_inherit_context={0,1}` causes :class:`~threading.Thread` - to, by default, use a copy of context of of the caller of + to, by default, use a copy of context of the caller of ``Thread.start()`` when starting. Otherwise, threads will start with an empty context. If unset, the value of this option defaults to ``1`` on free-threaded builds and to ``0`` otherwise. See also @@ -1284,7 +1284,7 @@ conflict. .. envvar:: PYTHON_THREAD_INHERIT_CONTEXT If this variable is set to ``1`` then :class:`~threading.Thread` will, - by default, use a copy of context of of the caller of ``Thread.start()`` + by default, use a copy of context of the caller of ``Thread.start()`` when starting. Otherwise, new threads will start with an empty context. If unset, this variable defaults to ``1`` on free-threaded builds and to ``0`` otherwise. See also :option:`-X thread_inherit_context<-X>`. diff --git a/Doc/whatsnew/3.13.rst b/Doc/whatsnew/3.13.rst index 580a3d8154dee1..ef7c36d8539dfd 100644 --- a/Doc/whatsnew/3.13.rst +++ b/Doc/whatsnew/3.13.rst @@ -1996,7 +1996,7 @@ New Deprecations (Contributed by Alex Waygood in :gh:`105566` and :gh:`105570`.) * Deprecate the :func:`typing.no_type_check_decorator` decorator function, - to be removed in in Python 3.15. + to be removed in Python 3.15. After eight years in the :mod:`typing` module, it has yet to be supported by any major type checker. (Contributed by Alex Waygood in :gh:`106309`.) From b38810bab76c11ea09260a817b3354aebc2af580 Mon Sep 17 00:00:00 2001 From: Russell Keith-Magee Date: Fri, 27 Jun 2025 12:46:49 +0800 Subject: [PATCH 16/55] gh-135966: Modify iOS testbed to make app_packages a site directory (#135967) The iOS testbed now treats the app_packages folder as a site folder. This ensures it is on the path, but also ensures any .pth files are processed on app startup. --- Doc/using/ios.rst | 11 ++-- ...-06-26-15-15-35.gh-issue-135966.EBpF8Y.rst | 1 + iOS/testbed/iOSTestbedTests/iOSTestbedTests.m | 51 +++++++++++++++---- 3 files changed, 50 insertions(+), 13 deletions(-) create mode 100644 Misc/NEWS.d/next/Tests/2025-06-26-15-15-35.gh-issue-135966.EBpF8Y.rst diff --git a/Doc/using/ios.rst b/Doc/using/ios.rst index 7d5c6331bef5ce..0fb28f8c866b02 100644 --- a/Doc/using/ios.rst +++ b/Doc/using/ios.rst @@ -298,9 +298,9 @@ To add Python to an iOS Xcode project: * Signal handlers (:c:member:`PyConfig.install_signal_handlers`) are *enabled*; * System logging (:c:member:`PyConfig.use_system_logger`) is *enabled* (optional, but strongly recommended; this is enabled by default); - * ``PYTHONHOME`` for the interpreter is configured to point at the + * :envvar:`PYTHONHOME` for the interpreter is configured to point at the ``python`` subfolder of your app's bundle; and - * The ``PYTHONPATH`` for the interpreter includes: + * The :envvar:`PYTHONPATH` for the interpreter includes: - the ``python/lib/python3.X`` subfolder of your app's bundle, - the ``python/lib/python3.X/lib-dynload`` subfolder of your app's bundle, and @@ -324,7 +324,12 @@ modules in your app, some additional steps will be required: the ``lib-dynload`` folder can be copied and adapted for this purpose. * If you're using a separate folder for third-party packages, ensure that folder - is included as part of the ``PYTHONPATH`` configuration in step 10. + is included as part of the :envvar:`PYTHONPATH` configuration in step 10. + +* If any of the folders that contain third-party packages will contain ``.pth`` + files, you should add that folder as a *site directory* (using + :meth:`site.addsitedir`), rather than adding to :envvar:`PYTHONPATH` or + :attr:`sys.path` directly. Testing a Python package ------------------------ diff --git a/Misc/NEWS.d/next/Tests/2025-06-26-15-15-35.gh-issue-135966.EBpF8Y.rst b/Misc/NEWS.d/next/Tests/2025-06-26-15-15-35.gh-issue-135966.EBpF8Y.rst new file mode 100644 index 00000000000000..8dc007431f3919 --- /dev/null +++ b/Misc/NEWS.d/next/Tests/2025-06-26-15-15-35.gh-issue-135966.EBpF8Y.rst @@ -0,0 +1 @@ +The iOS testbed now handles the ``app_packages`` folder as a site directory. diff --git a/iOS/testbed/iOSTestbedTests/iOSTestbedTests.m b/iOS/testbed/iOSTestbedTests/iOSTestbedTests.m index dd6e76f9496fe0..b502a6eb277b0b 100644 --- a/iOS/testbed/iOSTestbedTests/iOSTestbedTests.m +++ b/iOS/testbed/iOSTestbedTests/iOSTestbedTests.m @@ -15,6 +15,11 @@ - (void)testPython { PyStatus status; PyPreConfig preconfig; PyConfig config; + PyObject *app_packages_path; + PyObject *method_args; + PyObject *result; + PyObject *site_module; + PyObject *site_addsitedir_attr; PyObject *sys_module; PyObject *sys_path_attr; NSArray *test_args; @@ -111,29 +116,55 @@ - (void)testPython { return; } - sys_module = PyImport_ImportModule("sys"); - if (sys_module == NULL) { - XCTFail(@"Could not import sys module"); + // Add app_packages as a site directory. This both adds to sys.path, + // and ensures that any .pth files in that directory will be executed. + site_module = PyImport_ImportModule("site"); + if (site_module == NULL) { + XCTFail(@"Could not import site module"); return; } - sys_path_attr = PyObject_GetAttrString(sys_module, "path"); - if (sys_path_attr == NULL) { - XCTFail(@"Could not access sys.path"); + site_addsitedir_attr = PyObject_GetAttrString(site_module, "addsitedir"); + if (site_addsitedir_attr == NULL || !PyCallable_Check(site_addsitedir_attr)) { + XCTFail(@"Could not access site.addsitedir"); return; } - // Add the app packages path path = [NSString stringWithFormat:@"%@/app_packages", resourcePath, nil]; NSLog(@"App packages path: %@", path); wtmp_str = Py_DecodeLocale([path UTF8String], NULL); - failed = PyList_Insert(sys_path_attr, 0, PyUnicode_FromString([path UTF8String])); - if (failed) { - XCTFail(@"Unable to add app packages to sys.path"); + app_packages_path = PyUnicode_FromWideChar(wtmp_str, wcslen(wtmp_str)); + if (app_packages_path == NULL) { + XCTFail(@"Could not convert app_packages path to unicode"); return; } PyMem_RawFree(wtmp_str); + method_args = Py_BuildValue("(O)", app_packages_path); + if (method_args == NULL) { + XCTFail(@"Could not create arguments for site.addsitedir"); + return; + } + + result = PyObject_CallObject(site_addsitedir_attr, method_args); + if (result == NULL) { + XCTFail(@"Could not add app_packages directory using site.addsitedir"); + return; + } + + // Add test code to sys.path + sys_module = PyImport_ImportModule("sys"); + if (sys_module == NULL) { + XCTFail(@"Could not import sys module"); + return; + } + + sys_path_attr = PyObject_GetAttrString(sys_module, "path"); + if (sys_path_attr == NULL) { + XCTFail(@"Could not access sys.path"); + return; + } + path = [NSString stringWithFormat:@"%@/app", resourcePath, nil]; NSLog(@"App path: %@", path); wtmp_str = Py_DecodeLocale([path UTF8String], NULL); From 2fc68e180ffdb31886938203e89a75b220a58cec Mon Sep 17 00:00:00 2001 From: Tim Peters Date: Thu, 26 Jun 2025 23:48:05 -0500 Subject: [PATCH 17/55] gh-135551: Change how sorting picks minimum run length (#135553) New scheme from Stefan Pochmann for picking minimum run lengths. By allowing them to change a little from one run to the next, it's possible to arrange for that all merges, at all levels, strongly tend to be as evenly balanced as possible, for randomly ordered data. Meaning the number of initial runs is a power of 2, and all merges involve runs whose lengths differ by no more than 1. --- Misc/ACKS | 1 + ...-06-16-03-56-15.gh-issue-135551.hRTQO-.rst | 1 + Objects/listobject.c | 48 +++-- Objects/listsort.txt | 175 ++++++++++++++++-- 4 files changed, 184 insertions(+), 41 deletions(-) create mode 100644 Misc/NEWS.d/next/Core_and_Builtins/2025-06-16-03-56-15.gh-issue-135551.hRTQO-.rst diff --git a/Misc/ACKS b/Misc/ACKS index 74cf29cdbc552f..6ab50763feadd9 100644 --- a/Misc/ACKS +++ b/Misc/ACKS @@ -1481,6 +1481,7 @@ Jean-François Piéronne Oleg Plakhotnyuk Anatoliy Platonov Marcel Plch +Stefan Pochmann Kirill Podoprigora Remi Pointel Jon Poler diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-06-16-03-56-15.gh-issue-135551.hRTQO-.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-06-16-03-56-15.gh-issue-135551.hRTQO-.rst new file mode 100644 index 00000000000000..22dda2a3e972a8 --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2025-06-16-03-56-15.gh-issue-135551.hRTQO-.rst @@ -0,0 +1 @@ +Sorting randomly ordered lists will often run a bit faster, thanks to a new scheme for picking minimum run lengths from Stefan Pochmann, which arranges for the merge tree to be as evenly balanced as is possible. diff --git a/Objects/listobject.c b/Objects/listobject.c index 23d3472b6d4153..1b36f4c25abf4d 100644 --- a/Objects/listobject.c +++ b/Objects/listobject.c @@ -1685,10 +1685,7 @@ sortslice_advance(sortslice *slice, Py_ssize_t n) /* Avoid malloc for small temp arrays. */ #define MERGESTATE_TEMP_SIZE 256 -/* The largest value of minrun. This must be a power of 2, and >= 1, so that - * the compute_minrun() algorithm guarantees to return a result no larger than - * this, - */ +/* The largest value of minrun. This must be a power of 2, and >= 1 */ #define MAX_MINRUN 64 #if ((MAX_MINRUN) < 1) || ((MAX_MINRUN) & ((MAX_MINRUN) - 1)) #error "MAX_MINRUN must be a power of 2, and >= 1" @@ -1749,6 +1746,11 @@ struct s_MergeState { * of tuples. It may be set to safe_object_compare, but the idea is that hopefully * we can assume more, and use one of the special-case compares. */ int (*tuple_elem_compare)(PyObject *, PyObject *, MergeState *); + + /* Varisbles used for minrun computation. The "ideal" minrun length is + * the infinite precision listlen / 2**e. See listsort.txt. + */ + Py_ssize_t mr_current, mr_e, mr_mask; }; /* binarysort is the best method for sorting small arrays: it does few @@ -2210,6 +2212,14 @@ merge_init(MergeState *ms, Py_ssize_t list_size, int has_keyfunc, ms->min_gallop = MIN_GALLOP; ms->listlen = list_size; ms->basekeys = lo->keys; + + /* State for generating minrun values. See listsort.txt. */ + ms->mr_e = 0; + while (list_size >> ms->mr_e >= MAX_MINRUN) { + ++ms->mr_e; + } + ms->mr_mask = (1 << ms->mr_e) - 1; + ms->mr_current = 0; } /* Free all the temp memory owned by the MergeState. This must be called @@ -2687,27 +2697,15 @@ merge_force_collapse(MergeState *ms) return 0; } -/* Compute a good value for the minimum run length; natural runs shorter - * than this are boosted artificially via binary insertion. - * - * If n < MAX_MINRUN return n (it's too small to bother with fancy stuff). - * Else if n is an exact power of 2, return MAX_MINRUN / 2. - * Else return an int k, MAX_MINRUN / 2 <= k <= MAX_MINRUN, such that n/k is - * close to, but strictly less than, an exact power of 2. - * - * See listsort.txt for more info. - */ -static Py_ssize_t -merge_compute_minrun(Py_ssize_t n) +/* Return the next minrun value to use. See listsort.txt. */ +Py_LOCAL_INLINE(Py_ssize_t) +minrun_next(MergeState *ms) { - Py_ssize_t r = 0; /* becomes 1 if any 1 bits are shifted off */ - - assert(n >= 0); - while (n >= MAX_MINRUN) { - r |= n & 1; - n >>= 1; - } - return n + r; + ms->mr_current += ms->listlen; + assert(ms->mr_current >= 0); /* no overflow */ + Py_ssize_t result = ms->mr_current >> ms->mr_e; + ms->mr_current &= ms->mr_mask; + return result; } /* Here we define custom comparison functions to optimize for the cases one commonly @@ -3075,7 +3073,6 @@ list_sort_impl(PyListObject *self, PyObject *keyfunc, int reverse) /* March over the array once, left to right, finding natural runs, * and extending short natural runs to minrun elements. */ - minrun = merge_compute_minrun(nremaining); do { Py_ssize_t n; @@ -3084,6 +3081,7 @@ list_sort_impl(PyListObject *self, PyObject *keyfunc, int reverse) if (n < 0) goto fail; /* If short, extend to min(minrun, nremaining). */ + minrun = minrun_next(&ms); if (n < minrun) { const Py_ssize_t force = nremaining <= minrun ? nremaining : minrun; diff --git a/Objects/listsort.txt b/Objects/listsort.txt index f387d9c116e502..5b2fc7d50a25ca 100644 --- a/Objects/listsort.txt +++ b/Objects/listsort.txt @@ -270,8 +270,8 @@ result. This has two primary good effects: Computing minrun ---------------- -If N < MAX_MINRUN, minrun is N. IOW, binary insertion sort is used for the -whole array then; it's hard to beat that given the overheads of trying +If N < MAX_MINRUN, minrun is N. IOW, binary insertion sort is used for the +whole array then; it's hard to beat that given the overheads of trying something fancier (see note BINSORT). When N is a power of 2, testing on random data showed that minrun values of @@ -288,7 +288,6 @@ that 32 isn't a good choice for the general case! Consider N=2112: >>> divmod(2112, 32) (66, 0) ->>> If the data is randomly ordered, we're very likely to end up with 66 runs each of length 32. The first 64 of these trigger a sequence of perfectly @@ -301,22 +300,94 @@ to get 64 elements into place). If we take minrun=33 in this case, then we're very likely to end up with 64 runs each of length 33, and then all merges are perfectly balanced. Better! -What we want to avoid is picking minrun such that in +The original code used a cheap heuristic to pick a minrun that avoided the +very worst cases of imbalance for the final merge, but "pretty bad" cases +still existed. - q, r = divmod(N, minrun) +In 2025, Stefan Pochmann found a much better approach, based on letting minrun +vary a bit from one run to the next. Under his scheme, at _all_ levels of the +merge tree: -q is a power of 2 and r>0 (then the last merge only gets r elements into -place, and r < minrun is small compared to N), or q a little larger than a -power of 2 regardless of r (then we've got a case similar to "2112", again -leaving too little work for the last merge to do). +- The number of runs is a power of 2. +- At most two different run lengths appear. +- When two do appear, the smaller is one less than the larger. +- The lengths of run pairs merged never differ by more than one. -Instead we pick a minrun in range(MAX_MINRUN / 2, MAX_MINRUN + 1) such that -N/minrun is exactly a power of 2, or if that isn't possible, is close to, but -strictly less than, a power of 2. This is easier to do than it may sound: -take the first log2(MAX_MINRUN) bits of N, and add 1 if any of the remaining -bits are set. In fact, that rule covers every case in this section, including -small N and exact powers of 2; merge_compute_minrun() is a deceptively simple -function. +So, in all respects, as perfectly balanced as possible. + +For the 2112 case, that also keeps minrun at 33, but we were lucky there +that 2112 is 33 times a power of 2. The new approach doesn't rely on luck. + +For example, with 315 random elements, the old scheme uses fixed minrun=40 and +produces runs of length 40, except for the last. The new scheme produces a +mix of lengths 39 and 40: + +old: 40 40 40 40 40 40 40 35 +new: 39 39 40 39 39 40 39 40 + +Both schemes produce eight runs, a power of 2. That's good for a balanced +merge tree. But the new scheme allows merges where left and right length +never differ by more than 1: + +39 39 40 39 39 40 39 40 + 78 79 79 79 + 157 158 + 315 + +(This shows merges downward, e.g., two runs of length 39 are merged and +become a run of length 78.) + +With larger lists, the old scheme can get even more unbalanced. For example, +with 32769 elements (that's 2**15 + 1), it uses minrun=33 and produces 993 +runs (of length 33). That's not even a power of 2. The new scheme instead +produces 1024 runs, all with length 32 except for the last one with length 33. + +How does it work? Ideally, all runs would be exactly equally long. For the +above example, each run would have 315/8 = 39.375 elements. Which of course +doesn't work. But we can get close: + +For the first run, we'd like 39.375 elements. Since that's impossible, we +instead use 39 (the floor) and remember the current leftover fraction 0.375. +For the second run, we add 0.375 + 39.375 = 39.75. Again impossible, so we +instead use 39 and remember 0.75. For the third run, we add 0.75 + 39.375 = +40.125. This time we get 40 and remember 0.125. And so on. Here's a Python +generator doing that: + +def gen_minruns_with_floats(n): + mr = n + while mr >= MAX_MINRUN: + mr /= 2 + + mr_current = 0 + while True: + mr_current += mr + yield int(mr_current) + mr_current %= 1 + +But while all arithmetic here can be done exactly using binery floating point, +floats have less precision that a Py_ssize_t, and mixing floats with ints is +needlessly expensive anyway. + +So here's an integer version, where the internal numbers are scaled up by +2**e, or rather not divided by 2**e. Instead, only each yielded minrun gets +divided (by right-shifting). For example instead of adding 39.375 and +reducing modulo 1, it just adds 315 and reduces modulo 8. And always divides +by 8 to get each actual minrun value: + +def gen_minruns_simpler(n): + e = 0 + while (n >> e) >= MAX_MINRUN: + e += 1 + mask = (1 << e) - 1 + + mr_current = 0 + while True: + mr_current += n + yield mr_current >> e + mr_current &= mask + +See note MINRUN CODE for a full implementation and a driver that exhaustively +verifies the claims above for all list lengths through 2 million. The Merge Pattern @@ -820,3 +891,75 @@ partially mitigated by pre-scanning the data to determine whether the data is homogeneous with respect to type. If so, it is sometimes possible to substitute faster type-specific comparisons for the slower, generic PyObject_RichCompareBool. + +MINRUN CODE +from itertools import accumulate +try: + from itertools import batched +except ImportError: + from itertools import islice + def batched(xs, k): + it = iter(xs) + while chunk := tuple(islice(it, k)): + yield chunk + +MAX_MINRUN = 64 + +def gen_minruns(n): + # In listobject.c, initialization is done in merge_init(), and + # the body of the loop in minrun_next(). + mr_e = 0 + while (n >> mr_e) >= MAX_MINRUN: + mr_e += 1 + mr_mask = (1 << mr_e) - 1 + + mr_current = 0 + while True: + mr_current += n + yield mr_current >> mr_e + mr_current &= mr_mask + +def chew(n, show=False): + if n < 1: + return + + sizes = [] + tot = 0 + for size in gen_minruns(n): + sizes.append(size) + tot += size + if tot >= n: + break + assert tot == n + print(n, len(sizes)) + + small, large = MAX_MINRUN // 2, MAX_MINRUN + while len(sizes) > 1: + assert not len(sizes) & 1 + assert len(sizes).bit_count() == 1 # i.e., power of 2 + assert sum(sizes) == n + assert min(sizes) >= min(n, small) + assert max(sizes) <= large + + d = set(sizes) + assert len(d) <= 2 + if len(d) == 2: + lo, hi = sorted(d) + assert lo + 1 == hi + + mr = n / len(sizes) + for i, s in enumerate(accumulate(sizes, initial=0)): + assert int(mr * i) == s + + newsizes = [] + for a, b in batched(sizes, 2): + assert abs(a - b) <= 1 + newsizes.append(a + b) + sizes = newsizes + smsll = large + large *= 2 + + assert sizes[0] == n + +for n in range(2_000_001): + chew(n) \ No newline at end of file From 0c6c09b7377e10dcf80844c961b578fbdc6f5375 Mon Sep 17 00:00:00 2001 From: Russell Keith-Magee Date: Fri, 27 Jun 2025 12:58:20 +0800 Subject: [PATCH 18/55] gh-135968: Add iOS binary stubs for strip (#135970) Adds iOS binary stubs for invoking `strip` --- .../Tools-Demos/2025-06-26-15-58-13.gh-issue-135968.C4v_-W.rst | 1 + iOS/Resources/bin/arm64-apple-ios-simulator-strip | 2 ++ iOS/Resources/bin/arm64-apple-ios-strip | 2 ++ iOS/Resources/bin/x86_64-apple-ios-simulator-strip | 2 ++ 4 files changed, 7 insertions(+) create mode 100644 Misc/NEWS.d/next/Tools-Demos/2025-06-26-15-58-13.gh-issue-135968.C4v_-W.rst create mode 100755 iOS/Resources/bin/arm64-apple-ios-simulator-strip create mode 100755 iOS/Resources/bin/arm64-apple-ios-strip create mode 100755 iOS/Resources/bin/x86_64-apple-ios-simulator-strip diff --git a/Misc/NEWS.d/next/Tools-Demos/2025-06-26-15-58-13.gh-issue-135968.C4v_-W.rst b/Misc/NEWS.d/next/Tools-Demos/2025-06-26-15-58-13.gh-issue-135968.C4v_-W.rst new file mode 100644 index 00000000000000..1c0b3825c71c1d --- /dev/null +++ b/Misc/NEWS.d/next/Tools-Demos/2025-06-26-15-58-13.gh-issue-135968.C4v_-W.rst @@ -0,0 +1 @@ +Stubs for ``strip`` are now provided as part of an iOS install. diff --git a/iOS/Resources/bin/arm64-apple-ios-simulator-strip b/iOS/Resources/bin/arm64-apple-ios-simulator-strip new file mode 100755 index 00000000000000..fd59d309b73a20 --- /dev/null +++ b/iOS/Resources/bin/arm64-apple-ios-simulator-strip @@ -0,0 +1,2 @@ +#!/bin/sh +xcrun --sdk iphonesimulator${IOS_SDK_VERSION} strip -arch arm64 "$@" diff --git a/iOS/Resources/bin/arm64-apple-ios-strip b/iOS/Resources/bin/arm64-apple-ios-strip new file mode 100755 index 00000000000000..75e823a3d02d61 --- /dev/null +++ b/iOS/Resources/bin/arm64-apple-ios-strip @@ -0,0 +1,2 @@ +#!/bin/sh +xcrun --sdk iphoneos${IOS_SDK_VERSION} strip -arch arm64 "$@" diff --git a/iOS/Resources/bin/x86_64-apple-ios-simulator-strip b/iOS/Resources/bin/x86_64-apple-ios-simulator-strip new file mode 100755 index 00000000000000..c5cfb28929195a --- /dev/null +++ b/iOS/Resources/bin/x86_64-apple-ios-simulator-strip @@ -0,0 +1,2 @@ +#!/bin/sh +xcrun --sdk iphonesimulator${IOS_SDK_VERSION} strip -arch x86_64 "$@" From 07183ebce36462aaaea4d20e0502b20821dd2682 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Fri, 27 Jun 2025 10:50:59 +0300 Subject: [PATCH 19/55] gh-53203: Fix strptime() for %c, %x and %X formats on some locales (#135971) * Add detection of decimal non-ASCII alt digits. * Add support of non-decimal alt digits on locale lzh_TW. * Accept only numbers in correct range if alt digits are known. * Fix bug in detecting the position of the week day name on locales byn_ER and wal_ET. * Fix support of single-digit hour on locales ar_SA and bg_BG. * Add support for %T, %R, %r, %C, %OC. * Prepare code to use nl_langinfo(). --- Lib/_strptime.py | 190 ++++++++++++++---- Lib/test/test_strptime.py | 28 ++- ...5-06-26-11-52-40.gh-issue-53203.TMigBr.rst | 2 + 3 files changed, 164 insertions(+), 56 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2025-06-26-11-52-40.gh-issue-53203.TMigBr.rst diff --git a/Lib/_strptime.py b/Lib/_strptime.py index ae67949626d460..7ac6f36360cb69 100644 --- a/Lib/_strptime.py +++ b/Lib/_strptime.py @@ -14,6 +14,7 @@ import time import locale import calendar +import re from re import compile as re_compile from re import sub as re_sub from re import IGNORECASE @@ -41,6 +42,21 @@ def _findall(haystack, needle): yield i i += len(needle) + +lzh_TW_alt_digits = ( + # 〇:一:二:三:四:五:六:七:八:九 + '\u3007', '\u4e00', '\u4e8c', '\u4e09', '\u56db', + '\u4e94', '\u516d', '\u4e03', '\u516b', '\u4e5d', + # 十:十一:十二:十三:十四:十五:十六:十七:十八:十九 + '\u5341', '\u5341\u4e00', '\u5341\u4e8c', '\u5341\u4e09', '\u5341\u56db', + '\u5341\u4e94', '\u5341\u516d', '\u5341\u4e03', '\u5341\u516b', '\u5341\u4e5d', + # 廿:廿一:廿二:廿三:廿四:廿五:廿六:廿七:廿八:廿九 + '\u5eff', '\u5eff\u4e00', '\u5eff\u4e8c', '\u5eff\u4e09', '\u5eff\u56db', + '\u5eff\u4e94', '\u5eff\u516d', '\u5eff\u4e03', '\u5eff\u516b', '\u5eff\u4e5d', + # 卅:卅一 + '\u5345', '\u5345\u4e00') + + class LocaleTime(object): """Stores and handles locale-specific information related to time. @@ -84,6 +100,7 @@ def __init__(self): self.__calc_weekday() self.__calc_month() self.__calc_am_pm() + self.__calc_alt_digits() self.__calc_timezone() self.__calc_date_time() if _getlang() != self.lang: @@ -119,9 +136,43 @@ def __calc_am_pm(self): am_pm.append(time.strftime("%p", time_tuple).lower().strip()) self.am_pm = am_pm + def __calc_alt_digits(self): + # Set self.LC_alt_digits by using time.strftime(). + + # The magic data should contain all decimal digits. + time_tuple = time.struct_time((1998, 1, 27, 10, 43, 56, 1, 27, 0)) + s = time.strftime("%x%X", time_tuple) + if s.isascii(): + # Fast path -- all digits are ASCII. + self.LC_alt_digits = () + return + + digits = ''.join(sorted(set(re.findall(r'\d', s)))) + if len(digits) == 10 and ord(digits[-1]) == ord(digits[0]) + 9: + # All 10 decimal digits from the same set. + if digits.isascii(): + # All digits are ASCII. + self.LC_alt_digits = () + return + + self.LC_alt_digits = [a + b for a in digits for b in digits] + # Test whether the numbers contain leading zero. + time_tuple2 = time.struct_time((2000, 1, 1, 1, 1, 1, 5, 1, 0)) + if self.LC_alt_digits[1] not in time.strftime("%x %X", time_tuple2): + self.LC_alt_digits[:10] = digits + return + + # Either non-Gregorian calendar or non-decimal numbers. + if {'\u4e00', '\u4e03', '\u4e5d', '\u5341', '\u5eff'}.issubset(s): + # lzh_TW + self.LC_alt_digits = lzh_TW_alt_digits + return + + self.LC_alt_digits = None + def __calc_date_time(self): - # Set self.date_time, self.date, & self.time by using - # time.strftime(). + # Set self.LC_date_time, self.LC_date, self.LC_time and + # self.LC_time_ampm by using time.strftime(). # Use (1999,3,17,22,44,55,2,76,0) for magic date because the amount of # overloaded numbers is minimized. The order in which searches for @@ -129,26 +180,32 @@ def __calc_date_time(self): # possible ambiguity for what something represents. time_tuple = time.struct_time((1999,3,17,22,44,55,2,76,0)) time_tuple2 = time.struct_time((1999,1,3,1,1,1,6,3,0)) - replacement_pairs = [ + replacement_pairs = [] + + # Non-ASCII digits + if self.LC_alt_digits or self.LC_alt_digits is None: + for n, d in [(19, '%OC'), (99, '%Oy'), (22, '%OH'), + (44, '%OM'), (55, '%OS'), (17, '%Od'), + (3, '%Om'), (2, '%Ow'), (10, '%OI')]: + if self.LC_alt_digits is None: + s = chr(0x660 + n // 10) + chr(0x660 + n % 10) + replacement_pairs.append((s, d)) + if n < 10: + replacement_pairs.append((s[1], d)) + elif len(self.LC_alt_digits) > n: + replacement_pairs.append((self.LC_alt_digits[n], d)) + else: + replacement_pairs.append((time.strftime(d, time_tuple), d)) + replacement_pairs += [ ('1999', '%Y'), ('99', '%y'), ('22', '%H'), ('44', '%M'), ('55', '%S'), ('76', '%j'), ('17', '%d'), ('03', '%m'), ('3', '%m'), # '3' needed for when no leading zero. ('2', '%w'), ('10', '%I'), - # Non-ASCII digits - ('\u0661\u0669\u0669\u0669', '%Y'), - ('\u0669\u0669', '%Oy'), - ('\u0662\u0662', '%OH'), - ('\u0664\u0664', '%OM'), - ('\u0665\u0665', '%OS'), - ('\u0661\u0667', '%Od'), - ('\u0660\u0663', '%Om'), - ('\u0663', '%Om'), - ('\u0662', '%Ow'), - ('\u0661\u0660', '%OI'), ] + date_time = [] - for directive in ('%c', '%x', '%X'): + for directive in ('%c', '%x', '%X', '%r'): current_format = time.strftime(directive, time_tuple).lower() current_format = current_format.replace('%', '%%') # The month and the day of the week formats are treated specially @@ -172,9 +229,10 @@ def __calc_date_time(self): if tz: current_format = current_format.replace(tz, "%Z") # Transform all non-ASCII digits to digits in range U+0660 to U+0669. - current_format = re_sub(r'\d(?3[0-1]|[1-2]\d|0[1-9]|[1-9]| [1-9])", 'f': r"(?P[0-9]{1,6})", - 'H': r"(?P2[0-3]|[0-1]\d|\d)", + 'H': r"(?P2[0-3]|[0-1]\d|\d| \d)", + 'k': r"(?P2[0-3]|[0-1]\d|\d| \d)", 'I': r"(?P1[0-2]|0[1-9]|[1-9]| [1-9])", + 'l': r"(?P1[0-2]|0[1-9]|[1-9]| [1-9])", 'G': r"(?P\d\d\d\d)", 'j': r"(?P36[0-6]|3[0-5]\d|[1-2]\d\d|0[1-9]\d|00[1-9]|[1-9]\d|0[1-9]|[1-9])", 'm': r"(?P1[0-2]|0[1-9]|[1-9])", @@ -312,16 +373,49 @@ def __init__(self, locale_time=None): for tz in tz_names), 'Z'), '%': '%'} - for d in 'dmyHIMS': - mapping['O' + d] = r'(?P<%s>\d\d|\d| \d)' % d - mapping['Ow'] = r'(?P\d)' + if self.locale_time.LC_alt_digits is None: + for d in 'dmyCHIMS': + mapping['O' + d] = r'(?P<%s>\d\d|\d| \d)' % d + mapping['Ow'] = r'(?P\d)' + else: + mapping.update({ + 'Od': self.__seqToRE(self.locale_time.LC_alt_digits[1:32], 'd', + '3[0-1]|[1-2][0-9]|0[1-9]|[1-9]'), + 'Om': self.__seqToRE(self.locale_time.LC_alt_digits[1:13], 'm', + '1[0-2]|0[1-9]|[1-9]'), + 'Ow': self.__seqToRE(self.locale_time.LC_alt_digits[:7], 'w', + '[0-6]'), + 'Oy': self.__seqToRE(self.locale_time.LC_alt_digits, 'y', + '[0-9][0-9]'), + 'OC': self.__seqToRE(self.locale_time.LC_alt_digits, 'C', + '[0-9][0-9]'), + 'OH': self.__seqToRE(self.locale_time.LC_alt_digits[:24], 'H', + '2[0-3]|[0-1][0-9]|[0-9]'), + 'OI': self.__seqToRE(self.locale_time.LC_alt_digits[1:13], 'I', + '1[0-2]|0[1-9]|[1-9]'), + 'OM': self.__seqToRE(self.locale_time.LC_alt_digits[:60], 'M', + '[0-5][0-9]|[0-9]'), + 'OS': self.__seqToRE(self.locale_time.LC_alt_digits[:62], 'S', + '6[0-1]|[0-5][0-9]|[0-9]'), + }) + mapping.update({ + 'e': mapping['d'], + 'Oe': mapping['Od'], + 'P': mapping['p'], + 'Op': mapping['p'], + 'W': mapping['U'].replace('U', 'W'), + }) mapping['W'] = mapping['U'].replace('U', 'W') + base.__init__(mapping) + base.__setitem__('T', self.pattern('%H:%M:%S')) + base.__setitem__('R', self.pattern('%H:%M')) + base.__setitem__('r', self.pattern(self.locale_time.LC_time_ampm)) base.__setitem__('X', self.pattern(self.locale_time.LC_time)) base.__setitem__('x', self.pattern(self.locale_time.LC_date)) base.__setitem__('c', self.pattern(self.locale_time.LC_date_time)) - def __seqToRE(self, to_convert, directive): + def __seqToRE(self, to_convert, directive, altregex=None): """Convert a list to a regex string for matching a directive. Want possible matching values to be from longest to shortest. This @@ -337,8 +431,9 @@ def __seqToRE(self, to_convert, directive): else: return '' regex = '|'.join(re_escape(stuff) for stuff in to_convert) - regex = '(?P<%s>%s' % (directive, regex) - return '%s)' % regex + if altregex is not None: + regex += '|' + altregex + return '(?P<%s>%s)' % (directive, regex) def pattern(self, format): """Return regex pattern for the format string. @@ -365,7 +460,7 @@ def repl(m): nonlocal day_of_month_in_format day_of_month_in_format = True return self[format_char] - format = re_sub(r'%([OE]?\\?.?)', repl, format) + format = re_sub(r'%[-_0^#]*[0-9]*([OE]?\\?.?)', repl, format) if day_of_month_in_format and not year_in_format: import warnings warnings.warn("""\ @@ -467,6 +562,15 @@ def _strptime(data_string, format="%a %b %d %H:%M:%S %Y"): # values weekday = julian = None found_dict = found.groupdict() + if locale_time.LC_alt_digits: + def parse_int(s): + try: + return locale_time.LC_alt_digits.index(s) + except ValueError: + return int(s) + else: + parse_int = int + for group_key in found_dict.keys(): # Directives not explicitly handled below: # c, x, X @@ -474,30 +578,34 @@ def _strptime(data_string, format="%a %b %d %H:%M:%S %Y"): # U, W # worthless without day of the week if group_key == 'y': - year = int(found_dict['y']) - # Open Group specification for strptime() states that a %y - #value in the range of [00, 68] is in the century 2000, while - #[69,99] is in the century 1900 - if year <= 68: - year += 2000 + year = parse_int(found_dict['y']) + if 'C' in found_dict: + century = parse_int(found_dict['C']) + year += century * 100 else: - year += 1900 + # Open Group specification for strptime() states that a %y + #value in the range of [00, 68] is in the century 2000, while + #[69,99] is in the century 1900 + if year <= 68: + year += 2000 + else: + year += 1900 elif group_key == 'Y': year = int(found_dict['Y']) elif group_key == 'G': iso_year = int(found_dict['G']) elif group_key == 'm': - month = int(found_dict['m']) + month = parse_int(found_dict['m']) elif group_key == 'B': month = locale_time.f_month.index(found_dict['B'].lower()) elif group_key == 'b': month = locale_time.a_month.index(found_dict['b'].lower()) elif group_key == 'd': - day = int(found_dict['d']) + day = parse_int(found_dict['d']) elif group_key == 'H': - hour = int(found_dict['H']) + hour = parse_int(found_dict['H']) elif group_key == 'I': - hour = int(found_dict['I']) + hour = parse_int(found_dict['I']) ampm = found_dict.get('p', '').lower() # If there was no AM/PM indicator, we'll treat this like AM if ampm in ('', locale_time.am_pm[0]): @@ -513,9 +621,9 @@ def _strptime(data_string, format="%a %b %d %H:%M:%S %Y"): if hour != 12: hour += 12 elif group_key == 'M': - minute = int(found_dict['M']) + minute = parse_int(found_dict['M']) elif group_key == 'S': - second = int(found_dict['S']) + second = parse_int(found_dict['S']) elif group_key == 'f': s = found_dict['f'] # Pad to always return microseconds. diff --git a/Lib/test/test_strptime.py b/Lib/test/test_strptime.py index 268230f6da78f8..e52c46f8c58cce 100644 --- a/Lib/test/test_strptime.py +++ b/Lib/test/test_strptime.py @@ -221,14 +221,16 @@ def test_ValueError(self): self.assertRaises(ValueError, _strptime._strptime_time, data_string="%d", format="%A") for bad_format in ("%", "% ", "%\n"): - with self.assertRaisesRegex(ValueError, "stray % in format "): + with (self.subTest(format=bad_format), + self.assertRaisesRegex(ValueError, "stray % in format ")): _strptime._strptime_time("2005", bad_format) - for bad_format in ("%e", "%Oe", "%O", "%O ", "%Ee", "%E", "%E ", - "%.", "%+", "%_", "%~", "%\\", + for bad_format in ("%i", "%Oi", "%O", "%O ", "%Ee", "%E", "%E ", + "%.", "%+", "%~", "%\\", "%O.", "%O+", "%O_", "%O~", "%O\\"): directive = bad_format[1:].rstrip() - with self.assertRaisesRegex(ValueError, - f"'{re.escape(directive)}' is a bad directive in format "): + with (self.subTest(format=bad_format), + self.assertRaisesRegex(ValueError, + f"'{re.escape(directive)}' is a bad directive in format ")): _strptime._strptime_time("2005", bad_format) msg_week_no_year_or_weekday = r"ISO week directive '%V' must be used with " \ @@ -480,13 +482,11 @@ def test_bad_timezone(self): # * Year is not included: ha_NG. # * Use non-Gregorian calendar: lo_LA, thai, th_TH. # On Windows: ar_IN, ar_SA, fa_IR, ps_AF. - # - # BUG: Generates regexp that does not match the current date and time - # for lzh_TW. @run_with_locales('LC_TIME', 'C', 'en_US', 'fr_FR', 'de_DE', 'ja_JP', 'he_IL', 'eu_ES', 'ar_AE', 'mfe_MU', 'yo_NG', 'csb_PL', 'br_FR', 'gez_ET', 'brx_IN', - 'my_MM', 'or_IN', 'shn_MM', 'az_IR') + 'my_MM', 'or_IN', 'shn_MM', 'az_IR', + 'byn_ER', 'wal_ET', 'lzh_TW') def test_date_time_locale(self): # Test %c directive loc = locale.getlocale(locale.LC_TIME)[0] @@ -525,11 +525,9 @@ def test_date_time_locale2(self): # NB: Does not roundtrip because use non-Gregorian calendar: # lo_LA, thai, th_TH. On Windows: ar_IN, ar_SA, fa_IR, ps_AF. - # BUG: Generates regexp that does not match the current date - # for lzh_TW. @run_with_locales('LC_TIME', 'C', 'en_US', 'fr_FR', 'de_DE', 'ja_JP', 'he_IL', 'eu_ES', 'ar_AE', - 'az_IR', 'my_MM', 'or_IN', 'shn_MM') + 'az_IR', 'my_MM', 'or_IN', 'shn_MM', 'lzh_TW') def test_date_locale(self): # Test %x directive now = time.time() @@ -546,7 +544,7 @@ def test_date_locale(self): # NB: Dates before 1969 do not roundtrip on many locales, including C. @unittest.skipIf(support.linked_to_musl(), "musl libc issue, bpo-46390") @run_with_locales('LC_TIME', 'en_US', 'fr_FR', 'de_DE', 'ja_JP', - 'eu_ES', 'ar_AE', 'my_MM', 'shn_MM') + 'eu_ES', 'ar_AE', 'my_MM', 'shn_MM', 'lzh_TW') def test_date_locale2(self): # Test %x directive loc = locale.getlocale(locale.LC_TIME)[0] @@ -562,11 +560,11 @@ def test_date_locale2(self): # norwegian, nynorsk. # * Hours are in 12-hour notation without AM/PM indication: hy_AM, # ms_MY, sm_WS. - # BUG: Generates regexp that does not match the current time for lzh_TW. @run_with_locales('LC_TIME', 'C', 'en_US', 'fr_FR', 'de_DE', 'ja_JP', 'aa_ET', 'am_ET', 'az_IR', 'byn_ER', 'fa_IR', 'gez_ET', 'my_MM', 'om_ET', 'or_IN', 'shn_MM', 'sid_ET', 'so_SO', - 'ti_ET', 'tig_ER', 'wal_ET') + 'ti_ET', 'tig_ER', 'wal_ET', 'lzh_TW', + 'ar_SA', 'bg_BG') def test_time_locale(self): # Test %X directive loc = locale.getlocale(locale.LC_TIME)[0] diff --git a/Misc/NEWS.d/next/Library/2025-06-26-11-52-40.gh-issue-53203.TMigBr.rst b/Misc/NEWS.d/next/Library/2025-06-26-11-52-40.gh-issue-53203.TMigBr.rst new file mode 100644 index 00000000000000..ba2fae49fdc933 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-06-26-11-52-40.gh-issue-53203.TMigBr.rst @@ -0,0 +1,2 @@ +Fix :func:`time.strptime` for ``%c`` and ``%x`` formats on locales byn_ER, +wal_ET and lzh_TW, and for ``%X`` format on locales ar_SA, bg_BG and lzh_TW. From e23518fa96583d0190d457adb807b19545df26cf Mon Sep 17 00:00:00 2001 From: Pieter Eendebak Date: Fri, 27 Jun 2025 11:01:51 +0200 Subject: [PATCH 20/55] gh-136017: avoid decref in rich compare for bool objects (#136018) --- Objects/object.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/Objects/object.c b/Objects/object.c index 4d60128b092c22..1223983753ac46 100644 --- a/Objects/object.c +++ b/Objects/object.c @@ -1131,11 +1131,14 @@ PyObject_RichCompareBool(PyObject *v, PyObject *w, int op) res = PyObject_RichCompare(v, w, op); if (res == NULL) return -1; - if (PyBool_Check(res)) + if (PyBool_Check(res)) { ok = (res == Py_True); - else + assert(_Py_IsImmortal(res)); + } + else { ok = PyObject_IsTrue(res); - Py_DECREF(res); + Py_DECREF(res); + } return ok; } From f3aec60d7a01c5f085a3ef2d6670d46b42b8ddd3 Mon Sep 17 00:00:00 2001 From: Sergey B Kirpichev Date: Fri, 27 Jun 2025 13:00:25 +0300 Subject: [PATCH 21/55] gh-128051: Fix tests if sys.float_repr_style is 'legacy' (#135908) Co-authored-by: Victor Stinner --- Lib/difflib.py | 4 ++-- Lib/test/test_builtin.py | 3 ++- Lib/test/test_configparser.py | 12 ++++++------ Lib/test/test_ctypes/test_parameters.py | 4 +++- Lib/test/test_enum.py | 2 +- Lib/test/test_float.py | 2 ++ Lib/test/test_format.py | 10 +++++----- Lib/test/test_fstring.py | 8 ++++---- Lib/test/test_json/test_tool.py | 2 +- Lib/test/test_optparse.py | 4 ++-- Lib/test/test_peepholer.py | 6 +++--- Lib/test/test_pprint.py | 2 +- Lib/test/test_reprlib.py | 20 ++++++++++---------- Lib/test/test_statistics.py | 3 ++- Lib/test/test_str.py | 8 ++++---- Lib/test/test_types.py | 4 ++-- 16 files changed, 50 insertions(+), 44 deletions(-) diff --git a/Lib/difflib.py b/Lib/difflib.py index 18801a9b19eb9d..487936dbf47cdc 100644 --- a/Lib/difflib.py +++ b/Lib/difflib.py @@ -78,8 +78,8 @@ class SequenceMatcher: sequences. As a rule of thumb, a .ratio() value over 0.6 means the sequences are close matches: - >>> print(round(s.ratio(), 3)) - 0.866 + >>> print(round(s.ratio(), 2)) + 0.87 >>> If you're only interested in where the sequences match, diff --git a/Lib/test/test_builtin.py b/Lib/test/test_builtin.py index d221aa5e1d999f..14fe3355239615 100644 --- a/Lib/test/test_builtin.py +++ b/Lib/test/test_builtin.py @@ -2991,7 +2991,8 @@ def test_namespace_order(self): def load_tests(loader, tests, pattern): from doctest import DocTestSuite - tests.addTest(DocTestSuite(builtins)) + if sys.float_repr_style == 'short': + tests.addTest(DocTestSuite(builtins)) return tests if __name__ == "__main__": diff --git a/Lib/test/test_configparser.py b/Lib/test/test_configparser.py index 23904d17d326d8..e7364e18742c16 100644 --- a/Lib/test/test_configparser.py +++ b/Lib/test/test_configparser.py @@ -986,12 +986,12 @@ def test_add_section_default(self): def test_defaults_keyword(self): """bpo-23835 fix for ConfigParser""" - cf = self.newconfig(defaults={1: 2.4}) - self.assertEqual(cf[self.default_section]['1'], '2.4') - self.assertAlmostEqual(cf[self.default_section].getfloat('1'), 2.4) - cf = self.newconfig(defaults={"A": 5.2}) - self.assertEqual(cf[self.default_section]['a'], '5.2') - self.assertAlmostEqual(cf[self.default_section].getfloat('a'), 5.2) + cf = self.newconfig(defaults={1: 2.5}) + self.assertEqual(cf[self.default_section]['1'], '2.5') + self.assertAlmostEqual(cf[self.default_section].getfloat('1'), 2.5) + cf = self.newconfig(defaults={"A": 5.25}) + self.assertEqual(cf[self.default_section]['a'], '5.25') + self.assertAlmostEqual(cf[self.default_section].getfloat('a'), 5.25) class ConfigParserTestCaseNoInterpolation(BasicTestCase, unittest.TestCase): diff --git a/Lib/test/test_ctypes/test_parameters.py b/Lib/test/test_ctypes/test_parameters.py index f89521cf8b3a67..46f8ff93efa915 100644 --- a/Lib/test/test_ctypes/test_parameters.py +++ b/Lib/test/test_ctypes/test_parameters.py @@ -1,3 +1,4 @@ +import sys import unittest import test.support from ctypes import (CDLL, PyDLL, ArgumentError, @@ -240,7 +241,8 @@ def test_parameter_repr(self): self.assertRegex(repr(c_ulonglong.from_param(20000)), r"^$") self.assertEqual(repr(c_float.from_param(1.5)), "") self.assertEqual(repr(c_double.from_param(1.5)), "") - self.assertEqual(repr(c_double.from_param(1e300)), "") + if sys.float_repr_style == 'short': + self.assertEqual(repr(c_double.from_param(1e300)), "") self.assertRegex(repr(c_longdouble.from_param(1.5)), r"^$") self.assertRegex(repr(c_char_p.from_param(b'hihi')), r"^$") self.assertRegex(repr(c_wchar_p.from_param('hihi')), r"^$") diff --git a/Lib/test/test_enum.py b/Lib/test/test_enum.py index 221f9db7763764..bbc7630fa83f45 100644 --- a/Lib/test/test_enum.py +++ b/Lib/test/test_enum.py @@ -36,7 +36,7 @@ def load_tests(loader, tests, ignore): optionflags=doctest.ELLIPSIS|doctest.NORMALIZE_WHITESPACE, )) howto_tests = os.path.join(REPO_ROOT, 'Doc/howto/enum.rst') - if os.path.exists(howto_tests): + if os.path.exists(howto_tests) and sys.float_repr_style == 'short': tests.addTests(doctest.DocFileSuite( howto_tests, module_relative=False, diff --git a/Lib/test/test_float.py b/Lib/test/test_float.py index 237d7b5d35edd7..00518abcb11b46 100644 --- a/Lib/test/test_float.py +++ b/Lib/test/test_float.py @@ -795,6 +795,8 @@ def test_format(self): self.assertRaises(ValueError, format, x, '.6,n') @support.requires_IEEE_754 + @unittest.skipUnless(sys.float_repr_style == 'short', + "applies only when using short float repr style") def test_format_testfile(self): with open(format_testfile, encoding="utf-8") as testfile: for line in testfile: diff --git a/Lib/test/test_format.py b/Lib/test/test_format.py index c7cc32e09490b2..1f626d87fa6c7a 100644 --- a/Lib/test/test_format.py +++ b/Lib/test/test_format.py @@ -346,12 +346,12 @@ def __bytes__(self): testcommon(b"%s", memoryview(b"abc"), b"abc") # %a will give the equivalent of # repr(some_obj).encode('ascii', 'backslashreplace') - testcommon(b"%a", 3.14, b"3.14") + testcommon(b"%a", 3.25, b"3.25") testcommon(b"%a", b"ghi", b"b'ghi'") testcommon(b"%a", "jkl", b"'jkl'") testcommon(b"%a", "\u0544", b"'\\u0544'") # %r is an alias for %a - testcommon(b"%r", 3.14, b"3.14") + testcommon(b"%r", 3.25, b"3.25") testcommon(b"%r", b"ghi", b"b'ghi'") testcommon(b"%r", "jkl", b"'jkl'") testcommon(b"%r", "\u0544", b"'\\u0544'") @@ -407,19 +407,19 @@ def test_non_ascii(self): self.assertEqual(format("abc", "\u2007<5"), "abc\u2007\u2007") self.assertEqual(format(123, "\u2007<5"), "123\u2007\u2007") - self.assertEqual(format(12.3, "\u2007<6"), "12.3\u2007\u2007") + self.assertEqual(format(12.5, "\u2007<6"), "12.5\u2007\u2007") self.assertEqual(format(0j, "\u2007<4"), "0j\u2007\u2007") self.assertEqual(format(1+2j, "\u2007<8"), "(1+2j)\u2007\u2007") self.assertEqual(format("abc", "\u2007>5"), "\u2007\u2007abc") self.assertEqual(format(123, "\u2007>5"), "\u2007\u2007123") - self.assertEqual(format(12.3, "\u2007>6"), "\u2007\u200712.3") + self.assertEqual(format(12.5, "\u2007>6"), "\u2007\u200712.5") self.assertEqual(format(1+2j, "\u2007>8"), "\u2007\u2007(1+2j)") self.assertEqual(format(0j, "\u2007>4"), "\u2007\u20070j") self.assertEqual(format("abc", "\u2007^5"), "\u2007abc\u2007") self.assertEqual(format(123, "\u2007^5"), "\u2007123\u2007") - self.assertEqual(format(12.3, "\u2007^6"), "\u200712.3\u2007") + self.assertEqual(format(12.5, "\u2007^6"), "\u200712.5\u2007") self.assertEqual(format(1+2j, "\u2007^8"), "\u2007(1+2j)\u2007") self.assertEqual(format(0j, "\u2007^4"), "\u20070j\u2007") diff --git a/Lib/test/test_fstring.py b/Lib/test/test_fstring.py index f54557056784f2..58a30c8e6ac447 100644 --- a/Lib/test/test_fstring.py +++ b/Lib/test/test_fstring.py @@ -1336,9 +1336,9 @@ def test_equal_equal(self): def test_conversions(self): self.assertEqual(f'{3.14:10.10}', ' 3.14') - self.assertEqual(f'{3.14!s:10.10}', '3.14 ') - self.assertEqual(f'{3.14!r:10.10}', '3.14 ') - self.assertEqual(f'{3.14!a:10.10}', '3.14 ') + self.assertEqual(f'{1.25!s:10.10}', '1.25 ') + self.assertEqual(f'{1.25!r:10.10}', '1.25 ') + self.assertEqual(f'{1.25!a:10.10}', '1.25 ') self.assertEqual(f'{"a"}', 'a') self.assertEqual(f'{"a"!r}', "'a'") @@ -1347,7 +1347,7 @@ def test_conversions(self): # Conversions can have trailing whitespace after them since it # does not provide any significance self.assertEqual(f"{3!s }", "3") - self.assertEqual(f'{3.14!s :10.10}', '3.14 ') + self.assertEqual(f'{1.25!s :10.10}', '1.25 ') # Not a conversion. self.assertEqual(f'{"a!r"}', "a!r") diff --git a/Lib/test/test_json/test_tool.py b/Lib/test/test_json/test_tool.py index 9ea2679c77ec17..30f9bb3331605c 100644 --- a/Lib/test/test_json/test_tool.py +++ b/Lib/test/test_json/test_tool.py @@ -270,7 +270,7 @@ def test_colors(self): (r'" \"foo\" "', f'{t.string}" \\"foo\\" "{t.reset}'), ('"α"', f'{t.string}"\\u03b1"{t.reset}'), ('123', f'{t.number}123{t.reset}'), - ('-1.2345e+23', f'{t.number}-1.2345e+23{t.reset}'), + ('-1.25e+23', f'{t.number}-1.25e+23{t.reset}'), (r'{"\\": ""}', f'''\ {ob} diff --git a/Lib/test/test_optparse.py b/Lib/test/test_optparse.py index e6ffd2b0ffeb0e..e476e4727803e5 100644 --- a/Lib/test/test_optparse.py +++ b/Lib/test/test_optparse.py @@ -615,9 +615,9 @@ def test_float_default(self): self.parser.add_option( "-p", "--prob", help="blow up with probability PROB [default: %default]") - self.parser.set_defaults(prob=0.43) + self.parser.set_defaults(prob=0.25) expected_help = self.help_prefix + \ - " -p PROB, --prob=PROB blow up with probability PROB [default: 0.43]\n" + " -p PROB, --prob=PROB blow up with probability PROB [default: 0.25]\n" self.assertHelp(self.parser, expected_help) def test_alt_expand(self): diff --git a/Lib/test/test_peepholer.py b/Lib/test/test_peepholer.py index ef596630b930f7..3d7300e1480256 100644 --- a/Lib/test/test_peepholer.py +++ b/Lib/test/test_peepholer.py @@ -718,9 +718,9 @@ def format(fmt, *values): self.assertEqual(format('x = %d!', 1234), 'x = 1234!') self.assertEqual(format('x = %x!', 1234), 'x = 4d2!') self.assertEqual(format('x = %f!', 1234), 'x = 1234.000000!') - self.assertEqual(format('x = %s!', 1234.5678901), 'x = 1234.5678901!') - self.assertEqual(format('x = %f!', 1234.5678901), 'x = 1234.567890!') - self.assertEqual(format('x = %d!', 1234.5678901), 'x = 1234!') + self.assertEqual(format('x = %s!', 1234.0000625), 'x = 1234.0000625!') + self.assertEqual(format('x = %f!', 1234.0000625), 'x = 1234.000063!') + self.assertEqual(format('x = %d!', 1234.0000625), 'x = 1234!') self.assertEqual(format('x = %s%% %%%%', 1234), 'x = 1234% %%') self.assertEqual(format('x = %s!', '%% %s'), 'x = %% %s!') self.assertEqual(format('x = %s, y = %d', 12, 34), 'x = 12, y = 34') diff --git a/Lib/test/test_pprint.py b/Lib/test/test_pprint.py index 0c84d3d3bfd17a..41c337ade7eca1 100644 --- a/Lib/test/test_pprint.py +++ b/Lib/test/test_pprint.py @@ -458,7 +458,7 @@ def __new__(cls, celsius_degrees): return super().__new__(Temperature, celsius_degrees) def __repr__(self): kelvin_degrees = self + 273.15 - return f"{kelvin_degrees}°K" + return f"{kelvin_degrees:.2f}°K" self.assertEqual(pprint.pformat(Temperature(1000)), '1273.15°K') def test_sorted_dict(self): diff --git a/Lib/test/test_reprlib.py b/Lib/test/test_reprlib.py index d5631efcdb75b7..22a55b57c076eb 100644 --- a/Lib/test/test_reprlib.py +++ b/Lib/test/test_reprlib.py @@ -397,20 +397,20 @@ def test_valid_indent(self): 'object': { 1: 'two', b'three': [ - (4.5, 6.7), + (4.5, 6.25), [set((8, 9)), frozenset((10, 11))], ], }, 'tests': ( (dict(indent=None), '''\ - {1: 'two', b'three': [(4.5, 6.7), [{8, 9}, frozenset({10, 11})]]}'''), + {1: 'two', b'three': [(4.5, 6.25), [{8, 9}, frozenset({10, 11})]]}'''), (dict(indent=False), '''\ { 1: 'two', b'three': [ ( 4.5, - 6.7, + 6.25, ), [ { @@ -430,7 +430,7 @@ def test_valid_indent(self): b'three': [ ( 4.5, - 6.7, + 6.25, ), [ { @@ -450,7 +450,7 @@ def test_valid_indent(self): b'three': [ ( 4.5, - 6.7, + 6.25, ), [ { @@ -470,7 +470,7 @@ def test_valid_indent(self): b'three': [ ( 4.5, - 6.7, + 6.25, ), [ { @@ -490,7 +490,7 @@ def test_valid_indent(self): b'three': [ ( 4.5, - 6.7, + 6.25, ), [ { @@ -518,7 +518,7 @@ def test_valid_indent(self): b'three': [ ( 4.5, - 6.7, + 6.25, ), [ { @@ -538,7 +538,7 @@ def test_valid_indent(self): -->b'three': [ -->-->( -->-->-->4.5, - -->-->-->6.7, + -->-->-->6.25, -->-->), -->-->[ -->-->-->{ @@ -558,7 +558,7 @@ def test_valid_indent(self): ....b'three': [ ........( ............4.5, - ............6.7, + ............6.25, ........), ........[ ............{ diff --git a/Lib/test/test_statistics.py b/Lib/test/test_statistics.py index 0dd619dd7c8ceb..8250b0aef09aec 100644 --- a/Lib/test/test_statistics.py +++ b/Lib/test/test_statistics.py @@ -3319,7 +3319,8 @@ def tearDown(self): def load_tests(loader, tests, ignore): """Used for doctest/unittest integration.""" tests.addTests(doctest.DocTestSuite()) - tests.addTests(doctest.DocTestSuite(statistics)) + if sys.float_repr_style == 'short': + tests.addTests(doctest.DocTestSuite(statistics)) return tests diff --git a/Lib/test/test_str.py b/Lib/test/test_str.py index d6a7bd0da59910..2584fbf72d3fa6 100644 --- a/Lib/test/test_str.py +++ b/Lib/test/test_str.py @@ -1231,10 +1231,10 @@ def __repr__(self): self.assertEqual('{0:\x00^6}'.format(3), '\x00\x003\x00\x00\x00') self.assertEqual('{0:<6}'.format(3), '3 ') - self.assertEqual('{0:\x00<6}'.format(3.14), '3.14\x00\x00') - self.assertEqual('{0:\x01<6}'.format(3.14), '3.14\x01\x01') - self.assertEqual('{0:\x00^6}'.format(3.14), '\x003.14\x00') - self.assertEqual('{0:^6}'.format(3.14), ' 3.14 ') + self.assertEqual('{0:\x00<6}'.format(3.25), '3.25\x00\x00') + self.assertEqual('{0:\x01<6}'.format(3.25), '3.25\x01\x01') + self.assertEqual('{0:\x00^6}'.format(3.25), '\x003.25\x00') + self.assertEqual('{0:^6}'.format(3.25), ' 3.25 ') self.assertEqual('{0:\x00<12}'.format(3+2.0j), '(3+2j)\x00\x00\x00\x00\x00\x00') self.assertEqual('{0:\x01<12}'.format(3+2.0j), '(3+2j)\x01\x01\x01\x01\x01\x01') diff --git a/Lib/test/test_types.py b/Lib/test/test_types.py index a117413301bebe..02592ea5eb21a1 100644 --- a/Lib/test/test_types.py +++ b/Lib/test/test_types.py @@ -517,8 +517,8 @@ def test(f, format_spec, result): # and a number after the decimal. This is tricky, because # a totally empty format specifier means something else. # So, just use a sign flag - test(1e200, '+g', '+1e+200') - test(1e200, '+', '+1e+200') + test(1.25e200, '+g', '+1.25e+200') + test(1.25e200, '+', '+1.25e+200') test(1.1e200, '+g', '+1.1e+200') test(1.1e200, '+', '+1.1e+200') From c45f4f3ebe34529a8db3a7918e8dd2e9f7ce8e86 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Fri, 27 Jun 2025 14:35:55 +0300 Subject: [PATCH 22/55] gh-78465: Fix error message for cls.__new__(cls, ...) where cls is not instantiable (GH-135981) Previous error message suggested to use cls.__new__(), which obviously does not work. Now the error message is the same as for cls(...). --- Lib/test/support/__init__.py | 1 + Lib/test/test_sys.py | 7 +------ Lib/test/test_types.py | 5 ++--- .../2025-06-26-15-25-51.gh-issue-78465.MbDN8X.rst | 2 ++ Objects/typeobject.c | 5 +++++ 5 files changed, 11 insertions(+), 9 deletions(-) create mode 100644 Misc/NEWS.d/next/Core_and_Builtins/2025-06-26-15-25-51.gh-issue-78465.MbDN8X.rst diff --git a/Lib/test/support/__init__.py b/Lib/test/support/__init__.py index 51c0ce11e8269d..fd39d3f7c95368 100644 --- a/Lib/test/support/__init__.py +++ b/Lib/test/support/__init__.py @@ -2333,6 +2333,7 @@ def check_disallow_instantiation(testcase, tp, *args, **kwds): qualname = f"{name}" msg = f"cannot create '{re.escape(qualname)}' instances" testcase.assertRaisesRegex(TypeError, msg, tp, *args, **kwds) + testcase.assertRaisesRegex(TypeError, msg, tp.__new__, tp, *args, **kwds) def get_recursion_depth(): """Get the recursion depth of the caller function. diff --git a/Lib/test/test_sys.py b/Lib/test/test_sys.py index 27524d86355b9c..486bf10a0b5647 100644 --- a/Lib/test/test_sys.py +++ b/Lib/test/test_sys.py @@ -869,12 +869,7 @@ def test_sys_flags(self): def assert_raise_on_new_sys_type(self, sys_attr): # Users are intentionally prevented from creating new instances of # sys.flags, sys.version_info, and sys.getwindowsversion. - arg = sys_attr - attr_type = type(sys_attr) - with self.assertRaises(TypeError): - attr_type(arg) - with self.assertRaises(TypeError): - attr_type.__new__(attr_type, arg) + support.check_disallow_instantiation(self, type(sys_attr), sys_attr) def test_sys_flags_no_instantiation(self): self.assert_raise_on_new_sys_type(sys.flags) diff --git a/Lib/test/test_types.py b/Lib/test/test_types.py index 02592ea5eb21a1..fc26e71ffcb67b 100644 --- a/Lib/test/test_types.py +++ b/Lib/test/test_types.py @@ -2,7 +2,7 @@ from test.support import ( run_with_locale, cpython_only, no_rerun, - MISSING_C_DOCSTRINGS, EqualToForwardRef, + MISSING_C_DOCSTRINGS, EqualToForwardRef, check_disallow_instantiation, ) from test.support.script_helper import assert_python_ok from test.support.import_helper import import_fresh_module @@ -1148,8 +1148,7 @@ def test_or_type_operator_reference_cycle(self): msg='Check for union reference leak.') def test_instantiation(self): - with self.assertRaises(TypeError): - types.UnionType() + check_disallow_instantiation(self, types.UnionType) self.assertIs(int, types.UnionType[int]) self.assertIs(int, types.UnionType[int, int]) self.assertEqual(int | str, types.UnionType[int, str]) diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-06-26-15-25-51.gh-issue-78465.MbDN8X.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-06-26-15-25-51.gh-issue-78465.MbDN8X.rst new file mode 100644 index 00000000000000..99734d63c5d87e --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2025-06-26-15-25-51.gh-issue-78465.MbDN8X.rst @@ -0,0 +1,2 @@ +Fix error message for ``cls.__new__(cls, ...)`` where ``cls`` is not +instantiable builtin or extension type (with ``tp_new`` set to ``NULL``). diff --git a/Objects/typeobject.c b/Objects/typeobject.c index b9d549610693c1..6e7471cb5941a7 100644 --- a/Objects/typeobject.c +++ b/Objects/typeobject.c @@ -10020,6 +10020,11 @@ tp_new_wrapper(PyObject *self, PyObject *args, PyObject *kwds) /* If staticbase is NULL now, it is a really weird type. In the spirit of backwards compatibility (?), just shut up. */ if (staticbase && staticbase->tp_new != type->tp_new) { + if (staticbase->tp_new == NULL) { + PyErr_Format(PyExc_TypeError, + "cannot create '%s' instances", subtype->tp_name); + return NULL; + } PyErr_Format(PyExc_TypeError, "%s.__new__(%s) is not safe, use %s.__new__()", type->tp_name, From 695ab61351c019a7fcd731eebd77c172c90bf9e0 Mon Sep 17 00:00:00 2001 From: Ken Jin Date: Fri, 27 Jun 2025 19:37:44 +0800 Subject: [PATCH 23/55] gh-132732: Automatically constant evaluate pure operations (GH-132733) This adds a "macro" to the optimizer DSL called "REPLACE_OPCODE_IF_EVALUATES_PURE", which allows automatically constant evaluating a bytecode body if certain inputs have no side effects upon evaluations (such as ints, strings, and floats). Co-authored-by: Tomas R. --- Include/internal/pycore_optimizer.h | 5 +- Include/internal/pycore_uop_metadata.h | 2 +- Lib/test/test_generated_cases.py | 197 ++++++++++++ ...-04-19-16-22-47.gh-issue-132732.jgqhlF.rst | 1 + Python/bytecodes.c | 2 +- Python/optimizer_analysis.c | 7 + Python/optimizer_bytecodes.c | 77 +---- Python/optimizer_cases.c.h | 304 ++++++++++++++---- Python/optimizer_symbols.c | 39 +++ Tools/cases_generator/optimizer_generator.py | 194 ++++++++++- 10 files changed, 706 insertions(+), 122 deletions(-) create mode 100644 Misc/NEWS.d/next/Core_and_Builtins/2025-04-19-16-22-47.gh-issue-132732.jgqhlF.rst diff --git a/Include/internal/pycore_optimizer.h b/Include/internal/pycore_optimizer.h index 576c27947824b4..8b7f12bf03d624 100644 --- a/Include/internal/pycore_optimizer.h +++ b/Include/internal/pycore_optimizer.h @@ -10,7 +10,7 @@ extern "C" { #include "pycore_typedefs.h" // _PyInterpreterFrame #include "pycore_uop_ids.h" -#include "pycore_stackref.h" +#include "pycore_stackref.h" // _PyStackRef #include @@ -316,6 +316,9 @@ extern JitOptRef _Py_uop_sym_new_type( JitOptContext *ctx, PyTypeObject *typ); extern JitOptRef _Py_uop_sym_new_const(JitOptContext *ctx, PyObject *const_val); +extern JitOptRef _Py_uop_sym_new_const_steal(JitOptContext *ctx, PyObject *const_val); +bool _Py_uop_sym_is_safe_const(JitOptContext *ctx, JitOptRef sym); +_PyStackRef _Py_uop_sym_get_const_as_stackref(JitOptContext *ctx, JitOptRef sym); extern JitOptRef _Py_uop_sym_new_null(JitOptContext *ctx); extern bool _Py_uop_sym_has_type(JitOptRef sym); extern bool _Py_uop_sym_matches_type(JitOptRef sym, PyTypeObject *typ); diff --git a/Include/internal/pycore_uop_metadata.h b/Include/internal/pycore_uop_metadata.h index 52cbc2fffe484e..ff7e800aa9bb1a 100644 --- a/Include/internal/pycore_uop_metadata.h +++ b/Include/internal/pycore_uop_metadata.h @@ -106,7 +106,7 @@ const uint16_t _PyUop_Flags[MAX_UOP_ID+1] = { [_BINARY_OP_ADD_UNICODE] = HAS_ERROR_FLAG | HAS_PURE_FLAG, [_BINARY_OP_INPLACE_ADD_UNICODE] = HAS_LOCAL_FLAG | HAS_DEOPT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, [_GUARD_BINARY_OP_EXTEND] = HAS_DEOPT_FLAG | HAS_ESCAPES_FLAG, - [_BINARY_OP_EXTEND] = HAS_ESCAPES_FLAG | HAS_PURE_FLAG, + [_BINARY_OP_EXTEND] = HAS_ESCAPES_FLAG, [_BINARY_SLICE] = HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, [_STORE_SLICE] = HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, [_BINARY_OP_SUBSCR_LIST_INT] = HAS_DEOPT_FLAG | HAS_ESCAPES_FLAG, diff --git a/Lib/test/test_generated_cases.py b/Lib/test/test_generated_cases.py index 9e0fd1218f2534..eb01328b6ea946 100644 --- a/Lib/test/test_generated_cases.py +++ b/Lib/test/test_generated_cases.py @@ -2224,5 +2224,202 @@ def test_validate_uop_unused_size_mismatch(self): "Inputs must have equal sizes"): self.run_cases_test(input, input2, output) + def test_pure_uop_body_copied_in(self): + # Note: any non-escaping call works. + # In this case, we use PyStackRef_IsNone. + input = """ + pure op(OP, (foo -- res)) { + res = PyStackRef_IsNone(foo); + } + """ + input2 = """ + op(OP, (foo -- res)) { + REPLACE_OPCODE_IF_EVALUATES_PURE(foo); + res = sym_new_known(ctx, foo); + } + """ + output = """ + case OP: { + JitOptRef foo; + JitOptRef res; + foo = stack_pointer[-1]; + if ( + sym_is_safe_const(ctx, foo) + ) { + JitOptRef foo_sym = foo; + _PyStackRef foo = sym_get_const_as_stackref(ctx, foo_sym); + _PyStackRef res_stackref; + /* Start of uop copied from bytecodes for constant evaluation */ + res_stackref = PyStackRef_IsNone(foo); + /* End of uop copied from bytecodes for constant evaluation */ + res = sym_new_const_steal(ctx, PyStackRef_AsPyObjectSteal(res_stackref)); + stack_pointer[-1] = res; + break; + } + res = sym_new_known(ctx, foo); + stack_pointer[-1] = res; + break; + } + """ + self.run_cases_test(input, input2, output) + + def test_pure_uop_body_copied_in_deopt(self): + # Note: any non-escaping call works. + # In this case, we use PyStackRef_IsNone. + input = """ + pure op(OP, (foo -- res)) { + DEOPT_IF(PyStackRef_IsNull(foo)); + res = foo; + } + """ + input2 = """ + op(OP, (foo -- res)) { + REPLACE_OPCODE_IF_EVALUATES_PURE(foo); + res = foo; + } + """ + output = """ + case OP: { + JitOptRef foo; + JitOptRef res; + foo = stack_pointer[-1]; + if ( + sym_is_safe_const(ctx, foo) + ) { + JitOptRef foo_sym = foo; + _PyStackRef foo = sym_get_const_as_stackref(ctx, foo_sym); + _PyStackRef res_stackref; + /* Start of uop copied from bytecodes for constant evaluation */ + if (PyStackRef_IsNull(foo)) { + ctx->done = true; + break; + } + res_stackref = foo; + /* End of uop copied from bytecodes for constant evaluation */ + res = sym_new_const_steal(ctx, PyStackRef_AsPyObjectSteal(res_stackref)); + stack_pointer[-1] = res; + break; + } + res = foo; + stack_pointer[-1] = res; + break; + } + """ + self.run_cases_test(input, input2, output) + + def test_pure_uop_body_copied_in_error_if(self): + # Note: any non-escaping call works. + # In this case, we use PyStackRef_IsNone. + input = """ + pure op(OP, (foo -- res)) { + ERROR_IF(PyStackRef_IsNull(foo)); + res = foo; + } + """ + input2 = """ + op(OP, (foo -- res)) { + REPLACE_OPCODE_IF_EVALUATES_PURE(foo); + res = foo; + } + """ + output = """ + case OP: { + JitOptRef foo; + JitOptRef res; + foo = stack_pointer[-1]; + if ( + sym_is_safe_const(ctx, foo) + ) { + JitOptRef foo_sym = foo; + _PyStackRef foo = sym_get_const_as_stackref(ctx, foo_sym); + _PyStackRef res_stackref; + /* Start of uop copied from bytecodes for constant evaluation */ + if (PyStackRef_IsNull(foo)) { + goto error; + } + res_stackref = foo; + /* End of uop copied from bytecodes for constant evaluation */ + res = sym_new_const_steal(ctx, PyStackRef_AsPyObjectSteal(res_stackref)); + stack_pointer[-1] = res; + break; + } + res = foo; + stack_pointer[-1] = res; + break; + } + """ + self.run_cases_test(input, input2, output) + + + def test_replace_opcode_uop_body_copied_in_complex(self): + input = """ + pure op(OP, (foo -- res)) { + if (foo) { + res = PyStackRef_IsNone(foo); + } + else { + res = 1; + } + } + """ + input2 = """ + op(OP, (foo -- res)) { + REPLACE_OPCODE_IF_EVALUATES_PURE(foo); + res = sym_new_known(ctx, foo); + } + """ + output = """ + case OP: { + JitOptRef foo; + JitOptRef res; + foo = stack_pointer[-1]; + if ( + sym_is_safe_const(ctx, foo) + ) { + JitOptRef foo_sym = foo; + _PyStackRef foo = sym_get_const_as_stackref(ctx, foo_sym); + _PyStackRef res_stackref; + /* Start of uop copied from bytecodes for constant evaluation */ + if (foo) { + res_stackref = PyStackRef_IsNone(foo); + } + else { + res_stackref = 1; + } + /* End of uop copied from bytecodes for constant evaluation */ + res = sym_new_const_steal(ctx, PyStackRef_AsPyObjectSteal(res_stackref)); + stack_pointer[-1] = res; + break; + } + res = sym_new_known(ctx, foo); + stack_pointer[-1] = res; + break; + } + """ + self.run_cases_test(input, input2, output) + + def test_replace_opocode_uop_reject_array_effects(self): + input = """ + pure op(OP, (foo[2] -- res)) { + if (foo) { + res = PyStackRef_IsNone(foo); + } + else { + res = 1; + } + } + """ + input2 = """ + op(OP, (foo[2] -- res)) { + REPLACE_OPCODE_IF_EVALUATES_PURE(foo); + res = sym_new_unknown(ctx); + } + """ + output = """ + """ + with self.assertRaisesRegex(SyntaxError, + "Pure evaluation cannot take array-like inputs"): + self.run_cases_test(input, input2, output) + if __name__ == "__main__": unittest.main() diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-04-19-16-22-47.gh-issue-132732.jgqhlF.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-04-19-16-22-47.gh-issue-132732.jgqhlF.rst new file mode 100644 index 00000000000000..aadaf2169fd01a --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2025-04-19-16-22-47.gh-issue-132732.jgqhlF.rst @@ -0,0 +1 @@ +Automatically constant evaluate bytecode operations marked as pure in the JIT optimizer. diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 535e552e047475..1a5a9ff13a23a5 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -850,7 +850,7 @@ dummy_func( DEOPT_IF(!res); } - pure op(_BINARY_OP_EXTEND, (descr/4, left, right -- res)) { + op(_BINARY_OP_EXTEND, (descr/4, left, right -- res)) { PyObject *left_o = PyStackRef_AsPyObjectBorrow(left); PyObject *right_o = PyStackRef_AsPyObjectBorrow(right); assert(INLINE_CACHE_ENTRIES_BINARY_OP == 5); diff --git a/Python/optimizer_analysis.c b/Python/optimizer_analysis.c index 145a8c118d3612..fab6fef5ccda10 100644 --- a/Python/optimizer_analysis.c +++ b/Python/optimizer_analysis.c @@ -26,6 +26,8 @@ #include "pycore_function.h" #include "pycore_uop_ids.h" #include "pycore_range.h" +#include "pycore_unicodeobject.h" +#include "pycore_ceval.h" #include #include @@ -321,7 +323,10 @@ remove_globals(_PyInterpreterFrame *frame, _PyUOpInstruction *buffer, /* Shortened forms for convenience, used in optimizer_bytecodes.c */ #define sym_is_not_null _Py_uop_sym_is_not_null #define sym_is_const _Py_uop_sym_is_const +#define sym_is_safe_const _Py_uop_sym_is_safe_const #define sym_get_const _Py_uop_sym_get_const +#define sym_new_const_steal _Py_uop_sym_new_const_steal +#define sym_get_const_as_stackref _Py_uop_sym_get_const_as_stackref #define sym_new_unknown _Py_uop_sym_new_unknown #define sym_new_not_null _Py_uop_sym_new_not_null #define sym_new_type _Py_uop_sym_new_type @@ -350,6 +355,8 @@ remove_globals(_PyInterpreterFrame *frame, _PyUOpInstruction *buffer, #define sym_new_compact_int _Py_uop_sym_new_compact_int #define sym_new_truthiness _Py_uop_sym_new_truthiness +#define JUMP_TO_LABEL(label) goto label; + static int optimize_to_bool( _PyUOpInstruction *this_instr, diff --git a/Python/optimizer_bytecodes.c b/Python/optimizer_bytecodes.c index f8a0484bdc2b04..3182e8b3b70144 100644 --- a/Python/optimizer_bytecodes.c +++ b/Python/optimizer_bytecodes.c @@ -181,6 +181,7 @@ dummy_func(void) { } op(_BINARY_OP, (lhs, rhs -- res)) { + REPLACE_OPCODE_IF_EVALUATES_PURE(lhs, rhs); bool lhs_int = sym_matches_type(lhs, &PyLong_Type); bool rhs_int = sym_matches_type(rhs, &PyLong_Type); bool lhs_float = sym_matches_type(lhs, &PyFloat_Type); @@ -235,35 +236,23 @@ dummy_func(void) { } op(_BINARY_OP_ADD_INT, (left, right -- res)) { + REPLACE_OPCODE_IF_EVALUATES_PURE(left, right); res = sym_new_compact_int(ctx); } op(_BINARY_OP_SUBTRACT_INT, (left, right -- res)) { + REPLACE_OPCODE_IF_EVALUATES_PURE(left, right); res = sym_new_compact_int(ctx); } op(_BINARY_OP_MULTIPLY_INT, (left, right -- res)) { + REPLACE_OPCODE_IF_EVALUATES_PURE(left, right); res = sym_new_compact_int(ctx); } op(_BINARY_OP_ADD_FLOAT, (left, right -- res)) { - if (sym_is_const(ctx, left) && sym_is_const(ctx, right)) { - assert(PyFloat_CheckExact(sym_get_const(ctx, left))); - assert(PyFloat_CheckExact(sym_get_const(ctx, right))); - PyObject *temp = PyFloat_FromDouble( - PyFloat_AS_DOUBLE(sym_get_const(ctx, left)) + - PyFloat_AS_DOUBLE(sym_get_const(ctx, right))); - if (temp == NULL) { - goto error; - } - res = sym_new_const(ctx, temp); - Py_DECREF(temp); - // TODO gh-115506: - // replace opcode with constant propagated one and update tests! - } - else { - res = sym_new_type(ctx, &PyFloat_Type); - } + REPLACE_OPCODE_IF_EVALUATES_PURE(left, right); + res = sym_new_type(ctx, &PyFloat_Type); // TODO (gh-134584): Refactor this to use another uop if (PyJitRef_IsBorrowed(left) && PyJitRef_IsBorrowed(right)) { REPLACE_OP(this_instr, op_without_decref_inputs[opcode], oparg, 0); @@ -271,23 +260,8 @@ dummy_func(void) { } op(_BINARY_OP_SUBTRACT_FLOAT, (left, right -- res)) { - if (sym_is_const(ctx, left) && sym_is_const(ctx, right)) { - assert(PyFloat_CheckExact(sym_get_const(ctx, left))); - assert(PyFloat_CheckExact(sym_get_const(ctx, right))); - PyObject *temp = PyFloat_FromDouble( - PyFloat_AS_DOUBLE(sym_get_const(ctx, left)) - - PyFloat_AS_DOUBLE(sym_get_const(ctx, right))); - if (temp == NULL) { - goto error; - } - res = sym_new_const(ctx, temp); - Py_DECREF(temp); - // TODO gh-115506: - // replace opcode with constant propagated one and update tests! - } - else { - res = sym_new_type(ctx, &PyFloat_Type); - } + REPLACE_OPCODE_IF_EVALUATES_PURE(left, right); + res = sym_new_type(ctx, &PyFloat_Type); // TODO (gh-134584): Refactor this to use another uop if (PyJitRef_IsBorrowed(left) && PyJitRef_IsBorrowed(right)) { REPLACE_OP(this_instr, op_without_decref_inputs[opcode], oparg, 0); @@ -295,23 +269,8 @@ dummy_func(void) { } op(_BINARY_OP_MULTIPLY_FLOAT, (left, right -- res)) { - if (sym_is_const(ctx, left) && sym_is_const(ctx, right)) { - assert(PyFloat_CheckExact(sym_get_const(ctx, left))); - assert(PyFloat_CheckExact(sym_get_const(ctx, right))); - PyObject *temp = PyFloat_FromDouble( - PyFloat_AS_DOUBLE(sym_get_const(ctx, left)) * - PyFloat_AS_DOUBLE(sym_get_const(ctx, right))); - if (temp == NULL) { - goto error; - } - res = sym_new_const(ctx, temp); - Py_DECREF(temp); - // TODO gh-115506: - // replace opcode with constant propagated one and update tests! - } - else { - res = sym_new_type(ctx, &PyFloat_Type); - } + REPLACE_OPCODE_IF_EVALUATES_PURE(left, right); + res = sym_new_type(ctx, &PyFloat_Type); // TODO (gh-134584): Refactor this to use another uop if (PyJitRef_IsBorrowed(left) && PyJitRef_IsBorrowed(right)) { REPLACE_OP(this_instr, op_without_decref_inputs[opcode], oparg, 0); @@ -319,19 +278,8 @@ dummy_func(void) { } op(_BINARY_OP_ADD_UNICODE, (left, right -- res)) { - if (sym_is_const(ctx, left) && sym_is_const(ctx, right)) { - assert(PyUnicode_CheckExact(sym_get_const(ctx, left))); - assert(PyUnicode_CheckExact(sym_get_const(ctx, right))); - PyObject *temp = PyUnicode_Concat(sym_get_const(ctx, left), sym_get_const(ctx, right)); - if (temp == NULL) { - goto error; - } - res = sym_new_const(ctx, temp); - Py_DECREF(temp); - } - else { - res = sym_new_type(ctx, &PyUnicode_Type); - } + REPLACE_OPCODE_IF_EVALUATES_PURE(left, right); + res = sym_new_type(ctx, &PyUnicode_Type); } op(_BINARY_OP_INPLACE_ADD_UNICODE, (left, right -- )) { @@ -443,6 +391,7 @@ dummy_func(void) { } op(_UNARY_NOT, (value -- res)) { + REPLACE_OPCODE_IF_EVALUATES_PURE(value); sym_set_type(value, &PyBool_Type); res = sym_new_truthiness(ctx, value, false); } diff --git a/Python/optimizer_cases.c.h b/Python/optimizer_cases.c.h index 10767ccdbd57f5..8d30df3aa7d429 100644 --- a/Python/optimizer_cases.c.h +++ b/Python/optimizer_cases.c.h @@ -206,6 +206,21 @@ JitOptRef value; JitOptRef res; value = stack_pointer[-1]; + if ( + sym_is_safe_const(ctx, value) + ) { + JitOptRef value_sym = value; + _PyStackRef value = sym_get_const_as_stackref(ctx, value_sym); + _PyStackRef res_stackref; + /* Start of uop copied from bytecodes for constant evaluation */ + assert(PyStackRef_BoolCheck(value)); + res_stackref = PyStackRef_IsFalse(value) + ? PyStackRef_True : PyStackRef_False; + /* End of uop copied from bytecodes for constant evaluation */ + res = sym_new_const_steal(ctx, PyStackRef_AsPyObjectSteal(res_stackref)); + stack_pointer[-1] = res; + break; + } sym_set_type(value, &PyBool_Type); res = sym_new_truthiness(ctx, value, false); stack_pointer[-1] = res; @@ -391,7 +406,41 @@ } case _BINARY_OP_MULTIPLY_INT: { + JitOptRef right; + JitOptRef left; JitOptRef res; + right = stack_pointer[-1]; + left = stack_pointer[-2]; + if ( + sym_is_safe_const(ctx, left) && + sym_is_safe_const(ctx, right) + ) { + JitOptRef left_sym = left; + JitOptRef right_sym = right; + _PyStackRef left = sym_get_const_as_stackref(ctx, left_sym); + _PyStackRef right = sym_get_const_as_stackref(ctx, right_sym); + _PyStackRef res_stackref; + /* Start of uop copied from bytecodes for constant evaluation */ + PyObject *left_o = PyStackRef_AsPyObjectBorrow(left); + PyObject *right_o = PyStackRef_AsPyObjectBorrow(right); + assert(PyLong_CheckExact(left_o)); + assert(PyLong_CheckExact(right_o)); + assert(_PyLong_BothAreCompact((PyLongObject *)left_o, (PyLongObject *)right_o)); + STAT_INC(BINARY_OP, hit); + res_stackref = _PyCompactLong_Multiply((PyLongObject *)left_o, (PyLongObject *)right_o); + if (PyStackRef_IsNull(res_stackref )) { + ctx->done = true; + break; + } + PyStackRef_CLOSE_SPECIALIZED(right, _PyLong_ExactDealloc); + PyStackRef_CLOSE_SPECIALIZED(left, _PyLong_ExactDealloc); + /* End of uop copied from bytecodes for constant evaluation */ + res = sym_new_const_steal(ctx, PyStackRef_AsPyObjectSteal(res_stackref)); + stack_pointer[-2] = res; + stack_pointer += -1; + assert(WITHIN_STACK_BOUNDS()); + break; + } res = sym_new_compact_int(ctx); stack_pointer[-2] = res; stack_pointer += -1; @@ -400,7 +449,41 @@ } case _BINARY_OP_ADD_INT: { + JitOptRef right; + JitOptRef left; JitOptRef res; + right = stack_pointer[-1]; + left = stack_pointer[-2]; + if ( + sym_is_safe_const(ctx, left) && + sym_is_safe_const(ctx, right) + ) { + JitOptRef left_sym = left; + JitOptRef right_sym = right; + _PyStackRef left = sym_get_const_as_stackref(ctx, left_sym); + _PyStackRef right = sym_get_const_as_stackref(ctx, right_sym); + _PyStackRef res_stackref; + /* Start of uop copied from bytecodes for constant evaluation */ + PyObject *left_o = PyStackRef_AsPyObjectBorrow(left); + PyObject *right_o = PyStackRef_AsPyObjectBorrow(right); + assert(PyLong_CheckExact(left_o)); + assert(PyLong_CheckExact(right_o)); + assert(_PyLong_BothAreCompact((PyLongObject *)left_o, (PyLongObject *)right_o)); + STAT_INC(BINARY_OP, hit); + res_stackref = _PyCompactLong_Add((PyLongObject *)left_o, (PyLongObject *)right_o); + if (PyStackRef_IsNull(res_stackref )) { + ctx->done = true; + break; + } + PyStackRef_CLOSE_SPECIALIZED(right, _PyLong_ExactDealloc); + PyStackRef_CLOSE_SPECIALIZED(left, _PyLong_ExactDealloc); + /* End of uop copied from bytecodes for constant evaluation */ + res = sym_new_const_steal(ctx, PyStackRef_AsPyObjectSteal(res_stackref)); + stack_pointer[-2] = res; + stack_pointer += -1; + assert(WITHIN_STACK_BOUNDS()); + break; + } res = sym_new_compact_int(ctx); stack_pointer[-2] = res; stack_pointer += -1; @@ -409,7 +492,41 @@ } case _BINARY_OP_SUBTRACT_INT: { + JitOptRef right; + JitOptRef left; JitOptRef res; + right = stack_pointer[-1]; + left = stack_pointer[-2]; + if ( + sym_is_safe_const(ctx, left) && + sym_is_safe_const(ctx, right) + ) { + JitOptRef left_sym = left; + JitOptRef right_sym = right; + _PyStackRef left = sym_get_const_as_stackref(ctx, left_sym); + _PyStackRef right = sym_get_const_as_stackref(ctx, right_sym); + _PyStackRef res_stackref; + /* Start of uop copied from bytecodes for constant evaluation */ + PyObject *left_o = PyStackRef_AsPyObjectBorrow(left); + PyObject *right_o = PyStackRef_AsPyObjectBorrow(right); + assert(PyLong_CheckExact(left_o)); + assert(PyLong_CheckExact(right_o)); + assert(_PyLong_BothAreCompact((PyLongObject *)left_o, (PyLongObject *)right_o)); + STAT_INC(BINARY_OP, hit); + res_stackref = _PyCompactLong_Subtract((PyLongObject *)left_o, (PyLongObject *)right_o); + if (PyStackRef_IsNull(res_stackref )) { + ctx->done = true; + break; + } + PyStackRef_CLOSE_SPECIALIZED(right, _PyLong_ExactDealloc); + PyStackRef_CLOSE_SPECIALIZED(left, _PyLong_ExactDealloc); + /* End of uop copied from bytecodes for constant evaluation */ + res = sym_new_const_steal(ctx, PyStackRef_AsPyObjectSteal(res_stackref)); + stack_pointer[-2] = res; + stack_pointer += -1; + assert(WITHIN_STACK_BOUNDS()); + break; + } res = sym_new_compact_int(ctx); stack_pointer[-2] = res; stack_pointer += -1; @@ -443,29 +560,42 @@ JitOptRef res; right = stack_pointer[-1]; left = stack_pointer[-2]; - if (sym_is_const(ctx, left) && sym_is_const(ctx, right)) { - assert(PyFloat_CheckExact(sym_get_const(ctx, left))); - assert(PyFloat_CheckExact(sym_get_const(ctx, right))); - PyObject *temp = PyFloat_FromDouble( - PyFloat_AS_DOUBLE(sym_get_const(ctx, left)) * - PyFloat_AS_DOUBLE(sym_get_const(ctx, right))); - if (temp == NULL) { + if ( + sym_is_safe_const(ctx, left) && + sym_is_safe_const(ctx, right) + ) { + JitOptRef left_sym = left; + JitOptRef right_sym = right; + _PyStackRef left = sym_get_const_as_stackref(ctx, left_sym); + _PyStackRef right = sym_get_const_as_stackref(ctx, right_sym); + _PyStackRef res_stackref; + /* Start of uop copied from bytecodes for constant evaluation */ + PyObject *left_o = PyStackRef_AsPyObjectBorrow(left); + PyObject *right_o = PyStackRef_AsPyObjectBorrow(right); + assert(PyFloat_CheckExact(left_o)); + assert(PyFloat_CheckExact(right_o)); + STAT_INC(BINARY_OP, hit); + double dres = + ((PyFloatObject *)left_o)->ob_fval * + ((PyFloatObject *)right_o)->ob_fval; + res_stackref = _PyFloat_FromDouble_ConsumeInputs(left, right, dres); + if (PyStackRef_IsNull(res_stackref )) { goto error; } - res = sym_new_const(ctx, temp); + /* End of uop copied from bytecodes for constant evaluation */ + res = sym_new_const_steal(ctx, PyStackRef_AsPyObjectSteal(res_stackref)); stack_pointer[-2] = res; stack_pointer += -1; assert(WITHIN_STACK_BOUNDS()); - Py_DECREF(temp); - } - else { - res = sym_new_type(ctx, &PyFloat_Type); - stack_pointer += -1; + break; } + res = sym_new_type(ctx, &PyFloat_Type); if (PyJitRef_IsBorrowed(left) && PyJitRef_IsBorrowed(right)) { REPLACE_OP(this_instr, op_without_decref_inputs[opcode], oparg, 0); } - stack_pointer[-1] = res; + stack_pointer[-2] = res; + stack_pointer += -1; + assert(WITHIN_STACK_BOUNDS()); break; } @@ -475,29 +605,42 @@ JitOptRef res; right = stack_pointer[-1]; left = stack_pointer[-2]; - if (sym_is_const(ctx, left) && sym_is_const(ctx, right)) { - assert(PyFloat_CheckExact(sym_get_const(ctx, left))); - assert(PyFloat_CheckExact(sym_get_const(ctx, right))); - PyObject *temp = PyFloat_FromDouble( - PyFloat_AS_DOUBLE(sym_get_const(ctx, left)) + - PyFloat_AS_DOUBLE(sym_get_const(ctx, right))); - if (temp == NULL) { + if ( + sym_is_safe_const(ctx, left) && + sym_is_safe_const(ctx, right) + ) { + JitOptRef left_sym = left; + JitOptRef right_sym = right; + _PyStackRef left = sym_get_const_as_stackref(ctx, left_sym); + _PyStackRef right = sym_get_const_as_stackref(ctx, right_sym); + _PyStackRef res_stackref; + /* Start of uop copied from bytecodes for constant evaluation */ + PyObject *left_o = PyStackRef_AsPyObjectBorrow(left); + PyObject *right_o = PyStackRef_AsPyObjectBorrow(right); + assert(PyFloat_CheckExact(left_o)); + assert(PyFloat_CheckExact(right_o)); + STAT_INC(BINARY_OP, hit); + double dres = + ((PyFloatObject *)left_o)->ob_fval + + ((PyFloatObject *)right_o)->ob_fval; + res_stackref = _PyFloat_FromDouble_ConsumeInputs(left, right, dres); + if (PyStackRef_IsNull(res_stackref )) { goto error; } - res = sym_new_const(ctx, temp); + /* End of uop copied from bytecodes for constant evaluation */ + res = sym_new_const_steal(ctx, PyStackRef_AsPyObjectSteal(res_stackref)); stack_pointer[-2] = res; stack_pointer += -1; assert(WITHIN_STACK_BOUNDS()); - Py_DECREF(temp); - } - else { - res = sym_new_type(ctx, &PyFloat_Type); - stack_pointer += -1; + break; } + res = sym_new_type(ctx, &PyFloat_Type); if (PyJitRef_IsBorrowed(left) && PyJitRef_IsBorrowed(right)) { REPLACE_OP(this_instr, op_without_decref_inputs[opcode], oparg, 0); } - stack_pointer[-1] = res; + stack_pointer[-2] = res; + stack_pointer += -1; + assert(WITHIN_STACK_BOUNDS()); break; } @@ -507,29 +650,42 @@ JitOptRef res; right = stack_pointer[-1]; left = stack_pointer[-2]; - if (sym_is_const(ctx, left) && sym_is_const(ctx, right)) { - assert(PyFloat_CheckExact(sym_get_const(ctx, left))); - assert(PyFloat_CheckExact(sym_get_const(ctx, right))); - PyObject *temp = PyFloat_FromDouble( - PyFloat_AS_DOUBLE(sym_get_const(ctx, left)) - - PyFloat_AS_DOUBLE(sym_get_const(ctx, right))); - if (temp == NULL) { + if ( + sym_is_safe_const(ctx, left) && + sym_is_safe_const(ctx, right) + ) { + JitOptRef left_sym = left; + JitOptRef right_sym = right; + _PyStackRef left = sym_get_const_as_stackref(ctx, left_sym); + _PyStackRef right = sym_get_const_as_stackref(ctx, right_sym); + _PyStackRef res_stackref; + /* Start of uop copied from bytecodes for constant evaluation */ + PyObject *left_o = PyStackRef_AsPyObjectBorrow(left); + PyObject *right_o = PyStackRef_AsPyObjectBorrow(right); + assert(PyFloat_CheckExact(left_o)); + assert(PyFloat_CheckExact(right_o)); + STAT_INC(BINARY_OP, hit); + double dres = + ((PyFloatObject *)left_o)->ob_fval - + ((PyFloatObject *)right_o)->ob_fval; + res_stackref = _PyFloat_FromDouble_ConsumeInputs(left, right, dres); + if (PyStackRef_IsNull(res_stackref )) { goto error; } - res = sym_new_const(ctx, temp); + /* End of uop copied from bytecodes for constant evaluation */ + res = sym_new_const_steal(ctx, PyStackRef_AsPyObjectSteal(res_stackref)); stack_pointer[-2] = res; stack_pointer += -1; assert(WITHIN_STACK_BOUNDS()); - Py_DECREF(temp); - } - else { - res = sym_new_type(ctx, &PyFloat_Type); - stack_pointer += -1; + break; } + res = sym_new_type(ctx, &PyFloat_Type); if (PyJitRef_IsBorrowed(left) && PyJitRef_IsBorrowed(right)) { REPLACE_OP(this_instr, op_without_decref_inputs[opcode], oparg, 0); } - stack_pointer[-1] = res; + stack_pointer[-2] = res; + stack_pointer += -1; + assert(WITHIN_STACK_BOUNDS()); break; } @@ -566,24 +722,39 @@ JitOptRef res; right = stack_pointer[-1]; left = stack_pointer[-2]; - if (sym_is_const(ctx, left) && sym_is_const(ctx, right)) { - assert(PyUnicode_CheckExact(sym_get_const(ctx, left))); - assert(PyUnicode_CheckExact(sym_get_const(ctx, right))); - PyObject *temp = PyUnicode_Concat(sym_get_const(ctx, left), sym_get_const(ctx, right)); - if (temp == NULL) { + if ( + sym_is_safe_const(ctx, left) && + sym_is_safe_const(ctx, right) + ) { + JitOptRef left_sym = left; + JitOptRef right_sym = right; + _PyStackRef left = sym_get_const_as_stackref(ctx, left_sym); + _PyStackRef right = sym_get_const_as_stackref(ctx, right_sym); + _PyStackRef res_stackref; + /* Start of uop copied from bytecodes for constant evaluation */ + PyObject *left_o = PyStackRef_AsPyObjectBorrow(left); + PyObject *right_o = PyStackRef_AsPyObjectBorrow(right); + assert(PyUnicode_CheckExact(left_o)); + assert(PyUnicode_CheckExact(right_o)); + STAT_INC(BINARY_OP, hit); + PyObject *res_o = PyUnicode_Concat(left_o, right_o); + PyStackRef_CLOSE_SPECIALIZED(right, _PyUnicode_ExactDealloc); + PyStackRef_CLOSE_SPECIALIZED(left, _PyUnicode_ExactDealloc); + if (res_o == NULL) { goto error; } - res = sym_new_const(ctx, temp); + res_stackref = PyStackRef_FromPyObjectSteal(res_o); + /* End of uop copied from bytecodes for constant evaluation */ + res = sym_new_const_steal(ctx, PyStackRef_AsPyObjectSteal(res_stackref)); stack_pointer[-2] = res; stack_pointer += -1; assert(WITHIN_STACK_BOUNDS()); - Py_DECREF(temp); - } - else { - res = sym_new_type(ctx, &PyUnicode_Type); - stack_pointer += -1; + break; } - stack_pointer[-1] = res; + res = sym_new_type(ctx, &PyUnicode_Type); + stack_pointer[-2] = res; + stack_pointer += -1; + assert(WITHIN_STACK_BOUNDS()); break; } @@ -2539,6 +2710,31 @@ JitOptRef res; rhs = stack_pointer[-1]; lhs = stack_pointer[-2]; + if ( + sym_is_safe_const(ctx, lhs) && + sym_is_safe_const(ctx, rhs) + ) { + JitOptRef lhs_sym = lhs; + JitOptRef rhs_sym = rhs; + _PyStackRef lhs = sym_get_const_as_stackref(ctx, lhs_sym); + _PyStackRef rhs = sym_get_const_as_stackref(ctx, rhs_sym); + _PyStackRef res_stackref; + /* Start of uop copied from bytecodes for constant evaluation */ + PyObject *lhs_o = PyStackRef_AsPyObjectBorrow(lhs); + PyObject *rhs_o = PyStackRef_AsPyObjectBorrow(rhs); + assert(_PyEval_BinaryOps[oparg]); + stack_pointer[-2] = res; + stack_pointer += -1; + assert(WITHIN_STACK_BOUNDS()); + PyObject *res_o = _PyEval_BinaryOps[oparg](lhs_o, rhs_o); + if (res_o == NULL) { + JUMP_TO_LABEL(error); + } + res_stackref = PyStackRef_FromPyObjectSteal(res_o); + /* End of uop copied from bytecodes for constant evaluation */ + res = sym_new_const_steal(ctx, PyStackRef_AsPyObjectSteal(res_stackref)); + break; + } bool lhs_int = sym_matches_type(lhs, &PyLong_Type); bool rhs_int = sym_matches_type(rhs, &PyLong_Type); bool lhs_float = sym_matches_type(lhs, &PyFloat_Type); diff --git a/Python/optimizer_symbols.c b/Python/optimizer_symbols.c index c3d9e0e778bf55..e4dbca8362f4ce 100644 --- a/Python/optimizer_symbols.c +++ b/Python/optimizer_symbols.c @@ -185,6 +185,35 @@ _Py_uop_sym_get_const(JitOptContext *ctx, JitOptRef ref) return NULL; } +_PyStackRef +_Py_uop_sym_get_const_as_stackref(JitOptContext *ctx, JitOptRef sym) +{ + PyObject *const_val = _Py_uop_sym_get_const(ctx, sym); + if (const_val == NULL) { + return PyStackRef_NULL; + } + return PyStackRef_FromPyObjectBorrow(const_val); +} + +/* + Indicates whether the constant is safe to constant evaluate + (without side effects). + */ +bool +_Py_uop_sym_is_safe_const(JitOptContext *ctx, JitOptRef sym) +{ + PyObject *const_val = _Py_uop_sym_get_const(ctx, sym); + if (const_val == NULL) { + return false; + } + PyTypeObject *typ = Py_TYPE(const_val); + return (typ == &PyLong_Type) || + (typ == &PyUnicode_Type) || + (typ == &PyFloat_Type) || + (typ == &PyTuple_Type) || + (typ == &PyBool_Type); +} + void _Py_uop_sym_set_type(JitOptContext *ctx, JitOptRef ref, PyTypeObject *typ) { @@ -467,6 +496,16 @@ _Py_uop_sym_new_const(JitOptContext *ctx, PyObject *const_val) return ref; } +JitOptRef +_Py_uop_sym_new_const_steal(JitOptContext *ctx, PyObject *const_val) +{ + assert(const_val != NULL); + JitOptRef res = _Py_uop_sym_new_const(ctx, const_val); + // Decref once because sym_new_const increfs it. + Py_DECREF(const_val); + return res; +} + JitOptRef _Py_uop_sym_new_null(JitOptContext *ctx) { diff --git a/Tools/cases_generator/optimizer_generator.py b/Tools/cases_generator/optimizer_generator.py index 3b4fe64b02a807..4556b6d5a74f37 100644 --- a/Tools/cases_generator/optimizer_generator.py +++ b/Tools/cases_generator/optimizer_generator.py @@ -12,6 +12,8 @@ analyze_files, StackItem, analysis_error, + CodeSection, + Label, ) from generators_common import ( DEFAULT_INPUT, @@ -19,6 +21,7 @@ write_header, Emitter, TokenIterator, + always_true, ) from cwriter import CWriter from typing import TextIO @@ -75,6 +78,9 @@ def type_name(var: StackItem) -> str: return "JitOptRef *" return "JitOptRef " +def stackref_type_name(var: StackItem) -> str: + assert not var.is_array(), "Unsafe to convert a symbol to an array-like StackRef." + return "_PyStackRef " def declare_variables(uop: Uop, out: CWriter, skip_inputs: bool) -> None: variables = {"unused"} @@ -135,6 +141,12 @@ def emit_default(out: CWriter, uop: Uop, stack: Stack) -> None: class OptimizerEmitter(Emitter): + def __init__(self, out: CWriter, labels: dict[str, Label], original_uop: Uop, stack: Stack): + super().__init__(out, labels) + self._replacers["REPLACE_OPCODE_IF_EVALUATES_PURE"] = self.replace_opcode_if_evaluates_pure + self.original_uop = original_uop + self.stack = stack + def emit_save(self, storage: Storage) -> None: storage.flush(self.out) @@ -145,6 +157,185 @@ def goto_label(self, goto: Token, label: Token, storage: Storage) -> None: self.out.emit(goto) self.out.emit(label) + def replace_opcode_if_evaluates_pure( + self, + tkn: Token, + tkn_iter: TokenIterator, + uop: CodeSection, + storage: Storage, + inst: Instruction | None, + ) -> bool: + assert isinstance(uop, Uop) + input_identifiers = [] + for token in tkn_iter: + if token.kind == "IDENTIFIER": + input_identifiers.append(token) + if token.kind == "SEMI": + break + + if len(input_identifiers) == 0: + raise analysis_error( + "To evaluate an operation as pure, it must have at least 1 input", + tkn + ) + # Check that the input identifiers belong to the uop's + # input stack effect + uop_stack_effect_input_identifers = {inp.name for inp in uop.stack.inputs} + for input_tkn in input_identifiers: + if input_tkn.text not in uop_stack_effect_input_identifers: + raise analysis_error(f"{input_tkn.text} referenced in " + f"REPLACE_OPCODE_IF_EVALUATES_PURE but does not " + f"exist in the base uop's input stack effects", + input_tkn) + input_identifiers_as_str = {tkn.text for tkn in input_identifiers} + used_stack_inputs = [inp for inp in uop.stack.inputs if inp.name in input_identifiers_as_str] + assert len(used_stack_inputs) > 0 + emitter = OptimizerConstantEmitter(self.out, {}, self.original_uop, self.stack.copy()) + emitter.emit("if (\n") + for inp in used_stack_inputs[:-1]: + emitter.emit(f"sym_is_safe_const(ctx, {inp.name}) &&\n") + emitter.emit(f"sym_is_safe_const(ctx, {used_stack_inputs[-1].name})\n") + emitter.emit(') {\n') + # Declare variables, before they are shadowed. + for inp in used_stack_inputs: + if inp.used: + emitter.emit(f"{type_name(inp)}{inp.name}_sym = {inp.name};\n") + # Shadow the symbolic variables with stackrefs. + for inp in used_stack_inputs: + if inp.is_array(): + raise analysis_error("Pure evaluation cannot take array-like inputs.", tkn) + if inp.used: + emitter.emit(f"{stackref_type_name(inp)}{inp.name} = sym_get_const_as_stackref(ctx, {inp.name}_sym);\n") + # Rename all output variables to stackref variant. + for outp in self.original_uop.stack.outputs: + if outp.is_array(): + raise analysis_error( + "Array output StackRefs not supported for evaluating pure ops.", + self.original_uop.body.open + ) + emitter.emit(f"_PyStackRef {outp.name}_stackref;\n") + + + storage = Storage.for_uop(self.stack, self.original_uop, CWriter.null(), check_liveness=False) + # No reference management of outputs needed. + for var in storage.outputs: + var.in_local = True + emitter.emit("/* Start of uop copied from bytecodes for constant evaluation */\n") + emitter.emit_tokens(self.original_uop, storage, inst=None, emit_braces=False) + self.out.start_line() + emitter.emit("/* End of uop copied from bytecodes for constant evaluation */\n") + # Finally, assign back the output stackrefs to symbolics. + for outp in self.original_uop.stack.outputs: + # All new stackrefs are created from new references. + # That's how the stackref contract works. + if not outp.peek: + emitter.emit(f"{outp.name} = sym_new_const_steal(ctx, PyStackRef_AsPyObjectSteal({outp.name}_stackref));\n") + else: + emitter.emit(f"{outp.name} = sym_new_const(ctx, PyStackRef_AsPyObjectBorrow({outp.name}_stackref));\n") + storage.flush(self.out) + emitter.emit("break;\n") + emitter.emit("}\n") + return True + +class OptimizerConstantEmitter(OptimizerEmitter): + def __init__(self, out: CWriter, labels: dict[str, Label], original_uop: Uop, stack: Stack): + super().__init__(out, labels, original_uop, stack) + # Replace all outputs to point to their stackref versions. + overrides = { + outp.name: self.emit_stackref_override for outp in self.original_uop.stack.outputs + } + self._replacers = {**self._replacers, **overrides} + + def emit_to_with_replacement( + self, + out: CWriter, + tkn_iter: TokenIterator, + end: str, + uop: CodeSection, + storage: Storage, + inst: Instruction | None + ) -> Token: + parens = 0 + for tkn in tkn_iter: + if tkn.kind == end and parens == 0: + return tkn + if tkn.kind == "LPAREN": + parens += 1 + if tkn.kind == "RPAREN": + parens -= 1 + if tkn.text in self._replacers: + self._replacers[tkn.text](tkn, tkn_iter, uop, storage, inst) + else: + out.emit(tkn) + raise analysis_error(f"Expecting {end}. Reached end of file", tkn) + + def emit_stackref_override( + self, + tkn: Token, + tkn_iter: TokenIterator, + uop: CodeSection, + storage: Storage, + inst: Instruction | None, + ) -> bool: + self.out.emit(tkn) + self.out.emit("_stackref ") + return True + + def deopt_if( + self, + tkn: Token, + tkn_iter: TokenIterator, + uop: CodeSection, + storage: Storage, + inst: Instruction | None, + ) -> bool: + self.out.start_line() + self.out.emit("if (") + lparen = next(tkn_iter) + assert lparen.kind == "LPAREN" + first_tkn = tkn_iter.peek() + self.emit_to_with_replacement(self.out, tkn_iter, "RPAREN", uop, storage, inst) + self.emit(") {\n") + next(tkn_iter) # Semi colon + # We guarantee this will deopt in real-world code + # via constants analysis. So just bail. + self.emit("ctx->done = true;\n") + self.emit("break;\n") + self.emit("}\n") + return not always_true(first_tkn) + + exit_if = deopt_if + + def error_if( + self, + tkn: Token, + tkn_iter: TokenIterator, + uop: CodeSection, + storage: Storage, + inst: Instruction | None, + ) -> bool: + lparen = next(tkn_iter) + assert lparen.kind == "LPAREN" + first_tkn = tkn_iter.peek() + unconditional = always_true(first_tkn) + if unconditional: + next(tkn_iter) + next(tkn_iter) # RPAREN + self.out.start_line() + else: + self.out.emit_at("if ", tkn) + self.emit(lparen) + self.emit_to_with_replacement(self.out, tkn_iter, "RPAREN", uop, storage, inst) + self.out.emit(") {\n") + next(tkn_iter) # Semi colon + storage.clear_inputs("at ERROR_IF") + + self.out.emit("goto error;\n") + if not unconditional: + self.out.emit("}\n") + return not unconditional + + def write_uop( override: Uop | None, uop: Uop, @@ -175,13 +366,14 @@ def write_uop( cast = f"uint{cache.size*16}_t" out.emit(f"{type}{cache.name} = ({cast})this_instr->operand0;\n") if override: - emitter = OptimizerEmitter(out, {}) + emitter = OptimizerEmitter(out, {}, uop, stack.copy()) # No reference management of inputs needed. for var in storage.inputs: # type: ignore[possibly-undefined] var.in_local = False _, storage = emitter.emit_tokens(override, storage, None, False) out.start_line() storage.flush(out) + out.start_line() else: emit_default(out, uop, stack) out.start_line() From de0d014815667982c683adb2b2cc16ae2bfb3c82 Mon Sep 17 00:00:00 2001 From: Hugo van Kemenade <1324225+hugovk@users.noreply.github.com> Date: Fri, 27 Jun 2025 16:23:33 +0300 Subject: [PATCH 24/55] gh-92266: Replace tabs with four spaces in Python files (#135983) --- .pre-commit-config.yaml | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 822a8a9f4e5076..86410c46d1d707 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -34,6 +34,13 @@ repos: name: Run Black on Tools/jit/ files: ^Tools/jit/ + - repo: https://github.com/Lucas-C/pre-commit-hooks + rev: v1.5.5 + hooks: + - id: remove-tabs + types: [python] + exclude: ^Tools/c-analyzer/cpython/_parser.py + - repo: https://github.com/pre-commit/pre-commit-hooks rev: v5.0.0 hooks: From 731f5b8ab3970e344bfbc4ff86df767a0795f0fc Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Fri, 27 Jun 2025 16:47:03 +0300 Subject: [PATCH 25/55] =?UTF-8?q?gh-136028:=20Fix=20parsing=20month=20name?= =?UTF-8?q?s=20containing=20"=C4=B0"=20(U+0130)=20in=20strptime()=20(GH-13?= =?UTF-8?q?6029)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This affects locales az_AZ, ber_DZ, ber_MA and crh_UA. --- Lib/_strptime.py | 12 ++++++++++-- Lib/test/test_strptime.py | 9 +++++++++ .../2025-06-27-13-34-28.gh-issue-136028.RY727g.rst | 3 +++ 3 files changed, 22 insertions(+), 2 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2025-06-27-13-34-28.gh-issue-136028.RY727g.rst diff --git a/Lib/_strptime.py b/Lib/_strptime.py index 7ac6f36360cb69..cdc55e8daaffa6 100644 --- a/Lib/_strptime.py +++ b/Lib/_strptime.py @@ -42,6 +42,14 @@ def _findall(haystack, needle): yield i i += len(needle) +def _fixmonths(months): + yield from months + # The lower case of 'İ' ('\u0130') is 'i\u0307'. + # The re module only supports 1-to-1 character matching in + # case-insensitive mode. + for s in months: + if 'i\u0307' in s: + yield s.replace('i\u0307', '\u0130') lzh_TW_alt_digits = ( # 〇:一:二:三:四:五:六:七:八:九 @@ -366,8 +374,8 @@ def __init__(self, locale_time=None): 'z': r"(?P([+-]\d\d:?[0-5]\d(:?[0-5]\d(\.\d{1,6})?)?)|(?-i:Z))?", 'A': self.__seqToRE(self.locale_time.f_weekday, 'A'), 'a': self.__seqToRE(self.locale_time.a_weekday, 'a'), - 'B': self.__seqToRE(self.locale_time.f_month[1:], 'B'), - 'b': self.__seqToRE(self.locale_time.a_month[1:], 'b'), + 'B': self.__seqToRE(_fixmonths(self.locale_time.f_month[1:]), 'B'), + 'b': self.__seqToRE(_fixmonths(self.locale_time.a_month[1:]), 'b'), 'p': self.__seqToRE(self.locale_time.am_pm, 'p'), 'Z': self.__seqToRE((tz for tz_names in self.locale_time.timezone for tz in tz_names), diff --git a/Lib/test/test_strptime.py b/Lib/test/test_strptime.py index e52c46f8c58cce..0241e543cd7dde 100644 --- a/Lib/test/test_strptime.py +++ b/Lib/test/test_strptime.py @@ -337,6 +337,15 @@ def test_month_locale(self): self.roundtrip('%B', 1, (1900, m, 1, 0, 0, 0, 0, 1, 0)) self.roundtrip('%b', 1, (1900, m, 1, 0, 0, 0, 0, 1, 0)) + @run_with_locales('LC_TIME', 'az_AZ', 'ber_DZ', 'ber_MA', 'crh_UA') + def test_month_locale2(self): + # Test for month directives + # Month name contains 'İ' ('\u0130') + self.roundtrip('%B', 1, (2025, 6, 1, 0, 0, 0, 6, 152, 0)) + self.roundtrip('%b', 1, (2025, 6, 1, 0, 0, 0, 6, 152, 0)) + self.roundtrip('%B', 1, (2025, 7, 1, 0, 0, 0, 1, 182, 0)) + self.roundtrip('%b', 1, (2025, 7, 1, 0, 0, 0, 1, 182, 0)) + def test_day(self): # Test for day directives self.roundtrip('%d %Y', 2) diff --git a/Misc/NEWS.d/next/Library/2025-06-27-13-34-28.gh-issue-136028.RY727g.rst b/Misc/NEWS.d/next/Library/2025-06-27-13-34-28.gh-issue-136028.RY727g.rst new file mode 100644 index 00000000000000..9859df7cf6a69c --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-06-27-13-34-28.gh-issue-136028.RY727g.rst @@ -0,0 +1,3 @@ +Fix parsing month names containing "İ" (U+0130, LATIN CAPITAL LETTER I WITH +DOT ABOVE) in :func:`time.strptime`. This affects locales az_AZ, ber_DZ, +ber_MA and crh_UA. From 065194c1a971b59547f1bb2cc64760c4bf0ee674 Mon Sep 17 00:00:00 2001 From: Will Childs-Klein Date: Fri, 27 Jun 2025 11:01:16 -0400 Subject: [PATCH 26/55] gh-135571: Guard `_hashlib` usage in `test_hashlib.py` (#135572) --- Lib/test/test_hashlib.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/Lib/test/test_hashlib.py b/Lib/test/test_hashlib.py index 7b378c45e71563..5bad483ae9dafc 100644 --- a/Lib/test/test_hashlib.py +++ b/Lib/test/test_hashlib.py @@ -279,7 +279,10 @@ def test_clinic_signature(self): with self.assertWarnsRegex(DeprecationWarning, DEPRECATED_STRING_PARAMETER): hashlib.new(digest_name, string=b'') - if self._hashlib: + # Make sure that _hashlib contains the constructor + # to test when using a combination of libcrypto and + # interned hash implementations. + if self._hashlib and digest_name in self._hashlib._constructors: self._hashlib.new(digest_name, b'') self._hashlib.new(digest_name, data=b'') with self.assertWarnsRegex(DeprecationWarning, @@ -333,7 +336,8 @@ def test_clinic_signature_errors(self): with self.subTest(digest_name, args=args, kwds=kwds): with self.assertRaisesRegex(TypeError, errmsg): hashlib.new(digest_name, *args, **kwds) - if self._hashlib: + if (self._hashlib and + digest_name in self._hashlib._constructors): with self.assertRaisesRegex(TypeError, errmsg): self._hashlib.new(digest_name, *args, **kwds) From 1e975aee28924afbd956183918cef278e09ce8f3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Fri, 27 Jun 2025 17:12:21 +0200 Subject: [PATCH 27/55] gh-135755: rename undocumented `HACL_CAN_COMPILE_SIMD{128,256}` macros (#135847) Rename undocumented `HACL_CAN_COMPILE_SIMD{128,256}` macros to `_Py_HACL_CAN_COMPILE_VEC{128,256}`. These macros are private. --- Modules/blake2module.c | 66 +++++++++++++++++++------------------- Modules/hmacmodule.c | 21 ++++++------ PCbuild/pythoncore.vcxproj | 8 +++-- configure | 4 +-- configure.ac | 6 ++-- pyconfig.h.in | 12 +++---- 6 files changed, 62 insertions(+), 55 deletions(-) diff --git a/Modules/blake2module.c b/Modules/blake2module.c index 9e279e11b518d2..163f238a4268d0 100644 --- a/Modules/blake2module.c +++ b/Modules/blake2module.c @@ -43,25 +43,25 @@ // SIMD256 can't be compiled on macOS ARM64, and performance of SIMD128 isn't // great; but when compiling a universal2 binary, autoconf will set -// HACL_CAN_COMPILE_SIMD128 and HACL_CAN_COMPILE_SIMD256 because they *can* be -// compiled on x86_64. If we're on macOS ARM64, disable these preprocessor -// symbols. +// _Py_HACL_CAN_COMPILE_VEC{128,256} because they *can* be compiled on x86_64. +// If we're on macOS ARM64, we however disable these preprocessor symbols. #if defined(__APPLE__) && defined(__arm64__) -# undef HACL_CAN_COMPILE_SIMD128 -# undef HACL_CAN_COMPILE_SIMD256 +# undef _Py_HACL_CAN_COMPILE_VEC128 +# undef _Py_HACL_CAN_COMPILE_VEC256 #endif -// Small mismatch between the variable names Python defines as part of configure -// at the ones HACL* expects to be set in order to enable those headers. -#define HACL_CAN_COMPILE_VEC128 HACL_CAN_COMPILE_SIMD128 -#define HACL_CAN_COMPILE_VEC256 HACL_CAN_COMPILE_SIMD256 +// HACL* expects HACL_CAN_COMPILE_VEC* macros to be set in order to enable +// the corresponding SIMD instructions so we need to "forward" the values +// we just deduced above. +#define HACL_CAN_COMPILE_VEC128 _Py_HACL_CAN_COMPILE_VEC128 +#define HACL_CAN_COMPILE_VEC256 _Py_HACL_CAN_COMPILE_VEC256 #include "_hacl/Hacl_Hash_Blake2s.h" #include "_hacl/Hacl_Hash_Blake2b.h" -#if HACL_CAN_COMPILE_SIMD128 +#if _Py_HACL_CAN_COMPILE_VEC128 #include "_hacl/Hacl_Hash_Blake2s_Simd128.h" #endif -#if HACL_CAN_COMPILE_SIMD256 +#if _Py_HACL_CAN_COMPILE_VEC256 #include "_hacl/Hacl_Hash_Blake2b_Simd256.h" #endif @@ -88,7 +88,7 @@ blake2_get_state(PyObject *module) return (Blake2State *)state; } -#if defined(HACL_CAN_COMPILE_SIMD128) || defined(HACL_CAN_COMPILE_SIMD256) +#if defined(_Py_HACL_CAN_COMPILE_VEC128) || defined(_Py_HACL_CAN_COMPILE_VEC256) static inline Blake2State * blake2_get_state_from_type(PyTypeObject *module) { @@ -181,7 +181,7 @@ blake2module_init_cpu_features(Blake2State *state) #undef ECX_SSE3 #undef EBX_AVX2 -#if HACL_CAN_COMPILE_SIMD128 +#if _Py_HACL_CAN_COMPILE_VEC128 // TODO(picnixz): use py_cpuid_features (gh-125022) to improve detection state->can_run_simd128 = sse && sse2 && sse3 && sse41 && sse42 && cmov; #else @@ -191,7 +191,7 @@ blake2module_init_cpu_features(Blake2State *state) state->can_run_simd128 = false; #endif -#if HACL_CAN_COMPILE_SIMD256 +#if _Py_HACL_CAN_COMPILE_VEC256 // TODO(picnixz): use py_cpuid_features (gh-125022) to improve detection state->can_run_simd256 = state->can_run_simd128 && avx && avx2; #else @@ -332,18 +332,18 @@ is_blake2s(blake2_impl impl) static inline blake2_impl type_to_impl(PyTypeObject *type) { -#if defined(HACL_CAN_COMPILE_SIMD128) || defined(HACL_CAN_COMPILE_SIMD256) +#if defined(_Py_HACL_CAN_COMPILE_VEC128) || defined(_Py_HACL_CAN_COMPILE_VEC256) Blake2State *st = blake2_get_state_from_type(type); #endif if (!strcmp(type->tp_name, blake2b_type_spec.name)) { -#if HACL_CAN_COMPILE_SIMD256 +#if _Py_HACL_CAN_COMPILE_VEC256 return st->can_run_simd256 ? Blake2b_256 : Blake2b; #else return Blake2b; #endif } else if (!strcmp(type->tp_name, blake2s_type_spec.name)) { -#if HACL_CAN_COMPILE_SIMD128 +#if _Py_HACL_CAN_COMPILE_VEC128 return st->can_run_simd128 ? Blake2s_128 : Blake2s; #else return Blake2s; @@ -357,10 +357,10 @@ typedef struct { union { Hacl_Hash_Blake2s_state_t *blake2s_state; Hacl_Hash_Blake2b_state_t *blake2b_state; -#if HACL_CAN_COMPILE_SIMD128 +#if _Py_HACL_CAN_COMPILE_VEC128 Hacl_Hash_Blake2s_Simd128_state_t *blake2s_128_state; #endif -#if HACL_CAN_COMPILE_SIMD256 +#if _Py_HACL_CAN_COMPILE_VEC256 Hacl_Hash_Blake2b_Simd256_state_t *blake2b_256_state; #endif }; @@ -429,13 +429,13 @@ blake2_update_unlocked(Blake2Object *self, uint8_t *buf, Py_ssize_t len) switch (self->impl) { // blake2b_256_state and blake2s_128_state must be if'd since // otherwise this results in an unresolved symbol at link-time. -#if HACL_CAN_COMPILE_SIMD256 +#if _Py_HACL_CAN_COMPILE_VEC256 case Blake2b_256: HACL_UPDATE(Hacl_Hash_Blake2b_Simd256_update, self->blake2b_256_state, buf, len); return; #endif -#if HACL_CAN_COMPILE_SIMD128 +#if _Py_HACL_CAN_COMPILE_VEC128 case Blake2s_128: HACL_UPDATE(Hacl_Hash_Blake2s_Simd128_update, self->blake2s_128_state, buf, len); @@ -555,12 +555,12 @@ py_blake2_new(PyTypeObject *type, PyObject *data, int digest_size, // Ensure that the states are NULL-initialized in case of an error. // See: py_blake2_clear() for more details. switch (self->impl) { -#if HACL_CAN_COMPILE_SIMD256 +#if _Py_HACL_CAN_COMPILE_VEC256 case Blake2b_256: self->blake2b_256_state = NULL; break; #endif -#if HACL_CAN_COMPILE_SIMD128 +#if _Py_HACL_CAN_COMPILE_VEC128 case Blake2s_128: self->blake2s_128_state = NULL; break; @@ -623,12 +623,12 @@ py_blake2_new(PyTypeObject *type, PyObject *data, int digest_size, } while (0) switch (self->impl) { -#if HACL_CAN_COMPILE_SIMD256 +#if _Py_HACL_CAN_COMPILE_VEC256 case Blake2b_256: BLAKE2_MALLOC(Blake2b_Simd256, self->blake2b_256_state); break; #endif -#if HACL_CAN_COMPILE_SIMD128 +#if _Py_HACL_CAN_COMPILE_VEC128 case Blake2s_128: BLAKE2_MALLOC(Blake2s_Simd128, self->blake2s_128_state); break; @@ -756,12 +756,12 @@ blake2_blake2b_copy_unlocked(Blake2Object *self, Blake2Object *cpy) } while (0) switch (self->impl) { -#if HACL_CAN_COMPILE_SIMD256 +#if _Py_HACL_CAN_COMPILE_VEC256 case Blake2b_256: BLAKE2_COPY(Blake2b_Simd256, blake2b_256_state); break; #endif -#if HACL_CAN_COMPILE_SIMD128 +#if _Py_HACL_CAN_COMPILE_VEC128 case Blake2s_128: BLAKE2_COPY(Blake2s_Simd128, blake2s_128_state); break; @@ -840,12 +840,12 @@ static uint8_t blake2_blake2b_compute_digest(Blake2Object *self, uint8_t *digest) { switch (self->impl) { -#if HACL_CAN_COMPILE_SIMD256 +#if _Py_HACL_CAN_COMPILE_VEC256 case Blake2b_256: return Hacl_Hash_Blake2b_Simd256_digest( self->blake2b_256_state, digest); #endif -#if HACL_CAN_COMPILE_SIMD128 +#if _Py_HACL_CAN_COMPILE_VEC128 case Blake2s_128: return Hacl_Hash_Blake2s_Simd128_digest( self->blake2s_128_state, digest); @@ -923,11 +923,11 @@ static Hacl_Hash_Blake2b_index hacl_get_blake2_info(Blake2Object *self) { switch (self->impl) { -#if HACL_CAN_COMPILE_SIMD256 +#if _Py_HACL_CAN_COMPILE_VEC256 case Blake2b_256: return Hacl_Hash_Blake2b_Simd256_info(self->blake2b_256_state); #endif -#if HACL_CAN_COMPILE_SIMD128 +#if _Py_HACL_CAN_COMPILE_VEC128 case Blake2s_128: return Hacl_Hash_Blake2s_Simd128_info(self->blake2s_128_state); #endif @@ -975,12 +975,12 @@ py_blake2_clear(PyObject *op) } while (0) switch (self->impl) { -#if HACL_CAN_COMPILE_SIMD256 +#if _Py_HACL_CAN_COMPILE_VEC256 case Blake2b_256: BLAKE2_FREE(Blake2b_Simd256, self->blake2b_256_state); break; #endif -#if HACL_CAN_COMPILE_SIMD128 +#if _Py_HACL_CAN_COMPILE_VEC128 case Blake2s_128: BLAKE2_FREE(Blake2s_Simd128, self->blake2s_128_state); break; diff --git a/Modules/hmacmodule.c b/Modules/hmacmodule.c index e7a5ccbb19b45c..95e400231bb65c 100644 --- a/Modules/hmacmodule.c +++ b/Modules/hmacmodule.c @@ -31,14 +31,15 @@ #endif #if defined(__APPLE__) && defined(__arm64__) -# undef HACL_CAN_COMPILE_SIMD128 -# undef HACL_CAN_COMPILE_SIMD256 +# undef _Py_HACL_CAN_COMPILE_VEC128 +# undef _Py_HACL_CAN_COMPILE_VEC256 #endif -// Small mismatch between the variable names Python defines as part of configure -// at the ones HACL* expects to be set in order to enable those headers. -#define HACL_CAN_COMPILE_VEC128 HACL_CAN_COMPILE_SIMD128 -#define HACL_CAN_COMPILE_VEC256 HACL_CAN_COMPILE_SIMD256 +// HACL* expects HACL_CAN_COMPILE_VEC* macros to be set in order to enable +// the corresponding SIMD instructions so we need to "forward" the values +// we just deduced above. +#define HACL_CAN_COMPILE_VEC128 _Py_HACL_CAN_COMPILE_VEC128 +#define HACL_CAN_COMPILE_VEC256 _Py_HACL_CAN_COMPILE_VEC256 #include "_hacl/Hacl_HMAC.h" #include "_hacl/Hacl_Streaming_HMAC.h" // Hacl_Agile_Hash_* identifiers @@ -361,7 +362,7 @@ narrow_hmac_hash_kind(hmacmodule_state *state, HMAC_Hash_Kind kind) { switch (kind) { case Py_hmac_kind_hmac_blake2s_32: { -#if HACL_CAN_COMPILE_SIMD128 +#if _Py_HACL_CAN_COMPILE_VEC128 if (state->can_run_simd128) { return Py_hmac_kind_hmac_vectorized_blake2s_32; } @@ -369,7 +370,7 @@ narrow_hmac_hash_kind(hmacmodule_state *state, HMAC_Hash_Kind kind) return kind; } case Py_hmac_kind_hmac_blake2b_32: { -#if HACL_CAN_COMPILE_SIMD256 +#if _Py_HACL_CAN_COMPILE_VEC256 if (state->can_run_simd256) { return Py_hmac_kind_hmac_vectorized_blake2b_32; } @@ -1601,7 +1602,7 @@ hmacmodule_init_cpu_features(hmacmodule_state *state) #undef ECX_SSE3 #undef EBX_AVX2 -#if HACL_CAN_COMPILE_SIMD128 +#if _Py_HACL_CAN_COMPILE_VEC128 // TODO(picnixz): use py_cpuid_features (gh-125022) to improve detection state->can_run_simd128 = sse && sse2 && sse3 && sse41 && sse42 && cmov; #else @@ -1611,7 +1612,7 @@ hmacmodule_init_cpu_features(hmacmodule_state *state) state->can_run_simd128 = false; #endif -#if HACL_CAN_COMPILE_SIMD256 +#if _Py_HACL_CAN_COMPILE_VEC256 // TODO(picnixz): use py_cpuid_features (gh-125022) to improve detection state->can_run_simd256 = state->can_run_simd128 && avx && avx2; #else diff --git a/PCbuild/pythoncore.vcxproj b/PCbuild/pythoncore.vcxproj index 32a8f2dbad3d5e..b911c9385634d7 100644 --- a/PCbuild/pythoncore.vcxproj +++ b/PCbuild/pythoncore.vcxproj @@ -419,8 +419,12 @@ - HACL_CAN_COMPILE_SIMD128;%(PreprocessorDefinitions) - HACL_CAN_COMPILE_SIMD256;%(PreprocessorDefinitions) + + _Py_HACL_CAN_COMPILE_VEC128;%(PreprocessorDefinitions) + + + _Py_HACL_CAN_COMPILE_VEC256;%(PreprocessorDefinitions) + diff --git a/configure b/configure index 43b36d9231e341..75ae1699a8e451 100755 --- a/configure +++ b/configure @@ -32633,7 +32633,7 @@ then : LIBHACL_SIMD128_FLAGS="-msse -msse2 -msse3 -msse4.1 -msse4.2" -printf "%s\n" "#define HACL_CAN_COMPILE_SIMD128 1" >>confdefs.h +printf "%s\n" "#define _Py_HACL_CAN_COMPILE_VEC128 1" >>confdefs.h # macOS universal2 builds *support* the -msse etc flags because they're @@ -32709,7 +32709,7 @@ then : LIBHACL_SIMD256_FLAGS="-mavx2" -printf "%s\n" "#define HACL_CAN_COMPILE_SIMD256 1" >>confdefs.h +printf "%s\n" "#define _Py_HACL_CAN_COMPILE_VEC256 1" >>confdefs.h # macOS universal2 builds *support* the -mavx2 compiler flag because it's diff --git a/configure.ac b/configure.ac index e77696e3a4e025..4da1ba78b54b0d 100644 --- a/configure.ac +++ b/configure.ac @@ -8026,7 +8026,8 @@ then AX_CHECK_COMPILE_FLAG([-msse -msse2 -msse3 -msse4.1 -msse4.2],[ [LIBHACL_SIMD128_FLAGS="-msse -msse2 -msse3 -msse4.1 -msse4.2"] - AC_DEFINE([HACL_CAN_COMPILE_SIMD128], [1], [HACL* library can compile SIMD128 implementations]) + AC_DEFINE([_Py_HACL_CAN_COMPILE_VEC128], [1], [ + HACL* library can compile SIMD128 implementations]) # macOS universal2 builds *support* the -msse etc flags because they're # available on x86_64. However, performance of the HACL SIMD128 implementation @@ -8057,7 +8058,8 @@ if test "$ac_sys_system" != "Linux-android" -a "$ac_sys_system" != "WASI" || \ then AX_CHECK_COMPILE_FLAG([-mavx2],[ [LIBHACL_SIMD256_FLAGS="-mavx2"] - AC_DEFINE([HACL_CAN_COMPILE_SIMD256], [1], [HACL* library can compile SIMD256 implementations]) + AC_DEFINE([_Py_HACL_CAN_COMPILE_VEC256], [1], [ + HACL* library can compile SIMD256 implementations]) # macOS universal2 builds *support* the -mavx2 compiler flag because it's # available on x86_64; but the HACL SIMD256 build then fails because the diff --git a/pyconfig.h.in b/pyconfig.h.in index d4f1da7fb10776..d7c496fccc682c 100644 --- a/pyconfig.h.in +++ b/pyconfig.h.in @@ -50,12 +50,6 @@ /* Define if getpgrp() must be called as getpgrp(0). */ #undef GETPGRP_HAVE_ARG -/* HACL* library can compile SIMD128 implementations */ -#undef HACL_CAN_COMPILE_SIMD128 - -/* HACL* library can compile SIMD256 implementations */ -#undef HACL_CAN_COMPILE_SIMD256 - /* Define if you have the 'accept' function. */ #undef HAVE_ACCEPT @@ -2026,6 +2020,12 @@ /* Defined if _Complex C type can be used with libffi. */ #undef _Py_FFI_SUPPORT_C_COMPLEX +/* HACL* library can compile SIMD128 implementations */ +#undef _Py_HACL_CAN_COMPILE_VEC128 + +/* HACL* library can compile SIMD256 implementations */ +#undef _Py_HACL_CAN_COMPILE_VEC256 + /* Define to force use of thread-safe errno, h_errno, and other functions */ #undef _REENTRANT From 0141e7f9e66e5e5ac3949fb018dd6811cd630dca Mon Sep 17 00:00:00 2001 From: Lee Dogeon Date: Sat, 28 Jun 2025 00:15:11 +0900 Subject: [PATCH 28/55] gh-108765: fix comment about macro definitions in `_stat.c` post GH-108854 (#136027) --- Modules/_stat.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Modules/_stat.c b/Modules/_stat.c index f11ca7d23b440d..1dabf2f6d5b07f 100644 --- a/Modules/_stat.c +++ b/Modules/_stat.c @@ -57,7 +57,7 @@ typedef unsigned short mode_t; * Only the names are defined by POSIX but not their value. All common file * types seems to have the same numeric value on all platforms, though. * - * pyport.h guarantees S_IFMT, S_IFDIR, S_IFCHR, S_IFREG and S_IFLNK + * fileutils.h guarantees S_IFMT, S_IFDIR, S_IFCHR, S_IFREG and S_IFLNK */ #ifndef S_IFBLK @@ -86,7 +86,7 @@ typedef unsigned short mode_t; /* S_ISXXX() - * pyport.h defines S_ISDIR(), S_ISREG() and S_ISCHR() + * fileutils.h defines S_ISDIR(), S_ISREG() and S_ISCHR() */ #ifndef S_ISBLK From 0e5d09613094f2331a6b1cdb83f98998702d4469 Mon Sep 17 00:00:00 2001 From: Brandt Bucher Date: Fri, 27 Jun 2025 08:20:51 -0700 Subject: [PATCH 29/55] GH-135904: Optimize the JIT's assembly control flow (GH-135905) --- ...-06-24-16-46-34.gh-issue-135904.78xfon.rst | 2 + Tools/jit/_optimizers.py | 319 ++++++++++++++++++ Tools/jit/_stencils.py | 67 +--- Tools/jit/_targets.py | 58 ++-- 4 files changed, 352 insertions(+), 94 deletions(-) create mode 100644 Misc/NEWS.d/next/Core_and_Builtins/2025-06-24-16-46-34.gh-issue-135904.78xfon.rst create mode 100644 Tools/jit/_optimizers.py diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-06-24-16-46-34.gh-issue-135904.78xfon.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-06-24-16-46-34.gh-issue-135904.78xfon.rst new file mode 100644 index 00000000000000..ecbd8fda9a5e9d --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2025-06-24-16-46-34.gh-issue-135904.78xfon.rst @@ -0,0 +1,2 @@ +Perform more aggressive control-flow optimizations on the machine code +templates emitted by the experimental JIT compiler. diff --git a/Tools/jit/_optimizers.py b/Tools/jit/_optimizers.py new file mode 100644 index 00000000000000..1077e4106fdfbd --- /dev/null +++ b/Tools/jit/_optimizers.py @@ -0,0 +1,319 @@ +"""Low-level optimization of textual assembly.""" + +import dataclasses +import pathlib +import re +import typing + +# Same as saying "not string.startswith('')": +_RE_NEVER_MATCH = re.compile(r"(?!)") +# Dictionary mapping branch instructions to their inverted branch instructions. +# If a branch cannot be inverted, the value is None: +_X86_BRANCHES = { + # https://www.felixcloutier.com/x86/jcc + "ja": "jna", + "jae": "jnae", + "jb": "jnb", + "jbe": "jnbe", + "jc": "jnc", + "jcxz": None, + "je": "jne", + "jecxz": None, + "jg": "jng", + "jge": "jnge", + "jl": "jnl", + "jle": "jnle", + "jo": "jno", + "jp": "jnp", + "jpe": "jpo", + "jrcxz": None, + "js": "jns", + "jz": "jnz", + # https://www.felixcloutier.com/x86/loop:loopcc + "loop": None, + "loope": None, + "loopne": None, + "loopnz": None, + "loopz": None, +} +# Update with all of the inverted branches, too: +_X86_BRANCHES |= {v: k for k, v in _X86_BRANCHES.items() if v} + + +@dataclasses.dataclass +class _Block: + label: str | None = None + # Non-instruction lines like labels, directives, and comments: + noninstructions: list[str] = dataclasses.field(default_factory=list) + # Instruction lines: + instructions: list[str] = dataclasses.field(default_factory=list) + # If this block ends in a jump, where to? + target: typing.Self | None = None + # The next block in the linked list: + link: typing.Self | None = None + # Whether control flow can fall through to the linked block above: + fallthrough: bool = True + # Whether this block can eventually reach the next uop (_JIT_CONTINUE): + hot: bool = False + + def resolve(self) -> typing.Self: + """Find the first non-empty block reachable from this one.""" + block = self + while block.link and not block.instructions: + block = block.link + return block + + +@dataclasses.dataclass +class Optimizer: + """Several passes of analysis and optimization for textual assembly.""" + + path: pathlib.Path + _: dataclasses.KW_ONLY + # prefix used to mangle symbols on some platforms: + prefix: str = "" + # The first block in the linked list: + _root: _Block = dataclasses.field(init=False, default_factory=_Block) + _labels: dict[str, _Block] = dataclasses.field(init=False, default_factory=dict) + # No groups: + _re_noninstructions: typing.ClassVar[re.Pattern[str]] = re.compile( + r"\s*(?:\.|#|//|$)" + ) + # One group (label): + _re_label: typing.ClassVar[re.Pattern[str]] = re.compile( + r'\s*(?P