From 38c64c24124faaa98593e7e3c89ddb08b78bf8ee Mon Sep 17 00:00:00 2001
From: Petr Viktorin <encukou@gmail.com>
Date: Mon, 10 Feb 2025 17:06:57 +0100
Subject: [PATCH 1/5] Move deprecated PyUnicode API docs to new section

Move Py_UNICODE to a new "Deprecated API" section.

Formally soft-deprecate PyUnicode_READY, and move it

Document and soft-deprecate PyUnicode_IS_READY, and move it
---
 Doc/c-api/unicode.rst           | 81 +++++++++++++++++++++------------
 Include/cpython/unicodeobject.h |  4 +-
 2 files changed, 54 insertions(+), 31 deletions(-)

diff --git a/Doc/c-api/unicode.rst b/Doc/c-api/unicode.rst
index 94110d48ed7d85..c561e148fa4075 100644
--- a/Doc/c-api/unicode.rst
+++ b/Doc/c-api/unicode.rst
@@ -31,6 +31,12 @@ Unicode Type
 These are the basic Unicode object types used for the Unicode implementation in
 Python:
 
+.. c:var:: PyTypeObject PyUnicode_Type
+
+   This instance of :c:type:`PyTypeObject` represents the Python Unicode type.  It
+   is exposed to Python code as ``str``.
+
+
 .. c:type:: Py_UCS4
             Py_UCS2
             Py_UCS1
@@ -42,19 +48,6 @@ Python:
    .. versionadded:: 3.3
 
 
-.. c:type:: Py_UNICODE
-
-   This is a typedef of :c:type:`wchar_t`, which is a 16-bit type or 32-bit type
-   depending on the platform.
-
-   .. versionchanged:: 3.3
-      In previous versions, this was a 16-bit type or a 32-bit type depending on
-      whether you selected a "narrow" or "wide" Unicode version of Python at
-      build time.
-
-   .. deprecated-removed:: 3.13 3.15
-
-
 .. c:type:: PyASCIIObject
             PyCompactUnicodeObject
             PyUnicodeObject
@@ -66,12 +59,6 @@ Python:
    .. versionadded:: 3.3
 
 
-.. c:var:: PyTypeObject PyUnicode_Type
-
-   This instance of :c:type:`PyTypeObject` represents the Python Unicode type.  It
-   is exposed to Python code as ``str``.
-
-
 The following APIs are C macros and static inlined functions for fast checks and
 access to internal read-only data of Unicode objects:
 
@@ -87,16 +74,6 @@ access to internal read-only data of Unicode objects:
    subtype.  This function always succeeds.
 
 
-.. c:function:: int PyUnicode_READY(PyObject *unicode)
-
-   Returns ``0``. This API is kept only for backward compatibility.
-
-   .. versionadded:: 3.3
-
-   .. deprecated:: 3.10
-      This API does nothing since Python 3.12.
-
-
 .. c:function:: Py_ssize_t PyUnicode_GET_LENGTH(PyObject *unicode)
 
    Return the length of the Unicode string, in code points.  *unicode* has to be a
@@ -1729,3 +1706,49 @@ object.
    On error, set an exception, leave the writer unchanged, and return ``-1``.
 
    See also :c:func:`PyUnicodeWriter_WriteUTF8`.
+
+Deprecated API
+^^^^^^^^^^^^^^
+
+The following API is deprecated.
+
+.. c:type:: Py_UNICODE
+
+   This is a typedef of :c:type:`wchar_t`, which is a 16-bit type or 32-bit type
+   depending on the platform.
+   Please use :c:type:`wchar_t` directly instead.
+
+   .. versionchanged:: 3.3
+      In previous versions, this was a 16-bit type or a 32-bit type depending on
+      whether you selected a "narrow" or "wide" Unicode version of Python at
+      build time.
+
+   .. deprecated-removed:: 3.13 3.15
+
+
+.. c:function:: int PyUnicode_READY(PyObject *unicode)
+
+   Do nothing and return ``0``.
+   This API is kept only for backward compatibility, but there are no plans
+   to remove it.
+
+   .. versionadded:: 3.3
+
+   .. deprecated:: 3.10
+      This API does nothing since Python 3.12.
+      Previously, this needed to be called for each string created using
+      the old API (``PyUnicode_FromUnicode`` or similar).
+
+
+.. c:function:: unsigned int PyUnicode_IS_READY(PyObject *unicode)
+
+   Do nothing and return ``1``.
+   This API is kept only for backward compatibility, but there are no plans
+   to remove it.
+
+   .. versionadded:: 3.3
+
+   .. deprecated:: next
+      This API does nothing since Python 3.12.
+      Previously, this could be called to check if
+      :c:func:`PyUnicode_READY` is necessary.
diff --git a/Include/cpython/unicodeobject.h b/Include/cpython/unicodeobject.h
index cea69dd1280999..ec71d16e7a8617 100644
--- a/Include/cpython/unicodeobject.h
+++ b/Include/cpython/unicodeobject.h
@@ -205,7 +205,7 @@ static inline unsigned int PyUnicode_CHECK_INTERNED(PyObject *op) {
 }
 #define PyUnicode_CHECK_INTERNED(op) PyUnicode_CHECK_INTERNED(_PyObject_CAST(op))
 
-/* For backward compatibility */
+/* For backward compatibility. Soft-deprecated. */
 static inline unsigned int PyUnicode_IS_READY(PyObject* Py_UNUSED(op)) {
     return 1;
 }
@@ -398,7 +398,7 @@ PyAPI_FUNC(PyObject*) PyUnicode_New(
     Py_UCS4 maxchar             /* maximum code point value in the string */
     );
 
-/* For backward compatibility */
+/* For backward compatibility. Soft-deprecated. */
 static inline int PyUnicode_READY(PyObject* Py_UNUSED(op))
 {
     return 0;

From 1a756554c678f9f4c3802e9320992a861836814e Mon Sep 17 00:00:00 2001
From: Petr Viktorin <encukou@gmail.com>
Date: Mon, 10 Feb 2025 17:09:54 +0100
Subject: [PATCH 2/5] Document PyUnicode_IS_ASCII, PyUnicode_CHECK_INTERNED

---
 Doc/c-api/unicode.rst | 22 ++++++++++++++++++++++
 1 file changed, 22 insertions(+)

diff --git a/Doc/c-api/unicode.rst b/Doc/c-api/unicode.rst
index c561e148fa4075..f0b6703ca1207b 100644
--- a/Doc/c-api/unicode.rst
+++ b/Doc/c-api/unicode.rst
@@ -173,6 +173,14 @@ access to internal read-only data of Unicode objects:
       is not ready.
 
 
+.. c:function:: unsigned int PyUnicode_IS_ASCII(PyObject *unicode)
+
+   Return true if the string only contains ASCII characters.
+   Equivalent to :py:meth:`str.isascii`.
+
+   .. versionadded:: 3.2
+
+
 Unicode Character Properties
 """"""""""""""""""""""""""""
 
@@ -1574,6 +1582,20 @@ They all return ``NULL`` or ``-1`` if an exception occurs.
       Strings interned this way are made :term:`immortal`.
 
 
+.. c:function:: unsigned int PyUnicode_CHECK_INTERNED(PyObject *str)
+
+   Return a non-zero value if *str* is interned, zero if not.
+   The *str* argument must be a string; this is not checked.
+   This function always succeeds.
+
+   .. impl-detail::
+
+      A non-zero return value may carry additional information
+      about *how* the string is interned.
+      The meaning of such non-zero values, as well as each specific string's
+      intern-related details, may change between CPython versions.
+
+
 PyUnicodeWriter
 ^^^^^^^^^^^^^^^
 

From fc5322c107d6958a662d4f492b343de47ccd549d Mon Sep 17 00:00:00 2001
From: Petr Viktorin <encukou@gmail.com>
Date: Mon, 10 Feb 2025 17:11:33 +0100
Subject: [PATCH 3/5] PyUnicode_New docs: Clarify requirements for "fresh"
 strings

---
 Doc/c-api/unicode.rst | 60 ++++++++++++++++++++++++++++++++-----------
 1 file changed, 45 insertions(+), 15 deletions(-)

diff --git a/Doc/c-api/unicode.rst b/Doc/c-api/unicode.rst
index f0b6703ca1207b..0fb580de24d432 100644
--- a/Doc/c-api/unicode.rst
+++ b/Doc/c-api/unicode.rst
@@ -126,12 +126,16 @@ access to internal read-only data of Unicode objects:
 .. c:function:: void PyUnicode_WRITE(int kind, void *data, \
                                      Py_ssize_t index, Py_UCS4 value)
 
-   Write into a canonical representation *data* (as obtained with
-   :c:func:`PyUnicode_DATA`).  This function performs no sanity checks, and is
-   intended for usage in loops.  The caller should cache the *kind* value and
-   *data* pointer as obtained from other calls.  *index* is the index in
-   the string (starts at 0) and *value* is the new code point value which should
-   be written to that location.
+   Write the code point *value* to the given zero-based *index* in a string.
+
+   The *kind* value and *data* pointer must have been obtained from a
+   string using :c:func:`PyUnicode_KIND` and :c:func:`PyUnicode_DATA`
+   respectively. You must hold a reference to that string while calling
+   :c:func:`!PyUnicode_WRITE`. All requirements of
+   :c:func:`PyUnicode_WriteChar` also apply.
+
+   The function performs no checks for any of its requirements,
+   and is intended for usage in loops.
 
    .. versionadded:: 3.3
 
@@ -320,11 +324,30 @@ APIs:
    to be placed in the string.  As an approximation, it can be rounded up to the
    nearest value in the sequence 127, 255, 65535, 1114111.
 
-   This is the recommended way to allocate a new Unicode object.  Objects
-   created using this function are not resizable.
-
    On error, set an exception and return ``NULL``.
 
+   After creation, the string can be filled by :c:func:`PyUnicode_WriteChar`,
+   :c:func:`PyUnicode_CopyCharacters`, :c:func:`PyUnicode_Fill`,
+   :c:func:`PyUnicode_WRITE` or similar.
+   Since strings are supposed to be immutable, take care to not “use” the
+   result while it is being modified. In particular, before it's filled
+   with its final contents, a string:
+
+   - must not be hashed,
+   - must not be :c:func:`converted to UTF-8 <PyUnicode_AsUTF8AndSize>`,
+     or another non-"canonical" representation,
+   - must not have its reference count changed,
+   - must not be shared with code that might do one of the above.
+
+   This list is not exhaustive. Avoiding these uses is your responsibility;
+   Python does not always check these requirements.
+
+   To avoid accidentally exposing a partially-written string object, prefer
+   using the :c:type:`PyUnicodeWriter` API, or one of the ``PyUnicode_From*``
+   functions below.
+
+   Objects created using this function are not resizable.
+
    .. versionadded:: 3.3
 
 
@@ -617,6 +640,9 @@ APIs:
    possible.  Returns ``-1`` and sets an exception on error, otherwise returns
    the number of copied characters.
 
+   The string must not have been “used” yet.
+   See :c:func:`PyUnicode_New` for details.
+
    .. versionadded:: 3.3
 
 
@@ -629,6 +655,9 @@ APIs:
    Fail if *fill_char* is bigger than the string maximum character, or if the
    string has more than 1 reference.
 
+   The string must not have been “used” yet.
+   See :c:func:`PyUnicode_New` for details.
+
    Return the number of written character, or return ``-1`` and raise an
    exception on error.
 
@@ -638,15 +667,16 @@ APIs:
 .. c:function:: int PyUnicode_WriteChar(PyObject *unicode, Py_ssize_t index, \
                                         Py_UCS4 character)
 
-   Write a character to a string.  The string must have been created through
-   :c:func:`PyUnicode_New`.  Since Unicode strings are supposed to be immutable,
-   the string must not be shared, or have been hashed yet.
+   Write a *character* to the string *unicode* at the zero-based *index*.
+   Return ``0`` on success, ``-1`` on error with an exception set.
 
    This function checks that *unicode* is a Unicode object, that the index is
-   not out of bounds, and that the object can be modified safely (i.e. that it
-   its reference count is one).
+   not out of bounds, and that the object's reference count is one).
+   See :c:func:`PyUnicode_WRITE` for a version that skips these checks,
+   making them your responsibility.
 
-   Return ``0`` on success, ``-1`` on error with an exception set.
+   The string must not have been “used” yet.
+   See :c:func:`PyUnicode_New` for details.
 
    .. versionadded:: 3.3
 

From e12ad3a53cb44c3ae5a0ec3c8becc972652785f4 Mon Sep 17 00:00:00 2001
From: Petr Viktorin <encukou@gmail.com>
Date: Mon, 10 Feb 2025 17:11:56 +0100
Subject: [PATCH 4/5] PyUnicodeWriter_DecodeUTF8Stateful: Link "error-handlers"

---
 Doc/c-api/unicode.rst | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/Doc/c-api/unicode.rst b/Doc/c-api/unicode.rst
index 0fb580de24d432..40b614b1d698a6 100644
--- a/Doc/c-api/unicode.rst
+++ b/Doc/c-api/unicode.rst
@@ -1746,8 +1746,8 @@ object.
    *size* is the string length in bytes. If *size* is equal to ``-1``, call
    ``strlen(str)`` to get the string length.
 
-   *errors* is an error handler name, such as ``"replace"``. If *errors* is
-   ``NULL``, use the strict error handler.
+   *errors* is an :ref:`error handler <error-handlers>` name, such as
+   ``"replace"``. If *errors* is ``NULL``, use the strict error handler.
 
    If *consumed* is not ``NULL``, set *\*consumed* to the number of decoded
    bytes on success.

From e73652eab2f6cf45987902da66c4992ddeaa55b9 Mon Sep 17 00:00:00 2001
From: Petr Viktorin <encukou@gmail.com>
Date: Thu, 27 Feb 2025 15:56:50 +0100
Subject: [PATCH 5/5] Apply suggestions from code review

Co-authored-by: Serhiy Storchaka <storchaka@gmail.com>
---
 Doc/c-api/unicode.rst | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/Doc/c-api/unicode.rst b/Doc/c-api/unicode.rst
index 40b614b1d698a6..9d651c47df3519 100644
--- a/Doc/c-api/unicode.rst
+++ b/Doc/c-api/unicode.rst
@@ -34,7 +34,7 @@ Python:
 .. c:var:: PyTypeObject PyUnicode_Type
 
    This instance of :c:type:`PyTypeObject` represents the Python Unicode type.  It
-   is exposed to Python code as ``str``.
+   is exposed to Python code as :py:class:`str`.
 
 
 .. c:type:: Py_UCS4
@@ -346,7 +346,6 @@ APIs:
    using the :c:type:`PyUnicodeWriter` API, or one of the ``PyUnicode_From*``
    functions below.
 
-   Objects created using this function are not resizable.
 
    .. versionadded:: 3.3
 
@@ -1789,7 +1788,7 @@ The following API is deprecated.
    .. deprecated:: 3.10
       This API does nothing since Python 3.12.
       Previously, this needed to be called for each string created using
-      the old API (``PyUnicode_FromUnicode`` or similar).
+      the old API (:c:func:`!PyUnicode_FromUnicode` or similar).
 
 
 .. c:function:: unsigned int PyUnicode_IS_READY(PyObject *unicode)