python · methane · Apr 4, 2022 · Mar 20, 2022 · Mar 20, 2022 · Mar 20, 2022
diff --git a/Doc/library/io.rst b/Doc/library/io.rst
@@ -198,12 +198,13 @@ High-level Module Interface
    This is a helper function for callables that use :func:`open` or
    :class:`TextIOWrapper` and have an ``encoding=None`` parameter.
 
-   This function returns *encoding* if it is not ``None`` and ``"locale"`` if
-   *encoding* is ``None``.
+   This function returns *encoding* if it is not ``None``.
+   Otherwise, it returns ``"locale"`` or ``"utf-8"`` depending on
+   :ref:`UTF-8 Mode <utf8-mode>`.
 
    This function emits an :class:`EncodingWarning` if
    :data:`sys.flags.warn_default_encoding <sys.flags>` is true and *encoding*
-   is None. *stacklevel* specifies where the warning is emitted.
+   is ``None``. *stacklevel* specifies where the warning is emitted.
    For example::
 
       def read_text(path, encoding=None):
@@ -218,6 +219,10 @@ High-level Module Interface
 
    .. versionadded:: 3.10
 
+   .. versionchanged:: 3.11
+      :func:`text_encoding` returns "utf-8" when UTF-8 mode is enabled and
+      *encoding* is ``None``.
+
 
 .. exception:: BlockingIOError
 

diff --git a/Include/internal/pycore_global_strings.h b/Include/internal/pycore_global_strings.h
@@ -48,6 +48,7 @@ struct _Py_global_strings {
         STRUCT_FOR_STR(newline, "\n")
         STRUCT_FOR_STR(open_br, "{")
         STRUCT_FOR_STR(percent, "%")
+        STRUCT_FOR_STR(utf_8, "utf-8")
     } literals;
 
     struct {

@@ -672,6 +672,7 @@ extern "C" {
                 INIT_STR(newline, "\n"), \
                 INIT_STR(open_br, "{"), \
                 INIT_STR(percent, "%"), \
+                INIT_STR(utf_8, "utf-8"), \
             }, \
             .identifiers = { \
                 INIT_ID(False), \

diff --git a/Lib/_pyio.py b/Lib/_pyio.py
@@ -44,8 +44,9 @@ def text_encoding(encoding, stacklevel=2):
     """
     A helper function to choose the text encoding.
 
-    When encoding is not None, just return it.
-    Otherwise, return the default text encoding (i.e. "locale").
+    When encoding is not None, this function returns it.
+    Otherwise, this function returns the default text encoding
+    (i.e. "locale" or "utf-8" depends on UTF-8 mode).
 
     This function emits an EncodingWarning if *encoding* is None and
     sys.flags.warn_default_encoding is true.
@@ -55,7 +56,10 @@ def text_encoding(encoding, stacklevel=2):
     However, please consider using encoding="utf-8" for new APIs.
     """
     if encoding is None:
-        encoding = "locale"
+        if sys.flags.utf8_mode:
+            encoding = "utf-8"
+        else:
+            encoding = "locale"
         if sys.flags.warn_default_encoding:
             import warnings
             warnings.warn("'encoding' argument not specified.",

diff --git a/Lib/test/test_io.py b/Lib/test/test_io.py
@@ -4289,6 +4289,17 @@ def test_check_encoding_warning(self):
         self.assertTrue(
             warnings[1].startswith(b"<string>:8: EncodingWarning: "))
 
+    def test_text_encoding(self):
+        # PEP 597, bpo-47000. io.text_encoding() returns "locale" or "utf-8"
+        # based on sys.flags.utf8_mode
+        code = "import io; print(io.text_encoding(None))"
+
+        proc = assert_python_ok('-X', 'utf8=0', '-c', code)
+        self.assertEqual(b"locale", proc.out.strip())
+
+        proc = assert_python_ok('-X', 'utf8=1', '-c', code)
+        self.assertEqual(b"utf-8", proc.out.strip())
+
     @support.cpython_only
     # Depending if OpenWrapper was already created or not, the warning is
     # emitted or not. For example, the attribute is already created when this

diff --git a/Lib/test/test_utf8_mode.py b/Lib/test/test_utf8_mode.py
@@ -161,7 +161,7 @@ def test_io(self):
         filename = __file__
 
         out = self.get_output('-c', code, filename, PYTHONUTF8='1')
-        self.assertEqual(out, 'UTF-8/strict')
+        self.assertEqual(out.lower(), 'utf-8/strict')
 
     def _check_io_encoding(self, module, encoding=None, errors=None):
         filename = __file__
@@ -183,10 +183,10 @@ def _check_io_encoding(self, module, encoding=None, errors=None):
                               PYTHONUTF8='1')
 
         if not encoding:
-            encoding = 'UTF-8'
+            encoding = 'utf-8'
         if not errors:
             errors = 'strict'
-        self.assertEqual(out, f'{encoding}/{errors}')
+        self.assertEqual(out.lower(), f'{encoding}/{errors}')
 
     def check_io_encoding(self, module):
         self._check_io_encoding(module, encoding="latin1")

diff --git a/Misc/NEWS.d/next/Library/2022-03-20-13-00-08.bpo-47000.p8HpG0.rst b/Misc/NEWS.d/next/Library/2022-03-20-13-00-08.bpo-47000.p8HpG0.rst
@@ -0,0 +1 @@
+Make :func:`io.text_encoding` returns "utf-8" when UTF-8 mode is enabled.
diff --git a/Modules/_io/_iomodule.c b/Modules/_io/_iomodule.c
@@ -457,8 +457,9 @@ _io.text_encoding
 
 A helper function to choose the text encoding.
 
-When encoding is not None, just return it.
-Otherwise, return the default text encoding (i.e. "locale").
+When encoding is not None, this function returns it.
+Otherwise, this function returns the default text encoding
+(i.e. "locale" or "utf-8" depends on UTF-8 mode).
 
 This function emits an EncodingWarning if encoding is None and
 sys.flags.warn_default_encoding is true.
@@ -469,7 +470,7 @@ However, please consider using encoding="utf-8" for new APIs.
 
 static PyObject *
 _io_text_encoding_impl(PyObject *module, PyObject *encoding, int stacklevel)
-/*[clinic end generated code: output=91b2cfea6934cc0c input=bf70231213e2a7b4]*/
+/*[clinic end generated code: output=91b2cfea6934cc0c input=4999aa8b3d90f3d4]*/
 {
     if (encoding == NULL || encoding == Py_None) {
         PyInterpreterState *interp = _PyInterpreterState_GET();
@@ -479,7 +480,14 @@ _io_text_encoding_impl(PyObject *module, PyObject *encoding, int stacklevel)
                 return NULL;
             }
         }
-        return &_Py_ID(locale);
+        const PyPreConfig *preconfig = &_PyRuntime.preconfig;
+        if (preconfig->utf8_mode) {
+            _Py_DECLARE_STR(utf_8, "utf-8");
+            encoding = &_Py_STR(utf_8);
+        }
+        else {
+            encoding = &_Py_ID(locale);
+        }
     }
     Py_INCREF(encoding);
     return encoding;

diff --git a/Modules/_io/clinic/_iomodule.c.h b/Modules/_io/clinic/_iomodule.c.h
@@ -841,7 +841,10 @@ static PyObject *
 sys_getdefaultencoding_impl(PyObject *module)
 /*[clinic end generated code: output=256d19dfcc0711e6 input=d416856ddbef6909]*/
 {
-    return PyUnicode_FromString(PyUnicode_GetDefaultEncoding());
+    _Py_DECLARE_STR(utf_8, "utf-8");
+    PyObject *ret = &_Py_STR(utf_8);
+    Py_INCREF(ret);
+    return ret;
 }
 
 /*[clinic input]
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		Make :func:`io.text_encoding` returns "utf-8" when UTF-8 mode is enabled.