diff --git a/Doc/c-api/unicode.rst b/Doc/c-api/unicode.rst index 64c16056ece9df..31a801f412850c 100644 --- a/Doc/c-api/unicode.rst +++ b/Doc/c-api/unicode.rst @@ -256,13 +256,8 @@ the Python configuration. .. c:function:: int Py_UNICODE_ISPRINTABLE(Py_UCS4 ch) - Return ``1`` or ``0`` depending on whether *ch* is a printable character. - Nonprintable characters are those characters defined in the Unicode character - database as "Other" or "Separator", excepting the ASCII space (0x20) which is - considered printable. (Note that printable characters in this context are - those which should not be escaped when :func:`repr` is invoked on a string. - It has no bearing on the handling of strings written to :data:`sys.stdout` or - :data:`sys.stderr`.) + Return ``1`` or ``0`` depending on whether *ch* is a printable character, + in the sense of :meth:`str.isprintable`. These APIs can be used for fast direct character conversions: diff --git a/Doc/library/stdtypes.rst b/Doc/library/stdtypes.rst index b5ba8060cb45a1..cb9624ab757330 100644 --- a/Doc/library/stdtypes.rst +++ b/Doc/library/stdtypes.rst @@ -1876,13 +1876,19 @@ expression support in the :mod:`re` module). .. method:: str.isprintable() - Return ``True`` if all characters in the string are printable or the string is - empty, ``False`` otherwise. Nonprintable characters are those characters defined - in the Unicode character database as "Other" or "Separator", excepting the - ASCII space (0x20) which is considered printable. (Note that printable - characters in this context are those which should not be escaped when - :func:`repr` is invoked on a string. It has no bearing on the handling of - strings written to :data:`sys.stdout` or :data:`sys.stderr`.) + Return true if all characters in the string are printable, false if it + contains at least one non-printable character. + + Here "printable" means the character is suitable for :func:`repr` to use in + its output; "non-printable" means that :func:`repr` on built-in types will + hex-escape the character. It has no bearing on the handling of strings + written to :data:`sys.stdout` or :data:`sys.stderr`. + + The printable characters are those which in the Unicode character database + (see :mod:`unicodedata`) have a general category in group Letter, Mark, + Number, Punctuation, or Symbol (L, M, N, P, or S); plus the ASCII space 0x20. + Nonprintable characters are those in group Separator or Other (Z or C), + except the ASCII space. .. method:: str.isspace() diff --git a/Lib/test/test_str.py b/Lib/test/test_str.py index c4f59224a6fe6f..46673cc56adb35 100644 --- a/Lib/test/test_str.py +++ b/Lib/test/test_str.py @@ -853,6 +853,15 @@ def test_isprintable(self): self.assertTrue('\U0001F46F'.isprintable()) self.assertFalse('\U000E0020'.isprintable()) + @support.requires_resource('cpu') + def test_isprintable_invariant(self): + for codepoint in range(sys.maxunicode + 1): + char = chr(codepoint) + category = unicodedata.category(char) + self.assertEqual(char.isprintable(), + category[0] not in ('C', 'Z') + or char == ' ') + def test_surrogates(self): for s in ('a\uD800b\uDFFF', 'a\uDFFFb\uD800', 'a\uD800b\uDFFFa', 'a\uDFFFb\uD800a'): diff --git a/Objects/clinic/unicodeobject.c.h b/Objects/clinic/unicodeobject.c.h index 78e14b0021d006..1db304e7063d4c 100644 --- a/Objects/clinic/unicodeobject.c.h +++ b/Objects/clinic/unicodeobject.c.h @@ -703,10 +703,9 @@ PyDoc_STRVAR(unicode_isprintable__doc__, "isprintable($self, /)\n" "--\n" "\n" -"Return True if the string is printable, False otherwise.\n" +"Return True if all characters in the string are printable, False otherwise.\n" "\n" -"A string is printable if all of its characters are considered printable in\n" -"repr() or if it is empty."); +"A character is printable if repr() may use it in its output."); #define UNICODE_ISPRINTABLE_METHODDEF \ {"isprintable", (PyCFunction)unicode_isprintable, METH_NOARGS, unicode_isprintable__doc__}, @@ -1888,4 +1887,4 @@ unicode_new(PyTypeObject *type, PyObject *args, PyObject *kwargs) exit: return return_value; } -/*[clinic end generated code: output=9fee62bd337f809b input=a9049054013a1b77]*/ +/*[clinic end generated code: output=b7d75c4898e8198d input=a9049054013a1b77]*/ diff --git a/Objects/unicodectype.c b/Objects/unicodectype.c index aa5c5b2a4ad2eb..7cd0dca3d13545 100644 --- a/Objects/unicodectype.c +++ b/Objects/unicodectype.c @@ -142,18 +142,10 @@ int _PyUnicode_IsNumeric(Py_UCS4 ch) return (ctype->flags & NUMERIC_MASK) != 0; } -/* Returns 1 for Unicode characters to be hex-escaped when repr()ed, - 0 otherwise. - All characters except those characters defined in the Unicode character - database as following categories are considered printable. - * Cc (Other, Control) - * Cf (Other, Format) - * Cs (Other, Surrogate) - * Co (Other, Private Use) - * Cn (Other, Not Assigned) - * Zl Separator, Line ('\u2028', LINE SEPARATOR) - * Zp Separator, Paragraph ('\u2029', PARAGRAPH SEPARATOR) - * Zs (Separator, Space) other than ASCII space('\x20'). +/* Returns 1 for Unicode characters that repr() may use in its output, + and 0 for characters to be hex-escaped. + + See documentation of `str.isprintable` for details. */ int _PyUnicode_IsPrintable(Py_UCS4 ch) { diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 434cb5ffb61c0e..a00125345b2dd5 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -12016,15 +12016,14 @@ unicode_isidentifier_impl(PyObject *self) /*[clinic input] str.isprintable as unicode_isprintable -Return True if the string is printable, False otherwise. +Return True if all characters in the string are printable, False otherwise. -A string is printable if all of its characters are considered printable in -repr() or if it is empty. +A character is printable if repr() may use it in its output. [clinic start generated code]*/ static PyObject * unicode_isprintable_impl(PyObject *self) -/*[clinic end generated code: output=3ab9626cd32dd1a0 input=98a0e1c2c1813209]*/ +/*[clinic end generated code: output=3ab9626cd32dd1a0 input=4e56bcc6b06ca18c]*/ { Py_ssize_t i, length; int kind;