diff --git a/Doc/c-api/unicode.rst b/Doc/c-api/unicode.rst index 97c7062178ac0b..8f383cd6c4015f 100644 --- a/Doc/c-api/unicode.rst +++ b/Doc/c-api/unicode.rst @@ -256,13 +256,8 @@ the Python configuration. .. c:function:: int Py_UNICODE_ISPRINTABLE(Py_UCS4 ch) - Return ``1`` or ``0`` depending on whether *ch* is a printable character. - Nonprintable characters are those characters defined in the Unicode character - database as "Other" or "Separator", excepting the ASCII space (0x20) which is - considered printable. (Note that printable characters in this context are - those which should not be escaped when :func:`repr` is invoked on a string. - It has no bearing on the handling of strings written to :data:`sys.stdout` or - :data:`sys.stderr`.) + Return ``1`` or ``0`` depending on whether *ch* is a printable character, + in the sense of :meth:`str.isprintable`. These APIs can be used for fast direct character conversions: diff --git a/Doc/library/stdtypes.rst b/Doc/library/stdtypes.rst index 0985edf8d0f720..143fc6508ebc42 100644 --- a/Doc/library/stdtypes.rst +++ b/Doc/library/stdtypes.rst @@ -2012,13 +2012,19 @@ expression support in the :mod:`re` module). .. method:: str.isprintable() - Return ``True`` if all characters in the string are printable or the string is - empty, ``False`` otherwise. Nonprintable characters are those characters defined - in the Unicode character database as "Other" or "Separator", excepting the - ASCII space (0x20) which is considered printable. (Note that printable - characters in this context are those which should not be escaped when - :func:`repr` is invoked on a string. It has no bearing on the handling of - strings written to :data:`sys.stdout` or :data:`sys.stderr`.) + Return true if all characters in the string are printable, false if it + contains at least one non-printable character. + + Here "printable" means the character is suitable for :func:`repr` to use in + its output; "non-printable" means that :func:`repr` on built-in types will + hex-escape the character. It has no bearing on the handling of strings + written to :data:`sys.stdout` or :data:`sys.stderr`. + + The printable characters are those which in the Unicode character database + (see :mod:`unicodedata`) have a general category in group Letter, Mark, + Number, Punctuation, or Symbol (L, M, N, P, or S); plus the ASCII space 0x20. + Nonprintable characters are those in group Separator or Other (Z or C), + except the ASCII space. .. method:: str.isspace() diff --git a/Lib/test/test_str.py b/Lib/test/test_str.py index d1c9542c7d1317..2694f5d45c7ebf 100644 --- a/Lib/test/test_str.py +++ b/Lib/test/test_str.py @@ -853,6 +853,15 @@ def test_isprintable(self): self.assertTrue('\U0001F46F'.isprintable()) self.assertFalse('\U000E0020'.isprintable()) + @support.requires_resource('cpu') + def test_isprintable_invariant(self): + for codepoint in range(sys.maxunicode + 1): + char = chr(codepoint) + category = unicodedata.category(char) + self.assertEqual(char.isprintable(), + category[0] not in ('C', 'Z') + or char == ' ') + def test_surrogates(self): for s in ('a\uD800b\uDFFF', 'a\uDFFFb\uD800', 'a\uD800b\uDFFFa', 'a\uDFFFb\uD800a'): diff --git a/Objects/clinic/unicodeobject.c.h b/Objects/clinic/unicodeobject.c.h index 5c6a425b0f803a..99651f3c64bc5a 100644 --- a/Objects/clinic/unicodeobject.c.h +++ b/Objects/clinic/unicodeobject.c.h @@ -705,10 +705,9 @@ PyDoc_STRVAR(unicode_isprintable__doc__, "isprintable($self, /)\n" "--\n" "\n" -"Return True if the string is printable, False otherwise.\n" +"Return True if all characters in the string are printable, False otherwise.\n" "\n" -"A string is printable if all of its characters are considered printable in\n" -"repr() or if it is empty."); +"A character is printable if repr() may use it in its output."); #define UNICODE_ISPRINTABLE_METHODDEF \ {"isprintable", (PyCFunction)unicode_isprintable, METH_NOARGS, unicode_isprintable__doc__}, @@ -1895,4 +1894,4 @@ unicode_new(PyTypeObject *type, PyObject *args, PyObject *kwargs) exit: return return_value; } -/*[clinic end generated code: output=4d1cecd6d08498a4 input=a9049054013a1b77]*/ +/*[clinic end generated code: output=db37497bf38a2c17 input=a9049054013a1b77]*/ diff --git a/Objects/unicodectype.c b/Objects/unicodectype.c index aa5c5b2a4ad2eb..7cd0dca3d13545 100644 --- a/Objects/unicodectype.c +++ b/Objects/unicodectype.c @@ -142,18 +142,10 @@ int _PyUnicode_IsNumeric(Py_UCS4 ch) return (ctype->flags & NUMERIC_MASK) != 0; } -/* Returns 1 for Unicode characters to be hex-escaped when repr()ed, - 0 otherwise. - All characters except those characters defined in the Unicode character - database as following categories are considered printable. - * Cc (Other, Control) - * Cf (Other, Format) - * Cs (Other, Surrogate) - * Co (Other, Private Use) - * Cn (Other, Not Assigned) - * Zl Separator, Line ('\u2028', LINE SEPARATOR) - * Zp Separator, Paragraph ('\u2029', PARAGRAPH SEPARATOR) - * Zs (Separator, Space) other than ASCII space('\x20'). +/* Returns 1 for Unicode characters that repr() may use in its output, + and 0 for characters to be hex-escaped. + + See documentation of `str.isprintable` for details. */ int _PyUnicode_IsPrintable(Py_UCS4 ch) { diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 75967d69ed374d..371c358a4950c2 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -12452,15 +12452,14 @@ unicode_isidentifier_impl(PyObject *self) /*[clinic input] str.isprintable as unicode_isprintable -Return True if the string is printable, False otherwise. +Return True if all characters in the string are printable, False otherwise. -A string is printable if all of its characters are considered printable in -repr() or if it is empty. +A character is printable if repr() may use it in its output. [clinic start generated code]*/ static PyObject * unicode_isprintable_impl(PyObject *self) -/*[clinic end generated code: output=3ab9626cd32dd1a0 input=98a0e1c2c1813209]*/ +/*[clinic end generated code: output=3ab9626cd32dd1a0 input=4e56bcc6b06ca18c]*/ { Py_ssize_t i, length; int kind;