@@ -224,7 +224,7 @@ typedef struct {
224224 PyUnicode_4BYTE_KIND
225225 * compact = 1
226226 * ready = 1
227- * ( ascii = 0)
227+ * ascii = 0
228228
229229 - string created by the legacy API (not ready):
230230
@@ -236,7 +236,7 @@ typedef struct {
236236 * data.any is NULL
237237 * utf8 is NULL
238238 * interned = SSTATE_NOT_INTERNED
239- * ( ascii = 0)
239+ * ascii = 0
240240
241241 - string created by the legacy API, ready:
242242
@@ -246,7 +246,6 @@ typedef struct {
246246 * compact = 0
247247 * ready = 1
248248 * data.any is not NULL
249- * (ascii = 0)
250249
251250 String created by the legacy API becomes ready when calling
252251 PyUnicode_READY().
@@ -278,8 +277,9 @@ typedef struct {
278277 one block for the PyUnicodeObject struct and another for its data
279278 buffer. */
280279 unsigned int compact:1 ;
281- /* Compact objects which are ASCII-only also have the state.compact
282- flag set, and use the PyASCIIObject struct. */
280+ /* kind is PyUnicode_1BYTE_KIND but data contains only ASCII
281+ characters. If ascii is 1 and compact is 1, use the PyASCIIObject
282+ structure. */
283283 unsigned int ascii:1 ;
284284 /* The ready flag indicates whether the object layout is initialized
285285 completely. This means that this is either a compact object, or
@@ -304,7 +304,7 @@ typedef struct {
304304
305305/* Strings allocated through PyUnicode_FromUnicode(NULL, len) use the
306306 PyUnicodeObject structure. The actual string data is initially in the wstr
307- block, and copied into the data block using PyUnicode_Ready . */
307+ block, and copied into the data block using _PyUnicode_Ready . */
308308typedef struct {
309309 PyCompactUnicodeObject _base;
310310 union {
@@ -327,7 +327,7 @@ PyAPI_DATA(PyTypeObject) PyUnicodeIter_Type;
327327#ifndef Py_LIMITED_API
328328
329329#define PyUnicode_WSTR_LENGTH (op ) \
330- (((PyASCIIObject*) op)->state.ascii ? \
330+ (PyUnicode_IS_COMPACT_ASCII( op) ? \
331331 ((PyASCIIObject*)op)->length : \
332332 ((PyCompactUnicodeObject*)op)->wstr_length)
333333
@@ -369,10 +369,24 @@ PyAPI_DATA(PyTypeObject) PyUnicodeIter_Type;
369369#define SSTATE_INTERNED_MORTAL 1
370370#define SSTATE_INTERNED_IMMORTAL 2
371371
372- #define PyUnicode_IS_COMPACT_ASCII (op ) (((PyASCIIObject*)op)->state.ascii)
372+ /* Return true if the string contains only ASCII characters, or 0 if not. The
373+ string may be compact (PyUnicode_IS_COMPACT_ASCII) or not. No type checks
374+ or Ready calls are performed. */
375+ #define PyUnicode_IS_ASCII (op ) \
376+ (((PyASCIIObject*)op)->state.ascii)
377+
378+ /* Return true if the string is compact or 0 if not.
379+ No type checks or Ready calls are performed. */
380+ #define PyUnicode_IS_COMPACT (op ) \
381+ (((PyASCIIObject*)(op))->state.compact)
382+
383+ /* Return true if the string is a compact ASCII string (use PyASCIIObject
384+ structure), or 0 if not. No type checks or Ready calls are performed. */
385+ #define PyUnicode_IS_COMPACT_ASCII (op ) \
386+ (PyUnicode_IS_ASCII(op) && PyUnicode_IS_COMPACT(op))
373387
374388/* String contains only wstr byte characters. This is only possible
375- when the string was created with a legacy API and PyUnicode_Ready ()
389+ when the string was created with a legacy API and _PyUnicode_Ready ()
376390 has not been called yet. */
377391#define PyUnicode_WCHAR_KIND 0
378392
@@ -399,11 +413,6 @@ PyAPI_DATA(PyTypeObject) PyUnicodeIter_Type;
399413#define PyUnicode_2BYTE_DATA (op ) ((Py_UCS2*)PyUnicode_DATA(op))
400414#define PyUnicode_4BYTE_DATA (op ) ((Py_UCS4*)PyUnicode_DATA(op))
401415
402- /* Return true if the string is compact or 0 if not.
403- No type checks or Ready calls are performed. */
404- #define PyUnicode_IS_COMPACT (op ) \
405- (((PyASCIIObject*)(op))->state.compact)
406-
407416/* Return one of the PyUnicode_*_KIND values defined above. */
408417#define PyUnicode_KIND (op ) \
409418 (assert (PyUnicode_Check(op)), \
@@ -500,9 +509,9 @@ PyAPI_DATA(PyTypeObject) PyUnicodeIter_Type;
500509
501510#define PyUnicode_IS_READY (op ) (((PyASCIIObject*)op)->state.ready)
502511
503- /* PyUnicode_READY() does less work than PyUnicode_Ready () in the best
512+ /* PyUnicode_READY() does less work than _PyUnicode_Ready () in the best
504513 case. If the canonical representation is not yet set, it will still call
505- PyUnicode_Ready ().
514+ _PyUnicode_Ready ().
506515 Returns 0 on success and -1 on errors. */
507516#define PyUnicode_READY (op ) \
508517 (assert (PyUnicode_Check(op)), \
0 commit comments