@@ -89,33 +89,24 @@ OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
8989extern "C" {
9090#endif
9191
92- /* Generic helper macro to convert characters of different types.
93- from_type and to_type have to be valid type names, begin and end
94- are pointers to the source characters which should be of type
95- "from_type *". to is a pointer of type "to_type *" and points to the
96- buffer where the result characters are written to. */
97- #define _PyUnicode_CONVERT_BYTES (from_type , to_type , begin , end , to ) \
98- do { \
99- const from_type *iter_; to_type *to_; \
100- for (iter_ = (begin), to_ = (to_type *)(to); \
101- iter_ < (end); \
102- ++iter_, ++to_) { \
103- *to_ = (to_type)*iter_; \
104- } \
105- } while (0)
92+ #ifdef Py_DEBUG
93+ # define _PyUnicode_CHECK (op ) _PyUnicode_CheckConsistency(op)
94+ #else
95+ # define _PyUnicode_CHECK (op ) PyUnicode_Check(op)
96+ #endif
10697
10798#define _PyUnicode_UTF8 (op ) \
10899 (((PyCompactUnicodeObject*)(op))->utf8)
109100#define PyUnicode_UTF8 (op ) \
110- (assert(PyUnicode_Check (op)), \
101+ (assert(_PyUnicode_CHECK (op)), \
111102 assert(PyUnicode_IS_READY(op)), \
112103 PyUnicode_IS_COMPACT_ASCII(op) ? \
113104 ((char*)((PyASCIIObject*)(op) + 1)) : \
114105 _PyUnicode_UTF8(op))
115106#define _PyUnicode_UTF8_LENGTH (op ) \
116107 (((PyCompactUnicodeObject*)(op))->utf8_length)
117108#define PyUnicode_UTF8_LENGTH (op ) \
118- (assert(PyUnicode_Check (op)), \
109+ (assert(_PyUnicode_CHECK (op)), \
119110 assert(PyUnicode_IS_READY(op)), \
120111 PyUnicode_IS_COMPACT_ASCII(op) ? \
121112 ((PyASCIIObject*)(op))->length : \
@@ -125,22 +116,42 @@ extern "C" {
125116#define _PyUnicode_LENGTH (op ) (((PyASCIIObject *)(op))->length)
126117#define _PyUnicode_STATE (op ) (((PyASCIIObject *)(op))->state)
127118#define _PyUnicode_HASH (op ) (((PyASCIIObject *)(op))->hash)
128- #define _PyUnicode_KIND (op ) \
129- (assert(PyUnicode_Check (op)), \
119+ #define _PyUnicode_KIND (op ) \
120+ (assert(_PyUnicode_CHECK (op)), \
130121 ((PyASCIIObject *)(op))->state.kind)
131- #define _PyUnicode_GET_LENGTH (op ) \
132- (assert(PyUnicode_Check (op)), \
122+ #define _PyUnicode_GET_LENGTH (op ) \
123+ (assert(_PyUnicode_CHECK (op)), \
133124 ((PyASCIIObject *)(op))->length)
134125#define _PyUnicode_DATA_ANY (op ) (((PyUnicodeObject*)(op))->data.any)
135126
127+ #undef PyUnicode_READY
128+ #define PyUnicode_READY (op ) \
129+ (assert(_PyUnicode_CHECK(op)), \
130+ (PyUnicode_IS_READY(op) ? \
131+ 0 : _PyUnicode_Ready((PyObject *)(op))))
132+
136133/* true if the Unicode object has an allocated UTF-8 memory block
137134 (not shared with other data) */
138- #define _PyUnicode_HAS_UTF8_MEMORY (op ) \
139- (assert(PyUnicode_Check (op)), \
140- (!PyUnicode_IS_COMPACT_ASCII(op) \
141- && _PyUnicode_UTF8(op) \
135+ #define _PyUnicode_HAS_UTF8_MEMORY (op ) \
136+ (assert(_PyUnicode_CHECK (op)), \
137+ (!PyUnicode_IS_COMPACT_ASCII(op) \
138+ && _PyUnicode_UTF8(op) \
142139 && _PyUnicode_UTF8(op) != PyUnicode_DATA(op)))
143140
141+ /* Generic helper macro to convert characters of different types.
142+ from_type and to_type have to be valid type names, begin and end
143+ are pointers to the source characters which should be of type
144+ "from_type *". to is a pointer of type "to_type *" and points to the
145+ buffer where the result characters are written to. */
146+ #define _PyUnicode_CONVERT_BYTES (from_type , to_type , begin , end , to ) \
147+ do { \
148+ const from_type *iter_; to_type *to_; \
149+ for (iter_ = (begin), to_ = (to_type *)(to); \
150+ iter_ < (end); \
151+ ++iter_, ++to_) { \
152+ *to_ = (to_type)*iter_; \
153+ } \
154+ } while (0)
144155
145156/* The Unicode string has been modified: reset the hash */
146157#define _PyUnicode_DIRTY (op ) do { _PyUnicode_HASH(op) = -1; } while (0)
@@ -250,6 +261,57 @@ PyUnicode_GetMax(void)
250261#endif
251262}
252263
264+ #ifdef Py_DEBUG
265+ static int
266+ _PyUnicode_CheckConsistency (void * op )
267+ {
268+ PyASCIIObject * ascii ;
269+ unsigned int kind ;
270+
271+ assert (PyUnicode_Check (op ));
272+
273+ ascii = (PyASCIIObject * )op ;
274+ kind = ascii -> state .kind ;
275+
276+ if (ascii -> state .ascii == 1 ) {
277+ assert (kind == PyUnicode_1BYTE_KIND );
278+ assert (ascii -> state .compact == 1 );
279+ assert (ascii -> state .ready == 1 );
280+ }
281+ else if (ascii -> state .compact == 1 ) {
282+ assert (kind == PyUnicode_1BYTE_KIND
283+ || kind == PyUnicode_2BYTE_KIND
284+ || kind == PyUnicode_4BYTE_KIND );
285+ assert (ascii -> state .compact == 1 );
286+ assert (ascii -> state .ascii == 0 );
287+ assert (ascii -> state .ready == 1 );
288+ } else {
289+ PyCompactUnicodeObject * compact = (PyCompactUnicodeObject * )op ;
290+ PyUnicodeObject * unicode = (PyUnicodeObject * )op ;
291+
292+ if (kind == PyUnicode_WCHAR_KIND ) {
293+ assert (!ascii -> state .compact == 1 );
294+ assert (ascii -> state .ascii == 0 );
295+ assert (!ascii -> state .ready == 1 );
296+ assert (ascii -> wstr != NULL );
297+ assert (unicode -> data .any == NULL );
298+ assert (compact -> utf8 == NULL );
299+ assert (ascii -> state .interned == SSTATE_NOT_INTERNED );
300+ }
301+ else {
302+ assert (kind == PyUnicode_1BYTE_KIND
303+ || kind == PyUnicode_2BYTE_KIND
304+ || kind == PyUnicode_4BYTE_KIND );
305+ assert (!ascii -> state .compact == 1 );
306+ assert (ascii -> state .ready == 1 );
307+ assert (unicode -> data .any != NULL );
308+ assert (ascii -> state .ascii == 0 );
309+ }
310+ }
311+ return 1 ;
312+ }
313+ #endif
314+
253315/* --- Bloom Filters ----------------------------------------------------- */
254316
255317/* stuff to implement simple "bloom filters" for Unicode characters.
@@ -542,7 +604,7 @@ _PyUnicode_New(Py_ssize_t length)
542604static const char *
543605unicode_kind_name (PyObject * unicode )
544606{
545- assert (PyUnicode_Check (unicode ));
607+ assert (_PyUnicode_CHECK (unicode ));
546608 if (!PyUnicode_IS_COMPACT (unicode ))
547609 {
548610 if (!PyUnicode_IS_READY (unicode ))
@@ -744,7 +806,8 @@ unicode_convert_wchar_to_ucs4(const wchar_t *begin, const wchar_t *end,
744806 const wchar_t * iter ;
745807 Py_UCS4 * ucs4_out ;
746808
747- assert (unicode && PyUnicode_Check (unicode ));
809+ assert (unicode != NULL );
810+ assert (_PyUnicode_CHECK (unicode ));
748811 assert (_PyUnicode_KIND (unicode ) == PyUnicode_4BYTE_KIND );
749812 ucs4_out = PyUnicode_4BYTE_DATA (unicode );
750813
@@ -771,7 +834,7 @@ unicode_convert_wchar_to_ucs4(const wchar_t *begin, const wchar_t *end,
771834static int
772835_PyUnicode_Dirty (PyObject * unicode )
773836{
774- assert (PyUnicode_Check (unicode ));
837+ assert (_PyUnicode_CHECK (unicode ));
775838 if (Py_REFCNT (unicode ) != 1 ) {
776839 PyErr_SetString (PyExc_ValueError ,
777840 "Cannot modify a string having more than 1 reference" );
@@ -966,10 +1029,8 @@ _PyUnicode_Ready(PyObject *obj)
9661029 strings were created using _PyObject_New() and where no canonical
9671030 representation (the str field) has been set yet aka strings
9681031 which are not yet ready. */
969- assert (PyUnicode_Check (obj ));
970- assert (!PyUnicode_IS_READY (obj ));
971- assert (!PyUnicode_IS_COMPACT (obj ));
972- assert (_PyUnicode_KIND (obj ) == PyUnicode_WCHAR_KIND );
1032+ assert (_PyUnicode_CHECK (unicode ));
1033+ assert (_PyUnicode_KIND (unicode ) == PyUnicode_WCHAR_KIND );
9731034 assert (_PyUnicode_WSTR (unicode ) != NULL );
9741035 assert (_PyUnicode_DATA_ANY (unicode ) == NULL );
9751036 assert (_PyUnicode_UTF8 (unicode ) == NULL );
@@ -1154,7 +1215,7 @@ unicode_resize(PyObject **p_unicode, Py_ssize_t length)
11541215 assert (PyUnicode_Check (unicode ));
11551216 assert (0 <= length );
11561217
1157- if (! PyUnicode_IS_COMPACT (unicode ) && ! PyUnicode_IS_READY ( unicode ) )
1218+ if (_PyUnicode_KIND (unicode ) == PyUnicode_WCHAR_KIND )
11581219 old_length = PyUnicode_WSTR_LENGTH (unicode );
11591220 else
11601221 old_length = PyUnicode_GET_LENGTH (unicode );
@@ -1907,7 +1968,7 @@ PyUnicode_FromFormatV(const char *format, va_list vargs)
19071968 case 'U' :
19081969 {
19091970 PyObject * obj = va_arg (count , PyObject * );
1910- assert (obj && PyUnicode_Check (obj ));
1971+ assert (obj && _PyUnicode_CHECK (obj ));
19111972 if (PyUnicode_READY (obj ) == -1 )
19121973 goto fail ;
19131974 argmaxchar = PyUnicode_MAX_CHAR_VALUE (obj );
@@ -1921,7 +1982,7 @@ PyUnicode_FromFormatV(const char *format, va_list vargs)
19211982 const char * str = va_arg (count , const char * );
19221983 PyObject * str_obj ;
19231984 assert (obj || str );
1924- assert (!obj || PyUnicode_Check (obj ));
1985+ assert (!obj || _PyUnicode_CHECK (obj ));
19251986 if (obj ) {
19261987 if (PyUnicode_READY (obj ) == -1 )
19271988 goto fail ;
@@ -9570,7 +9631,7 @@ PyUnicode_CompareWithASCIIString(PyObject* uni, const char* str)
95709631 void * data ;
95719632 Py_UCS4 chr ;
95729633
9573- assert (PyUnicode_Check (uni ));
9634+ assert (_PyUnicode_CHECK (uni ));
95749635 if (PyUnicode_READY (uni ) == -1 )
95759636 return -1 ;
95769637 kind = PyUnicode_KIND (uni );
@@ -12698,7 +12759,7 @@ unicode_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
1269812759 unicode = (PyUnicodeObject * )unicode_new (& PyUnicode_Type , args , kwds );
1269912760 if (unicode == NULL )
1270012761 return NULL ;
12701- assert (PyUnicode_Check (unicode ));
12762+ assert (_PyUnicode_CHECK (unicode ));
1270212763 if (PyUnicode_READY (unicode ))
1270312764 return NULL ;
1270412765
@@ -13054,7 +13115,7 @@ unicodeiter_next(unicodeiterobject *it)
1305413115 seq = it -> it_seq ;
1305513116 if (seq == NULL )
1305613117 return NULL ;
13057- assert (PyUnicode_Check (seq ));
13118+ assert (_PyUnicode_CHECK (seq ));
1305813119
1305913120 if (it -> it_index < PyUnicode_GET_LENGTH (seq )) {
1306013121 int kind = PyUnicode_KIND (seq );
0 commit comments