@@ -57,8 +57,9 @@ OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
5757
5858/* --- Globals ------------------------------------------------------------
5959
60- The globals are initialized by the _PyUnicode_Init() API and should
61- not be used before calling that API.
60+ NOTE: In the interpreter's initialization phase, some globals are currently
61+ initialized dynamically as needed. In the process Unicode objects may
62+ be created before the Unicode type is ready.
6263
6364*/
6465
@@ -179,17 +180,36 @@ extern "C" {
179180 Another way to look at this is that to say that the actual reference
180181 count of a string is: s->ob_refcnt + (s->state ? 2 : 0)
181182*/
182- static PyObject * interned ;
183+ static PyObject * interned = NULL ;
183184
184185/* The empty Unicode object is shared to improve performance. */
185- static PyObject * unicode_empty ;
186+ static PyObject * unicode_empty = NULL ;
187+
188+ #define _Py_INCREF_UNICODE_EMPTY () \
189+ do { \
190+ if (unicode_empty != NULL) \
191+ Py_INCREF(unicode_empty); \
192+ else { \
193+ unicode_empty = PyUnicode_New(0, 0); \
194+ if (unicode_empty != NULL) { \
195+ Py_INCREF(unicode_empty); \
196+ assert(_PyUnicode_CheckConsistency(unicode_empty, 1)); \
197+ } \
198+ } \
199+ } while (0)
200+
201+ #define _Py_RETURN_UNICODE_EMPTY () \
202+ do { \
203+ _Py_INCREF_UNICODE_EMPTY(); \
204+ return unicode_empty; \
205+ } while (0)
186206
187207/* List of static strings. */
188- static _Py_Identifier * static_strings ;
208+ static _Py_Identifier * static_strings = NULL ;
189209
190210/* Single character Unicode strings in the Latin-1 range are being
191211 shared as well. */
192- static PyObject * unicode_latin1 [256 ];
212+ static PyObject * unicode_latin1 [256 ] = { NULL } ;
193213
194214/* Fast detection of the most frequent whitespace characters */
195215const unsigned char _Py_ascii_whitespace [] = {
@@ -416,9 +436,8 @@ unicode_result_wchar(PyObject *unicode)
416436
417437 len = _PyUnicode_WSTR_LENGTH (unicode );
418438 if (len == 0 ) {
419- Py_INCREF (unicode_empty );
420439 Py_DECREF (unicode );
421- return unicode_empty ;
440+ _Py_RETURN_UNICODE_EMPTY () ;
422441 }
423442
424443 if (len == 1 ) {
@@ -450,8 +469,8 @@ unicode_result_ready(PyObject *unicode)
450469 length = PyUnicode_GET_LENGTH (unicode );
451470 if (length == 0 ) {
452471 if (unicode != unicode_empty ) {
453- Py_INCREF (unicode_empty );
454472 Py_DECREF (unicode );
473+ _Py_RETURN_UNICODE_EMPTY ();
455474 }
456475 return unicode_empty ;
457476 }
@@ -528,7 +547,7 @@ static OSVERSIONINFOEX winver;
528547
529548#define BLOOM_MASK unsigned long
530549
531- static BLOOM_MASK bloom_linebreak ;
550+ static BLOOM_MASK bloom_linebreak = ~( BLOOM_MASK ) 0 ;
532551
533552#define BLOOM_ADD (mask , ch ) ((mask |= (1UL << ((ch) & (BLOOM_WIDTH - 1)))))
534553#define BLOOM (mask , ch ) ((mask & (1UL << ((ch) & (BLOOM_WIDTH - 1)))))
@@ -1582,9 +1601,11 @@ unicode_resize(PyObject **p_unicode, Py_ssize_t length)
15821601 return 0 ;
15831602
15841603 if (length == 0 ) {
1604+ _Py_INCREF_UNICODE_EMPTY ();
1605+ if (!unicode_empty )
1606+ return -1 ;
15851607 Py_DECREF (* p_unicode );
15861608 * p_unicode = unicode_empty ;
1587- Py_INCREF (* p_unicode );
15881609 return 0 ;
15891610 }
15901611
@@ -1731,10 +1752,8 @@ PyUnicode_FromUnicode(const Py_UNICODE *u, Py_ssize_t size)
17311752 some optimizations which share commonly used objects. */
17321753
17331754 /* Optimization for empty strings */
1734- if (size == 0 && unicode_empty != NULL ) {
1735- Py_INCREF (unicode_empty );
1736- return unicode_empty ;
1737- }
1755+ if (size == 0 )
1756+ _Py_RETURN_UNICODE_EMPTY ();
17381757
17391758 /* Single character Unicode objects in the Latin-1 range are
17401759 shared when using this constructor */
@@ -1893,10 +1912,8 @@ _PyUnicode_FromUCS1(const unsigned char* u, Py_ssize_t size)
18931912 PyObject * res ;
18941913 unsigned char max_char ;
18951914
1896- if (size == 0 ) {
1897- Py_INCREF (unicode_empty );
1898- return unicode_empty ;
1899- }
1915+ if (size == 0 )
1916+ _Py_RETURN_UNICODE_EMPTY ();
19001917 assert (size > 0 );
19011918 if (size == 1 )
19021919 return get_latin1_char (u [0 ]);
@@ -1916,10 +1933,8 @@ _PyUnicode_FromUCS2(const Py_UCS2 *u, Py_ssize_t size)
19161933 PyObject * res ;
19171934 Py_UCS2 max_char ;
19181935
1919- if (size == 0 ) {
1920- Py_INCREF (unicode_empty );
1921- return unicode_empty ;
1922- }
1936+ if (size == 0 )
1937+ _Py_RETURN_UNICODE_EMPTY ();
19231938 assert (size > 0 );
19241939 if (size == 1 ) {
19251940 Py_UCS4 ch = u [0 ];
@@ -1954,10 +1969,8 @@ _PyUnicode_FromUCS4(const Py_UCS4 *u, Py_ssize_t size)
19541969 PyObject * res ;
19551970 Py_UCS4 max_char ;
19561971
1957- if (size == 0 ) {
1958- Py_INCREF (unicode_empty );
1959- return unicode_empty ;
1960- }
1972+ if (size == 0 )
1973+ _Py_RETURN_UNICODE_EMPTY ();
19611974 assert (size > 0 );
19621975 if (size == 1 ) {
19631976 Py_UCS4 ch = u [0 ];
@@ -2249,10 +2262,8 @@ PyObject *
22492262PyUnicode_FromWideChar (register const wchar_t * w , Py_ssize_t size )
22502263{
22512264 if (w == NULL ) {
2252- if (size == 0 ) {
2253- Py_INCREF (unicode_empty );
2254- return unicode_empty ;
2255- }
2265+ if (size == 0 )
2266+ _Py_RETURN_UNICODE_EMPTY ();
22562267 PyErr_BadInternalCall ();
22572268 return NULL ;
22582269 }
@@ -3007,15 +3018,11 @@ PyUnicode_FromEncodedObject(register PyObject *obj,
30073018
30083019 /* Decoding bytes objects is the most common case and should be fast */
30093020 if (PyBytes_Check (obj )) {
3010- if (PyBytes_GET_SIZE (obj ) == 0 ) {
3011- Py_INCREF (unicode_empty );
3012- v = unicode_empty ;
3013- }
3014- else {
3015- v = PyUnicode_Decode (
3016- PyBytes_AS_STRING (obj ), PyBytes_GET_SIZE (obj ),
3017- encoding , errors );
3018- }
3021+ if (PyBytes_GET_SIZE (obj ) == 0 )
3022+ _Py_RETURN_UNICODE_EMPTY ();
3023+ v = PyUnicode_Decode (
3024+ PyBytes_AS_STRING (obj ), PyBytes_GET_SIZE (obj ),
3025+ encoding , errors );
30193026 return v ;
30203027 }
30213028
@@ -3035,12 +3042,11 @@ PyUnicode_FromEncodedObject(register PyObject *obj,
30353042 }
30363043
30373044 if (buffer .len == 0 ) {
3038- Py_INCREF ( unicode_empty );
3039- v = unicode_empty ;
3045+ PyBuffer_Release ( & buffer );
3046+ _Py_RETURN_UNICODE_EMPTY () ;
30403047 }
3041- else
3042- v = PyUnicode_Decode ((char * ) buffer .buf , buffer .len , encoding , errors );
30433048
3049+ v = PyUnicode_Decode ((char * ) buffer .buf , buffer .len , encoding , errors );
30443050 PyBuffer_Release (& buffer );
30453051 return v ;
30463052}
@@ -4720,8 +4726,7 @@ PyUnicode_DecodeUTF8Stateful(const char *s,
47204726 if (size == 0 ) {
47214727 if (consumed )
47224728 * consumed = 0 ;
4723- Py_INCREF (unicode_empty );
4724- return unicode_empty ;
4729+ _Py_RETURN_UNICODE_EMPTY ();
47254730 }
47264731
47274732 /* ASCII is equivalent to the first 128 ordinals in Unicode. */
@@ -5232,8 +5237,7 @@ PyUnicode_DecodeUTF16Stateful(const char *s,
52325237 if (q == e ) {
52335238 if (consumed )
52345239 * consumed = size ;
5235- Py_INCREF (unicode_empty );
5236- return unicode_empty ;
5240+ _Py_RETURN_UNICODE_EMPTY ();
52375241 }
52385242
52395243#ifdef BYTEORDER_IS_LITTLE_ENDIAN
@@ -6558,10 +6562,8 @@ PyUnicode_DecodeASCII(const char *s,
65586562 PyObject * errorHandler = NULL ;
65596563 PyObject * exc = NULL ;
65606564
6561- if (size == 0 ) {
6562- Py_INCREF (unicode_empty );
6563- return unicode_empty ;
6564- }
6565+ if (size == 0 )
6566+ _Py_RETURN_UNICODE_EMPTY ();
65656567
65666568 /* ASCII is equivalent to the first 128 ordinals in Unicode. */
65676569 if (size == 1 && (unsigned char )s [0 ] < 128 )
@@ -6940,8 +6942,7 @@ decode_code_page_stateful(int code_page,
69406942 if (chunk_size == 0 && done ) {
69416943 if (v != NULL )
69426944 break ;
6943- Py_INCREF (unicode_empty );
6944- return unicode_empty ;
6945+ _Py_RETURN_UNICODE_EMPTY ();
69456946 }
69466947
69476948
@@ -9503,9 +9504,7 @@ PyUnicode_Join(PyObject *separator, PyObject *seq)
95039504 /* If empty sequence, return u"". */
95049505 if (seqlen == 0 ) {
95059506 Py_DECREF (fseq );
9506- Py_INCREF (unicode_empty );
9507- res = unicode_empty ;
9508- return res ;
9507+ _Py_RETURN_UNICODE_EMPTY ();
95099508 }
95109509
95119510 /* If singleton sequence with an exact Unicode, return that. */
@@ -10205,7 +10204,9 @@ replace(PyObject *self, PyObject *str1,
1020510204 }
1020610205 new_size = slen + n * (len2 - len1 );
1020710206 if (new_size == 0 ) {
10208- Py_INCREF (unicode_empty );
10207+ _Py_INCREF_UNICODE_EMPTY ();
10208+ if (!unicode_empty )
10209+ goto error ;
1020910210 u = unicode_empty ;
1021010211 goto done ;
1021110212 }
@@ -11672,10 +11673,8 @@ PyUnicode_Substring(PyObject *self, Py_ssize_t start, Py_ssize_t end)
1167211673 PyErr_SetString (PyExc_IndexError , "string index out of range" );
1167311674 return NULL ;
1167411675 }
11675- if (start >= length || end < start ) {
11676- Py_INCREF (unicode_empty );
11677- return unicode_empty ;
11678- }
11676+ if (start >= length || end < start )
11677+ _Py_RETURN_UNICODE_EMPTY ();
1167911678
1168011679 length = end - start ;
1168111680 if (PyUnicode_IS_ASCII (self )) {
@@ -11802,10 +11801,8 @@ unicode_repeat(PyObject *str, Py_ssize_t len)
1180211801 PyObject * u ;
1180311802 Py_ssize_t nchars , n ;
1180411803
11805- if (len < 1 ) {
11806- Py_INCREF (unicode_empty );
11807- return unicode_empty ;
11808- }
11804+ if (len < 1 )
11805+ _Py_RETURN_UNICODE_EMPTY ();
1180911806
1181011807 /* no repeat, return original string */
1181111808 if (len == 1 )
@@ -12924,8 +12921,7 @@ _PyUnicodeWriter_Finish(_PyUnicodeWriter *writer)
1292412921{
1292512922 if (writer -> pos == 0 ) {
1292612923 Py_XDECREF (writer -> buffer );
12927- Py_INCREF (unicode_empty );
12928- return unicode_empty ;
12924+ _Py_RETURN_UNICODE_EMPTY ();
1292912925 }
1293012926 if (writer -> readonly ) {
1293112927 assert (PyUnicode_GET_LENGTH (writer -> buffer ) == writer -> pos );
@@ -13143,8 +13139,7 @@ unicode_subscript(PyObject* self, PyObject* item)
1314313139 }
1314413140
1314513141 if (slicelength <= 0 ) {
13146- Py_INCREF (unicode_empty );
13147- return unicode_empty ;
13142+ _Py_RETURN_UNICODE_EMPTY ();
1314813143 } else if (start == 0 && step == 1 &&
1314913144 slicelength == PyUnicode_GET_LENGTH (self )) {
1315013145 return unicode_result_unchanged (self );
@@ -13974,10 +13969,8 @@ unicode_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
1397413969 if (!PyArg_ParseTupleAndKeywords (args , kwds , "|Oss:str" ,
1397513970 kwlist , & x , & encoding , & errors ))
1397613971 return NULL ;
13977- if (x == NULL ) {
13978- Py_INCREF (unicode_empty );
13979- return unicode_empty ;
13980- }
13972+ if (x == NULL )
13973+ _Py_RETURN_UNICODE_EMPTY ();
1398113974 if (encoding == NULL && errors == NULL )
1398213975 return PyObject_Str (x );
1398313976 else
@@ -14146,8 +14139,6 @@ PyTypeObject PyUnicode_Type = {
1414614139
1414714140int _PyUnicode_Init (void )
1414814141{
14149- int i ;
14150-
1415114142 /* XXX - move this array to unicodectype.c ? */
1415214143 Py_UCS2 linebreak [] = {
1415314144 0x000A , /* LINE FEED */
@@ -14161,13 +14152,11 @@ int _PyUnicode_Init(void)
1416114152 };
1416214153
1416314154 /* Init the implementation */
14164- unicode_empty = PyUnicode_New ( 0 , 0 );
14155+ _Py_INCREF_UNICODE_EMPTY ( );
1416514156 if (!unicode_empty )
1416614157 Py_FatalError ("Can't create empty string" );
14167- assert ( _PyUnicode_CheckConsistency ( unicode_empty , 1 ) );
14158+ Py_DECREF ( unicode_empty );
1416814159
14169- for (i = 0 ; i < 256 ; i ++ )
14170- unicode_latin1 [i ] = NULL ;
1417114160 if (PyType_Ready (& PyUnicode_Type ) < 0 )
1417214161 Py_FatalError ("Can't initialize 'unicode'" );
1417314162
@@ -14207,15 +14196,10 @@ _PyUnicode_Fini(void)
1420714196{
1420814197 int i ;
1420914198
14210- Py_XDECREF (unicode_empty );
14211- unicode_empty = NULL ;
14199+ Py_CLEAR (unicode_empty );
1421214200
14213- for (i = 0 ; i < 256 ; i ++ ) {
14214- if (unicode_latin1 [i ]) {
14215- Py_DECREF (unicode_latin1 [i ]);
14216- unicode_latin1 [i ] = NULL ;
14217- }
14218- }
14201+ for (i = 0 ; i < 256 ; i ++ )
14202+ Py_CLEAR (unicode_latin1 [i ]);
1421914203 _PyUnicode_ClearStaticStrings ();
1422014204 (void )PyUnicode_ClearFreeList ();
1422114205}
@@ -14344,8 +14328,7 @@ _Py_ReleaseInternedUnicodeStrings(void)
1434414328 "mortal/immortal\n" , mortal_size , immortal_size );
1434514329 Py_DECREF (keys );
1434614330 PyDict_Clear (interned );
14347- Py_DECREF (interned );
14348- interned = NULL ;
14331+ Py_CLEAR (interned );
1434914332}
1435014333
1435114334
0 commit comments