@@ -231,28 +231,19 @@ get_unicode_state(void)
231
231
232
232
233
233
// Return a borrowed reference to the empty string singleton.
234
- // Return NULL if the singleton was not created yet.
235
234
static inline PyObject * unicode_get_empty (void )
236
235
{
237
236
struct _Py_unicode_state * state = get_unicode_state ();
237
+ // unicode_get_empty() must not be called before _PyUnicode_Init()
238
+ // or after _PyUnicode_Fini()
239
+ assert (state -> empty != NULL );
238
240
return state -> empty ;
239
241
}
240
242
241
243
static inline PyObject * unicode_new_empty (void )
242
244
{
243
- struct _Py_unicode_state * state = get_unicode_state ();
244
- PyObject * empty = state -> empty ;
245
- if (empty != NULL ) {
246
- Py_INCREF (empty );
247
- }
248
- else {
249
- empty = PyUnicode_New (0 , 0 );
250
- if (empty != NULL ) {
251
- Py_INCREF (empty );
252
- assert (_PyUnicode_CheckConsistency (empty , 1 ));
253
- state -> empty = empty ;
254
- }
255
- }
245
+ PyObject * empty = unicode_get_empty ();
246
+ Py_INCREF (empty );
256
247
return empty ;
257
248
}
258
249
@@ -696,12 +687,9 @@ unicode_result_ready(PyObject *unicode)
696
687
PyObject * empty = unicode_get_empty ();
697
688
if (unicode != empty ) {
698
689
Py_DECREF (unicode );
699
-
700
690
Py_INCREF (empty );
701
- return empty ;
702
691
}
703
- // unicode is the empty string singleton
704
- return unicode ;
692
+ return empty ;
705
693
}
706
694
707
695
#ifdef LATIN1_SINGLETONS
@@ -959,7 +947,7 @@ ensure_unicode(PyObject *obj)
959
947
960
948
/* Compilation of templated routines */
961
949
962
- #define STRINGLIB_GET_EMPTY () unicode_get_empty()
950
+ #define STRINGLIB_GET_EMPTY () unicode_get_empty()
963
951
964
952
#include "stringlib/asciilib.h"
965
953
#include "stringlib/fastsearch.h"
@@ -1260,11 +1248,7 @@ _PyUnicode_New(Py_ssize_t length)
1260
1248
1261
1249
/* Optimization for empty strings */
1262
1250
if (length == 0 ) {
1263
- PyObject * empty = unicode_get_empty ();
1264
- if (empty != NULL ) {
1265
- Py_INCREF (empty );
1266
- return (PyUnicodeObject * )empty ;
1267
- }
1251
+ return (PyUnicodeObject * )unicode_new_empty ();
1268
1252
}
1269
1253
1270
1254
/* Ensure we won't overflow the size. */
@@ -1416,11 +1400,7 @@ PyUnicode_New(Py_ssize_t size, Py_UCS4 maxchar)
1416
1400
{
1417
1401
/* Optimization for empty strings */
1418
1402
if (size == 0 ) {
1419
- PyObject * empty = unicode_get_empty ();
1420
- if (empty != NULL ) {
1421
- Py_INCREF (empty );
1422
- return empty ;
1423
- }
1403
+ return unicode_new_empty ();
1424
1404
}
1425
1405
1426
1406
PyObject * obj ;
@@ -2001,8 +1981,7 @@ unicode_dealloc(PyObject *unicode)
2001
1981
static int
2002
1982
unicode_is_singleton (PyObject * unicode )
2003
1983
{
2004
- struct _Py_unicode_state * state = get_unicode_state ();
2005
- if (unicode == state -> empty ) {
1984
+ if (unicode == unicode_get_empty ()) {
2006
1985
return 1 ;
2007
1986
}
2008
1987
#ifdef LATIN1_SINGLETONS
@@ -2059,8 +2038,6 @@ unicode_resize(PyObject **p_unicode, Py_ssize_t length)
2059
2038
2060
2039
if (length == 0 ) {
2061
2040
PyObject * empty = unicode_new_empty ();
2062
- if (!empty )
2063
- return -1 ;
2064
2041
Py_SETREF (* p_unicode , empty );
2065
2042
return 0 ;
2066
2043
}
@@ -10868,10 +10845,7 @@ replace(PyObject *self, PyObject *str1,
10868
10845
}
10869
10846
new_size = slen + n * (len2 - len1 );
10870
10847
if (new_size == 0 ) {
10871
- PyObject * empty = unicode_new_empty ();
10872
- if (!empty )
10873
- goto error ;
10874
- u = empty ;
10848
+ u = unicode_new_empty ();
10875
10849
goto done ;
10876
10850
}
10877
10851
if (new_size > (PY_SSIZE_T_MAX / rkind )) {
@@ -13293,13 +13267,7 @@ PyUnicode_Partition(PyObject *str_obj, PyObject *sep_obj)
13293
13267
len2 = PyUnicode_GET_LENGTH (sep_obj );
13294
13268
if (kind1 < kind2 || len1 < len2 ) {
13295
13269
PyObject * empty = unicode_get_empty (); // Borrowed reference
13296
- if (!empty ) {
13297
- out = NULL ;
13298
- }
13299
- else {
13300
- out = PyTuple_Pack (3 , str_obj , empty , empty );
13301
- }
13302
- return out ;
13270
+ return PyTuple_Pack (3 , str_obj , empty , empty );
13303
13271
}
13304
13272
buf1 = PyUnicode_DATA (str_obj );
13305
13273
buf2 = PyUnicode_DATA (sep_obj );
@@ -13351,13 +13319,7 @@ PyUnicode_RPartition(PyObject *str_obj, PyObject *sep_obj)
13351
13319
len2 = PyUnicode_GET_LENGTH (sep_obj );
13352
13320
if (kind1 < kind2 || len1 < len2 ) {
13353
13321
PyObject * empty = unicode_get_empty (); // Borrowed reference
13354
- if (!empty ) {
13355
- out = NULL ;
13356
- }
13357
- else {
13358
- out = PyTuple_Pack (3 , empty , empty , str_obj );
13359
- }
13360
- return out ;
13322
+ return PyTuple_Pack (3 , empty , empty , str_obj );
13361
13323
}
13362
13324
buf1 = PyUnicode_DATA (str_obj );
13363
13325
buf2 = PyUnicode_DATA (sep_obj );
@@ -15589,12 +15551,20 @@ _PyUnicode_Init(PyThreadState *tstate)
15589
15551
0x2029 , /* PARAGRAPH SEPARATOR */
15590
15552
};
15591
15553
15592
- /* Init the implementation */
15593
- PyObject * empty = unicode_new_empty ();
15594
- if (!empty ) {
15554
+ // Use size=1 rather than size=0, so PyUnicode_New(0, maxchar) can be
15555
+ // optimized to always use state->empty without having to check if it is
15556
+ // NULL or not.
15557
+ PyObject * empty = PyUnicode_New (1 , 0 );
15558
+ if (empty == NULL ) {
15595
15559
return _PyStatus_NO_MEMORY ();
15596
15560
}
15597
- Py_DECREF (empty );
15561
+ PyUnicode_1BYTE_DATA (empty )[0 ] = 0 ;
15562
+ _PyUnicode_LENGTH (empty ) = 0 ;
15563
+ assert (_PyUnicode_CheckConsistency (empty , 1 ));
15564
+
15565
+ struct _Py_unicode_state * state = & tstate -> interp -> unicode ;
15566
+ assert (state -> empty == NULL );
15567
+ state -> empty = empty ;
15598
15568
15599
15569
if (_Py_IsMainInterpreter (tstate )) {
15600
15570
/* initialize the linebreak bloom filter */
0 commit comments