Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 90ed8a6

Browse files
authored
bpo-40521: Optimize PyUnicode_New(0, maxchar) (GH-21099)
Functions of unicodeobject.c, like PyUnicode_New(), no longer check if the empty Unicode singleton has been initialized or not. Consider that it is always initialized. The Unicode API must not be used before _PyUnicode_Init() or after _PyUnicode_Fini().
1 parent f363d0a commit 90ed8a6

File tree

1 file changed

+25
-55
lines changed

1 file changed

+25
-55
lines changed

Objects/unicodeobject.c

+25-55
Original file line numberDiff line numberDiff line change
@@ -231,28 +231,19 @@ get_unicode_state(void)
231231

232232

233233
// Return a borrowed reference to the empty string singleton.
234-
// Return NULL if the singleton was not created yet.
235234
static inline PyObject* unicode_get_empty(void)
236235
{
237236
struct _Py_unicode_state *state = get_unicode_state();
237+
// unicode_get_empty() must not be called before _PyUnicode_Init()
238+
// or after _PyUnicode_Fini()
239+
assert(state->empty != NULL);
238240
return state->empty;
239241
}
240242

241243
static inline PyObject* unicode_new_empty(void)
242244
{
243-
struct _Py_unicode_state *state = get_unicode_state();
244-
PyObject *empty = state->empty;
245-
if (empty != NULL) {
246-
Py_INCREF(empty);
247-
}
248-
else {
249-
empty = PyUnicode_New(0, 0);
250-
if (empty != NULL) {
251-
Py_INCREF(empty);
252-
assert(_PyUnicode_CheckConsistency(empty, 1));
253-
state->empty = empty;
254-
}
255-
}
245+
PyObject *empty = unicode_get_empty();
246+
Py_INCREF(empty);
256247
return empty;
257248
}
258249

@@ -696,12 +687,9 @@ unicode_result_ready(PyObject *unicode)
696687
PyObject *empty = unicode_get_empty();
697688
if (unicode != empty) {
698689
Py_DECREF(unicode);
699-
700690
Py_INCREF(empty);
701-
return empty;
702691
}
703-
// unicode is the empty string singleton
704-
return unicode;
692+
return empty;
705693
}
706694

707695
#ifdef LATIN1_SINGLETONS
@@ -959,7 +947,7 @@ ensure_unicode(PyObject *obj)
959947

960948
/* Compilation of templated routines */
961949

962-
#define STRINGLIB_GET_EMPTY() unicode_get_empty()
950+
#define STRINGLIB_GET_EMPTY() unicode_get_empty()
963951

964952
#include "stringlib/asciilib.h"
965953
#include "stringlib/fastsearch.h"
@@ -1260,11 +1248,7 @@ _PyUnicode_New(Py_ssize_t length)
12601248

12611249
/* Optimization for empty strings */
12621250
if (length == 0) {
1263-
PyObject *empty = unicode_get_empty();
1264-
if (empty != NULL) {
1265-
Py_INCREF(empty);
1266-
return (PyUnicodeObject *)empty;
1267-
}
1251+
return (PyUnicodeObject *)unicode_new_empty();
12681252
}
12691253

12701254
/* Ensure we won't overflow the size. */
@@ -1416,11 +1400,7 @@ PyUnicode_New(Py_ssize_t size, Py_UCS4 maxchar)
14161400
{
14171401
/* Optimization for empty strings */
14181402
if (size == 0) {
1419-
PyObject *empty = unicode_get_empty();
1420-
if (empty != NULL) {
1421-
Py_INCREF(empty);
1422-
return empty;
1423-
}
1403+
return unicode_new_empty();
14241404
}
14251405

14261406
PyObject *obj;
@@ -2001,8 +1981,7 @@ unicode_dealloc(PyObject *unicode)
20011981
static int
20021982
unicode_is_singleton(PyObject *unicode)
20031983
{
2004-
struct _Py_unicode_state *state = get_unicode_state();
2005-
if (unicode == state->empty) {
1984+
if (unicode == unicode_get_empty()) {
20061985
return 1;
20071986
}
20081987
#ifdef LATIN1_SINGLETONS
@@ -2059,8 +2038,6 @@ unicode_resize(PyObject **p_unicode, Py_ssize_t length)
20592038

20602039
if (length == 0) {
20612040
PyObject *empty = unicode_new_empty();
2062-
if (!empty)
2063-
return -1;
20642041
Py_SETREF(*p_unicode, empty);
20652042
return 0;
20662043
}
@@ -10868,10 +10845,7 @@ replace(PyObject *self, PyObject *str1,
1086810845
}
1086910846
new_size = slen + n * (len2 - len1);
1087010847
if (new_size == 0) {
10871-
PyObject *empty = unicode_new_empty();
10872-
if (!empty)
10873-
goto error;
10874-
u = empty;
10848+
u = unicode_new_empty();
1087510849
goto done;
1087610850
}
1087710851
if (new_size > (PY_SSIZE_T_MAX / rkind)) {
@@ -13293,13 +13267,7 @@ PyUnicode_Partition(PyObject *str_obj, PyObject *sep_obj)
1329313267
len2 = PyUnicode_GET_LENGTH(sep_obj);
1329413268
if (kind1 < kind2 || len1 < len2) {
1329513269
PyObject *empty = unicode_get_empty(); // Borrowed reference
13296-
if (!empty) {
13297-
out = NULL;
13298-
}
13299-
else {
13300-
out = PyTuple_Pack(3, str_obj, empty, empty);
13301-
}
13302-
return out;
13270+
return PyTuple_Pack(3, str_obj, empty, empty);
1330313271
}
1330413272
buf1 = PyUnicode_DATA(str_obj);
1330513273
buf2 = PyUnicode_DATA(sep_obj);
@@ -13351,13 +13319,7 @@ PyUnicode_RPartition(PyObject *str_obj, PyObject *sep_obj)
1335113319
len2 = PyUnicode_GET_LENGTH(sep_obj);
1335213320
if (kind1 < kind2 || len1 < len2) {
1335313321
PyObject *empty = unicode_get_empty(); // Borrowed reference
13354-
if (!empty) {
13355-
out = NULL;
13356-
}
13357-
else {
13358-
out = PyTuple_Pack(3, empty, empty, str_obj);
13359-
}
13360-
return out;
13322+
return PyTuple_Pack(3, empty, empty, str_obj);
1336113323
}
1336213324
buf1 = PyUnicode_DATA(str_obj);
1336313325
buf2 = PyUnicode_DATA(sep_obj);
@@ -15589,12 +15551,20 @@ _PyUnicode_Init(PyThreadState *tstate)
1558915551
0x2029, /* PARAGRAPH SEPARATOR */
1559015552
};
1559115553

15592-
/* Init the implementation */
15593-
PyObject *empty = unicode_new_empty();
15594-
if (!empty) {
15554+
// Use size=1 rather than size=0, so PyUnicode_New(0, maxchar) can be
15555+
// optimized to always use state->empty without having to check if it is
15556+
// NULL or not.
15557+
PyObject *empty = PyUnicode_New(1, 0);
15558+
if (empty == NULL) {
1559515559
return _PyStatus_NO_MEMORY();
1559615560
}
15597-
Py_DECREF(empty);
15561+
PyUnicode_1BYTE_DATA(empty)[0] = 0;
15562+
_PyUnicode_LENGTH(empty) = 0;
15563+
assert(_PyUnicode_CheckConsistency(empty, 1));
15564+
15565+
struct _Py_unicode_state *state = &tstate->interp->unicode;
15566+
assert(state->empty == NULL);
15567+
state->empty = empty;
1559815568

1559915569
if (_Py_IsMainInterpreter(tstate)) {
1560015570
/* initialize the linebreak bloom filter */

0 commit comments

Comments
 (0)