Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 678db84

Browse files
Issue #10156: In the interpreter's initialization phase, unicode globals
are now initialized dynamically as needed.
2 parents 1c7181d + 0599725 commit 678db84

2 files changed

Lines changed: 76 additions & 90 deletions

File tree

Misc/NEWS

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,9 @@ What's New in Python 3.3.1?
1212
Core and Builtins
1313
-----------------
1414

15+
- Issue #10156: In the interpreter's initialization phase, unicode globals
16+
are now initialized dynamically as needed.
17+
1518
- Issue #16980: Fix processing of escaped non-ascii bytes in the
1619
unicode-escape-decode decoder.
1720

Objects/unicodeobject.c

Lines changed: 73 additions & 90 deletions
Original file line numberDiff line numberDiff line change
@@ -57,8 +57,9 @@ OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
5757

5858
/* --- Globals ------------------------------------------------------------
5959
60-
The globals are initialized by the _PyUnicode_Init() API and should
61-
not be used before calling that API.
60+
NOTE: In the interpreter's initialization phase, some globals are currently
61+
initialized dynamically as needed. In the process Unicode objects may
62+
be created before the Unicode type is ready.
6263
6364
*/
6465

@@ -179,17 +180,36 @@ extern "C" {
179180
Another way to look at this is that to say that the actual reference
180181
count of a string is: s->ob_refcnt + (s->state ? 2 : 0)
181182
*/
182-
static PyObject *interned;
183+
static PyObject *interned = NULL;
183184

184185
/* The empty Unicode object is shared to improve performance. */
185-
static PyObject *unicode_empty;
186+
static PyObject *unicode_empty = NULL;
187+
188+
#define _Py_INCREF_UNICODE_EMPTY() \
189+
do { \
190+
if (unicode_empty != NULL) \
191+
Py_INCREF(unicode_empty); \
192+
else { \
193+
unicode_empty = PyUnicode_New(0, 0); \
194+
if (unicode_empty != NULL) { \
195+
Py_INCREF(unicode_empty); \
196+
assert(_PyUnicode_CheckConsistency(unicode_empty, 1)); \
197+
} \
198+
} \
199+
} while (0)
200+
201+
#define _Py_RETURN_UNICODE_EMPTY() \
202+
do { \
203+
_Py_INCREF_UNICODE_EMPTY(); \
204+
return unicode_empty; \
205+
} while (0)
186206

187207
/* List of static strings. */
188-
static _Py_Identifier *static_strings;
208+
static _Py_Identifier *static_strings = NULL;
189209

190210
/* Single character Unicode strings in the Latin-1 range are being
191211
shared as well. */
192-
static PyObject *unicode_latin1[256];
212+
static PyObject *unicode_latin1[256] = {NULL};
193213

194214
/* Fast detection of the most frequent whitespace characters */
195215
const unsigned char _Py_ascii_whitespace[] = {
@@ -416,9 +436,8 @@ unicode_result_wchar(PyObject *unicode)
416436

417437
len = _PyUnicode_WSTR_LENGTH(unicode);
418438
if (len == 0) {
419-
Py_INCREF(unicode_empty);
420439
Py_DECREF(unicode);
421-
return unicode_empty;
440+
_Py_RETURN_UNICODE_EMPTY();
422441
}
423442

424443
if (len == 1) {
@@ -450,8 +469,8 @@ unicode_result_ready(PyObject *unicode)
450469
length = PyUnicode_GET_LENGTH(unicode);
451470
if (length == 0) {
452471
if (unicode != unicode_empty) {
453-
Py_INCREF(unicode_empty);
454472
Py_DECREF(unicode);
473+
_Py_RETURN_UNICODE_EMPTY();
455474
}
456475
return unicode_empty;
457476
}
@@ -528,7 +547,7 @@ static OSVERSIONINFOEX winver;
528547

529548
#define BLOOM_MASK unsigned long
530549

531-
static BLOOM_MASK bloom_linebreak;
550+
static BLOOM_MASK bloom_linebreak = ~(BLOOM_MASK)0;
532551

533552
#define BLOOM_ADD(mask, ch) ((mask |= (1UL << ((ch) & (BLOOM_WIDTH - 1)))))
534553
#define BLOOM(mask, ch) ((mask & (1UL << ((ch) & (BLOOM_WIDTH - 1)))))
@@ -1582,9 +1601,11 @@ unicode_resize(PyObject **p_unicode, Py_ssize_t length)
15821601
return 0;
15831602

15841603
if (length == 0) {
1604+
_Py_INCREF_UNICODE_EMPTY();
1605+
if (!unicode_empty)
1606+
return -1;
15851607
Py_DECREF(*p_unicode);
15861608
*p_unicode = unicode_empty;
1587-
Py_INCREF(*p_unicode);
15881609
return 0;
15891610
}
15901611

@@ -1731,10 +1752,8 @@ PyUnicode_FromUnicode(const Py_UNICODE *u, Py_ssize_t size)
17311752
some optimizations which share commonly used objects. */
17321753

17331754
/* Optimization for empty strings */
1734-
if (size == 0 && unicode_empty != NULL) {
1735-
Py_INCREF(unicode_empty);
1736-
return unicode_empty;
1737-
}
1755+
if (size == 0)
1756+
_Py_RETURN_UNICODE_EMPTY();
17381757

17391758
/* Single character Unicode objects in the Latin-1 range are
17401759
shared when using this constructor */
@@ -1893,10 +1912,8 @@ _PyUnicode_FromUCS1(const unsigned char* u, Py_ssize_t size)
18931912
PyObject *res;
18941913
unsigned char max_char;
18951914

1896-
if (size == 0) {
1897-
Py_INCREF(unicode_empty);
1898-
return unicode_empty;
1899-
}
1915+
if (size == 0)
1916+
_Py_RETURN_UNICODE_EMPTY();
19001917
assert(size > 0);
19011918
if (size == 1)
19021919
return get_latin1_char(u[0]);
@@ -1916,10 +1933,8 @@ _PyUnicode_FromUCS2(const Py_UCS2 *u, Py_ssize_t size)
19161933
PyObject *res;
19171934
Py_UCS2 max_char;
19181935

1919-
if (size == 0) {
1920-
Py_INCREF(unicode_empty);
1921-
return unicode_empty;
1922-
}
1936+
if (size == 0)
1937+
_Py_RETURN_UNICODE_EMPTY();
19231938
assert(size > 0);
19241939
if (size == 1) {
19251940
Py_UCS4 ch = u[0];
@@ -1954,10 +1969,8 @@ _PyUnicode_FromUCS4(const Py_UCS4 *u, Py_ssize_t size)
19541969
PyObject *res;
19551970
Py_UCS4 max_char;
19561971

1957-
if (size == 0) {
1958-
Py_INCREF(unicode_empty);
1959-
return unicode_empty;
1960-
}
1972+
if (size == 0)
1973+
_Py_RETURN_UNICODE_EMPTY();
19611974
assert(size > 0);
19621975
if (size == 1) {
19631976
Py_UCS4 ch = u[0];
@@ -2249,10 +2262,8 @@ PyObject *
22492262
PyUnicode_FromWideChar(register const wchar_t *w, Py_ssize_t size)
22502263
{
22512264
if (w == NULL) {
2252-
if (size == 0) {
2253-
Py_INCREF(unicode_empty);
2254-
return unicode_empty;
2255-
}
2265+
if (size == 0)
2266+
_Py_RETURN_UNICODE_EMPTY();
22562267
PyErr_BadInternalCall();
22572268
return NULL;
22582269
}
@@ -3007,15 +3018,11 @@ PyUnicode_FromEncodedObject(register PyObject *obj,
30073018

30083019
/* Decoding bytes objects is the most common case and should be fast */
30093020
if (PyBytes_Check(obj)) {
3010-
if (PyBytes_GET_SIZE(obj) == 0) {
3011-
Py_INCREF(unicode_empty);
3012-
v = unicode_empty;
3013-
}
3014-
else {
3015-
v = PyUnicode_Decode(
3016-
PyBytes_AS_STRING(obj), PyBytes_GET_SIZE(obj),
3017-
encoding, errors);
3018-
}
3021+
if (PyBytes_GET_SIZE(obj) == 0)
3022+
_Py_RETURN_UNICODE_EMPTY();
3023+
v = PyUnicode_Decode(
3024+
PyBytes_AS_STRING(obj), PyBytes_GET_SIZE(obj),
3025+
encoding, errors);
30193026
return v;
30203027
}
30213028

@@ -3035,12 +3042,11 @@ PyUnicode_FromEncodedObject(register PyObject *obj,
30353042
}
30363043

30373044
if (buffer.len == 0) {
3038-
Py_INCREF(unicode_empty);
3039-
v = unicode_empty;
3045+
PyBuffer_Release(&buffer);
3046+
_Py_RETURN_UNICODE_EMPTY();
30403047
}
3041-
else
3042-
v = PyUnicode_Decode((char*) buffer.buf, buffer.len, encoding, errors);
30433048

3049+
v = PyUnicode_Decode((char*) buffer.buf, buffer.len, encoding, errors);
30443050
PyBuffer_Release(&buffer);
30453051
return v;
30463052
}
@@ -4720,8 +4726,7 @@ PyUnicode_DecodeUTF8Stateful(const char *s,
47204726
if (size == 0) {
47214727
if (consumed)
47224728
*consumed = 0;
4723-
Py_INCREF(unicode_empty);
4724-
return unicode_empty;
4729+
_Py_RETURN_UNICODE_EMPTY();
47254730
}
47264731

47274732
/* ASCII is equivalent to the first 128 ordinals in Unicode. */
@@ -5232,8 +5237,7 @@ PyUnicode_DecodeUTF16Stateful(const char *s,
52325237
if (q == e) {
52335238
if (consumed)
52345239
*consumed = size;
5235-
Py_INCREF(unicode_empty);
5236-
return unicode_empty;
5240+
_Py_RETURN_UNICODE_EMPTY();
52375241
}
52385242

52395243
#ifdef BYTEORDER_IS_LITTLE_ENDIAN
@@ -6558,10 +6562,8 @@ PyUnicode_DecodeASCII(const char *s,
65586562
PyObject *errorHandler = NULL;
65596563
PyObject *exc = NULL;
65606564

6561-
if (size == 0) {
6562-
Py_INCREF(unicode_empty);
6563-
return unicode_empty;
6564-
}
6565+
if (size == 0)
6566+
_Py_RETURN_UNICODE_EMPTY();
65656567

65666568
/* ASCII is equivalent to the first 128 ordinals in Unicode. */
65676569
if (size == 1 && (unsigned char)s[0] < 128)
@@ -6940,8 +6942,7 @@ decode_code_page_stateful(int code_page,
69406942
if (chunk_size == 0 && done) {
69416943
if (v != NULL)
69426944
break;
6943-
Py_INCREF(unicode_empty);
6944-
return unicode_empty;
6945+
_Py_RETURN_UNICODE_EMPTY();
69456946
}
69466947

69476948

@@ -9503,9 +9504,7 @@ PyUnicode_Join(PyObject *separator, PyObject *seq)
95039504
/* If empty sequence, return u"". */
95049505
if (seqlen == 0) {
95059506
Py_DECREF(fseq);
9506-
Py_INCREF(unicode_empty);
9507-
res = unicode_empty;
9508-
return res;
9507+
_Py_RETURN_UNICODE_EMPTY();
95099508
}
95109509

95119510
/* If singleton sequence with an exact Unicode, return that. */
@@ -10205,7 +10204,9 @@ replace(PyObject *self, PyObject *str1,
1020510204
}
1020610205
new_size = slen + n * (len2 - len1);
1020710206
if (new_size == 0) {
10208-
Py_INCREF(unicode_empty);
10207+
_Py_INCREF_UNICODE_EMPTY();
10208+
if (!unicode_empty)
10209+
goto error;
1020910210
u = unicode_empty;
1021010211
goto done;
1021110212
}
@@ -11672,10 +11673,8 @@ PyUnicode_Substring(PyObject *self, Py_ssize_t start, Py_ssize_t end)
1167211673
PyErr_SetString(PyExc_IndexError, "string index out of range");
1167311674
return NULL;
1167411675
}
11675-
if (start >= length || end < start) {
11676-
Py_INCREF(unicode_empty);
11677-
return unicode_empty;
11678-
}
11676+
if (start >= length || end < start)
11677+
_Py_RETURN_UNICODE_EMPTY();
1167911678

1168011679
length = end - start;
1168111680
if (PyUnicode_IS_ASCII(self)) {
@@ -11802,10 +11801,8 @@ unicode_repeat(PyObject *str, Py_ssize_t len)
1180211801
PyObject *u;
1180311802
Py_ssize_t nchars, n;
1180411803

11805-
if (len < 1) {
11806-
Py_INCREF(unicode_empty);
11807-
return unicode_empty;
11808-
}
11804+
if (len < 1)
11805+
_Py_RETURN_UNICODE_EMPTY();
1180911806

1181011807
/* no repeat, return original string */
1181111808
if (len == 1)
@@ -12924,8 +12921,7 @@ _PyUnicodeWriter_Finish(_PyUnicodeWriter *writer)
1292412921
{
1292512922
if (writer->pos == 0) {
1292612923
Py_XDECREF(writer->buffer);
12927-
Py_INCREF(unicode_empty);
12928-
return unicode_empty;
12924+
_Py_RETURN_UNICODE_EMPTY();
1292912925
}
1293012926
if (writer->readonly) {
1293112927
assert(PyUnicode_GET_LENGTH(writer->buffer) == writer->pos);
@@ -13143,8 +13139,7 @@ unicode_subscript(PyObject* self, PyObject* item)
1314313139
}
1314413140

1314513141
if (slicelength <= 0) {
13146-
Py_INCREF(unicode_empty);
13147-
return unicode_empty;
13142+
_Py_RETURN_UNICODE_EMPTY();
1314813143
} else if (start == 0 && step == 1 &&
1314913144
slicelength == PyUnicode_GET_LENGTH(self)) {
1315013145
return unicode_result_unchanged(self);
@@ -13974,10 +13969,8 @@ unicode_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
1397413969
if (!PyArg_ParseTupleAndKeywords(args, kwds, "|Oss:str",
1397513970
kwlist, &x, &encoding, &errors))
1397613971
return NULL;
13977-
if (x == NULL) {
13978-
Py_INCREF(unicode_empty);
13979-
return unicode_empty;
13980-
}
13972+
if (x == NULL)
13973+
_Py_RETURN_UNICODE_EMPTY();
1398113974
if (encoding == NULL && errors == NULL)
1398213975
return PyObject_Str(x);
1398313976
else
@@ -14146,8 +14139,6 @@ PyTypeObject PyUnicode_Type = {
1414614139

1414714140
int _PyUnicode_Init(void)
1414814141
{
14149-
int i;
14150-
1415114142
/* XXX - move this array to unicodectype.c ? */
1415214143
Py_UCS2 linebreak[] = {
1415314144
0x000A, /* LINE FEED */
@@ -14161,13 +14152,11 @@ int _PyUnicode_Init(void)
1416114152
};
1416214153

1416314154
/* Init the implementation */
14164-
unicode_empty = PyUnicode_New(0, 0);
14155+
_Py_INCREF_UNICODE_EMPTY();
1416514156
if (!unicode_empty)
1416614157
Py_FatalError("Can't create empty string");
14167-
assert(_PyUnicode_CheckConsistency(unicode_empty, 1));
14158+
Py_DECREF(unicode_empty);
1416814159

14169-
for (i = 0; i < 256; i++)
14170-
unicode_latin1[i] = NULL;
1417114160
if (PyType_Ready(&PyUnicode_Type) < 0)
1417214161
Py_FatalError("Can't initialize 'unicode'");
1417314162

@@ -14207,15 +14196,10 @@ _PyUnicode_Fini(void)
1420714196
{
1420814197
int i;
1420914198

14210-
Py_XDECREF(unicode_empty);
14211-
unicode_empty = NULL;
14199+
Py_CLEAR(unicode_empty);
1421214200

14213-
for (i = 0; i < 256; i++) {
14214-
if (unicode_latin1[i]) {
14215-
Py_DECREF(unicode_latin1[i]);
14216-
unicode_latin1[i] = NULL;
14217-
}
14218-
}
14201+
for (i = 0; i < 256; i++)
14202+
Py_CLEAR(unicode_latin1[i]);
1421914203
_PyUnicode_ClearStaticStrings();
1422014204
(void)PyUnicode_ClearFreeList();
1422114205
}
@@ -14344,8 +14328,7 @@ _Py_ReleaseInternedUnicodeStrings(void)
1434414328
"mortal/immortal\n", mortal_size, immortal_size);
1434514329
Py_DECREF(keys);
1434614330
PyDict_Clear(interned);
14347-
Py_DECREF(interned);
14348-
interned = NULL;
14331+
Py_CLEAR(interned);
1434914332
}
1435014333

1435114334

0 commit comments

Comments
 (0)