@@ -41,6 +41,7 @@ OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
4141#define PY_SSIZE_T_CLEAN
4242#include "Python.h"
4343#include "pycore_abstract.h" // _PyIndex_Check()
44+ #include "pycore_atomic_funcs.h" // _Py_atomic_size_get()
4445#include "pycore_bytes_methods.h" // _Py_bytes_lower()
4546#include "pycore_format.h" // F_LJUST
4647#include "pycore_initconfig.h" // _PyStatus_OK()
@@ -302,9 +303,6 @@ unicode_decode_utf8(const char *s, Py_ssize_t size,
302303 _Py_error_handler error_handler , const char * errors ,
303304 Py_ssize_t * consumed );
304305
305- /* List of static strings. */
306- static _Py_Identifier * static_strings = NULL ;
307-
308306/* Fast detection of the most frequent whitespace characters */
309307const unsigned char _Py_ascii_whitespace [] = {
310308 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
@@ -2312,42 +2310,85 @@ PyUnicode_FromString(const char *u)
23122310 return PyUnicode_DecodeUTF8Stateful (u , (Py_ssize_t )size , NULL , NULL );
23132311}
23142312
2313+
23152314PyObject *
23162315_PyUnicode_FromId (_Py_Identifier * id )
23172316{
2318- if (id -> object ) {
2319- return id -> object ;
2317+ PyInterpreterState * interp = _PyInterpreterState_GET ();
2318+ struct _Py_unicode_ids * ids = & interp -> unicode .ids ;
2319+
2320+ int index = _Py_atomic_size_get (& id -> index );
2321+ if (index < 0 ) {
2322+ struct _Py_unicode_runtime_ids * rt_ids = & interp -> runtime -> unicode_ids ;
2323+
2324+ PyThread_acquire_lock (rt_ids -> lock , WAIT_LOCK );
2325+ // Check again to detect concurrent access. Another thread can have
2326+ // initialized the index while this thread waited for the lock.
2327+ index = _Py_atomic_size_get (& id -> index );
2328+ if (index < 0 ) {
2329+ assert (rt_ids -> next_index < PY_SSIZE_T_MAX );
2330+ index = rt_ids -> next_index ;
2331+ rt_ids -> next_index ++ ;
2332+ _Py_atomic_size_set (& id -> index , index );
2333+ }
2334+ PyThread_release_lock (rt_ids -> lock );
23202335 }
2336+ assert (index >= 0 );
23212337
23222338 PyObject * obj ;
2323- obj = PyUnicode_DecodeUTF8Stateful (id -> string ,
2324- strlen (id -> string ),
2339+ if (index < ids -> size ) {
2340+ obj = ids -> array [index ];
2341+ if (obj ) {
2342+ // Return a borrowed reference
2343+ return obj ;
2344+ }
2345+ }
2346+
2347+ obj = PyUnicode_DecodeUTF8Stateful (id -> string , strlen (id -> string ),
23252348 NULL , NULL );
23262349 if (!obj ) {
23272350 return NULL ;
23282351 }
23292352 PyUnicode_InternInPlace (& obj );
23302353
2331- assert (!id -> next );
2332- id -> object = obj ;
2333- id -> next = static_strings ;
2334- static_strings = id ;
2335- return id -> object ;
2354+ if (index >= ids -> size ) {
2355+ // Overallocate to reduce the number of realloc
2356+ Py_ssize_t new_size = Py_MAX (index * 2 , 16 );
2357+ Py_ssize_t item_size = sizeof (ids -> array [0 ]);
2358+ PyObject * * new_array = PyMem_Realloc (ids -> array , new_size * item_size );
2359+ if (new_array == NULL ) {
2360+ PyErr_NoMemory ();
2361+ return NULL ;
2362+ }
2363+ memset (& new_array [ids -> size ], 0 , (new_size - ids -> size ) * item_size );
2364+ ids -> array = new_array ;
2365+ ids -> size = new_size ;
2366+ }
2367+
2368+ // The array stores a strong reference
2369+ ids -> array [index ] = obj ;
2370+
2371+ // Return a borrowed reference
2372+ return obj ;
23362373}
23372374
2375+
23382376static void
2339- unicode_clear_static_strings ( void )
2377+ unicode_clear_identifiers ( PyThreadState * tstate )
23402378{
2341- _Py_Identifier * tmp , * s = static_strings ;
2342- while (s ) {
2343- Py_CLEAR (s -> object );
2344- tmp = s -> next ;
2345- s -> next = NULL ;
2346- s = tmp ;
2379+ PyInterpreterState * interp = _PyInterpreterState_GET ();
2380+ struct _Py_unicode_ids * ids = & interp -> unicode .ids ;
2381+ for (Py_ssize_t i = 0 ; i < ids -> size ; i ++ ) {
2382+ Py_XDECREF (ids -> array [i ]);
23472383 }
2348- static_strings = NULL ;
2384+ ids -> size = 0 ;
2385+ PyMem_Free (ids -> array );
2386+ ids -> array = NULL ;
2387+ // Don't reset _PyRuntime next_index: _Py_Identifier.id remains valid
2388+ // after Py_Finalize().
23492389}
23502390
2391+
23512392/* Internal function, doesn't check maximum character */
23522393
23532394PyObject *
@@ -16238,9 +16279,7 @@ _PyUnicode_Fini(PyThreadState *tstate)
1623816279 Py_CLEAR (state -> latin1 [i ]);
1623916280 }
1624016281
16241- if (_Py_IsMainInterpreter (tstate )) {
16242- unicode_clear_static_strings ();
16243- }
16282+ unicode_clear_identifiers (tstate );
1624416283
1624516284 _PyUnicode_FiniEncodings (& tstate -> interp -> unicode .fs_codec );
1624616285}
0 commit comments