Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 4629c22

Browse files
authored
gh-113956: Make intern_common thread-safe in free-threaded build (gh-148886)
Avoid racing with the owning thread's refcount operations when immortalizing an interned string: if we don't own it and its refcount isn't merged, intern a copy we own instead. Use atomic stores in _Py_SetImmortalUntracked so concurrent atomic reads are race-free.
1 parent 42d645a commit 4629c22

4 files changed

Lines changed: 77 additions & 11 deletions

File tree

Lib/test/test_free_threading/test_str.py

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,9 @@
1+
import sys
2+
import threading
13
import unittest
24

35
from itertools import cycle
4-
from threading import Event, Thread
6+
from threading import Barrier, Event, Thread
57
from unittest import TestCase
68

79
from test.support import threading_helper
@@ -69,6 +71,24 @@ def reader_func():
6971
for reader in readers:
7072
reader.join()
7173

74+
def test_intern_unowned_string(self):
75+
# Test interning strings owned by various threads.
76+
strings = [f"intern_race_owner_{i}" for i in range(50)]
77+
78+
NUM_THREADS = 5
79+
b = Barrier(NUM_THREADS)
80+
81+
def interner():
82+
tid = threading.get_ident()
83+
for i in range(20):
84+
strings.append(f"intern_{tid}_{i}")
85+
b.wait()
86+
for s in strings:
87+
r = sys.intern(s)
88+
self.assertTrue(sys._is_interned(r))
89+
90+
threading_helper.run_concurrently(interner, nthreads=NUM_THREADS)
91+
7292
def test_maketrans_dict_concurrent_modification(self):
7393
for _ in range(5):
7494
d = {2000: 'a'}
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
Fix a data race in :func:`sys.intern` in the free-threaded build when
2+
interning a string owned by another thread. An interned copy owned by the
3+
current thread is used instead when it is not safe to immortalize the
4+
original.

Objects/object.c

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2768,9 +2768,9 @@ _Py_SetImmortalUntracked(PyObject *op)
27682768
return;
27692769
}
27702770
#ifdef Py_GIL_DISABLED
2771-
op->ob_tid = _Py_UNOWNED_TID;
2772-
op->ob_ref_local = _Py_IMMORTAL_REFCNT_LOCAL;
2773-
op->ob_ref_shared = 0;
2771+
_Py_atomic_store_uintptr_relaxed(&op->ob_tid, _Py_UNOWNED_TID);
2772+
_Py_atomic_store_uint32_relaxed(&op->ob_ref_local, _Py_IMMORTAL_REFCNT_LOCAL);
2773+
_Py_atomic_store_ssize_relaxed(&op->ob_ref_shared, 0);
27742774
_Py_atomic_or_uint8(&op->ob_gc_bits, _PyGC_BITS_DEFERRED);
27752775
#elif SIZEOF_VOID_P > 4
27762776
op->ob_flags = _Py_IMMORTAL_FLAGS;

Objects/unicodeobject.c

Lines changed: 49 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -589,6 +589,14 @@ _PyUnicode_CheckConsistency(PyObject *op, int check_content)
589589
{
590590
#define CHECK(expr) \
591591
do { if (!(expr)) { _PyObject_ASSERT_FAILED_MSG(op, Py_STRINGIFY(expr)); } } while (0)
592+
#ifdef Py_GIL_DISABLED
593+
# define CHECK_IF_GIL(expr) (void)(expr)
594+
# define CHECK_IF_FT(expr) CHECK(expr)
595+
#else
596+
# define CHECK_IF_GIL(expr) CHECK(expr)
597+
# define CHECK_IF_FT(expr) (void)(expr)
598+
#endif
599+
592600

593601
assert(op != NULL);
594602
CHECK(PyUnicode_Check(op));
@@ -669,11 +677,9 @@ _PyUnicode_CheckConsistency(PyObject *op, int check_content)
669677

670678
/* Check interning state */
671679
#ifdef Py_DEBUG
672-
// Note that we do not check `_Py_IsImmortal(op)`, since stable ABI
673-
// extensions can make immortal strings mortal (but with a high enough
674-
// refcount).
675-
// The other way is extremely unlikely (worth a potential failed assertion
676-
// in a debug build), so we do check `!_Py_IsImmortal(op)`.
680+
// Note that we do not check `_Py_IsImmortal(op)` in the GIL-enabled build
681+
// since stable ABI extensions can make immortal strings mortal (but with a
682+
// high enough refcount).
677683
switch (PyUnicode_CHECK_INTERNED(op)) {
678684
case SSTATE_NOT_INTERNED:
679685
if (ascii->state.statically_allocated) {
@@ -683,18 +689,20 @@ _PyUnicode_CheckConsistency(PyObject *op, int check_content)
683689
// are static but use SSTATE_NOT_INTERNED
684690
}
685691
else {
686-
CHECK(!_Py_IsImmortal(op));
692+
CHECK_IF_GIL(!_Py_IsImmortal(op));
687693
}
688694
break;
689695
case SSTATE_INTERNED_MORTAL:
690696
CHECK(!ascii->state.statically_allocated);
691-
CHECK(!_Py_IsImmortal(op));
697+
CHECK_IF_GIL(!_Py_IsImmortal(op));
692698
break;
693699
case SSTATE_INTERNED_IMMORTAL:
694700
CHECK(!ascii->state.statically_allocated);
701+
CHECK_IF_FT(_Py_IsImmortal(op));
695702
break;
696703
case SSTATE_INTERNED_IMMORTAL_STATIC:
697704
CHECK(ascii->state.statically_allocated);
705+
CHECK_IF_FT(_Py_IsImmortal(op));
698706
break;
699707
default:
700708
Py_UNREACHABLE();
@@ -14208,6 +14216,18 @@ immortalize_interned(PyObject *s)
1420814216
FT_ATOMIC_STORE_UINT8(_PyUnicode_STATE(s).interned, SSTATE_INTERNED_IMMORTAL);
1420914217
}
1421014218

14219+
#ifdef Py_GIL_DISABLED
14220+
static bool
14221+
can_immortalize_safely(PyObject *s)
14222+
{
14223+
if (_Py_IsOwnedByCurrentThread(s) || _Py_IsImmortal(s)) {
14224+
return true;
14225+
}
14226+
Py_ssize_t shared = _Py_atomic_load_ssize(&s->ob_ref_shared);
14227+
return _Py_REF_IS_MERGED(shared);
14228+
}
14229+
#endif
14230+
1421114231
static /* non-null */ PyObject*
1421214232
intern_common(PyInterpreterState *interp, PyObject *s /* stolen */,
1421314233
bool immortalize)
@@ -14236,11 +14256,16 @@ intern_common(PyInterpreterState *interp, PyObject *s /* stolen */,
1423614256
// no, go on
1423714257
break;
1423814258
case SSTATE_INTERNED_MORTAL:
14259+
#ifndef Py_GIL_DISABLED
1423914260
// yes but we might need to make it immortal
1424014261
if (immortalize) {
1424114262
immortalize_interned(s);
1424214263
}
1424314264
return s;
14265+
#else
14266+
// not fully interned yet; fall through to the locking path
14267+
break;
14268+
#endif
1424414269
default:
1424514270
// all done
1424614271
return s;
@@ -14305,6 +14330,23 @@ intern_common(PyInterpreterState *interp, PyObject *s /* stolen */,
1430514330
Py_DECREF(r);
1430614331
}
1430714332
#endif
14333+
14334+
#ifdef Py_GIL_DISABLED
14335+
// Immortalization writes to the refcount fields non-atomically. That
14336+
// races with Py_INCREF / Py_DECREF on the thread that owns `s`. If we
14337+
// don't own it (and its refcount hasn't been merged), intern a copy
14338+
// we own instead.
14339+
if (!can_immortalize_safely(s)) {
14340+
PyObject *copy = _PyUnicode_Copy(s);
14341+
if (copy == NULL) {
14342+
PyErr_Clear();
14343+
return s;
14344+
}
14345+
Py_DECREF(s);
14346+
s = copy;
14347+
}
14348+
#endif
14349+
1430814350
FT_MUTEX_LOCK(INTERN_MUTEX);
1430914351
PyObject *t;
1431014352
{

0 commit comments

Comments
 (0)