Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 26c6e3d

Browse files
authored
[3.14] gh-113956: Make intern_common thread-safe in free-threaded build (gh-148886) (#148927)
Avoid racing with the owning thread's refcount operations when immortalizing an interned string: if we don't own it and its refcount isn't merged, intern a copy we own instead. Use atomic stores in _Py_SetImmortalUntracked so concurrent atomic reads are race-free. (cherry picked from commit 4629c22)
1 parent 31ba91a commit 26c6e3d

4 files changed

Lines changed: 77 additions & 11 deletions

File tree

Lib/test/test_free_threading/test_str.py

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,9 @@
1+
import sys
2+
import threading
13
import unittest
24

35
from itertools import cycle
4-
from threading import Event, Thread
6+
from threading import Barrier, Event, Thread
57
from unittest import TestCase
68

79
from test.support import threading_helper
@@ -69,6 +71,24 @@ def reader_func():
6971
for reader in readers:
7072
reader.join()
7173

74+
def test_intern_unowned_string(self):
75+
# Test interning strings owned by various threads.
76+
strings = [f"intern_race_owner_{i}" for i in range(50)]
77+
78+
NUM_THREADS = 5
79+
b = Barrier(NUM_THREADS)
80+
81+
def interner():
82+
tid = threading.get_ident()
83+
for i in range(20):
84+
strings.append(f"intern_{tid}_{i}")
85+
b.wait()
86+
for s in strings:
87+
r = sys.intern(s)
88+
self.assertTrue(sys._is_interned(r))
89+
90+
threading_helper.run_concurrently(interner, nthreads=NUM_THREADS)
91+
7292
def test_maketrans_dict_concurrent_modification(self):
7393
for _ in range(5):
7494
d = {2000: 'a'}
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
Fix a data race in :func:`sys.intern` in the free-threaded build when
2+
interning a string owned by another thread. An interned copy owned by the
3+
current thread is used instead when it is not safe to immortalize the
4+
original.

Objects/object.c

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2685,9 +2685,9 @@ _Py_SetImmortalUntracked(PyObject *op)
26852685
return;
26862686
}
26872687
#ifdef Py_GIL_DISABLED
2688-
op->ob_tid = _Py_UNOWNED_TID;
2689-
op->ob_ref_local = _Py_IMMORTAL_REFCNT_LOCAL;
2690-
op->ob_ref_shared = 0;
2688+
_Py_atomic_store_uintptr_relaxed(&op->ob_tid, _Py_UNOWNED_TID);
2689+
_Py_atomic_store_uint32_relaxed(&op->ob_ref_local, _Py_IMMORTAL_REFCNT_LOCAL);
2690+
_Py_atomic_store_ssize_relaxed(&op->ob_ref_shared, 0);
26912691
_Py_atomic_or_uint8(&op->ob_gc_bits, _PyGC_BITS_DEFERRED);
26922692
#elif SIZEOF_VOID_P > 4
26932693
op->ob_flags = _Py_IMMORTAL_FLAGS;

Objects/unicodeobject.c

Lines changed: 49 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -676,6 +676,14 @@ _PyUnicode_CheckConsistency(PyObject *op, int check_content)
676676
{
677677
#define CHECK(expr) \
678678
do { if (!(expr)) { _PyObject_ASSERT_FAILED_MSG(op, Py_STRINGIFY(expr)); } } while (0)
679+
#ifdef Py_GIL_DISABLED
680+
# define CHECK_IF_GIL(expr) (void)(expr)
681+
# define CHECK_IF_FT(expr) CHECK(expr)
682+
#else
683+
# define CHECK_IF_GIL(expr) CHECK(expr)
684+
# define CHECK_IF_FT(expr) (void)(expr)
685+
#endif
686+
679687

680688
assert(op != NULL);
681689
CHECK(PyUnicode_Check(op));
@@ -756,11 +764,9 @@ _PyUnicode_CheckConsistency(PyObject *op, int check_content)
756764

757765
/* Check interning state */
758766
#ifdef Py_DEBUG
759-
// Note that we do not check `_Py_IsImmortal(op)`, since stable ABI
760-
// extensions can make immortal strings mortal (but with a high enough
761-
// refcount).
762-
// The other way is extremely unlikely (worth a potential failed assertion
763-
// in a debug build), so we do check `!_Py_IsImmortal(op)`.
767+
// Note that we do not check `_Py_IsImmortal(op)` in the GIL-enabled build
768+
// since stable ABI extensions can make immortal strings mortal (but with a
769+
// high enough refcount).
764770
switch (PyUnicode_CHECK_INTERNED(op)) {
765771
case SSTATE_NOT_INTERNED:
766772
if (ascii->state.statically_allocated) {
@@ -770,18 +776,20 @@ _PyUnicode_CheckConsistency(PyObject *op, int check_content)
770776
// are static but use SSTATE_NOT_INTERNED
771777
}
772778
else {
773-
CHECK(!_Py_IsImmortal(op));
779+
CHECK_IF_GIL(!_Py_IsImmortal(op));
774780
}
775781
break;
776782
case SSTATE_INTERNED_MORTAL:
777783
CHECK(!ascii->state.statically_allocated);
778-
CHECK(!_Py_IsImmortal(op));
784+
CHECK_IF_GIL(!_Py_IsImmortal(op));
779785
break;
780786
case SSTATE_INTERNED_IMMORTAL:
781787
CHECK(!ascii->state.statically_allocated);
788+
CHECK_IF_FT(_Py_IsImmortal(op));
782789
break;
783790
case SSTATE_INTERNED_IMMORTAL_STATIC:
784791
CHECK(ascii->state.statically_allocated);
792+
CHECK_IF_FT(_Py_IsImmortal(op));
785793
break;
786794
default:
787795
Py_UNREACHABLE();
@@ -15961,6 +15969,18 @@ immortalize_interned(PyObject *s)
1596115969
FT_ATOMIC_STORE_UINT8(_PyUnicode_STATE(s).interned, SSTATE_INTERNED_IMMORTAL);
1596215970
}
1596315971

15972+
#ifdef Py_GIL_DISABLED
15973+
static bool
15974+
can_immortalize_safely(PyObject *s)
15975+
{
15976+
if (_Py_IsOwnedByCurrentThread(s) || _Py_IsImmortal(s)) {
15977+
return true;
15978+
}
15979+
Py_ssize_t shared = _Py_atomic_load_ssize(&s->ob_ref_shared);
15980+
return _Py_REF_IS_MERGED(shared);
15981+
}
15982+
#endif
15983+
1596415984
static /* non-null */ PyObject*
1596515985
intern_common(PyInterpreterState *interp, PyObject *s /* stolen */,
1596615986
bool immortalize)
@@ -15989,11 +16009,16 @@ intern_common(PyInterpreterState *interp, PyObject *s /* stolen */,
1598916009
// no, go on
1599016010
break;
1599116011
case SSTATE_INTERNED_MORTAL:
16012+
#ifndef Py_GIL_DISABLED
1599216013
// yes but we might need to make it immortal
1599316014
if (immortalize) {
1599416015
immortalize_interned(s);
1599516016
}
1599616017
return s;
16018+
#else
16019+
// not fully interned yet; fall through to the locking path
16020+
break;
16021+
#endif
1599716022
default:
1599816023
// all done
1599916024
return s;
@@ -16057,6 +16082,23 @@ intern_common(PyInterpreterState *interp, PyObject *s /* stolen */,
1605716082
Py_DECREF(r);
1605816083
}
1605916084
#endif
16085+
16086+
#ifdef Py_GIL_DISABLED
16087+
// Immortalization writes to the refcount fields non-atomically. That
16088+
// races with Py_INCREF / Py_DECREF on the thread that owns `s`. If we
16089+
// don't own it (and its refcount hasn't been merged), intern a copy
16090+
// we own instead.
16091+
if (!can_immortalize_safely(s)) {
16092+
PyObject *copy = _PyUnicode_Copy(s);
16093+
if (copy == NULL) {
16094+
PyErr_Clear();
16095+
return s;
16096+
}
16097+
Py_DECREF(s);
16098+
s = copy;
16099+
}
16100+
#endif
16101+
1606016102
LOCK_INTERNED(interp);
1606116103
PyObject *t;
1606216104
{

0 commit comments

Comments
 (0)