Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit dc09301

Browse files
gh-122417: Implement per-thread heap type refcounts (#122418)
The free-threaded build partially stores heap type reference counts in distributed manner in per-thread arrays. This avoids reference count contention when creating or destroying instances. Co-authored-by: Ken Jin <[email protected]>
1 parent 1429651 commit dc09301

18 files changed

+427
-69
lines changed

Include/cpython/object.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -270,6 +270,9 @@ typedef struct _heaptypeobject {
270270
PyObject *ht_module;
271271
char *_ht_tpname; // Storage for "tp_name"; see PyType_FromModuleAndSpec
272272
struct _specialization_cache _spec_cache; // For use by the specializer.
273+
#ifdef Py_GIL_DISABLED
274+
Py_ssize_t unique_id; // ID used for thread-local refcounting
275+
#endif
273276
/* here are optional user slots, followed by the members. */
274277
} PyHeapTypeObject;
275278

Include/internal/pycore_gc.h

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -381,10 +381,6 @@ extern void _PyGC_ClearAllFreeLists(PyInterpreterState *interp);
381381
extern void _Py_ScheduleGC(PyThreadState *tstate);
382382
extern void _Py_RunGC(PyThreadState *tstate);
383383

384-
#ifdef Py_GIL_DISABLED
385-
// gh-117783: Immortalize objects that use deferred reference counting
386-
extern void _PyGC_ImmortalizeDeferredObjects(PyInterpreterState *interp);
387-
#endif
388384

389385
#ifdef __cplusplus
390386
}

Include/internal/pycore_interp.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@ extern "C" {
3535
#include "pycore_qsbr.h" // struct _qsbr_state
3636
#include "pycore_tstate.h" // _PyThreadStateImpl
3737
#include "pycore_tuple.h" // struct _Py_tuple_state
38+
#include "pycore_typeid.h" // struct _Py_type_id_pool
3839
#include "pycore_typeobject.h" // struct types_state
3940
#include "pycore_unicodeobject.h" // struct _Py_unicode_state
4041
#include "pycore_warnings.h" // struct _warnings_runtime_state
@@ -220,6 +221,7 @@ struct _is {
220221
#if defined(Py_GIL_DISABLED)
221222
struct _mimalloc_interp_state mimalloc;
222223
struct _brc_state brc; // biased reference counting state
224+
struct _Py_type_id_pool type_ids;
223225
PyMutex weakref_locks[NUM_WEAKREF_LIST_LOCKS];
224226
#endif
225227

Include/internal/pycore_object.h

Lines changed: 71 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,10 +14,19 @@ extern "C" {
1414
#include "pycore_interp.h" // PyInterpreterState.gc
1515
#include "pycore_pyatomic_ft_wrappers.h" // FT_ATOMIC_STORE_PTR_RELAXED
1616
#include "pycore_pystate.h" // _PyInterpreterState_GET()
17+
#include "pycore_typeid.h" // _PyType_IncrefSlow
1718

1819

1920
#define _Py_IMMORTAL_REFCNT_LOOSE ((_Py_IMMORTAL_REFCNT >> 1) + 1)
2021

22+
// This value is added to `ob_ref_shared` for objects that use deferred
23+
// reference counting so that they are not immediately deallocated when the
24+
// non-deferred reference count drops to zero.
25+
//
26+
// The value is half the maximum shared refcount because the low two bits of
27+
// `ob_ref_shared` are used for flags.
28+
#define _Py_REF_DEFERRED (PY_SSIZE_T_MAX / 8)
29+
2130
// gh-121528, gh-118997: Similar to _Py_IsImmortal() but be more loose when
2231
// comparing the reference count to stay compatible with C extensions built
2332
// with the stable ABI 3.11 or older. Such extensions implement INCREF/DECREF
@@ -280,6 +289,67 @@ extern PyStatus _PyObject_InitState(PyInterpreterState *interp);
280289
extern void _PyObject_FiniState(PyInterpreterState *interp);
281290
extern bool _PyRefchain_IsTraced(PyInterpreterState *interp, PyObject *obj);
282291

292+
#ifndef Py_GIL_DISABLED
293+
# define _Py_INCREF_TYPE Py_INCREF
294+
# define _Py_DECREF_TYPE Py_DECREF
295+
#else
296+
static inline void
297+
_Py_INCREF_TYPE(PyTypeObject *type)
298+
{
299+
if (!_PyType_HasFeature(type, Py_TPFLAGS_HEAPTYPE)) {
300+
assert(_Py_IsImmortal(type));
301+
return;
302+
}
303+
304+
_PyThreadStateImpl *tstate = (_PyThreadStateImpl *)_PyThreadState_GET();
305+
PyHeapTypeObject *ht = (PyHeapTypeObject *)type;
306+
307+
// Unsigned comparison so that `unique_id=-1`, which indicates that
308+
// per-thread refcounting has been disabled on this type, is handled by
309+
// the "else".
310+
if ((size_t)ht->unique_id < (size_t)tstate->types.size) {
311+
# ifdef Py_REF_DEBUG
312+
_Py_INCREF_IncRefTotal();
313+
# endif
314+
_Py_INCREF_STAT_INC();
315+
tstate->types.refcounts[ht->unique_id]++;
316+
}
317+
else {
318+
// The slow path resizes the thread-local refcount array if necessary.
319+
// It handles the unique_id=-1 case to keep the inlinable function smaller.
320+
_PyType_IncrefSlow(ht);
321+
}
322+
}
323+
324+
static inline void
325+
_Py_DECREF_TYPE(PyTypeObject *type)
326+
{
327+
if (!_PyType_HasFeature(type, Py_TPFLAGS_HEAPTYPE)) {
328+
assert(_Py_IsImmortal(type));
329+
return;
330+
}
331+
332+
_PyThreadStateImpl *tstate = (_PyThreadStateImpl *)_PyThreadState_GET();
333+
PyHeapTypeObject *ht = (PyHeapTypeObject *)type;
334+
335+
// Unsigned comparison so that `unique_id=-1`, which indicates that
336+
// per-thread refcounting has been disabled on this type, is handled by
337+
// the "else".
338+
if ((size_t)ht->unique_id < (size_t)tstate->types.size) {
339+
# ifdef Py_REF_DEBUG
340+
_Py_DECREF_DecRefTotal();
341+
# endif
342+
_Py_DECREF_STAT_INC();
343+
tstate->types.refcounts[ht->unique_id]--;
344+
}
345+
else {
346+
// Directly decref the type if the type id is not assigned or if
347+
// per-thread refcounting has been disabled on this type.
348+
Py_DECREF(type);
349+
}
350+
}
351+
#endif
352+
283353
/* Inline functions trading binary compatibility for speed:
284354
_PyObject_Init() is the fast version of PyObject_Init(), and
285355
_PyObject_InitVar() is the fast version of PyObject_InitVar().
@@ -291,7 +361,7 @@ _PyObject_Init(PyObject *op, PyTypeObject *typeobj)
291361
assert(op != NULL);
292362
Py_SET_TYPE(op, typeobj);
293363
assert(_PyType_HasFeature(typeobj, Py_TPFLAGS_HEAPTYPE) || _Py_IsImmortalLoose(typeobj));
294-
Py_INCREF(typeobj);
364+
_Py_INCREF_TYPE(typeobj);
295365
_Py_NewReference(op);
296366
}
297367

Include/internal/pycore_tstate.h

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,16 @@ typedef struct _PyThreadStateImpl {
3131
struct _mimalloc_thread_state mimalloc;
3232
struct _Py_freelists freelists;
3333
struct _brc_thread_state brc;
34+
struct {
35+
// The thread-local refcounts for heap type objects
36+
Py_ssize_t *refcounts;
37+
38+
// Size of the refcounts array.
39+
Py_ssize_t size;
40+
41+
// If set, don't use thread-local refcounts
42+
int is_finalized;
43+
} types;
3444
#endif
3545

3646
#if defined(Py_REF_DEBUG) && defined(Py_GIL_DISABLED)

Include/internal/pycore_typeid.h

Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,75 @@
1+
#ifndef Py_INTERNAL_TYPEID_H
2+
#define Py_INTERNAL_TYPEID_H
3+
#ifdef __cplusplus
4+
extern "C" {
5+
#endif
6+
7+
#ifndef Py_BUILD_CORE
8+
# error "this header requires Py_BUILD_CORE define"
9+
#endif
10+
11+
#ifdef Py_GIL_DISABLED
12+
13+
// This contains code for allocating unique ids to heap type objects
14+
// and re-using those ids when the type is deallocated.
15+
//
16+
// The type ids are used to implement per-thread reference counts of
17+
// heap type objects to avoid contention on the reference count fields
18+
// of heap type objects. Static type objects are immortal, so contention
19+
// is not an issue for those types.
20+
//
21+
// Type id of -1 is used to indicate a type doesn't use thread-local
22+
// refcounting. This value is used when a type object is finalized by the GC
23+
// and during interpreter shutdown to allow the type object to be
24+
// deallocated promptly when the object's refcount reaches zero.
25+
//
26+
// Each entry implicitly represents a type id based on it's offset in the
27+
// table. Non-allocated entries form a free-list via the 'next' pointer.
28+
// Allocated entries store the corresponding PyTypeObject.
29+
typedef union _Py_type_id_entry {
30+
// Points to the next free type id, when part of the freelist
31+
union _Py_type_id_entry *next;
32+
33+
// Stores the type object when the id is assigned
34+
PyHeapTypeObject *type;
35+
} _Py_type_id_entry;
36+
37+
struct _Py_type_id_pool {
38+
PyMutex mutex;
39+
40+
// combined table of types with allocated type ids and unallocated
41+
// type ids.
42+
_Py_type_id_entry *table;
43+
44+
// Next entry to allocate inside 'table' or NULL
45+
_Py_type_id_entry *freelist;
46+
47+
// size of 'table'
48+
Py_ssize_t size;
49+
};
50+
51+
// Assigns the next id from the pool of type ids.
52+
extern void _PyType_AssignId(PyHeapTypeObject *type);
53+
54+
// Releases the allocated type id back to the pool.
55+
extern void _PyType_ReleaseId(PyHeapTypeObject *type);
56+
57+
// Merges the thread-local reference counts into the corresponding types.
58+
extern void _PyType_MergeThreadLocalRefcounts(_PyThreadStateImpl *tstate);
59+
60+
// Like _PyType_MergeThreadLocalRefcounts, but also frees the thread-local
61+
// array of refcounts.
62+
extern void _PyType_FinalizeThreadLocalRefcounts(_PyThreadStateImpl *tstate);
63+
64+
// Frees the interpreter's pool of type ids.
65+
extern void _PyType_FinalizeIdPool(PyInterpreterState *interp);
66+
67+
// Increfs the type, resizing the thread-local refcount array if necessary.
68+
PyAPI_FUNC(void) _PyType_IncrefSlow(PyHeapTypeObject *type);
69+
70+
#endif /* Py_GIL_DISABLED */
71+
72+
#ifdef __cplusplus
73+
}
74+
#endif
75+
#endif /* !Py_INTERNAL_TYPEID_H */

Lib/test/test_sys.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1710,6 +1710,7 @@ def delx(self): del self.__x
17101710
fmt = 'P2nPI13Pl4Pn9Pn12PIPc'
17111711
s = vsize(fmt)
17121712
check(int, s)
1713+
typeid = 'n' if support.Py_GIL_DISABLED else ''
17131714
# class
17141715
s = vsize(fmt + # PyTypeObject
17151716
'4P' # PyAsyncMethods
@@ -1718,7 +1719,8 @@ def delx(self): del self.__x
17181719
'10P' # PySequenceMethods
17191720
'2P' # PyBufferProcs
17201721
'6P'
1721-
'1PIP' # Specializer cache
1722+
'1PIP' # Specializer cache
1723+
+ typeid # heap type id (free-threaded only)
17221724
)
17231725
class newstyleclass(object): pass
17241726
# Separate block for PyDictKeysObject with 8 keys and 5 entries

Makefile.pre.in

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -483,6 +483,7 @@ PYTHON_OBJS= \
483483
Python/thread.o \
484484
Python/traceback.o \
485485
Python/tracemalloc.o \
486+
Python/typeid.o \
486487
Python/getopt.o \
487488
Python/pystrcmp.o \
488489
Python/pystrtod.o \
@@ -1257,6 +1258,7 @@ PYTHON_HEADERS= \
12571258
$(srcdir)/Include/internal/pycore_tracemalloc.h \
12581259
$(srcdir)/Include/internal/pycore_tstate.h \
12591260
$(srcdir)/Include/internal/pycore_tuple.h \
1261+
$(srcdir)/Include/internal/pycore_typeid.h \
12601262
$(srcdir)/Include/internal/pycore_typeobject.h \
12611263
$(srcdir)/Include/internal/pycore_typevarobject.h \
12621264
$(srcdir)/Include/internal/pycore_ucnhash.h \
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
In the free-threaded build, the reference counts for heap type objects are now
2+
partially stored in a distributed manner in per-thread arrays. This reduces
3+
contention on the heap type's reference count fields when creating or
4+
destroying instances of the same type from multiple threads concurrently.

Objects/object.c

Lines changed: 1 addition & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -2477,15 +2477,7 @@ _PyObject_SetDeferredRefcount(PyObject *op)
24772477
assert(_Py_IsOwnedByCurrentThread(op));
24782478
assert(op->ob_ref_shared == 0);
24792479
_PyObject_SET_GC_BITS(op, _PyGC_BITS_DEFERRED);
2480-
PyInterpreterState *interp = _PyInterpreterState_GET();
2481-
if (_Py_atomic_load_int_relaxed(&interp->gc.immortalize) == 1) {
2482-
// gh-117696: immortalize objects instead of using deferred reference
2483-
// counting for now.
2484-
_Py_SetImmortal(op);
2485-
return;
2486-
}
2487-
op->ob_ref_local += 1;
2488-
op->ob_ref_shared = _Py_REF_QUEUED;
2480+
op->ob_ref_shared = _Py_REF_SHARED(_Py_REF_DEFERRED, 0);
24892481
#endif
24902482
}
24912483

Objects/typeobject.c

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2452,7 +2452,7 @@ subtype_dealloc(PyObject *self)
24522452
reference counting. Only decref if the base type is not already a heap
24532453
allocated type. Otherwise, basedealloc should have decref'd it already */
24542454
if (type_needs_decref) {
2455-
Py_DECREF(type);
2455+
_Py_DECREF_TYPE(type);
24562456
}
24572457

24582458
/* Done */
@@ -2562,7 +2562,7 @@ subtype_dealloc(PyObject *self)
25622562
reference counting. Only decref if the base type is not already a heap
25632563
allocated type. Otherwise, basedealloc should have decref'd it already */
25642564
if (type_needs_decref) {
2565-
Py_DECREF(type);
2565+
_Py_DECREF_TYPE(type);
25662566
}
25672567

25682568
endlabel:
@@ -3913,7 +3913,9 @@ type_new_alloc(type_new_ctx *ctx)
39133913
et->ht_module = NULL;
39143914
et->_ht_tpname = NULL;
39153915

3916-
_PyObject_SetDeferredRefcount((PyObject *)et);
3916+
#ifdef Py_GIL_DISABLED
3917+
_PyType_AssignId(et);
3918+
#endif
39173919

39183920
return type;
39193921
}
@@ -4965,6 +4967,11 @@ _PyType_FromMetaclass_impl(
49654967
type->tp_weaklistoffset = weaklistoffset;
49664968
type->tp_dictoffset = dictoffset;
49674969

4970+
#ifdef Py_GIL_DISABLED
4971+
// Assign a type id to enable thread-local refcounting
4972+
_PyType_AssignId(res);
4973+
#endif
4974+
49684975
/* Ready the type (which includes inheritance).
49694976
*
49704977
* After this call we should generally only touch up what's
@@ -5914,6 +5921,9 @@ type_dealloc(PyObject *self)
59145921
}
59155922
Py_XDECREF(et->ht_module);
59165923
PyMem_Free(et->_ht_tpname);
5924+
#ifdef Py_GIL_DISABLED
5925+
_PyType_ReleaseId(et);
5926+
#endif
59175927
Py_TYPE(type)->tp_free((PyObject *)type);
59185928
}
59195929

PCbuild/_freeze_module.vcxproj

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -267,6 +267,7 @@
267267
<ClCompile Include="..\Python\thread.c" />
268268
<ClCompile Include="..\Python\traceback.c" />
269269
<ClCompile Include="..\Python\tracemalloc.c" />
270+
<ClCompile Include="..\Python\typeid.c" />
270271
</ItemGroup>
271272
<ItemGroup>
272273
<ClInclude Include="..\PC\pyconfig.h.in" />

PCbuild/_freeze_module.vcxproj.filters

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -464,6 +464,9 @@
464464
<ClCompile Include="..\Python\tracemalloc.c">
465465
<Filter>Source Files</Filter>
466466
</ClCompile>
467+
<ClCompile Include="..\Python\typeid.c">
468+
<Filter>Source Files</Filter>
469+
</ClCompile>
467470
<ClCompile Include="..\Objects\tupleobject.c">
468471
<Filter>Source Files</Filter>
469472
</ClCompile>

PCbuild/pythoncore.vcxproj

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -304,6 +304,7 @@
304304
<ClInclude Include="..\Include\internal\pycore_tracemalloc.h" />
305305
<ClInclude Include="..\Include\internal\pycore_tstate.h" />
306306
<ClInclude Include="..\Include\internal\pycore_tuple.h" />
307+
<ClInclude Include="..\Include\internal\pycore_typeid.h" />
307308
<ClInclude Include="..\Include\internal\pycore_typeobject.h" />
308309
<ClInclude Include="..\Include\internal\pycore_typevarobject.h" />
309310
<ClInclude Include="..\Include\internal\pycore_ucnhash.h" />
@@ -643,6 +644,7 @@
643644
<ClCompile Include="..\Python\thread.c" />
644645
<ClCompile Include="..\Python\traceback.c" />
645646
<ClCompile Include="..\Python\tracemalloc.c" />
647+
<ClCompile Include="..\Python\typeid.c" />
646648
</ItemGroup>
647649
<ItemGroup Condition="$(IncludeExternals)">
648650
<ClCompile Include="..\Modules\zlibmodule.c" />

PCbuild/pythoncore.vcxproj.filters

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -831,6 +831,9 @@
831831
<ClInclude Include="..\Include\internal\pycore_tuple.h">
832832
<Filter>Include\internal</Filter>
833833
</ClInclude>
834+
<ClInclude Include="..\Include\internal\pycore_typeid.h">
835+
<Filter>Include\internal</Filter>
836+
</ClInclude>
834837
<ClInclude Include="..\Include\internal\pycore_typeobject.h">
835838
<Filter>Include\internal</Filter>
836839
</ClInclude>
@@ -1493,6 +1496,9 @@
14931496
<ClCompile Include="..\Python\tracemalloc.c">
14941497
<Filter>Python</Filter>
14951498
</ClCompile>
1499+
<ClCompile Include="..\Python\typeid.c">
1500+
<Filter>Python</Filter>
1501+
</ClCompile>
14961502
<ClCompile Include="..\Python\bootstrap_hash.c">
14971503
<Filter>Python</Filter>
14981504
</ClCompile>

0 commit comments

Comments
 (0)