From 2b0a2c5beffaee60023b17dc9c94109aa8a5741c Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Tue, 19 Nov 2024 17:15:34 +0000 Subject: [PATCH 01/18] Change the young generation to 'not visited'. WIP --- Include/internal/pycore_object.h | 4 ++-- Python/gc.c | 25 ++++++++++++++----------- 2 files changed, 16 insertions(+), 13 deletions(-) diff --git a/Include/internal/pycore_object.h b/Include/internal/pycore_object.h index cafc02f892499c..1ba021b9d573a5 100644 --- a/Include/internal/pycore_object.h +++ b/Include/internal/pycore_object.h @@ -470,8 +470,8 @@ static inline void _PyObject_GC_TRACK( PyGC_Head *last = (PyGC_Head*)(generation0->_gc_prev); _PyGCHead_SET_NEXT(last, gc); _PyGCHead_SET_PREV(gc, last); - /* Young objects will be moved into the visited space during GC, so set the bit here */ - gc->_gc_next = ((uintptr_t)generation0) | (uintptr_t)interp->gc.visited_space; + uintptr_t not_visited = 1 ^ interp->gc.visited_space; + gc->_gc_next = ((uintptr_t)generation0) | not_visited; generation0->_gc_prev = (uintptr_t)gc; #endif } diff --git a/Python/gc.c b/Python/gc.c index 63adecf0e05114..0501ad34b7d29d 100644 --- a/Python/gc.c +++ b/Python/gc.c @@ -24,7 +24,7 @@ typedef struct _gc_runtime_state GCState; #endif // Define this when debugging the GC -// #define GC_EXTRA_DEBUG +#define GC_EXTRA_DEBUG #define GC_NEXT _PyGCHead_NEXT @@ -1308,6 +1308,7 @@ gc_collect_young(PyThreadState *tstate, PyGC_Head survivors; gc_list_init(&survivors); + gc_list_set_space(young, gcstate->visited_space); gc_collect_region(tstate, young, &survivors, stats); Py_ssize_t survivor_count = 0; if (gcstate->visited_space) { @@ -1385,6 +1386,7 @@ expand_region_transitively_reachable(PyGC_Head *container, PyGC_Head *gc, GCStat * have been marked as visited */ assert(IS_IN_VISITED(gc, gcstate->visited_space)); PyObject *op = FROM_GC(gc); + assert(_PyObject_GC_IS_TRACKED(op)); if (_Py_IsImmortal(op)) { PyGC_Head *next = GC_NEXT(gc); gc_list_move(gc, &get_gc_state()->permanent_generation.head); @@ -1404,17 +1406,16 @@ expand_region_transitively_reachable(PyGC_Head *container, PyGC_Head *gc, GCStat static void completed_cycle(GCState *gcstate) { -#ifdef Py_DEBUG - PyGC_Head *not_visited = &gcstate->old[gcstate->visited_space^1].head; - assert(gc_list_is_empty(not_visited)); -#endif + int not_visited = flip_old_space(gcstate->visited_space); + assert(gc_list_is_empty(&gcstate->old[not_visited].head)); + gcstate->visited_space = not_visited; gcstate->visited_space = flip_old_space(gcstate->visited_space); /* Make sure all young objects have old space bit set correctly */ PyGC_Head *young = &gcstate->young.head; PyGC_Head *gc = GC_NEXT(young); while (gc != young) { PyGC_Head *next = GC_NEXT(gc); - gc_set_old_space(gc, gcstate->visited_space); + gc_set_old_space(gc, not_visited); gc = next; } gcstate->work_to_do = 0; @@ -1433,6 +1434,7 @@ gc_collect_increment(PyThreadState *tstate, struct gc_collection_stats *stats) if (scale_factor < 1) { scale_factor = 1; } + gc_list_set_space(&gcstate->young.head, gcstate->visited_space); gc_list_merge(&gcstate->young.head, &increment); gcstate->young.count = 0; gc_list_validate_space(&increment, gcstate->visited_space); @@ -1444,6 +1446,7 @@ gc_collect_increment(PyThreadState *tstate, struct gc_collection_stats *stats) PyGC_Head *gc = _PyGCHead_NEXT(not_visited); gc_list_move(gc, &increment); increment_size++; + assert(!_Py_IsImmortal(FROM_GC(gc))); gc_set_old_space(gc, gcstate->visited_space); increment_size += expand_region_transitively_reachable(&increment, gc, gcstate); } @@ -1465,7 +1468,6 @@ gc_collect_increment(PyThreadState *tstate, struct gc_collection_stats *stats) } } - static void gc_collect_full(PyThreadState *tstate, struct gc_collection_stats *stats) @@ -1477,9 +1479,9 @@ gc_collect_full(PyThreadState *tstate, PyGC_Head *pending = &gcstate->old[gcstate->visited_space^1].head; PyGC_Head *visited = &gcstate->old[gcstate->visited_space].head; /* merge all generations into visited */ - gc_list_validate_space(young, gcstate->visited_space); - gc_list_set_space(pending, gcstate->visited_space); gc_list_merge(young, pending); + gc_list_validate_space(pending, 1-gcstate->visited_space); + gc_list_set_space(pending, gcstate->visited_space); gcstate->young.count = 0; gc_list_merge(pending, visited); @@ -1488,7 +1490,7 @@ gc_collect_full(PyThreadState *tstate, gcstate->young.count = 0; gcstate->old[0].count = 0; gcstate->old[1].count = 0; - + completed_cycle(gcstate); gcstate->work_to_do = - gcstate->young.threshold * 2; _PyGC_ClearAllFreeLists(tstate->interp); validate_old(gcstate); @@ -1734,9 +1736,10 @@ _PyGC_Freeze(PyInterpreterState *interp) { GCState *gcstate = &interp->gc; /* The permanent_generation has its old space bit set to zero */ - if (gcstate->visited_space) { + if (gcstate->visited_space == 0) { gc_list_set_space(&gcstate->young.head, 0); } + gc_list_validate_space(&gcstate->young.head, 0); gc_list_merge(&gcstate->young.head, &gcstate->permanent_generation.head); gcstate->young.count = 0; PyGC_Head*old0 = &gcstate->old[0].head; From 754bb686566afe161c40dfb5b8a4f454633636df Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Wed, 20 Nov 2024 14:06:45 +0000 Subject: [PATCH 02/18] Tidy up visited tracking and add consistency asserts --- Python/gc.c | 93 +++++++++++++++++++++++++++-------------------------- 1 file changed, 48 insertions(+), 45 deletions(-) diff --git a/Python/gc.c b/Python/gc.c index 0501ad34b7d29d..977863ae3f22d1 100644 --- a/Python/gc.c +++ b/Python/gc.c @@ -106,7 +106,7 @@ gc_old_space(PyGC_Head *g) } static inline int -flip_old_space(int space) +other_space(int space) { assert(space == 0 || space == 1); return space ^ _PyGC_NEXT_MASK_OLD_SPACE_1; @@ -430,18 +430,27 @@ validate_list(PyGC_Head *head, enum flagstates flags) #endif #ifdef GC_EXTRA_DEBUG + + static void -validate_old(GCState *gcstate) +gc_list_validate_space(PyGC_Head *head, int space) { + PyGC_Head *gc = GC_NEXT(head); + while (gc != head) { + assert(gc_old_space(gc) == space); + gc = GC_NEXT(gc); + } +} + +static void +validate_spaces(GCState *gcstate) { + int visited = gcstate->visited_space; + int not_visited = other_space(visited); + gc_list_validate_space(&gcstate->young.head, not_visited); for (int space = 0; space < 2; space++) { - PyGC_Head *head = &gcstate->old[space].head; - PyGC_Head *gc = GC_NEXT(head); - while (gc != head) { - PyGC_Head *next = GC_NEXT(gc); - assert(gc_old_space(gc) == space); - gc = next; - } + gc_list_validate_space(&gcstate->old[space].head, space); } + gc_list_validate_space(&gcstate->permanent_generation.head, visited); } static void @@ -463,14 +472,6 @@ validate_consistent_old_space(PyGC_Head *head) assert(prev == GC_PREV(head)); } -static void -gc_list_validate_space(PyGC_Head *head, int space) { - PyGC_Head *gc = GC_NEXT(head); - while (gc != head) { - assert(gc_old_space(gc) == space); - gc = GC_NEXT(gc); - } -} #else #define validate_old(g) do{}while(0) @@ -494,7 +495,7 @@ update_refs(PyGC_Head *containers) next = GC_NEXT(gc); PyObject *op = FROM_GC(gc); if (_Py_IsImmortal(op)) { - gc_list_move(gc, &get_gc_state()->permanent_generation.head); + _PyObject_GC_UNTRACK(op); gc = next; continue; } @@ -1293,6 +1294,7 @@ gc_collect_young(PyThreadState *tstate, struct gc_collection_stats *stats) { GCState *gcstate = &tstate->interp->gc; + validate_spaces(gcstate); PyGC_Head *young = &gcstate->young.head; PyGC_Head *visited = &gcstate->old[gcstate->visited_space].head; GC_STAT_ADD(0, collections, 1); @@ -1326,7 +1328,7 @@ gc_collect_young(PyThreadState *tstate, } (void)survivor_count; // Silence compiler warning gc_list_merge(&survivors, visited); - validate_old(gcstate); + validate_spaces(gcstate); gcstate->young.count = 0; gcstate->old[gcstate->visited_space].count++; Py_ssize_t scale_factor = gcstate->old[0].threshold; @@ -1335,13 +1337,14 @@ gc_collect_young(PyThreadState *tstate, } gcstate->work_to_do += gcstate->heap_size / SCAN_RATE_DIVISOR / scale_factor; add_stats(gcstate, 0, stats); + validate_spaces(gcstate); } #ifndef NDEBUG static inline int IS_IN_VISITED(PyGC_Head *gc, int visited_space) { - assert(visited_space == 0 || flip_old_space(visited_space) == 0); + assert(visited_space == 0 || other_space(visited_space) == 0); return gc_old_space(gc) == visited_space; } #endif @@ -1406,19 +1409,15 @@ expand_region_transitively_reachable(PyGC_Head *container, PyGC_Head *gc, GCStat static void completed_cycle(GCState *gcstate) { - int not_visited = flip_old_space(gcstate->visited_space); - assert(gc_list_is_empty(&gcstate->old[not_visited].head)); - gcstate->visited_space = not_visited; - gcstate->visited_space = flip_old_space(gcstate->visited_space); - /* Make sure all young objects have old space bit set correctly */ - PyGC_Head *young = &gcstate->young.head; - PyGC_Head *gc = GC_NEXT(young); - while (gc != young) { - PyGC_Head *next = GC_NEXT(gc); - gc_set_old_space(gc, not_visited); - gc = next; - } + /* Flip spaces */ + int not_visited = gcstate->visited_space; + int visited = other_space(not_visited); + gcstate->visited_space = visited; + /* Make sure all objects have visited bit set correctly */ + gc_list_set_space(&gcstate->young.head, not_visited); + gc_list_set_space(&gcstate->permanent_generation.head, visited); gcstate->work_to_do = 0; + assert(gc_list_is_empty(&gcstate->old[visited].head)); } static void @@ -1461,11 +1460,11 @@ gc_collect_increment(PyThreadState *tstate, struct gc_collection_stats *stats) gcstate->work_to_do += gcstate->heap_size / SCAN_RATE_DIVISOR / scale_factor; gcstate->work_to_do -= increment_size; - validate_old(gcstate); add_stats(gcstate, 1, stats); if (gc_list_is_empty(not_visited)) { completed_cycle(gcstate); } + validate_spaces(gcstate); } static void @@ -1474,7 +1473,7 @@ gc_collect_full(PyThreadState *tstate, { GC_STAT_ADD(2, collections, 1); GCState *gcstate = &tstate->interp->gc; - validate_old(gcstate); + validate_spaces(gcstate); PyGC_Head *young = &gcstate->young.head; PyGC_Head *pending = &gcstate->old[gcstate->visited_space^1].head; PyGC_Head *visited = &gcstate->old[gcstate->visited_space].head; @@ -1484,16 +1483,18 @@ gc_collect_full(PyThreadState *tstate, gc_list_set_space(pending, gcstate->visited_space); gcstate->young.count = 0; gc_list_merge(pending, visited); + validate_spaces(gcstate); gc_collect_region(tstate, visited, visited, stats); + validate_spaces(gcstate); gcstate->young.count = 0; gcstate->old[0].count = 0; gcstate->old[1].count = 0; completed_cycle(gcstate); gcstate->work_to_do = - gcstate->young.threshold * 2; _PyGC_ClearAllFreeLists(tstate->interp); - validate_old(gcstate); + validate_spaces(gcstate); add_stats(gcstate, 2, stats); } @@ -1735,21 +1736,23 @@ void _PyGC_Freeze(PyInterpreterState *interp) { GCState *gcstate = &interp->gc; - /* The permanent_generation has its old space bit set to zero */ - if (gcstate->visited_space == 0) { - gc_list_set_space(&gcstate->young.head, 0); - } - gc_list_validate_space(&gcstate->young.head, 0); + /* The permanent_generation must be visited */ + gc_list_set_space(&gcstate->young.head, gcstate->visited_space); gc_list_merge(&gcstate->young.head, &gcstate->permanent_generation.head); gcstate->young.count = 0; PyGC_Head*old0 = &gcstate->old[0].head; PyGC_Head*old1 = &gcstate->old[1].head; + if (gcstate->visited_space) { + gc_list_set_space(old0, 1); + } + else { + gc_list_set_space(old1, 0); + } gc_list_merge(old0, &gcstate->permanent_generation.head); gcstate->old[0].count = 0; - gc_list_set_space(old1, 0); gc_list_merge(old1, &gcstate->permanent_generation.head); gcstate->old[1].count = 0; - validate_old(gcstate); + validate_spaces(gcstate); } void @@ -1757,8 +1760,8 @@ _PyGC_Unfreeze(PyInterpreterState *interp) { GCState *gcstate = &interp->gc; gc_list_merge(&gcstate->permanent_generation.head, - &gcstate->old[0].head); - validate_old(gcstate); + &gcstate->old[gcstate->visited_space].head); + validate_spaces(gcstate); } Py_ssize_t @@ -1863,7 +1866,7 @@ _PyGC_Collect(PyThreadState *tstate, int generation, _PyGC_Reason reason) _Py_stats->object_stats.object_visits = 0; } #endif - validate_old(gcstate); + validate_spaces(gcstate); _Py_atomic_store_int(&gcstate->collecting, 0); return stats.uncollectable + stats.collected; } From ac3ad26ad9a1f9668c1a8921e56358eb3c616221 Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Thu, 21 Nov 2024 09:17:17 +0000 Subject: [PATCH 03/18] Partially apply mark-first-gc patch --- Include/cpython/pystats.h | 2 ++ Include/internal/pycore_frame.h | 3 ++ Include/internal/pycore_gc.h | 10 +++++-- Include/internal/pycore_runtime_init.h | 1 + InternalDocs/garbage_collector.md | 39 ++++++++++++++++++++++++++ Lib/test/test_gc.py | 24 +++++++++------- Modules/_testinternalcapi.c | 6 ++++ 7 files changed, 73 insertions(+), 12 deletions(-) diff --git a/Include/cpython/pystats.h b/Include/cpython/pystats.h index f1ca54839fbc38..2ae48002d720e9 100644 --- a/Include/cpython/pystats.h +++ b/Include/cpython/pystats.h @@ -99,6 +99,8 @@ typedef struct _gc_stats { uint64_t collections; uint64_t object_visits; uint64_t objects_collected; + uint64_t objects_transitively_reachable; + uint64_t objects_not_transitively_reachable; } GCStats; typedef struct _uop_stats { diff --git a/Include/internal/pycore_frame.h b/Include/internal/pycore_frame.h index 8c0100390d036e..b786c5f49e9831 100644 --- a/Include/internal/pycore_frame.h +++ b/Include/internal/pycore_frame.h @@ -75,6 +75,7 @@ typedef struct _PyInterpreterFrame { _PyStackRef *stackpointer; uint16_t return_offset; /* Only relevant during a function call */ char owner; + char visited; /* Locals and stack */ _PyStackRef localsplus[1]; } _PyInterpreterFrame; @@ -207,6 +208,7 @@ _PyFrame_Initialize( #endif frame->return_offset = 0; frame->owner = FRAME_OWNED_BY_THREAD; + frame->visited = 0; for (int i = null_locals_from; i < code->co_nlocalsplus; i++) { frame->localsplus[i] = PyStackRef_NULL; @@ -389,6 +391,7 @@ _PyFrame_PushTrampolineUnchecked(PyThreadState *tstate, PyCodeObject *code, int frame->instr_ptr = _PyCode_CODE(code); #endif frame->owner = FRAME_OWNED_BY_THREAD; + frame->visited = 0; frame->return_offset = 0; #ifdef Py_GIL_DISABLED diff --git a/Include/internal/pycore_gc.h b/Include/internal/pycore_gc.h index 38a1c56c09d9db..ea1b5ec4ede68c 100644 --- a/Include/internal/pycore_gc.h +++ b/Include/internal/pycore_gc.h @@ -10,11 +10,11 @@ extern "C" { /* GC information is stored BEFORE the object structure. */ typedef struct { - // Pointer to next object in the list. + // Tagged pointer to next object in the list. // 0 means the object is not tracked uintptr_t _gc_next; - // Pointer to previous object in the list. + // Tagged pointer to previous object in the list. // Lowest two bits are used for flags documented later. uintptr_t _gc_prev; } PyGC_Head; @@ -302,6 +302,11 @@ struct gc_generation_stats { Py_ssize_t uncollectable; }; +enum _GCPhase { + GC_PHASE_MARK = 0, + GC_PHASE_COLLECT = 1 +}; + struct _gc_runtime_state { /* List of objects that still need to be cleaned up, singly linked * via their gc headers' gc_prev pointers. */ @@ -329,6 +334,7 @@ struct _gc_runtime_state { Py_ssize_t work_to_do; /* Which of the old spaces is the visited space */ int visited_space; + int phase; #ifdef Py_GIL_DISABLED /* This is the number of objects that survived the last full diff --git a/Include/internal/pycore_runtime_init.h b/Include/internal/pycore_runtime_init.h index 9f6748945bab36..1260b957ce9482 100644 --- a/Include/internal/pycore_runtime_init.h +++ b/Include/internal/pycore_runtime_init.h @@ -137,6 +137,7 @@ extern PyTypeObject _PyExc_MemoryError; { .threshold = 0, }, \ }, \ .work_to_do = -5000, \ + .phase = GC_PHASE_MARK, \ }, \ .qsbr = { \ .wr_seq = QSBR_INITIAL, \ diff --git a/InternalDocs/garbage_collector.md b/InternalDocs/garbage_collector.md index 5de4aa05398b55..14186fc3a90eec 100644 --- a/InternalDocs/garbage_collector.md +++ b/InternalDocs/garbage_collector.md @@ -484,6 +484,45 @@ specifically in a generation by calling `gc.collect(generation=NUM)`. ``` +Optimization: visiting reachable objects +======================================== + +An object cannot be garbage if it can be reached. + +To avoid having to identify reference cycles across the whole heap, we can +reduce the amount of work done considerably by first moving most reachable objects +to the `visited` space. Empirically, most reachable objects can be reached from a +small set of global objects and local variables. +This step does much less work per object, so reduces the time spent +performing garbage collection by at least half. + +> [!NOTE] +> Objects that are not determined to be reachable by this pass are not necessarily +> unreachable. We still need to perform the main algorithm to determine which objects +> are actually unreachable. +We use the same technique of forming a transitive closure as the incremental +collector does to find reachable objects, seeding the list with some global +objects and the currently executing frames. + +This phase moves objects to the `visited` space, as follows: + +1. All objects directly referred to by any builtin class, the `sys` module, the `builtins` +module and all objects directly referred to from stack frames are added to a working +set of reachable objects. +2. Until this working set is empty: + 1. Pop an object from the set and move it to the `visited` space + 2. For each object directly reachable from that object: + * If it is not already in `visited` space and it is a GC object, + add it to the working set + + +Before each increment of collection is performed, the stacks are scanned +to check for any new stack frames that have been created since the last +increment. All objects directly referred to from those stack frames are +added to the working set. +Then the above algorithm is repeated, starting from step 2. + + Optimization: reusing fields to save memory =========================================== diff --git a/Lib/test/test_gc.py b/Lib/test/test_gc.py index 0372815b9bfd27..2de4459bfe5ca0 100644 --- a/Lib/test/test_gc.py +++ b/Lib/test/test_gc.py @@ -31,6 +31,11 @@ def __new__(cls, *args, **kwargs): return C ContainerNoGC = None +try: + import _testinternalcapi +except ImportError: + _testinternalcapi = None + ### Support code ############################################################################### @@ -1130,6 +1135,7 @@ def setUp(self): def tearDown(self): gc.disable() + @unittest.skipIf(_testinternalcapi is None, "requires _testinternalcapi") @requires_gil_enabled("Free threading does not support incremental GC") # Use small increments to emulate longer running process in a shorter time @gc_threshold(200, 10) @@ -1167,20 +1173,15 @@ def make_ll(depth): enabled = gc.isenabled() gc.enable() olds = [] + initial_heap_size = _testinternalcapi.get_heap_size() for i in range(20_000): newhead = make_ll(20) count += 20 newhead.surprise = head olds.append(newhead) if len(olds) == 20: - stats = gc.get_stats() - young = stats[0] - incremental = stats[1] - old = stats[2] - collected = young['collected'] + incremental['collected'] + old['collected'] - count += CORRECTION - live = count - collected - self.assertLess(live, 25000) + new_objects = _testinternalcapi.get_heap_size() - initial_heap_size + self.assertLess(new_objects, 25_000) del olds[:] if not enabled: gc.disable() @@ -1322,7 +1323,8 @@ def test_refcount_errors(self): from test.support import gc_collect, SuppressCrashReport a = [1, 2, 3] - b = [a] + b = [a, a] + a.append(b) # Avoid coredump when Py_FatalError() calls abort() SuppressCrashReport().__enter__() @@ -1332,6 +1334,8 @@ def test_refcount_errors(self): # (to avoid deallocating it): import ctypes ctypes.pythonapi.Py_DecRef(ctypes.py_object(a)) + del a + del b # The garbage collector should now have a fatal error # when it reaches the broken object @@ -1360,7 +1364,7 @@ def test_refcount_errors(self): self.assertRegex(stderr, br'object type name: list') self.assertRegex(stderr, - br'object repr : \[1, 2, 3\]') + br'object repr : \[1, 2, 3, \[\[...\], \[...\]\]\]') class GCTogglingTests(unittest.TestCase): diff --git a/Modules/_testinternalcapi.c b/Modules/_testinternalcapi.c index a925191d479bd6..0ef4902ea0756b 100644 --- a/Modules/_testinternalcapi.c +++ b/Modules/_testinternalcapi.c @@ -2076,6 +2076,11 @@ has_deferred_refcount(PyObject *self, PyObject *op) return PyBool_FromLong(_PyObject_HasDeferredRefcount(op)); } +static PyObject * +get_heap_size(PyObject *self, PyObject *Py_UNUSED(ignored)) +{ + return PyLong_FromInt64(PyInterpreterState_Get()->gc.heap_size); +} static PyMethodDef module_functions[] = { {"get_configs", get_configs, METH_NOARGS}, @@ -2174,6 +2179,7 @@ static PyMethodDef module_functions[] = { {"get_static_builtin_types", get_static_builtin_types, METH_NOARGS}, {"identify_type_slot_wrappers", identify_type_slot_wrappers, METH_NOARGS}, {"has_deferred_refcount", has_deferred_refcount, METH_O}, + {"get_heap_size", get_heap_size, METH_NOARGS}, {NULL, NULL} /* sentinel */ }; From 5c3a5effc10718131a9da5b494cabfb7a5fcee29 Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Thu, 21 Nov 2024 09:19:52 +0000 Subject: [PATCH 04/18] Rename get_heap_size --- Lib/test/test_gc.py | 4 ++-- Modules/_testinternalcapi.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/Lib/test/test_gc.py b/Lib/test/test_gc.py index 2de4459bfe5ca0..a68b92d5903969 100644 --- a/Lib/test/test_gc.py +++ b/Lib/test/test_gc.py @@ -1173,14 +1173,14 @@ def make_ll(depth): enabled = gc.isenabled() gc.enable() olds = [] - initial_heap_size = _testinternalcapi.get_heap_size() + initial_heap_size = _testinternalcapi.get_tracked_heap_size() for i in range(20_000): newhead = make_ll(20) count += 20 newhead.surprise = head olds.append(newhead) if len(olds) == 20: - new_objects = _testinternalcapi.get_heap_size() - initial_heap_size + new_objects = _testinternalcapi.get_tracked_heap_size() - initial_heap_size self.assertLess(new_objects, 25_000) del olds[:] if not enabled: diff --git a/Modules/_testinternalcapi.c b/Modules/_testinternalcapi.c index 0ef4902ea0756b..1bb71a3e80b39d 100644 --- a/Modules/_testinternalcapi.c +++ b/Modules/_testinternalcapi.c @@ -2077,7 +2077,7 @@ has_deferred_refcount(PyObject *self, PyObject *op) } static PyObject * -get_heap_size(PyObject *self, PyObject *Py_UNUSED(ignored)) +get_tracked_heap_size(PyObject *self, PyObject *Py_UNUSED(ignored)) { return PyLong_FromInt64(PyInterpreterState_Get()->gc.heap_size); } @@ -2179,7 +2179,7 @@ static PyMethodDef module_functions[] = { {"get_static_builtin_types", get_static_builtin_types, METH_NOARGS}, {"identify_type_slot_wrappers", identify_type_slot_wrappers, METH_NOARGS}, {"has_deferred_refcount", has_deferred_refcount, METH_O}, - {"get_heap_size", get_heap_size, METH_NOARGS}, + {"get_tracked_heap_size", get_tracked_heap_size, METH_NOARGS}, {NULL, NULL} /* sentinel */ }; From 0ac1c07febd92b4e6aea135aaac1b35558e8f4ff Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Thu, 21 Nov 2024 09:26:09 +0000 Subject: [PATCH 05/18] Set visited on entry frame --- Python/ceval.c | 1 + 1 file changed, 1 insertion(+) diff --git a/Python/ceval.c b/Python/ceval.c index 892dc5f7b58ff8..ad086aa540e741 100644 --- a/Python/ceval.c +++ b/Python/ceval.c @@ -817,6 +817,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int entry_frame.instr_ptr = (_Py_CODEUNIT *)_Py_INTERPRETER_TRAMPOLINE_INSTRUCTIONS + 1; entry_frame.stackpointer = entry_frame.localsplus; entry_frame.owner = FRAME_OWNED_BY_CSTACK; + entry_frame.visited = 0; entry_frame.return_offset = 0; /* Push frame */ entry_frame.previous = tstate->current_frame; From bb51b19ff3ea8ffa911a7bc24449919a254aefda Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Thu, 21 Nov 2024 11:54:13 +0000 Subject: [PATCH 06/18] Apply more diff, but comment out marking step --- Python/gc.c | 210 ++++++++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 181 insertions(+), 29 deletions(-) diff --git a/Python/gc.c b/Python/gc.c index 977863ae3f22d1..48ffdb59eb5678 100644 --- a/Python/gc.c +++ b/Python/gc.c @@ -1297,6 +1297,7 @@ gc_collect_young(PyThreadState *tstate, validate_spaces(gcstate); PyGC_Head *young = &gcstate->young.head; PyGC_Head *visited = &gcstate->old[gcstate->visited_space].head; + untrack_tuples(young); GC_STAT_ADD(0, collections, 1); #ifdef Py_STATS { @@ -1312,30 +1313,10 @@ gc_collect_young(PyThreadState *tstate, gc_list_init(&survivors); gc_list_set_space(young, gcstate->visited_space); gc_collect_region(tstate, young, &survivors, stats); - Py_ssize_t survivor_count = 0; - if (gcstate->visited_space) { - /* objects in visited space have bit set, so we set it here */ - survivor_count = gc_list_set_space(&survivors, 1); - } - else { - PyGC_Head *gc; - for (gc = GC_NEXT(&survivors); gc != &survivors; gc = GC_NEXT(gc)) { -#ifdef GC_DEBUG - assert(gc_old_space(gc) == 0); -#endif - survivor_count++; - } - } - (void)survivor_count; // Silence compiler warning gc_list_merge(&survivors, visited); validate_spaces(gcstate); gcstate->young.count = 0; gcstate->old[gcstate->visited_space].count++; - Py_ssize_t scale_factor = gcstate->old[0].threshold; - if (scale_factor < 1) { - scale_factor = 1; - } - gcstate->work_to_do += gcstate->heap_size / SCAN_RATE_DIVISOR / scale_factor; add_stats(gcstate, 0, stats); validate_spaces(gcstate); } @@ -1352,7 +1333,7 @@ IS_IN_VISITED(PyGC_Head *gc, int visited_space) struct container_and_flag { PyGC_Head *container; int visited_space; - uintptr_t size; + intptr_t size; }; /* A traversal callback for adding to container) */ @@ -1375,7 +1356,7 @@ visit_add_to_container(PyObject *op, void *arg) return 0; } -static uintptr_t +static intptr_t expand_region_transitively_reachable(PyGC_Head *container, PyGC_Head *gc, GCState *gcstate) { struct container_and_flag arg = { @@ -1416,8 +1397,168 @@ completed_cycle(GCState *gcstate) /* Make sure all objects have visited bit set correctly */ gc_list_set_space(&gcstate->young.head, not_visited); gc_list_set_space(&gcstate->permanent_generation.head, visited); - gcstate->work_to_do = 0; assert(gc_list_is_empty(&gcstate->old[visited].head)); + gcstate->work_to_do = 0; + gcstate->phase = GC_PHASE_MARK; +} + +static intptr_t +move_to_reachable(PyObject *op, PyGC_Head *reachable, int visited_space) +{ + if (op != NULL && !_Py_IsImmortal(op) && _PyObject_IS_GC(op)) { + PyGC_Head *gc = AS_GC(op); + if (_PyObject_GC_IS_TRACKED(op) && + gc_old_space(gc) != visited_space) { + gc_flip_old_space(gc); + gc_list_move(gc, reachable); + return 1; + } + } + return 0; +} + +static intptr_t +mark_all_reachable(PyGC_Head *reachable, PyGC_Head *visited, int visited_space) +{ + // Transitively traverse all objects from reachable, until empty + struct container_and_flag arg = { + .container = reachable, + .visited_space = visited_space, + .size = 0 + }; + while (!gc_list_is_empty(reachable)) { + PyGC_Head *gc = _PyGCHead_NEXT(reachable); + assert(gc_old_space(gc) == visited_space); + gc_list_move(gc, visited); + PyObject *op = FROM_GC(gc); + traverseproc traverse = Py_TYPE(op)->tp_traverse; + (void) traverse(op, + visit_add_to_container, + &arg); + } + gc_list_validate_space(visited, visited_space); + return arg.size; +} + +static intptr_t +mark_stacks(PyInterpreterState *interp, PyGC_Head *visited, int visited_space, bool start) +{ + PyGC_Head reachable; + gc_list_init(&reachable); + Py_ssize_t objects_marked = 0; + // Move all objects on stacks to reachable + _PyRuntimeState *runtime = &_PyRuntime; + HEAD_LOCK(runtime); + PyThreadState* ts = PyInterpreterState_ThreadHead(interp); + HEAD_UNLOCK(runtime); + while (ts) { + _PyInterpreterFrame *frame = ts->current_frame; + while (frame) { + if (frame->owner == FRAME_OWNED_BY_CSTACK) { + frame = frame->previous; + continue; + } + _PyStackRef *locals = frame->localsplus; + _PyStackRef *sp = frame->stackpointer; + objects_marked += move_to_reachable(frame->f_locals, &reachable, visited_space); + PyObject *func = PyStackRef_AsPyObjectBorrow(frame->f_funcobj); + objects_marked += move_to_reachable(func, &reachable, visited_space); + while (sp > locals) { + sp--; + if (PyStackRef_IsNull(*sp)) { + continue; + } + PyObject *op = PyStackRef_AsPyObjectBorrow(*sp); + if (!_Py_IsImmortal(op) && _PyObject_IS_GC(op)) { + PyGC_Head *gc = AS_GC(op); + if (_PyObject_GC_IS_TRACKED(op) && + gc_old_space(gc) != visited_space) { + gc_flip_old_space(gc); + objects_marked++; + gc_list_move(gc, &reachable); + } + } + } + if (!start && frame->visited) { + // If this frame has already been visited, then the lower frames + // will have already been visited and will not have changed + break; + } + frame->visited = 1; + frame = frame->previous; + } + HEAD_LOCK(runtime); + ts = PyThreadState_Next(ts); + HEAD_UNLOCK(runtime); + } + objects_marked += mark_all_reachable(&reachable, visited, visited_space); + assert(gc_list_is_empty(&reachable)); + return objects_marked; +} + +static intptr_t +mark_global_roots(PyInterpreterState *interp, PyGC_Head *visited, int visited_space) +{ + PyGC_Head reachable; + gc_list_init(&reachable); + Py_ssize_t objects_marked = 0; + objects_marked += move_to_reachable(interp->sysdict, &reachable, visited_space); + objects_marked += move_to_reachable(interp->builtins, &reachable, visited_space); + objects_marked += move_to_reachable(interp->dict, &reachable, visited_space); + struct types_state *types = &interp->types; + for (int i = 0; i < _Py_MAX_MANAGED_STATIC_BUILTIN_TYPES; i++) { + objects_marked += move_to_reachable(types->builtins.initialized[i].tp_dict, &reachable, visited_space); + objects_marked += move_to_reachable(types->builtins.initialized[i].tp_subclasses, &reachable, visited_space); + } + for (int i = 0; i < _Py_MAX_MANAGED_STATIC_EXT_TYPES; i++) { + objects_marked += move_to_reachable(types->for_extensions.initialized[i].tp_dict, &reachable, visited_space); + objects_marked += move_to_reachable(types->for_extensions.initialized[i].tp_subclasses, &reachable, visited_space); + } + objects_marked += mark_all_reachable(&reachable, visited, visited_space); + assert(gc_list_is_empty(&reachable)); + return objects_marked; +} + +static intptr_t +mark_at_start(PyThreadState *tstate) +{ + // TO DO -- Make this incremental + GCState *gcstate = &tstate->interp->gc; + PyGC_Head *visited = &gcstate->old[gcstate->visited_space].head; + Py_ssize_t objects_marked = mark_global_roots(tstate->interp, visited, gcstate->visited_space); + objects_marked += mark_stacks(tstate->interp, visited, gcstate->visited_space, true); + gcstate->work_to_do -= objects_marked; + gcstate->phase = GC_PHASE_COLLECT; + validate_spaces(gcstate); + return objects_marked; +} + +static intptr_t +assess_work_to_do(GCState *gcstate) +{ + /* The amount of work we want to do depends on three things. + * 1. The number of new objects created + * 2. The growth in heap size since the last collection + * 3. The heap size (up to the number of new objects, to avoid quadratic effects) + * + * For a steady state heap, the amount of work to do is three times the number + * of new objects added to the heap. This ensures that we stay ahead in the + * worst case of all new objects being garbage. + * + * This could be improved by tracking survival rates, but it is still a + * large improvement on the non-marking approach. + */ + Py_ssize_t scale_factor = gcstate->old[0].threshold; + if (scale_factor < 2) { + scale_factor = 2; + } + Py_ssize_t new_objects = gcstate->young.count; + Py_ssize_t heap_fraction = gcstate->heap_size / SCAN_RATE_DIVISOR / scale_factor; + if (heap_fraction > new_objects*2) { + heap_fraction = new_objects*2; + } + gcstate->young.count = 0; + return new_objects + heap_fraction; } static void @@ -1425,19 +1566,29 @@ gc_collect_increment(PyThreadState *tstate, struct gc_collection_stats *stats) { GC_STAT_ADD(1, collections, 1); GCState *gcstate = &tstate->interp->gc; + gcstate->work_to_do += assess_work_to_do(gcstate); + untrack_tuples(&gcstate->young.head); +// if (gcstate->phase == GC_PHASE_MARK) { +// Py_ssize_t objects_marked = mark_at_start(tstate); +// GC_STAT_ADD(1, objects_transitively_reachable, objects_marked); +// gcstate->work_to_do -= objects_marked; +// return; +// } PyGC_Head *not_visited = &gcstate->old[gcstate->visited_space^1].head; PyGC_Head *visited = &gcstate->old[gcstate->visited_space].head; PyGC_Head increment; gc_list_init(&increment); - Py_ssize_t scale_factor = gcstate->old[0].threshold; - if (scale_factor < 1) { - scale_factor = 1; + int scale_factor = gcstate->old[0].threshold; + if (scale_factor < 2) { + scale_factor = 2; } + intptr_t objects_marked = 0; // mark_stacks(tstate->interp, visited, gcstate->visited_space, false); + GC_STAT_ADD(1, objects_transitively_reachable, objects_marked); + gcstate->work_to_do -= objects_marked; gc_list_set_space(&gcstate->young.head, gcstate->visited_space); gc_list_merge(&gcstate->young.head, &increment); - gcstate->young.count = 0; gc_list_validate_space(&increment, gcstate->visited_space); - Py_ssize_t increment_size = 0; + Py_ssize_t increment_size = gc_list_size(&increment); while (increment_size < gcstate->work_to_do) { if (gc_list_is_empty(not_visited)) { break; @@ -1449,12 +1600,12 @@ gc_collect_increment(PyThreadState *tstate, struct gc_collection_stats *stats) gc_set_old_space(gc, gcstate->visited_space); increment_size += expand_region_transitively_reachable(&increment, gc, gcstate); } + GC_STAT_ADD(1, objects_not_transitively_reachable, increment_size); validate_list(&increment, collecting_clear_unreachable_clear); gc_list_validate_space(&increment, gcstate->visited_space); PyGC_Head survivors; gc_list_init(&survivors); gc_collect_region(tstate, &increment, &survivors, stats); - gc_list_validate_space(&survivors, gcstate->visited_space); gc_list_merge(&survivors, visited); assert(gc_list_is_empty(&increment)); gcstate->work_to_do += gcstate->heap_size / SCAN_RATE_DIVISOR / scale_factor; @@ -1477,6 +1628,7 @@ gc_collect_full(PyThreadState *tstate, PyGC_Head *young = &gcstate->young.head; PyGC_Head *pending = &gcstate->old[gcstate->visited_space^1].head; PyGC_Head *visited = &gcstate->old[gcstate->visited_space].head; + untrack_tuples(young); /* merge all generations into visited */ gc_list_merge(young, pending); gc_list_validate_space(pending, 1-gcstate->visited_space); From be636c95476b1475671175129f668e9f569de0db Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Thu, 21 Nov 2024 12:08:23 +0000 Subject: [PATCH 07/18] Refactor work to do calculation --- Python/gc.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/Python/gc.c b/Python/gc.c index 48ffdb59eb5678..16c9167bf1c71e 100644 --- a/Python/gc.c +++ b/Python/gc.c @@ -1548,14 +1548,15 @@ assess_work_to_do(GCState *gcstate) * This could be improved by tracking survival rates, but it is still a * large improvement on the non-marking approach. */ - Py_ssize_t scale_factor = gcstate->old[0].threshold; + intptr_t scale_factor = gcstate->old[0].threshold; if (scale_factor < 2) { scale_factor = 2; } - Py_ssize_t new_objects = gcstate->young.count; - Py_ssize_t heap_fraction = gcstate->heap_size / SCAN_RATE_DIVISOR / scale_factor; - if (heap_fraction > new_objects*2) { - heap_fraction = new_objects*2; + intptr_t new_objects = gcstate->young.count; + intptr_t max_heap_fraction = new_objects * 3/2; + intptr_t heap_fraction = gcstate->heap_size / SCAN_RATE_DIVISOR / scale_factor; + if (heap_fraction > max_heap_fraction) { + heap_fraction = max_heap_fraction; } gcstate->young.count = 0; return new_objects + heap_fraction; From 287f3811036e96a59ed3e0ec8cf3b6b269a2e8c2 Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Thu, 21 Nov 2024 16:09:51 +0000 Subject: [PATCH 08/18] Turn on marking. Explain issue with tuple untracking --- Lib/test/libregrtest/refleak.py | 2 +- Python/gc.c | 34 ++++++++++++++++++++++----------- 2 files changed, 24 insertions(+), 12 deletions(-) diff --git a/Lib/test/libregrtest/refleak.py b/Lib/test/libregrtest/refleak.py index e783475cc7a36b..d0d1c8cdc9a11b 100644 --- a/Lib/test/libregrtest/refleak.py +++ b/Lib/test/libregrtest/refleak.py @@ -123,9 +123,9 @@ def get_pooled_int(value): xml_filename = 'refleak-xml.tmp' result = None dash_R_cleanup(fs, ps, pic, zdc, abcs) - support.gc_collect() for i in rep_range: + support.gc_collect() current = refleak_helper._hunting_for_refleaks refleak_helper._hunting_for_refleaks = True try: diff --git a/Python/gc.c b/Python/gc.c index 16c9167bf1c71e..2b8a2abfaba6f8 100644 --- a/Python/gc.c +++ b/Python/gc.c @@ -734,13 +734,25 @@ move_unreachable(PyGC_Head *young, PyGC_Head *unreachable) unreachable->_gc_next &= _PyGC_PREV_MASK; } +/* In theory, all tuples should be younger than the +* objects they refer to, as tuples are immortal. +* Therefore, untracking tuples in oldest-first order in the +* young generation before promoting them should have tracked +* all the tuples that can be untracked. +* +* Unfortunately, the C API allows tuples to be created +* and then filled in. So this won't untrack all tuples +* that can be untracked. It should untrack most of them +* and is much faster than a more complex approach that +* would untrack all relevant tuples. +*/ static void untrack_tuples(PyGC_Head *head) { - PyGC_Head *next, *gc = GC_NEXT(head); + PyGC_Head *gc = GC_NEXT(head); while (gc != head) { PyObject *op = FROM_GC(gc); - next = GC_NEXT(gc); + PyGC_Head *next = GC_NEXT(gc); if (PyTuple_CheckExact(op)) { _PyTuple_MaybeUntrack(op); } @@ -1553,7 +1565,7 @@ assess_work_to_do(GCState *gcstate) scale_factor = 2; } intptr_t new_objects = gcstate->young.count; - intptr_t max_heap_fraction = new_objects * 3/2; + intptr_t max_heap_fraction = new_objects*3/2; intptr_t heap_fraction = gcstate->heap_size / SCAN_RATE_DIVISOR / scale_factor; if (heap_fraction > max_heap_fraction) { heap_fraction = max_heap_fraction; @@ -1569,12 +1581,13 @@ gc_collect_increment(PyThreadState *tstate, struct gc_collection_stats *stats) GCState *gcstate = &tstate->interp->gc; gcstate->work_to_do += assess_work_to_do(gcstate); untrack_tuples(&gcstate->young.head); -// if (gcstate->phase == GC_PHASE_MARK) { -// Py_ssize_t objects_marked = mark_at_start(tstate); -// GC_STAT_ADD(1, objects_transitively_reachable, objects_marked); -// gcstate->work_to_do -= objects_marked; -// return; -// } + if (gcstate->phase == GC_PHASE_MARK) { + Py_ssize_t objects_marked = mark_at_start(tstate); + GC_STAT_ADD(1, objects_transitively_reachable, objects_marked); + gcstate->work_to_do -= objects_marked; + validate_spaces(gcstate); + return; + } PyGC_Head *not_visited = &gcstate->old[gcstate->visited_space^1].head; PyGC_Head *visited = &gcstate->old[gcstate->visited_space].head; PyGC_Head increment; @@ -1583,7 +1596,7 @@ gc_collect_increment(PyThreadState *tstate, struct gc_collection_stats *stats) if (scale_factor < 2) { scale_factor = 2; } - intptr_t objects_marked = 0; // mark_stacks(tstate->interp, visited, gcstate->visited_space, false); + intptr_t objects_marked = mark_stacks(tstate->interp, visited, gcstate->visited_space, false); GC_STAT_ADD(1, objects_transitively_reachable, objects_marked); gcstate->work_to_do -= objects_marked; gc_list_set_space(&gcstate->young.head, gcstate->visited_space); @@ -1645,7 +1658,6 @@ gc_collect_full(PyThreadState *tstate, gcstate->old[0].count = 0; gcstate->old[1].count = 0; completed_cycle(gcstate); - gcstate->work_to_do = - gcstate->young.threshold * 2; _PyGC_ClearAllFreeLists(tstate->interp); validate_spaces(gcstate); add_stats(gcstate, 2, stats); From 7e920d66f5156f5bf98079d3858cf5af612d5151 Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Thu, 21 Nov 2024 16:13:59 +0000 Subject: [PATCH 09/18] Add news --- .../2024-11-21-16-13-52.gh-issue-126491.0YvL94.rst | 4 ++++ 1 file changed, 4 insertions(+) create mode 100644 Misc/NEWS.d/next/Core_and_Builtins/2024-11-21-16-13-52.gh-issue-126491.0YvL94.rst diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2024-11-21-16-13-52.gh-issue-126491.0YvL94.rst b/Misc/NEWS.d/next/Core_and_Builtins/2024-11-21-16-13-52.gh-issue-126491.0YvL94.rst new file mode 100644 index 00000000000000..9ef2b8dc33ed0f --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2024-11-21-16-13-52.gh-issue-126491.0YvL94.rst @@ -0,0 +1,4 @@ +Add a marking phase to the GC. All objects that can be transitively reached +from builtin modules or the stacks are marked as reachable before cycle +detection. This reduces the amount of work done by the GC by approximately +half. From 0b46494423badd5aeb63005455c2c3cc2feea813 Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Thu, 21 Nov 2024 16:19:20 +0000 Subject: [PATCH 10/18] Add stats for marking --- Python/specialize.c | 2 ++ Tools/scripts/summarize_stats.py | 5 ++++- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/Python/specialize.c b/Python/specialize.c index ad41dfc39c0147..c7eeb8c670e475 100644 --- a/Python/specialize.c +++ b/Python/specialize.c @@ -231,6 +231,8 @@ print_gc_stats(FILE *out, GCStats *stats) fprintf(out, "GC[%d] collections: %" PRIu64 "\n", i, stats[i].collections); fprintf(out, "GC[%d] object visits: %" PRIu64 "\n", i, stats[i].object_visits); fprintf(out, "GC[%d] objects collected: %" PRIu64 "\n", i, stats[i].objects_collected); + fprintf(out, "GC[%d] objects reachable from roots: %" PRIu64 "\n", i, stats[i].objects_transitively_reachable); + fprintf(out, "GC[%d] objects not reachable from roots: %" PRIu64 "\n", i, stats[i].objects_not_transitively_reachable); } } diff --git a/Tools/scripts/summarize_stats.py b/Tools/scripts/summarize_stats.py index 5793e5c649d6b3..296a31d5916bd8 100644 --- a/Tools/scripts/summarize_stats.py +++ b/Tools/scripts/summarize_stats.py @@ -1118,6 +1118,8 @@ def calc_gc_stats(stats: Stats) -> Rows: Count(gen["collections"]), Count(gen["objects collected"]), Count(gen["object visits"]), + Count(gen["objects reachable from roots"]), + Count(gen["objects not reachable from roots"]), ) for (i, gen) in enumerate(gc_stats) ] @@ -1127,7 +1129,8 @@ def calc_gc_stats(stats: Stats) -> Rows: "GC collections and effectiveness", [ Table( - ("Generation:", "Collections:", "Objects collected:", "Object visits:"), + ("Generation:", "Collections:", "Objects collected:", "Object visits:", + "Reachable from roots:", "Not reachable from roots:"), calc_gc_stats, ) ], From 698abb384fd1d156ed78bbad723680ed03d2ca61 Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Thu, 21 Nov 2024 16:36:44 +0000 Subject: [PATCH 11/18] Do not touch permanent generation --- Python/gc.c | 35 ++++++++++++++++++++++++----------- 1 file changed, 24 insertions(+), 11 deletions(-) diff --git a/Python/gc.c b/Python/gc.c index 2b8a2abfaba6f8..d7b2fa5fc37da7 100644 --- a/Python/gc.c +++ b/Python/gc.c @@ -1400,15 +1400,28 @@ expand_region_transitively_reachable(PyGC_Head *container, PyGC_Head *gc, GCStat /* Do bookkeeping for a completed GC cycle */ static void -completed_cycle(GCState *gcstate) -{ - /* Flip spaces */ - int not_visited = gcstate->visited_space; - int visited = other_space(not_visited); - gcstate->visited_space = visited; - /* Make sure all objects have visited bit set correctly */ - gc_list_set_space(&gcstate->young.head, not_visited); - gc_list_set_space(&gcstate->permanent_generation.head, visited); +completed_scavenge(GCState *gcstate) +{ + /* We must observe two invariants: + * 1. Members of the permanent generation must be marked visited. + * 2. We cannot touch members of the permanent generation. */ + int visited; + if (gc_list_is_empty(&gcstate->permanent_generation.head)) { + /* Permanent generation is empty so we can flip spaces bit */ + int not_visited = gcstate->visited_space; + visited = other_space(not_visited); + gcstate->visited_space = visited; + /* Make sure all objects have visited bit set correctly */ + gc_list_set_space(&gcstate->young.head, not_visited); + } + else { + /* We must move the objects from visited to pending space. */ + visited = gcstate->visited_space; + int not_visited = other_space(visited); + assert(gc_list_is_empty(&gcstate->old[not_visited].head)); + gc_list_merge(&gcstate->old[visited].head, &gcstate->old[not_visited].head); + gc_list_set_space(&gcstate->old[not_visited].head, not_visited); + } assert(gc_list_is_empty(&gcstate->old[visited].head)); gcstate->work_to_do = 0; gcstate->phase = GC_PHASE_MARK; @@ -1627,7 +1640,7 @@ gc_collect_increment(PyThreadState *tstate, struct gc_collection_stats *stats) add_stats(gcstate, 1, stats); if (gc_list_is_empty(not_visited)) { - completed_cycle(gcstate); + completed_scavenge(gcstate); } validate_spaces(gcstate); } @@ -1657,7 +1670,7 @@ gc_collect_full(PyThreadState *tstate, gcstate->young.count = 0; gcstate->old[0].count = 0; gcstate->old[1].count = 0; - completed_cycle(gcstate); + completed_scavenge(gcstate); _PyGC_ClearAllFreeLists(tstate->interp); validate_spaces(gcstate); add_stats(gcstate, 2, stats); From 6d8a0d46481ee011a28ce36a81ac146710b37525 Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Fri, 22 Nov 2024 09:13:56 +0000 Subject: [PATCH 12/18] Move informative failure message in test --- Lib/test/test_gc.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Lib/test/test_gc.py b/Lib/test/test_gc.py index a68b92d5903969..36517ba82f69a4 100644 --- a/Lib/test/test_gc.py +++ b/Lib/test/test_gc.py @@ -1181,7 +1181,7 @@ def make_ll(depth): olds.append(newhead) if len(olds) == 20: new_objects = _testinternalcapi.get_tracked_heap_size() - initial_heap_size - self.assertLess(new_objects, 25_000) + self.assertLess(new_objects, 25_000, f"Heap growing. Reached limit after {i} iterations") del olds[:] if not enabled: gc.disable() From d9632c6266919ce6de0a02bc8b3bee6af7c60622 Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Fri, 22 Nov 2024 10:56:28 +0000 Subject: [PATCH 13/18] Increase threshold a bit --- Lib/test/test_gc.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Lib/test/test_gc.py b/Lib/test/test_gc.py index 36517ba82f69a4..b5140057a69d36 100644 --- a/Lib/test/test_gc.py +++ b/Lib/test/test_gc.py @@ -1181,7 +1181,7 @@ def make_ll(depth): olds.append(newhead) if len(olds) == 20: new_objects = _testinternalcapi.get_tracked_heap_size() - initial_heap_size - self.assertLess(new_objects, 25_000, f"Heap growing. Reached limit after {i} iterations") + self.assertLess(new_objects, 27_000, f"Heap growing. Reached limit after {i} iterations") del olds[:] if not enabled: gc.disable() From 0702959f539a05e6c266308613a1038d288ffcdd Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Fri, 22 Nov 2024 11:21:47 +0000 Subject: [PATCH 14/18] Increase number of iterations --- Lib/test/test_gc.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Lib/test/test_gc.py b/Lib/test/test_gc.py index b5140057a69d36..68e88f1a9a7329 100644 --- a/Lib/test/test_gc.py +++ b/Lib/test/test_gc.py @@ -1174,7 +1174,7 @@ def make_ll(depth): gc.enable() olds = [] initial_heap_size = _testinternalcapi.get_tracked_heap_size() - for i in range(20_000): + for i in range(100_000): newhead = make_ll(20) count += 20 newhead.surprise = head From 6063dc82990eb4706a24c65417cd4ec4c8fa5e9c Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Fri, 22 Nov 2024 11:53:35 +0000 Subject: [PATCH 15/18] Return number of iterations to 20k --- Lib/test/test_gc.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Lib/test/test_gc.py b/Lib/test/test_gc.py index 68e88f1a9a7329..b5140057a69d36 100644 --- a/Lib/test/test_gc.py +++ b/Lib/test/test_gc.py @@ -1174,7 +1174,7 @@ def make_ll(depth): gc.enable() olds = [] initial_heap_size = _testinternalcapi.get_tracked_heap_size() - for i in range(100_000): + for i in range(20_000): newhead = make_ll(20) count += 20 newhead.surprise = head From 67b1e05df7b6627acc83a20b552ec5153e734d49 Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Fri, 22 Nov 2024 11:54:59 +0000 Subject: [PATCH 16/18] Fix compiler warning --- Python/gc.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/Python/gc.c b/Python/gc.c index d7b2fa5fc37da7..e15ab0c6e16150 100644 --- a/Python/gc.c +++ b/Python/gc.c @@ -456,7 +456,6 @@ validate_spaces(GCState *gcstate) static void validate_consistent_old_space(PyGC_Head *head) { - PyGC_Head *prev = head; PyGC_Head *gc = GC_NEXT(head); if (gc == head) { return; @@ -466,10 +465,8 @@ validate_consistent_old_space(PyGC_Head *head) PyGC_Head *truenext = GC_NEXT(gc); assert(truenext != NULL); assert(gc_old_space(gc) == old_space); - prev = gc; gc = truenext; } - assert(prev == GC_PREV(head)); } From eaea41e20bef53ab60ca7e0dc9dc826f75c96175 Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Fri, 22 Nov 2024 12:06:32 +0000 Subject: [PATCH 17/18] Turn off extra debugging --- Python/gc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Python/gc.c b/Python/gc.c index e15ab0c6e16150..928ec9f8a7f23b 100644 --- a/Python/gc.c +++ b/Python/gc.c @@ -24,7 +24,7 @@ typedef struct _gc_runtime_state GCState; #endif // Define this when debugging the GC -#define GC_EXTRA_DEBUG +// #define GC_EXTRA_DEBUG #define GC_NEXT _PyGCHead_NEXT From 79ab26c850aad5be9755f9abce34fd5d32c3fb47 Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Fri, 22 Nov 2024 12:16:26 +0000 Subject: [PATCH 18/18] Add missing macro --- Python/gc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Python/gc.c b/Python/gc.c index 928ec9f8a7f23b..5b9588c8741b97 100644 --- a/Python/gc.c +++ b/Python/gc.c @@ -471,7 +471,7 @@ validate_consistent_old_space(PyGC_Head *head) #else -#define validate_old(g) do{}while(0) +#define validate_spaces(g) do{}while(0) #define validate_consistent_old_space(l) do{}while(0) #define gc_list_validate_space(l, s) do{}while(0) #endif