Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 19b74c7

Browse files
committed
OK, I couldn't stand it <0.5 wink>: removed all uncertainty about what's
in gc_refs, even at the cost of putting back a test+branch in visit_decref. The good news: since gc_refs became utterly tame then, it became clear that another special value could be useful. The move_roots() and move_root_reachable() passes have now been replaced by a single move_unreachable() pass. Besides saving a pass over the generation, this has a better effect: most of the time everything turns out to be reachable, so we were breaking the generation list apart and moving it into into the reachable list, one element at a time. Now the reachable stuff stays in the generation list, and the unreachable stuff is moved instead. This isn't quite as good as it sounds, since sometimes we guess wrongly that a thing is unreachable, and have to move it back again. Still, overall, it yields a significant (but not dramatic) boost in collection speed.
1 parent 93cd83e commit 19b74c7

1 file changed

Lines changed: 163 additions & 96 deletions

File tree

Modules/gcmodule.c

Lines changed: 163 additions & 96 deletions
Original file line numberDiff line numberDiff line change
@@ -74,17 +74,20 @@ static int debug;
7474

7575
/* When a collection begins, gc_refs is set to ob_refcnt for, and only for,
7676
* the objects in the generation being collected, called the "young"
77-
* generation at that point. As collection proceeds, when it's determined
78-
* that one of these can't be collected (e.g., because it's reachable from
79-
* outside, or has a __del__ method), the object is moved out of young, and
80-
* gc_refs is set to a negative value. The latter is so we can distinguish
81-
* collection candidates from non-candidates just by looking at the object.
77+
* generation at that point. As collection proceeds, the gc_refs members
78+
* of young objects are set to GC_REACHABLE when it becomes known that they're
79+
* uncollectable, and to GC_TENTATIVELY_UNREACHABLE when the evidence
80+
* suggests they are collectable (this can't be known for certain until all
81+
* of the young generation is scanned).
8282
*/
83-
/* Special gc_refs value, although any negative value means "moved". */
84-
#define GC_MOVED -123
8583

86-
/* True iff an object is still a candidate for collection. */
87-
#define STILL_A_CANDIDATE(o) ((AS_GC(o))->gc.gc_refs >= 0)
84+
/* Special gc_refs values. */
85+
#define GC_REACHABLE -123
86+
#define GC_TENTATIVELY_UNREACHABLE -42
87+
88+
#define IS_REACHABLE(o) ((AS_GC(o))->gc.gc_refs == GC_REACHABLE)
89+
#define IS_TENTATIVELY_UNREACHABLE(o) ( \
90+
(AS_GC(o))->gc.gc_refs == GC_TENTATIVELY_UNREACHABLE)
8891

8992
/* list of uncollectable objects */
9093
static PyObject *garbage;
@@ -168,41 +171,40 @@ gc_list_size(PyGC_Head *list)
168171
/*** end of list stuff ***/
169172

170173

171-
172-
/* Set all gc_refs = ob_refcnt. After this, STILL_A_CANDIDATE(o) is true
173-
* for all objects in containers, and false for all tracked gc objects not
174-
* in containers (although see the comment in visit_decref).
174+
/* Set all gc_refs = ob_refcnt. After this, gc_refs is > 0 for all objects
175+
* in containers, and is GC_REACHABLE for all tracked gc objects not in
176+
* containers.
175177
*/
176178
static void
177179
update_refs(PyGC_Head *containers)
178180
{
179181
PyGC_Head *gc = containers->gc.gc_next;
180-
for (; gc != containers; gc=gc->gc.gc_next) {
182+
for (; gc != containers; gc = gc->gc.gc_next)
181183
gc->gc.gc_refs = FROM_GC(gc)->ob_refcnt;
182-
}
183184
}
184185

186+
/* A traversal callback for subtract_refs. */
185187
static int
186188
visit_decref(PyObject *op, void *data)
187189
{
188-
/* There's no point to decrementing gc_refs unless
189-
* STILL_A_CANDIDATE(op) is true. It would take extra cycles to
190-
* check that, though. If STILL_A_CANDIDATE(op) is false,
191-
* decrementing gc_refs almost always makes it "even more negative",
192-
* so doesn't change that STILL_A_CANDIDATE is false, and no harm is
193-
* done. However, it's possible that, after many collections, this
194-
* could underflow gc_refs in a long-lived old object. In that case,
195-
* visit_move() may move the old object back to the generation
196-
* getting collected. That would be a waste of time, but wouldn't
197-
* cause an error.
198-
*/
199190
assert(op != NULL);
200-
if (PyObject_IS_GC(op))
201-
AS_GC(op)->gc.gc_refs--;
191+
if (PyObject_IS_GC(op)) {
192+
PyGC_Head *gc = AS_GC(op);
193+
/* We're only interested in gc_refs for objects in the
194+
* generation being collected, which can be recognized
195+
* because only they have positive gc_refs.
196+
*/
197+
if (gc->gc.gc_refs > 0)
198+
gc->gc.gc_refs--;
199+
}
202200
return 0;
203201
}
204202

205-
/* Subtract internal references from gc_refs */
203+
/* Subtract internal references from gc_refs. After this, gc_refs is >= 0
204+
* for all objects in containers, and is GC_REACHABLE for all tracked gc
205+
* objects not in containers. The ones with gc_refs > 0 are directly
206+
* reachable from outside containers, and so can't be collected.
207+
*/
206208
static void
207209
subtract_refs(PyGC_Head *containers)
208210
{
@@ -216,52 +218,100 @@ subtract_refs(PyGC_Head *containers)
216218
}
217219
}
218220

219-
/* Move objects with gc_refs > 0 to roots list. They can't be collected. */
220-
static void
221-
move_roots(PyGC_Head *containers, PyGC_Head *roots)
222-
{
223-
PyGC_Head *next;
224-
PyGC_Head *gc = containers->gc.gc_next;
225-
while (gc != containers) {
226-
next = gc->gc.gc_next;
227-
if (gc->gc.gc_refs > 0) {
228-
gc_list_remove(gc);
229-
gc_list_append(gc, roots);
230-
gc->gc.gc_refs = GC_MOVED;
231-
}
232-
gc = next;
233-
}
234-
}
235-
221+
/* A traversal callback for move_unreachable. */
236222
static int
237-
visit_move(PyObject *op, PyGC_Head *tolist)
223+
visit_reachable(PyObject *op, PyGC_Head *reachable)
238224
{
239-
if (PyObject_IS_GC(op)) {
240-
if (IS_TRACKED(op) && STILL_A_CANDIDATE(op)) {
241-
PyGC_Head *gc = AS_GC(op);
225+
if (PyObject_IS_GC(op) && IS_TRACKED(op)) {
226+
PyGC_Head *gc = AS_GC(op);
227+
const int gc_refs = gc->gc.gc_refs;
228+
229+
if (gc_refs == 0) {
230+
/* This is in move_unreachable's 'young' list, but
231+
* the traversal hasn't yet gotten to it. All
232+
* we need to do is tell move_unreachable that it's
233+
* reachable.
234+
*/
235+
gc->gc.gc_refs = 1;
236+
}
237+
else if (gc_refs == GC_TENTATIVELY_UNREACHABLE) {
238+
/* This had gc_refs = 0 when move_unreachable got
239+
* to it, but turns out it's reachable after all.
240+
* Move it back to move_unreachable's 'young' list,
241+
* and move_unreachable will eventually get to it
242+
* again.
243+
*/
242244
gc_list_remove(gc);
243-
gc_list_append(gc, tolist);
244-
gc->gc.gc_refs = GC_MOVED;
245+
gc_list_append(gc, reachable);
246+
gc->gc.gc_refs = 1;
245247
}
248+
/* Else there's nothing to do.
249+
* If gc_refs > 0, it must be in move_unreachable's 'young'
250+
* list, and move_unreachable will eventually get to it.
251+
* If gc_refs == GC_REACHABLE, it's either in some other
252+
* generation so we don't care about it, or move_unreachable
253+
* already dealt with it.
254+
*/
246255
}
247256
return 0;
248257
}
249258

250-
/* Move candidates referenced from reachable to reachable set (they're no
251-
* longer candidates).
259+
/* Move the unreachable objects from young to unreachable. After this,
260+
* all objects in young have gc_refs = GC_REACHABLE, and all objects in
261+
* unreachable have gc_refs = GC_TENTATIVELY_UNREACHABLE. All tracked
262+
* gc objects not in young or unreachable still have gc_refs = GC_REACHABLE.
263+
* All objects in young after this are directly or indirectly reachable
264+
* from outside the original young; and all objects in unreachable are
265+
* not.
252266
*/
253267
static void
254-
move_root_reachable(PyGC_Head *reachable)
268+
move_unreachable(PyGC_Head *young, PyGC_Head *unreachable)
255269
{
256-
traverseproc traverse;
257-
PyGC_Head *gc = reachable->gc.gc_next;
258-
for (; gc != reachable; gc=gc->gc.gc_next) {
259-
/* careful, reachable list is growing here */
260-
PyObject *op = FROM_GC(gc);
261-
traverse = op->ob_type->tp_traverse;
262-
(void) traverse(op,
263-
(visitproc)visit_move,
264-
(void *)reachable);
270+
PyGC_Head *gc = young->gc.gc_next;
271+
272+
/* Invariants: all objects "to the left" of us in young have gc_refs
273+
* = GC_REACHABLE, and are indeed reachable (directly or indirectly)
274+
* from outside the young list as it was at entry. All other objects
275+
* from the original young "to the left" of us are in unreachable now,
276+
* and have gc_refs = GC_TENTATIVELY_UNREACHABLE. All objects to the
277+
* left of us in 'young' now have been scanned, and no objects here
278+
* or to the right have been scanned yet.
279+
*/
280+
281+
while (gc != young) {
282+
PyGC_Head *next;
283+
284+
if (gc->gc.gc_refs == 0) {
285+
/* This *may* be unreachable. To make progress,
286+
* assume it is. gc isn't directly reachable from
287+
* any object we've already traversed, but may be
288+
* reachable from an object we haven't gotten to yet.
289+
* visit_reachable will eventually move gc back into
290+
* young if that's so, and we'll see it again.
291+
*/
292+
next = gc->gc.gc_next;
293+
gc_list_remove(gc);
294+
gc_list_append(gc, unreachable);
295+
gc->gc.gc_refs = GC_TENTATIVELY_UNREACHABLE;
296+
}
297+
else {
298+
/* gc is definitely reachable from outside the
299+
* original 'young'. Mark it as such, and traverse
300+
* its pointers to find any other objects that may
301+
* be directly reachable from it. Note that the
302+
* call to tp_traverse may append objects to young,
303+
* so we have to wait until it returns to determine
304+
* the next object to visit.
305+
*/
306+
PyObject *op = FROM_GC(gc);
307+
traverseproc traverse = op->ob_type->tp_traverse;
308+
gc->gc.gc_refs = GC_REACHABLE;
309+
(void) traverse(op,
310+
(visitproc)visit_reachable,
311+
(void *)young);
312+
next = gc->gc.gc_next;
313+
}
314+
gc = next;
265315
}
266316
}
267317

@@ -292,12 +342,29 @@ move_finalizers(PyGC_Head *unreachable, PyGC_Head *finalizers)
292342
if (has_finalizer(op)) {
293343
gc_list_remove(gc);
294344
gc_list_append(gc, finalizers);
295-
gc->gc.gc_refs = GC_MOVED;
345+
gc->gc.gc_refs = GC_REACHABLE;
346+
}
347+
}
348+
}
349+
350+
/* A traversal callback for move_finalizer_reachable. */
351+
static int
352+
visit_move(PyObject *op, PyGC_Head *tolist)
353+
{
354+
if (PyObject_IS_GC(op)) {
355+
if (IS_TRACKED(op) && IS_TENTATIVELY_UNREACHABLE(op)) {
356+
PyGC_Head *gc = AS_GC(op);
357+
gc_list_remove(gc);
358+
gc_list_append(gc, tolist);
359+
gc->gc.gc_refs = GC_REACHABLE;
296360
}
297361
}
362+
return 0;
298363
}
299364

300-
/* Move objects referenced from roots to roots */
365+
/* Move objects that are reachable from finalizers, from the unreachable set
366+
* into the finalizers set.
367+
*/
301368
static void
302369
move_finalizer_reachable(PyGC_Head *finalizers)
303370
{
@@ -353,19 +420,21 @@ handle_finalizers(PyGC_Head *finalizers, PyGC_Head *old)
353420
/* If SAVEALL is not set then just append objects with
354421
* finalizers to the list of garbage. All objects in
355422
* the finalizers list are reachable from those
356-
* objects. */
423+
* objects.
424+
*/
357425
PyList_Append(garbage, op);
358426
}
359427
/* object is now reachable again */
360-
assert(!STILL_A_CANDIDATE(op));
428+
assert(IS_REACHABLE(op));
361429
gc_list_remove(gc);
362430
gc_list_append(gc, old);
363431
}
364432
}
365433

366434
/* Break reference cycles by clearing the containers involved. This is
367435
* tricky business as the lists can be changing and we don't know which
368-
* objects may be freed. It is possible I screwed something up here. */
436+
* objects may be freed. It is possible I screwed something up here.
437+
*/
369438
static void
370439
delete_garbage(PyGC_Head *unreachable, PyGC_Head *old)
371440
{
@@ -375,7 +444,7 @@ delete_garbage(PyGC_Head *unreachable, PyGC_Head *old)
375444
PyGC_Head *gc = unreachable->gc.gc_next;
376445
PyObject *op = FROM_GC(gc);
377446

378-
assert(STILL_A_CANDIDATE(op));
447+
assert(IS_TENTATIVELY_UNREACHABLE(op));
379448
if (debug & DEBUG_SAVEALL) {
380449
PyList_Append(garbage, op);
381450
}
@@ -390,7 +459,7 @@ delete_garbage(PyGC_Head *unreachable, PyGC_Head *old)
390459
/* object is still alive, move it, it may die later */
391460
gc_list_remove(gc);
392461
gc_list_append(gc, old);
393-
gc->gc.gc_refs = GC_MOVED;
462+
gc->gc.gc_refs = GC_REACHABLE;
394463
}
395464
}
396465
}
@@ -401,11 +470,10 @@ static long
401470
collect(int generation)
402471
{
403472
int i;
404-
long n = 0;
405-
long m = 0;
473+
long m = 0; /* # objects collected */
474+
long n = 0; /* # unreachable objects that couldn't be collected */
406475
PyGC_Head *young; /* the generation we are examining */
407476
PyGC_Head *old; /* next older generation */
408-
PyGC_Head reachable;
409477
PyGC_Head unreachable;
410478
PyGC_Head finalizers;
411479
PyGC_Head *gc;
@@ -433,38 +501,37 @@ collect(int generation)
433501

434502
/* handy references */
435503
young = GEN_HEAD(generation);
436-
if (generation < NUM_GENERATIONS-1) {
504+
if (generation < NUM_GENERATIONS-1)
437505
old = GEN_HEAD(generation+1);
438-
} else {
439-
old = GEN_HEAD(NUM_GENERATIONS-1);
440-
}
506+
else
507+
old = young;
441508

442509
/* Using ob_refcnt and gc_refs, calculate which objects in the
443510
* container set are reachable from outside the set (ie. have a
444511
* refcount greater than 0 when all the references within the
445-
* set are taken into account */
512+
* set are taken into account
513+
*/
446514
update_refs(young);
447515
subtract_refs(young);
448516

449-
/* Move everything reachable from outside the set into the
450-
* reachable set (ie. gc_refs > 0). Next, move everything
451-
* reachable from objects in the reachable set. */
452-
gc_list_init(&reachable);
453-
move_roots(young, &reachable);
454-
move_root_reachable(&reachable);
455-
456-
/* move unreachable objects to a temporary list, new objects can be
457-
* allocated after this point */
517+
/* Leave everything reachable from outside young in young, and move
518+
* everything else (in young) to unreachable.
519+
* NOTE: This used to move the reachable objects into a reachable
520+
* set instead. But most things usually turn out to be reachable,
521+
* so it's more efficient to move the unreachable things.
522+
*/
458523
gc_list_init(&unreachable);
459-
gc_list_move(young, &unreachable);
524+
move_unreachable(young, &unreachable);
460525

461-
/* move reachable objects to next generation */
462-
gc_list_merge(&reachable, old);
526+
/* Move reachable objects to next generation. */
527+
if (young != old)
528+
gc_list_merge(young, old);
463529

464-
/* Move objects reachable from finalizers, we can't safely delete
465-
* them. Python programmers should take care not to create such
466-
* things. For Python finalizers means instance objects with
467-
* __del__ methods. */
530+
/* All objects in unreachable are trash, but objects reachable from
531+
* finalizers can't safely be deleted. Python programmers should take
532+
* care not to create such things. For Python, finalizers means
533+
* instance objects with __del__ methods.
534+
*/
468535
gc_list_init(&finalizers);
469536
move_finalizers(&unreachable, &finalizers);
470537
move_finalizer_reachable(&finalizers);
@@ -478,7 +545,7 @@ collect(int generation)
478545
debug_cycle("collectable", FROM_GC(gc));
479546
}
480547
}
481-
/* call tp_clear on objects in the collectable set. This will cause
548+
/* Call tp_clear on objects in the collectable set. This will cause
482549
* the reference cycles to be broken. It may also cause some objects in
483550
* finalizers to be freed */
484551
delete_garbage(&unreachable, old);

0 commit comments

Comments
 (0)