@@ -74,17 +74,20 @@ static int debug;
7474
7575/* When a collection begins, gc_refs is set to ob_refcnt for, and only for,
7676 * the objects in the generation being collected, called the "young"
77- * generation at that point. As collection proceeds, when it's determined
78- * that one of these can't be collected (e.g., because it's reachable from
79- * outside, or has a __del__ method), the object is moved out of young, and
80- * gc_refs is set to a negative value. The latter is so we can distinguish
81- * collection candidates from non-candidates just by looking at the object .
77+ * generation at that point. As collection proceeds, the gc_refs members
78+ * of young objects are set to GC_REACHABLE when it becomes known that they're
79+ * uncollectable, and to GC_TENTATIVELY_UNREACHABLE when the evidence
80+ * suggests they are collectable (this can't be known for certain until all
81+ * of the young generation is scanned) .
8282 */
83- /* Special gc_refs value, although any negative value means "moved". */
84- #define GC_MOVED -123
8583
86- /* True iff an object is still a candidate for collection. */
87- #define STILL_A_CANDIDATE (o ) ((AS_GC(o))->gc.gc_refs >= 0)
84+ /* Special gc_refs values. */
85+ #define GC_REACHABLE -123
86+ #define GC_TENTATIVELY_UNREACHABLE -42
87+
88+ #define IS_REACHABLE (o ) ((AS_GC(o))->gc.gc_refs == GC_REACHABLE)
89+ #define IS_TENTATIVELY_UNREACHABLE (o ) ( \
90+ (AS_GC(o))->gc.gc_refs == GC_TENTATIVELY_UNREACHABLE)
8891
8992/* list of uncollectable objects */
9093static PyObject * garbage ;
@@ -168,41 +171,40 @@ gc_list_size(PyGC_Head *list)
168171/*** end of list stuff ***/
169172
170173
171-
172- /* Set all gc_refs = ob_refcnt. After this, STILL_A_CANDIDATE(o) is true
173- * for all objects in containers, and false for all tracked gc objects not
174- * in containers (although see the comment in visit_decref).
174+ /* Set all gc_refs = ob_refcnt. After this, gc_refs is > 0 for all objects
175+ * in containers, and is GC_REACHABLE for all tracked gc objects not in
176+ * containers.
175177 */
176178static void
177179update_refs (PyGC_Head * containers )
178180{
179181 PyGC_Head * gc = containers -> gc .gc_next ;
180- for (; gc != containers ; gc = gc -> gc .gc_next ) {
182+ for (; gc != containers ; gc = gc -> gc .gc_next )
181183 gc -> gc .gc_refs = FROM_GC (gc )-> ob_refcnt ;
182- }
183184}
184185
186+ /* A traversal callback for subtract_refs. */
185187static int
186188visit_decref (PyObject * op , void * data )
187189{
188- /* There's no point to decrementing gc_refs unless
189- * STILL_A_CANDIDATE(op) is true. It would take extra cycles to
190- * check that, though. If STILL_A_CANDIDATE(op) is false,
191- * decrementing gc_refs almost always makes it "even more negative",
192- * so doesn't change that STILL_A_CANDIDATE is false, and no harm is
193- * done. However, it's possible that, after many collections, this
194- * could underflow gc_refs in a long-lived old object. In that case,
195- * visit_move() may move the old object back to the generation
196- * getting collected. That would be a waste of time, but wouldn't
197- * cause an error.
198- */
199190 assert (op != NULL );
200- if (PyObject_IS_GC (op ))
201- AS_GC (op )-> gc .gc_refs -- ;
191+ if (PyObject_IS_GC (op )) {
192+ PyGC_Head * gc = AS_GC (op );
193+ /* We're only interested in gc_refs for objects in the
194+ * generation being collected, which can be recognized
195+ * because only they have positive gc_refs.
196+ */
197+ if (gc -> gc .gc_refs > 0 )
198+ gc -> gc .gc_refs -- ;
199+ }
202200 return 0 ;
203201}
204202
205- /* Subtract internal references from gc_refs */
203+ /* Subtract internal references from gc_refs. After this, gc_refs is >= 0
204+ * for all objects in containers, and is GC_REACHABLE for all tracked gc
205+ * objects not in containers. The ones with gc_refs > 0 are directly
206+ * reachable from outside containers, and so can't be collected.
207+ */
206208static void
207209subtract_refs (PyGC_Head * containers )
208210{
@@ -216,52 +218,100 @@ subtract_refs(PyGC_Head *containers)
216218 }
217219}
218220
219- /* Move objects with gc_refs > 0 to roots list. They can't be collected. */
220- static void
221- move_roots (PyGC_Head * containers , PyGC_Head * roots )
222- {
223- PyGC_Head * next ;
224- PyGC_Head * gc = containers -> gc .gc_next ;
225- while (gc != containers ) {
226- next = gc -> gc .gc_next ;
227- if (gc -> gc .gc_refs > 0 ) {
228- gc_list_remove (gc );
229- gc_list_append (gc , roots );
230- gc -> gc .gc_refs = GC_MOVED ;
231- }
232- gc = next ;
233- }
234- }
235-
221+ /* A traversal callback for move_unreachable. */
236222static int
237- visit_move (PyObject * op , PyGC_Head * tolist )
223+ visit_reachable (PyObject * op , PyGC_Head * reachable )
238224{
239- if (PyObject_IS_GC (op )) {
240- if (IS_TRACKED (op ) && STILL_A_CANDIDATE (op )) {
241- PyGC_Head * gc = AS_GC (op );
225+ if (PyObject_IS_GC (op ) && IS_TRACKED (op )) {
226+ PyGC_Head * gc = AS_GC (op );
227+ const int gc_refs = gc -> gc .gc_refs ;
228+
229+ if (gc_refs == 0 ) {
230+ /* This is in move_unreachable's 'young' list, but
231+ * the traversal hasn't yet gotten to it. All
232+ * we need to do is tell move_unreachable that it's
233+ * reachable.
234+ */
235+ gc -> gc .gc_refs = 1 ;
236+ }
237+ else if (gc_refs == GC_TENTATIVELY_UNREACHABLE ) {
238+ /* This had gc_refs = 0 when move_unreachable got
239+ * to it, but turns out it's reachable after all.
240+ * Move it back to move_unreachable's 'young' list,
241+ * and move_unreachable will eventually get to it
242+ * again.
243+ */
242244 gc_list_remove (gc );
243- gc_list_append (gc , tolist );
244- gc -> gc .gc_refs = GC_MOVED ;
245+ gc_list_append (gc , reachable );
246+ gc -> gc .gc_refs = 1 ;
245247 }
248+ /* Else there's nothing to do.
249+ * If gc_refs > 0, it must be in move_unreachable's 'young'
250+ * list, and move_unreachable will eventually get to it.
251+ * If gc_refs == GC_REACHABLE, it's either in some other
252+ * generation so we don't care about it, or move_unreachable
253+ * already dealt with it.
254+ */
246255 }
247256 return 0 ;
248257}
249258
250- /* Move candidates referenced from reachable to reachable set (they're no
251- * longer candidates).
259+ /* Move the unreachable objects from young to unreachable. After this,
260+ * all objects in young have gc_refs = GC_REACHABLE, and all objects in
261+ * unreachable have gc_refs = GC_TENTATIVELY_UNREACHABLE. All tracked
262+ * gc objects not in young or unreachable still have gc_refs = GC_REACHABLE.
263+ * All objects in young after this are directly or indirectly reachable
264+ * from outside the original young; and all objects in unreachable are
265+ * not.
252266 */
253267static void
254- move_root_reachable (PyGC_Head * reachable )
268+ move_unreachable (PyGC_Head * young , PyGC_Head * unreachable )
255269{
256- traverseproc traverse ;
257- PyGC_Head * gc = reachable -> gc .gc_next ;
258- for (; gc != reachable ; gc = gc -> gc .gc_next ) {
259- /* careful, reachable list is growing here */
260- PyObject * op = FROM_GC (gc );
261- traverse = op -> ob_type -> tp_traverse ;
262- (void ) traverse (op ,
263- (visitproc )visit_move ,
264- (void * )reachable );
270+ PyGC_Head * gc = young -> gc .gc_next ;
271+
272+ /* Invariants: all objects "to the left" of us in young have gc_refs
273+ * = GC_REACHABLE, and are indeed reachable (directly or indirectly)
274+ * from outside the young list as it was at entry. All other objects
275+ * from the original young "to the left" of us are in unreachable now,
276+ * and have gc_refs = GC_TENTATIVELY_UNREACHABLE. All objects to the
277+ * left of us in 'young' now have been scanned, and no objects here
278+ * or to the right have been scanned yet.
279+ */
280+
281+ while (gc != young ) {
282+ PyGC_Head * next ;
283+
284+ if (gc -> gc .gc_refs == 0 ) {
285+ /* This *may* be unreachable. To make progress,
286+ * assume it is. gc isn't directly reachable from
287+ * any object we've already traversed, but may be
288+ * reachable from an object we haven't gotten to yet.
289+ * visit_reachable will eventually move gc back into
290+ * young if that's so, and we'll see it again.
291+ */
292+ next = gc -> gc .gc_next ;
293+ gc_list_remove (gc );
294+ gc_list_append (gc , unreachable );
295+ gc -> gc .gc_refs = GC_TENTATIVELY_UNREACHABLE ;
296+ }
297+ else {
298+ /* gc is definitely reachable from outside the
299+ * original 'young'. Mark it as such, and traverse
300+ * its pointers to find any other objects that may
301+ * be directly reachable from it. Note that the
302+ * call to tp_traverse may append objects to young,
303+ * so we have to wait until it returns to determine
304+ * the next object to visit.
305+ */
306+ PyObject * op = FROM_GC (gc );
307+ traverseproc traverse = op -> ob_type -> tp_traverse ;
308+ gc -> gc .gc_refs = GC_REACHABLE ;
309+ (void ) traverse (op ,
310+ (visitproc )visit_reachable ,
311+ (void * )young );
312+ next = gc -> gc .gc_next ;
313+ }
314+ gc = next ;
265315 }
266316}
267317
@@ -292,12 +342,29 @@ move_finalizers(PyGC_Head *unreachable, PyGC_Head *finalizers)
292342 if (has_finalizer (op )) {
293343 gc_list_remove (gc );
294344 gc_list_append (gc , finalizers );
295- gc -> gc .gc_refs = GC_MOVED ;
345+ gc -> gc .gc_refs = GC_REACHABLE ;
346+ }
347+ }
348+ }
349+
350+ /* A traversal callback for move_finalizer_reachable. */
351+ static int
352+ visit_move (PyObject * op , PyGC_Head * tolist )
353+ {
354+ if (PyObject_IS_GC (op )) {
355+ if (IS_TRACKED (op ) && IS_TENTATIVELY_UNREACHABLE (op )) {
356+ PyGC_Head * gc = AS_GC (op );
357+ gc_list_remove (gc );
358+ gc_list_append (gc , tolist );
359+ gc -> gc .gc_refs = GC_REACHABLE ;
296360 }
297361 }
362+ return 0 ;
298363}
299364
300- /* Move objects referenced from roots to roots */
365+ /* Move objects that are reachable from finalizers, from the unreachable set
366+ * into the finalizers set.
367+ */
301368static void
302369move_finalizer_reachable (PyGC_Head * finalizers )
303370{
@@ -353,19 +420,21 @@ handle_finalizers(PyGC_Head *finalizers, PyGC_Head *old)
353420 /* If SAVEALL is not set then just append objects with
354421 * finalizers to the list of garbage. All objects in
355422 * the finalizers list are reachable from those
356- * objects. */
423+ * objects.
424+ */
357425 PyList_Append (garbage , op );
358426 }
359427 /* object is now reachable again */
360- assert (! STILL_A_CANDIDATE (op ));
428+ assert (IS_REACHABLE (op ));
361429 gc_list_remove (gc );
362430 gc_list_append (gc , old );
363431 }
364432}
365433
366434/* Break reference cycles by clearing the containers involved. This is
367435 * tricky business as the lists can be changing and we don't know which
368- * objects may be freed. It is possible I screwed something up here. */
436+ * objects may be freed. It is possible I screwed something up here.
437+ */
369438static void
370439delete_garbage (PyGC_Head * unreachable , PyGC_Head * old )
371440{
@@ -375,7 +444,7 @@ delete_garbage(PyGC_Head *unreachable, PyGC_Head *old)
375444 PyGC_Head * gc = unreachable -> gc .gc_next ;
376445 PyObject * op = FROM_GC (gc );
377446
378- assert (STILL_A_CANDIDATE (op ));
447+ assert (IS_TENTATIVELY_UNREACHABLE (op ));
379448 if (debug & DEBUG_SAVEALL ) {
380449 PyList_Append (garbage , op );
381450 }
@@ -390,7 +459,7 @@ delete_garbage(PyGC_Head *unreachable, PyGC_Head *old)
390459 /* object is still alive, move it, it may die later */
391460 gc_list_remove (gc );
392461 gc_list_append (gc , old );
393- gc -> gc .gc_refs = GC_MOVED ;
462+ gc -> gc .gc_refs = GC_REACHABLE ;
394463 }
395464 }
396465}
@@ -401,11 +470,10 @@ static long
401470collect (int generation )
402471{
403472 int i ;
404- long n = 0 ;
405- long m = 0 ;
473+ long m = 0 ; /* # objects collected */
474+ long n = 0 ; /* # unreachable objects that couldn't be collected */
406475 PyGC_Head * young ; /* the generation we are examining */
407476 PyGC_Head * old ; /* next older generation */
408- PyGC_Head reachable ;
409477 PyGC_Head unreachable ;
410478 PyGC_Head finalizers ;
411479 PyGC_Head * gc ;
@@ -433,38 +501,37 @@ collect(int generation)
433501
434502 /* handy references */
435503 young = GEN_HEAD (generation );
436- if (generation < NUM_GENERATIONS - 1 ) {
504+ if (generation < NUM_GENERATIONS - 1 )
437505 old = GEN_HEAD (generation + 1 );
438- } else {
439- old = GEN_HEAD (NUM_GENERATIONS - 1 );
440- }
506+ else
507+ old = young ;
441508
442509 /* Using ob_refcnt and gc_refs, calculate which objects in the
443510 * container set are reachable from outside the set (ie. have a
444511 * refcount greater than 0 when all the references within the
445- * set are taken into account */
512+ * set are taken into account
513+ */
446514 update_refs (young );
447515 subtract_refs (young );
448516
449- /* Move everything reachable from outside the set into the
450- * reachable set (ie. gc_refs > 0). Next, move everything
451- * reachable from objects in the reachable set. */
452- gc_list_init (& reachable );
453- move_roots (young , & reachable );
454- move_root_reachable (& reachable );
455-
456- /* move unreachable objects to a temporary list, new objects can be
457- * allocated after this point */
517+ /* Leave everything reachable from outside young in young, and move
518+ * everything else (in young) to unreachable.
519+ * NOTE: This used to move the reachable objects into a reachable
520+ * set instead. But most things usually turn out to be reachable,
521+ * so it's more efficient to move the unreachable things.
522+ */
458523 gc_list_init (& unreachable );
459- gc_list_move (young , & unreachable );
524+ move_unreachable (young , & unreachable );
460525
461- /* move reachable objects to next generation */
462- gc_list_merge (& reachable , old );
526+ /* Move reachable objects to next generation. */
527+ if (young != old )
528+ gc_list_merge (young , old );
463529
464- /* Move objects reachable from finalizers, we can't safely delete
465- * them. Python programmers should take care not to create such
466- * things. For Python finalizers means instance objects with
467- * __del__ methods. */
530+ /* All objects in unreachable are trash, but objects reachable from
531+ * finalizers can't safely be deleted. Python programmers should take
532+ * care not to create such things. For Python, finalizers means
533+ * instance objects with __del__ methods.
534+ */
468535 gc_list_init (& finalizers );
469536 move_finalizers (& unreachable , & finalizers );
470537 move_finalizer_reachable (& finalizers );
@@ -478,7 +545,7 @@ collect(int generation)
478545 debug_cycle ("collectable" , FROM_GC (gc ));
479546 }
480547 }
481- /* call tp_clear on objects in the collectable set. This will cause
548+ /* Call tp_clear on objects in the collectable set. This will cause
482549 * the reference cycles to be broken. It may also cause some objects in
483550 * finalizers to be freed */
484551 delete_garbage (& unreachable , old );
0 commit comments