Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 8839617

Browse files
committed
SF bug #574132: Major GC related performance regression
"The regression" is actually due to that 2.2.1 had a bug that prevented the regression (which isn't a regression at all) from showing up. "The regression" is actually a glitch in cyclic gc that's been there forever. As the generation being collected is analyzed, objects that can't be collected (because, e.g., we find they're externally referenced, or are in an unreachable cycle but have a __del__ method) are moved out of the list of candidates. A tricksy scheme uses negative values of gc_refs to mark such objects as being moved. However, the exact negative value set at the start may become "more negative" over time for objects not in the generation being collected, and the scheme was checking for an exact match on the negative value originally assigned. As a result, objects in generations older than the one being collected could get scanned too, and yanked back into a younger generation. Doing so doesn't lead to an error, but doesn't do any good, and can burn an unbounded amount of time doing useless work. A test case is simple (thanks to Kevin Jacobs for finding it!): x = [] for i in xrange(200000): x.append((1,)) Without the patch, this ends up scanning all of x on every gen0 collection, scans all of x twice on every gen1 collection, and x gets yanked back into gen1 on every gen0 collection. With the patch, once x gets to gen2, it's never scanned again until another gen2 collection, and stays in gen2. Bugfix candidate, although the code has changed enough that I think I'll need to port it by hand. 2.2.1 also has a different bug that causes bound method objects not to get tracked at all (so the test case doesn't burn absurd amounts of time in 2.2.1, but *should* <wink>).
1 parent 6238d2b commit 8839617

1 file changed

Lines changed: 43 additions & 13 deletions

File tree

Modules/gcmodule.c

Lines changed: 43 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
2+
33
Reference Cycle Garbage Collection
44
==================================
55
@@ -72,11 +72,19 @@ static int collecting;
7272
DEBUG_SAVEALL
7373
static int debug;
7474

75-
/* Special gc_refs value */
75+
/* When a collection begins, gc_refs is set to ob_refcnt for, and only for,
76+
* the objects in the generation being collected, called the "young"
77+
* generation at that point. As collection proceeds, when it's determined
78+
* that one of these can't be collected (e.g., because it's reachable from
79+
* outside, or has a __del__ method), the object is moved out of young, and
80+
* gc_refs is set to a negative value. The latter is so we can distinguish
81+
* collection candidates from non-candidates just by looking at the object.
82+
*/
83+
/* Special gc_refs value, although any negative value means "moved". */
7684
#define GC_MOVED -123
7785

78-
/* True if an object has been moved to the older generation */
79-
#define IS_MOVED(o) ((AS_GC(o))->gc.gc_refs == GC_MOVED)
86+
/* True iff an object is still a candidate for collection. */
87+
#define STILL_A_CANDIDATE(o) ((AS_GC(o))->gc.gc_refs >= 0)
8088

8189
/* list of uncollectable objects */
8290
static PyObject *garbage;
@@ -116,7 +124,7 @@ gc_list_remove(PyGC_Head *node)
116124
node->gc.gc_next = NULL; /* object is not currently tracked */
117125
}
118126

119-
static void
127+
static void
120128
gc_list_move(PyGC_Head *from, PyGC_Head *to)
121129
{
122130
if (gc_list_is_empty(from)) {
@@ -161,7 +169,10 @@ gc_list_size(PyGC_Head *list)
161169

162170

163171

164-
/* Set all gc_refs = ob_refcnt */
172+
/* Set all gc_refs = ob_refcnt. After this, STILL_A_CANDIDATE(o) is true
173+
* for all objects in containers, and false for all tracked gc objects not
174+
* in containers (although see the comment in visit_decref).
175+
*/
165176
static void
166177
update_refs(PyGC_Head *containers)
167178
{
@@ -174,9 +185,21 @@ update_refs(PyGC_Head *containers)
174185
static int
175186
visit_decref(PyObject *op, void *data)
176187
{
188+
/* There's no point to decrementing gc_refs unless
189+
* STILL_A_CANDIDATE(op) is true. It would take extra cycles to
190+
* check that, though. If STILL_A_CANDIDATE(op) is false,
191+
* decrementing gc_refs almost always makes it "even more negative",
192+
* so doesn't change that STILL_A_CANDIDATE is false, and no harm is
193+
* done. However, it's possible that, after many collections, this
194+
* could underflow gc_refs in a long-lived old object. In that case,
195+
* visit_move() may move the old object back to the generation
196+
* getting collected. That would be a waste of time, but wouldn't
197+
* cause an error.
198+
*/
177199
if (op && PyObject_IS_GC(op)) {
178-
if (IS_TRACKED(op))
200+
if (IS_TRACKED(op)) {
179201
AS_GC(op)->gc.gc_refs--;
202+
}
180203
}
181204
return 0;
182205
}
@@ -195,7 +218,7 @@ subtract_refs(PyGC_Head *containers)
195218
}
196219
}
197220

198-
/* Append objects with gc_refs > 0 to roots list */
221+
/* Move objects with gc_refs > 0 to roots list. They can't be collected. */
199222
static void
200223
move_roots(PyGC_Head *containers, PyGC_Head *roots)
201224
{
@@ -216,7 +239,7 @@ static int
216239
visit_move(PyObject *op, PyGC_Head *tolist)
217240
{
218241
if (PyObject_IS_GC(op)) {
219-
if (IS_TRACKED(op) && !IS_MOVED(op)) {
242+
if (IS_TRACKED(op) && STILL_A_CANDIDATE(op)) {
220243
PyGC_Head *gc = AS_GC(op);
221244
gc_list_remove(gc);
222245
gc_list_append(gc, tolist);
@@ -226,7 +249,9 @@ visit_move(PyObject *op, PyGC_Head *tolist)
226249
return 0;
227250
}
228251

229-
/* Move objects referenced from reachable to reachable set. */
252+
/* Move candidates referenced from reachable to reachable set (they're no
253+
* longer candidates).
254+
*/
230255
static void
231256
move_root_reachable(PyGC_Head *reachable)
232257
{
@@ -242,7 +267,7 @@ move_root_reachable(PyGC_Head *reachable)
242267
}
243268
}
244269

245-
/* return true of object has a finalization method */
270+
/* return true if object has a finalization method */
246271
static int
247272
has_finalizer(PyObject *op)
248273
{
@@ -269,6 +294,7 @@ move_finalizers(PyGC_Head *unreachable, PyGC_Head *finalizers)
269294
if (has_finalizer(op)) {
270295
gc_list_remove(gc);
271296
gc_list_append(gc, finalizers);
297+
gc->gc.gc_refs = GC_MOVED;
272298
}
273299
}
274300
}
@@ -282,7 +308,7 @@ move_finalizer_reachable(PyGC_Head *finalizers)
282308
for (; gc != finalizers; gc=gc->gc.gc_next) {
283309
/* careful, finalizers list is growing here */
284310
traverse = FROM_GC(gc)->ob_type->tp_traverse;
285-
(void) traverse(FROM_GC(gc),
311+
(void) traverse(FROM_GC(gc),
286312
(visitproc)visit_move,
287313
(void *)finalizers);
288314
}
@@ -332,7 +358,8 @@ handle_finalizers(PyGC_Head *finalizers, PyGC_Head *old)
332358
* objects. */
333359
PyList_Append(garbage, op);
334360
}
335-
/* object is now reachable again */
361+
/* object is now reachable again */
362+
assert(!STILL_A_CANDIDATE(op));
336363
gc_list_remove(gc);
337364
gc_list_append(gc, old);
338365
}
@@ -349,6 +376,8 @@ delete_garbage(PyGC_Head *unreachable, PyGC_Head *old)
349376
while (!gc_list_is_empty(unreachable)) {
350377
PyGC_Head *gc = unreachable->gc.gc_next;
351378
PyObject *op = FROM_GC(gc);
379+
380+
assert(STILL_A_CANDIDATE(op));
352381
if (debug & DEBUG_SAVEALL) {
353382
PyList_Append(garbage, op);
354383
}
@@ -363,6 +392,7 @@ delete_garbage(PyGC_Head *unreachable, PyGC_Head *old)
363392
/* object is still alive, move it, it may die later */
364393
gc_list_remove(gc);
365394
gc_list_append(gc, old);
395+
gc->gc.gc_refs = GC_MOVED;
366396
}
367397
}
368398
}

0 commit comments

Comments
 (0)