ct *
gc_collect(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames)
{
PyObject *return_value = NULL;
int generation = NUM_GENERATIONS - 1;
Py_ssize_t _return_value;
_return_value = gc_collect_impl(module, generation);
if ((_return_value == -1) && PyErr_Occurred()) {
goto exit;
}
return_value = PyLong_FromSsize_t(_return_value);
exit:
return return_value;
}
具体执行在gc_collect_impl
函数中,接着往下
static Py_ssize_t gc_collect_impl(PyObject *module, int generation)
{
PyThreadState *tstate = _PyThreadState_GET();
GCState *gcstate = &tstate->interp->gc;
Py_ssize_t n;
if (gcstate->collecting) {
/* already collecting, don't do anything */
n = 0;
}
else {
gcstate->collecting = 1;
n = collect_with_callback(tstate, generation);
gcstate->collecting = 0;
}
return n;
}
可以看到,如果已经在执行GC,则直接返回。接着看collect_with_callback
static Py_ssize_t
collect_with_callback(PyThreadState *tstate, int generation)
{
assert(!_PyErr_Occurred(tstate));
Py_ssize_t result, collected, uncollectable;
invoke_gc_callback(tstate, "start", generation, 0, 0);
result = collect(tstate, generation, &collected, &uncollectable, 0);
invoke_gc_callback(tstate, "stop", generation, collected, uncollectable);
assert(!_PyErr_Occurred(tstate));
return result;
}
其中invoke_gc_callback
是调用通过gc.callbacks
注册的回调函数,这里我们忽略,重点分析collect
函数。
collect函数签名
这段代码很长,我们拆分开来分析,这里会去除掉一些DEBUG相关的逻辑。
static Py_ssize_t collect(PyThreadState *tstate, int generation,Py_ssize_t *n_collected, Py_ssize_t *n_uncollectable, int nofail);
- 将新生代的对象合并到指定代的对象列表中。
/* merge younger generations with one we are currently collecting */
for (i = 0; i < generation; i++) {
gc_list_merge(GEN_HEAD(gcstate, i), GEN_HEAD(gcstate, generation));
}
比如调用gc.collect(2)
,就表示启动全部的垃圾回收。这里就会将第0、1代的对象合并到第2代上。合并之后第0、1代上就空了,全部可GC的对象都在第2代上。
- 推断不可达对象
/* handy references */
young = GEN_HEAD(gcstate, generation);
if (generation < NUM_GENERATIONS-1)
old = GEN_HEAD(gcstate, generation+1);
else
old = young;
validate_list(old, collecting_clear_unreachable_clear);
deduce_unreachable(young, &unreachable);
这里的young指针指向第2代的链表头,validate_list
做校验,这里忽略,重点在deduce_unreachable
函数中。
static inline void
deduce_unreachable(PyGC_Head *base, PyGC_Head *unreachable) {
validate_list(base, collecting_clear_unreachable_clear);
update_refs(base); // gc_prev is used for gc_refs
subtract_refs(base);
gc_list_init(unreachable);
move_unreachable(base, unreachable); // gc_prev is pointer again
validate_list(base, collecting_clear_unreachable_clear);
validate_list(unreachable, collecting_set_unreachable_set);
}
首先调用update_refs更新引用计数
static inline void
gc_reset_refs(PyGC_Head *g, Py_ssize_t refs)
{
g->_gc_prev = (g->_gc_prev & _PyGC_PREV_MASK_FINALIZED)
| PREV_MASK_COLLECTING
| ((uintptr_t)(refs) << _PyGC_PREV_SHIFT);
}
static void
update_refs(PyGC_Head *containers)
{
PyGC_Head *gc = GC_NEXT(containers);
for (; gc != containers; gc = GC_NEXT(gc)) {
gc_reset_refs(gc, Py_REFCNT(FROM_GC(gc)));
_PyObject_ASSERT(FROM_GC(gc), gc_get_refs(gc) != 0);
}
}
这里的逻辑就是遍历所有对象,然后赋值_gc_prev,设置为收集中的标识PREV_MASK_COLLECTING,然后将引用计数赋值给_gc_prev 。最后_gc_prev的内容如下。
更新完_gc_prev后,就开始调用subtrace_refs,遍历对象中的元素,