atic inline struct page *
__alloc_pages_high_priority(gfp_t gfp_mask, unsigned int order,
struct zonelist *zonelist, enum zone_type high_zoneidx,
nodemask_t *nodemask, struct zone *preferred_zone,
int migratetype)
{
struct page *page;
do {
page = get_page_from_freelist(gfp_mask, nodemask, order,
zonelist, high_zoneidx, ALLOC_NO_WATERMARKS,
preferred_zone, migratetype);
if (!page && gfp_mask & __GFP_NOFAIL)
wait_iff_congested(preferred_zone, BLK_RW_ASYNC, HZ/50);
} while (!page && (gfp_mask & __GFP_NOFAIL));
return page;
}
可以看到该函数根据分配标识__GFP_NOFAIL不断地调用get_page_from_freelist()循环尝试去获得内存。
接着回到__alloc_pages_slowpath()中,其从__alloc_pages_high_priority()退出后继而判断是否设置了__GFP_WAIT标识,如果设置则表示内存分配运行休眠,否则直接以分配内存失败而退出。接着将会调用__alloc_pages_direct_compact()和__alloc_pages_direct_reclaim()尝试回收内存并尝试分配。基于上面的多种尝试内存分配仍然失败的情况,将会调用__alloc_pages_may_oom()触发OOM killer机制。OOM killer将进程kill后会重新再次尝试内存分配,最后则是分配失败或分配成功的收尾处理。
__alloc_pages_slowpath()暂且分析至此,回到本文重点函数__alloc_pages_may_oom()中进一步进行分析。
【file:/ mm/page_alloc.h】
static inline struct page *
__alloc_pages_may_oom(gfp_t gfp_mask, unsigned int order,
struct zonelist *zonelist, enum zone_type high_zoneidx,
nodemask_t *nodemask, struct zone *preferred_zone,
int migratetype)
{
struct page *page;
/* Acquire the OOM killer lock for the zones in zonelist */
if (!try_set_zonelist_oom(zonelist, gfp_mask)) {
schedule_timeout_uninterruptible(1);
return NULL;
}
/*
* Go through the zonelist yet one more time, keep very high watermark
* here, this is only to catch a parallel oom killing, we must fail if
* we're still under heavy pressure.
*/
page = get_page_from_freelist(gfp_mask|__GFP_HARDWALL, nodemask,
order, zonelist, high_zoneidx,
ALLOC_WMARK_HIGH|ALLOC_CPUSET,
preferred_zone, migratetype);
if (page)
goto out;
if (!(gfp_mask & __GFP_NOFAIL)) {
/* The OOM killer will not help higher order allocs */
if (order > PAGE_ALLOC_COSTLY_ORDER)
goto out;
/* The OOM killer does not needlessly kill tasks for lowmem */
if (high_zoneidx < ZONE_NORMAL)
goto out;
/*
* GFP_THISNODE contains __GFP_NORETRY and we never hit this.
* Sanity check for bare calls of __GFP_THISNODE, not real OOM.
* The caller should handle page allocation failure by itself if
* it specifies __GFP_THISNODE.
* Note: Hugepage uses it but will hit PAGE_ALLOC_COSTLY_ORDER.
*/
if (gfp_mask & __GFP_THISNODE)
goto out;
}
/* Exhausted what can be done so it's blamo time */
out_of_memory(zonelist, gfp_mask, order, nodemask, false);
out:
clear_zonelist_oom(zonelist, gfp_mask);
return page;
}
该函数首先通过try_set_zonelist_oom()判断OOM killer是否已经在其他核进行killing操作,如果没有的情况下将会在try_set_zonelist_oom()内部进行锁操作,确保只有一个核执行killing的操作。继而调用get_page_from_freelist()在高watermark的情况下尝试再次获取内存,不过这里注定会失败。接着就是调用到了关键函数out_of_memory()。最后函数退出时将会调用clear_zonelist_oom()清除掉try_set_zonelist_oom()里面的锁操作。
着重分析一下out_of_memory():
【file:/ mm/oom_kill.c】
/**
* out_of_memory - kill the "best" process when we run out of memory
* @zonelist: zonelist pointer
* @gfp_mask: memory allocation flags
* @order: amount of memory being requested as a power of 2
* @nodemask: nodemask passed to page allocator
* @force_kill: true if a task must be killed, even if others are exiting
*
* If we run out of memory, we have the choice between either
* killing a random task (bad