changed by config */
static int user_zonelist_order = ZONELIST_ORDER_DEFAULT;
/* string for sysctl */
#define NUMA_ZONELIST_ORDER_LEN 16
char numa_zonelist_order[16] = "default";
#else
/* ......*/
#endif
而接受和处理用户配置的工作, 自然是交给我们强大的proc文件系统来完成的, 可以通过/proc/sys/vm/numa_zonelist_order动态改变zonelist order的分配方式。
内核通过setup_numa_zonelist_order读取并处理用户写入的配置信息
- 接收到用户的信息后用__parse_numa_zonelist_order处理接收的参数
- 如果前面用__parse_numa_zonelist_order处理的信息串成功, 则将对用的设置信息写入到字符串numa_zonelist_order中
参见mm/page_alloc.c?v=4.7, line 4578
/*
* interface for configure zonelist ordering.
* command line option "numa_zonelist_order"
* = "[dD]efault - default, automatic configuration.
* = "[nN]ode - order by node locality, then by zone within node
* = "[zZ]one - order by zone, then by locality within zone
*/
static int __parse_numa_zonelist_order(char *s)
{
if (*s == 'd' || *s == 'D') {
user_zonelist_order = ZONELIST_ORDER_DEFAULT;
} else if (*s == 'n' || *s == 'N') {
user_zonelist_order = ZONELIST_ORDER_NODE;
} else if (*s == 'z' || *s == 'Z') {
user_zonelist_order = ZONELIST_ORDER_ZONE;
} else {
pr_warn("Ignoring invalid numa_zonelist_order value: %s\n", s);
return -EINVAL;
}
return 0;
}
static __init int setup_numa_zonelist_order(char *s)
{
int ret;
if (!s)
return 0;
ret = __parse_numa_zonelist_order(s);
if (ret == 0)
strlcpy(numa_zonelist_order, s, NUMA_ZONELIST_ORDER_LEN);
return ret;
}
early_param("numa_zonelist_order", setup_numa_zonelist_order);
4 build_all_zonelists_init完成内存域zonelists的初始化
build_all_zonelists函数在通过set_zonelist_order设置了zonelists中结点的组织顺序后, 首先检查了ssytem_state标识. 如果当前系统处于boot阶段(SYSTEM_BOOTING), 就开始通过build_all_zonelists_init函数初始化zonelist
build_all_zonelists(pg_data_t *pgdat, struct zone *zone)
{
/* 设置zonelist中节点和内存域的组织形式
* current_zonelist_order变量标识了当前系统的内存组织形式
* zonelist_order_name以字符串存储了系统中内存组织形式的名称 */
set_zonelist_order();
if (system_state == SYSTEM_BOOTING) {
build_all_zonelists_init();
4.1 system_state系统状态标识
其中system_state
变量是一个系统全局定义的用来表示系统当前运行状态的枚举变量, 其定义在include/linux/kernel.h?v=4.7, line 487
/* Values used for system_state */
extern enum system_states
{
SYSTEM_BOOTING,
SYSTEM_RUNNING,
SYSTEM_HALT,
SYSTEM_POWER_OFF,
SYSTEM_RESTART,
} system_state;
- 如果系统system_state是SYSTEM_BOOTING, 则调用build_all_zonelists_init初始化所有的内存结点
- 否则的话如果定义了冷热页CONFIG_MEMORY_HOTPLUG且参数zone(待初始化的内存管理域zone)不为NULL, 则调用setup_zone_pageset设置冷热页
if (system_state == SYSTEM_BOOTING)
{
build_all_zonelists_init();
}
else
{
#ifdef CONFIG_MEMORY_HOTPLUG
if (zone)
setup_zone_pageset(zone);
#endif
4.2 build_all_zonelists_init函数
build_all_zonelists函数在如果当前系统处于boot阶段(system_state == SYSTEM_BOOTING), 就开始通过build_all_zonelists_init函数初始化zonelist
build_all_zonelists_init函数定义在mm/page_alloc.c?v=4.7, line 5013
static noinline void __init
build_all_zonelists_init(void)
{
__build_all_zonelists(NULL);
mminit_verify_zonelist();
cpuset_init_current_mems_allowed();
}
build_all_zonelists_init将将所有工作都委托给__build_all_zonelists完成了zonelists的初始化工作, 后者又对系统中的各个NUMA结点分别调用build_zonelists.
函数__build_all_zonelists定义在mm/page_alloc.c?v=4.7, line 4959
/* return values int ....just for stop_machine() */
static int __build_all_zonelists(void *data)
{
int nid;