1. 说明
1> 本篇是实际工作中linux上碰到的一个问题,一个使用了CGroup的进程处于R状态但不执行,也不退出,还不能kill,经过深入挖掘才发现是Cgroup的内核bug
2>发现该bug后,去年给RedHat提交过漏洞,但可惜并未通过,不知道为什么,这里就发我博客公开了
3> 前面的2个帖子
《极简cfs公平调度算法》
《极简组调度-CGroup如何限制cpu》是为了了解本篇这个内核bug而写的,需要linux内核进程调度和CGroup控制的基本原理才能够比较清晰的了解这个内核bug的来龙去脉
4> 本文所用的内核调试工具是crash,大家可以到官网上去查看crash命令的使用,这里就不多介绍了
2. 问题
2.1 触发bug code(code较长,请展开代码)
2.1.1 code
#include <iostream> #include <sys/types.h> #include <signal.h> #include <stdio.h> #include <stdlib.h> #include <unistd.h> #include <errno.h> #include <sys/stat.h> #include <pthread.h> #include <sys/time.h> #include <string> using namespace std; std::string sub_cgroup_dir("/sys/fs/cgroup/cpu/test"); // common lib bool is_dir(const std::string& path) { struct stat statbuf; if (stat(path.c_str(), &statbuf) == 0 ) { if (0 != S_ISDIR(statbuf.st_mode)) { return true; } } return false; } bool write_file(const std::string& file_path, int num) { FILE* fp = fopen(file_path.c_str(), "w"); if (fp = NULL) { return false; } std::string write_data = to_string(num); fputs(write_data.c_str(), fp); fclose(fp); return true; } // ms long get_ms_timestamp() { timeva l tv; gettimeofday(&tv, NULL); return (tv.tv_sec * 1000 + tv.tv_usec / 1000); } // cgroup bool create_cgroup() { if (is_dir(sub_cgroup_dir) == false) { if (mkdir(sub_cgroup_dir.c_str(), S_IRWXU | S_IRGRP) != 0) { cout << "mkdir cgroup dir fail" << endl; return false; } } int pid = getpid(); cout << "pid is " << pid << endl; std::string procs_path = sub_cgroup_dir + "/cgroup.procs"; return write_file(procs_path, pid); } bool set_period(int period) { std::string period_path = sub_cgroup_dir + "/cpu.cfs_period_us"; return write_file(period_path, period); } bool set_quota(int quota) { std::string quota_path = sub_cgroup_dir + "/cpu.cfs_quota_us"; return write_file(quota_path, quota); } // thread // param: ms interval void* thread_func(void* param) { int i = 0; int interval = (long)param; long last = get_ms_timestamp(); while (true) { i++; if (i % 1000 != 0) { continue; } long current = get_ms_timestamp(); if ((current - last) >= interval) { usleep(1000); last = current; } } pthread_exit(NULL); } void test_thread() { const int k_thread_num = 10; pthread_t pthreads[k_thread_num]; for (int i = 0; i < k_thread_num; i++) { if (pthread_create(&pthreads[i], NULL, thread_func, (void*)(i + 1)) != 0) { cout << "create thread fail" << endl; } else { cout << "create thread success,tid is " << pthreads[i] << endl; } } } //argv[0] : period //argv[1] : quota int main(int argc,char* argv[]) { if (argc <3) { cout << "usage : ./inactive timer $period $quota" << endl; return -1; } int period = stoi(argv[1]); int quota = stoi(argv[2]); cout << "period is " << period << endl; cout << "quota is " << quota << endl; test_thread(); if (create_cgroup() == false) { cout << "create cgroup fail&q