前言
今天在业务代码中看到request_module函数和DEFINE_MUTEX宏,之前没有使用过,特此记录一下,答疑解惑,也算是日常积累.
1、request_module
在用户态,我们可以使用insmod或者modprobe手动装载ko模块到内核中。 如果是在内核态呢?
这个时候需要用到request_module这个函数。
该函数是一个宏函数,其定义如下,内核版本4.1.15:
位置:./include/linux/kmod.h
#ifdef CONFIG_MODULES
extern char modprobe_path[]; /* for sysctl */
/* modprobe exit status on success, -ve on error. Return value
* usually useless though. */
extern __printf(2, 3)
int __request_module(bool wait, const char *name, ...);
#define request_module(mod...) __request_module(true, mod)
#define request_module_nowait(mod...) __request_module(false, mod)
#define try_then_request_module(x, mod...) \
((x) ?: (__request_module(true, mod), (x)))
#else
static inline int request_module(const char *name, ...) { return -ENOSYS; }
static inline int request_module_nowait(const char *name, ...) { return -ENOSYS; }
#define try_then_request_module(x, mod...) (x)
#endif
从上面的宏定义来看,request_module最后调用的接口是__request_module(true, mod),下面是他的原型定义:
位置: ./kernel/kmod.c
参数:
-
bool wait:是否等待操作完成
-
fmt:字符串,模块名称
/**
* __request_module - try to load a kernel module
* @wait: wait (or not) for the operation to complete
* @fmt: printf style format string for the name of the module
* @...: arguments as specified in the format string
*
* Load a module using the user mode module loader. The function returns
* zero on success or a negative errno code on failure. Note that a
* successful module load does not mean the module did not then unload
* and exit on an error of its own. Callers must check that the service
* they requested is now available not blindly invoke it.
*
* If module auto-loading support is disabled then this function
* becomes a no-operation.
*/
int __request_module(bool wait, const char *fmt, ...)
{
va_list args;
char module_name[MODULE_NAME_LEN];
unsigned int max_modprobes;
int ret;
static atomic_t kmod_concurrent = ATOMIC_INIT(0);
#define MAX_KMOD_CONCURRENT 50 /* Completely arbitrary value - KAO */
static int kmod_loop_msg;
/*
* We don't allow synchronous module loading from async. Module
* init may invoke async_synchronize_full() which will end up
* waiting for this task which already is waiting for the module
* loading to complete, leading to a deadlock.
*/
WARN_ON_ONCE(wait && current_is_async());
if (!modprobe_path[0])
return 0;
va_start(args, fmt);
ret = vsnprintf(module_name, MODULE_NAME_LEN, fmt, args);
va_end(args);
if (ret >= MODULE_NAME_LEN)
return -ENAMETOOLONG;
ret = security_kernel_module_request(module_name);
if (ret)
return ret;
/* If modprobe needs a service that is in a module, we get a recursive
* loop. Limit the number of running kmod threads to max_threads/2 or
* MAX_KMOD_CONCURRENT, whichever is the smaller. A cleaner method
* would be to run the parents of this process, counting how many times
* kmod was invoked. That would mean accessing the internals of the
* process tables to get the command line, proc_pid_cmdline is static
* and it is not worth changing the proc code just to handle this case.
* KAO.
*
* "trace the ppid" is simple, but will fail if someone's
* parent exits. I think this is as good as it gets. --RR
*/
max_modprobes = min(max_threads/2, MAX_KMOD_CONCURRENT);
atomic_inc(&kmod_concurrent);
if (atomic_read(&kmod_concurrent) > max_modprobes) {
/* We may be blaming an innocent here, but unlikely */
if (kmod_loop_msg < 5) {
printk(KERN_ERR
"request_module: runaway loop modprobe %s\n",
module_name);
kmod_loop_msg++;
}
atomic_dec(&kmod_concurrent);
return -ENOMEM;
}
trace_module_request(module_name, wait, _RET_IP_);
ret = call_modprobe(module_name, wait ? UMH_WAIT_PROC : UMH_WAIT_EXEC);
atomic_dec(&kmod_concurrent);
return ret;
}
EXPORT_SYMBOL(__request_module);
下面来仔细阅读一下这个代码,首先来看变量定义:
va_list args;
char module_name[MODULE_NAME_LEN];
unsigned int max_modprobes;
int ret;
static atomic_t kmod_concurrent = ATOMIC_INIT(0);
#define MAX_KMOD_CONCURRENT 50 /* Completely arbitrary value - KAO */
static int kmod_loop_msg;
va_list是可变参数函数的参数表,也就是所有的入参都保存在这个表中。
module_name是存放模块名称的字符数组。
max_modprobes尝试模块probe的次数
ret存放返回值
kmod_concurrent测试次数
MAX_KMOD_CONCURRENT最大KMOD数量
kmod_loop_msg打印警告信息的最大次数
模块不允许异步加载:
WARN_ON_ONCE(wait && current_is_async());
这个函数用于判断当前任务是否为异步任务,函数原型如下:
bool current_is_async(void)
{
struct worker *worker = current_wq_worker();
return worker && worker->current_func == async_run_entry_fn;
}
WARN_ON_ONCE用于只打印一次告警信息,避免频繁的打印。
接下来检查modprobe工具的路径:
if (!modprobe_path[0])
return 0;
modeprobe_path[]定义在如下位置:
char modprobe_path[KMOD_PATH_LEN] = "/sbin/modprobe";
接下来调用security_kernel_module_request函数,函数原型如下:
最终调用的是security_ops->kernel_module_request
int security_kernel_module_request(char *kmod_name)
{
return security_ops->kernel_module_request(kmod_name);
}
这个钩子在hooks.c文件中挂接,这里是对运行的安全权限进行审查,这个函数太过复杂,在这里不做解释。
static struct security_operations selinux_ops = {
.kernel_module_request = selinux_kernel_module_request,
}
trace_module_request这个函数在源码中没有搜到,先留个坑。
接下来就是直接调用call_modprobe这个函数进行函数加载。
ret = call_modprobe(module_name, wait ? UMH_WAIT_PROC : UMH_WAIT_EXEC);
这个函数定义:
static int call_modprobe(char *module_name, int wait)
{
struct subprocess_info *info;
static char *envp[] = {
"HOME=/",
"TERM=linux",
"PATH=/sbin:/usr/sbin:/bin:/usr/bin",
NULL
};
char **argv = kmalloc(sizeof(char *[5]), GFP_KERNEL);
if (!argv)
goto out;
module_name = kstrdup(module_name, GFP_KERNEL);
if (!module_name)
goto free_argv;
argv[0] = modprobe_path;
argv[1] = "-q";
argv[2] = "--";
argv[3] = module_name; /* check free_modprobe_argv() */
argv[4] = NULL;
info = call_usermodehelper_setup(modprobe_path, argv, envp, GFP_KERNEL,
NULL, free_modprobe_argv, NULL);
if (!info)
goto free_module_name;
return call_usermodehelper_exec(info, wait | UMH_KILLABLE);
free_module_name:
kfree(module_name);
free_argv:
kfree(argv);
out:
return -ENOMEM;
}
重点关注call_usermodehelper_exec这个函数:
int call_usermodehelper_exec(struct subprocess_info *sub_info, int wait)
{
DECLARE_COMPLETION_ONSTACK(done);
int retval = 0;
if (!sub_info->path) {
call_usermodehelper_freeinfo(sub_info);
return -EINVAL;
}
helper_lock();
if (!khelper_wq || usermodehelper_disabled) {
retval = -EBUSY;
goto out;
}
/*
* Set the completion pointer only if there is a waiter.
* This makes it possible to use umh_complete to free
* the data structure in case of UMH_NO_WAIT.
*/
sub_info->complete = (wait == UMH_NO_WAIT) ? NULL : &done;
sub_info->wait = wait;
queue_work(khelper_wq, &sub_info->work);
if (wait == UMH_NO_WAIT) /* task has freed sub_info */
goto unlock;
if (wait & UMH_KILLABLE) {
retval = wait_for_completion_killable(&done);
if (!retval)
goto wait_done;
/* umh_complete() will see NULL and free sub_info */
if (xchg(&sub_info->complete, NULL))
goto unlock;
/* fallthrough, umh_complete() was already called */
}
wait_for_completion(&done);
wait_done:
retval = sub_info->retval;
out:
call_usermodehelper_freeinfo(sub_info);
unlock:
helper_unlock();
return retval;
}
关键是queue_work这个函数:
queue_work(khelper_wq, &sub_info->work);
分析一下入参:
- khelper_wq
static struct workqueue_struct *khelper_wq;
void __init usermodehelper_init(void)
{
khelper_wq = create_singlethread_workqueue("khelper");
BUG_ON(!khelper_wq);
}
- sub_info->work
argv[0] = modprobe_path;
argv[1] = "-q";
argv[2] = "--";
argv[3] = module_name; /* check free_modprobe_argv() */
argv[4] = NULL;
info = call_usermodehelper_setup(modprobe_path, argv, envp, GFP_KERNEL,
NULL, free_modprobe_argv, NULL);
相当于创建了“khelper”这个线程,通过queue_work将其加入工作队列。
不同版本的内核细节有差异,请读者查阅具体的代码。
2、DEFINE_MUTEX
这是一个宏函数,用于在Linux内核中快速创建一个互斥锁。
位置:./include/linux/mutex.h:116
宏定义:
#define __MUTEX_INITIALIZER(lockname) \
{ .count = ATOMIC_INIT(1) \
, .wait_lock = __SPIN_LOCK_UNLOCKED(lockname.wait_lock) \
, .wait_list = LIST_HEAD_INIT(lockname.wait_list) \
__DEBUG_MUTEX_INITIALIZER(lockname) \
__DEP_MAP_MUTEX_INITIALIZER(lockname) }
#define DEFINE_MUTEX(mutexname) \
struct mutex mutexname = __MUTEX_INITIALIZER(mutexname)
__DEBUG_MUTEX_INITIALIZER初始化:
# define __DEBUG_MUTEX_INITIALIZER(lockname)
/**
* mutex_init - initialize the mutex
* @mutex: the mutex to be initialized
*
* Initialize the mutex to unlocked state.
*
* It is not allowed to initialize an already locked mutex.
*/
# define mutex_init(mutex) \
do { \
static struct lock_class_key __key; \
\
__mutex_init((mutex), #mutex, &__key); \
} while (0)
注意这个do while语句,宏函数常用定义,最后实际上是调用__mutex_init函数
位置:./kernel/locking/mutex.c
这里就不深入了,下面给一个使用示例:
DEFINE_MUTEX(my_mutex);
mutex_lock(&my_mutex);
mutex_unlock(&my_mutex);