Linux Kernel: 内核数据结构

发表于 2023-04-19 更新于 2025-03-21 分类于 GNU/Linux操作系统， Linux Kernel 本文字数： 5.7k 阅读时长 ≈ 10 分钟

概述

Linux 内核内建了常用的数据结构，并鼓励开发者尽可能地重用这些数据结构，而不是重新发明轮子。这些数据结构包括链表（list_head）、队列、散列表（hlist）和二叉树。

参考《Understanding the Linux Kernel, Third Edition》 By Daniel P. Bovet & Marco Cesati.
参考《Linux Kernel Development, Third Edition》 By Robert Love.

双向链表

双向链表list_head 在头文件include/linux/types.h 中定义：

struct list_head {
    struct list_head *next;
    struct list_head *prev;
};

基本的处理函数和宏在头文件include/linux/list.h 中定义。

初始化链表

初始化链表 LIST_HEAD_INIT 和 LIST_HEAD：

1
2
3

#define LIST_HEAD_INIT(name) { &(name), &(name) }
#define LIST_HEAD(name) \
	struct list_head name = LIST_HEAD_INIT(name)

插入

删除

查询

查询操作最重要的不是获取list_head类型的指针，而是获得list_head所在的结构体的基地址。例如我们定义下面的结构体：

struct person {
    int age;
    struct list_head list;
};

我们如何根据一个struct list_head *指针类型获取其所在struct person结构体的基地址呢？container_of宏主要通过指针偏移运算来完成这个工作（include/linux/kernel.h）。

#define container_of(ptr, type, member) ({				\
	void *__mptr = (void *)(ptr);					\
	BUILD_BUG_ON_MSG(!__same_type(*(ptr), ((type *)0)->member) &&	\
			 !__same_type(*(ptr), void),			\
			 "pointer type mismatch in container_of()");	\
	((type *)(__mptr - offsetof(type, member))); })

其中，offsetof 在include/linux/stddef.h中定义：

1	#define offsetof(TYPE, MEMBER) ((size_t)&((TYPE*)0)->NUMBER)

注意这里使用TYPE*类型的 NULL(0)指针并不会导致运行时错误，因为在编译阶段，编译器会将上诉代码翻译为0+OFFSET，其中OFFSET为NUMBER在类型TYPE中的偏移量。然后我们在container_of宏中将struct list_head ptr的地址减去该偏移量就可以获得其所在TYPE结构体的地址。

然后可以定义list_entry：

#define list_entry(ptr, type, member) \
	container_of(ptr, type, member)

// @ptr : the list head to take the element from.
#define list_first_entry(ptr, type, member) \
	list_entry((ptr)->next, type, member)

// @ptr : the list head to take the element from.
#define list_last_entry(ptr, type, member) \
	list_entry((ptr)->prev, type, member)

/**
 * list_next_entry - get the next element in list
 * @pos:	the type * to cursor
 * @member:	the name of the list_head within the struct.
 */
#define list_next_entry(pos, member) \
	list_entry((pos)->member.next, typeof(*(pos)), member)

替换

遍历

直接遍历struct list_head 节点：

/**
 * list_for_each	-	iterate over a list
 * @pos:	the &struct list_head to use as a loop cursor.
 * @head:	the head for your list.
 */
#define list_for_each(pos, head) \
	for (pos = (head)->next; pos != (head); pos = pos->next)

// @n: another &struct list_head to use as temporary storage
#define list_for_each_safe(pos, n, head) \
	for (pos = (head)->next, n = pos->next; pos != (head); \
		pos = n, n = pos->next)

注意，list_for_each_safe可以用在遍历链表的过程中执行删除节点操作，它是通过增加一个额外的临时hlist_node指针n来实现的。

遍历 entry :

/**
 * list_for_each_entry	-	iterate over list of given type
 * @pos:	the type * to use as a loop cursor.
 * @head:	the head for your list.
 * @member:	the name of the list_head within the struct.
 */
#define list_for_each_entry(pos, head, member)				\
	for (pos = list_first_entry(head, typeof(*pos), member);	\
	     &pos->member != (head);					\
	     pos = list_next_entry(pos, member))

边界检查

例子

进程链表是运用链表最重要的例子之一。在结构体task_struct中包含字段struct list_head tasks。

另外一个例子是等待队列。

哈希链表

哈希链表由于自身特点的考虑，不使用双向链表。

定义如下（include/linux/types.h）：

struct hlist_head {
    struct hlist_node *first;
};

struct hlist_node {
    struct hlist_node *next, **pprev;
};

为什么头节点(struct hlist_head)不直接使用struct hlist_node，而是要重新定义一个结构体呢？答案是为了节省空间。

另外，使用二级指针**pprev在删除节点上的处理非常简单，无需关心是否是 head 节点。而使用单链表或双向链表时需要单独判断是否是 head 节点。

初始化

#define HLIST_HEAD_INIT { .first = NULL }
#define HLIST_HEAD(name) struct hlist_head name = {  .first = NULL }
#define INIT_HLIST_HEAD(ptr) ((ptr)->first = NULL)
static inline void INIT_HLIST_NODE(struct hlist_node *h)
{
	h->next = NULL;
	h->pprev = NULL;
}

插入

向hlist链表添加节点时，可以直接添加到链表头部，也可以插入到中间位置。操作函数和宏在include/linux/list.h中定义：

static inline void hlist_add_head(struct hlist_node *n, struct hlist_head *h)
{
	struct hlist_node *first = h->first;
	n->next = first;
	if (first)
		first->pprev = &n->next;
	WRITE_ONCE(h->first, n);
	n->pprev = &h->first;
}

删除

static inline void __hlist_del(struct hlist_node *n)
{
	struct hlist_node *next = n->next;
	struct hlist_node **pprev = n->pprev;

	WRITE_ONCE(*pprev, next);
	if (next)
		next->pprev = pprev;
}

static inline void hlist_del(struct hlist_node *n)
{
	__hlist_del(n);
	n->next = LIST_POISON1;
	n->pprev = LIST_POISON2;
}

查询

等待队列

等待队列使用双向链表实现（include/linux/wait.h）：

struct wait_queue_entry {
	unsigned int		flags;
	void			*private;
	wait_queue_func_t	func;
	struct list_head	entry;
};

struct wait_queue_head {
	spinlock_t		lock;
	struct list_head	head;
};
typedef struct wait_queue_head wait_queue_head_t;

初始化

#define __WAITQUEUE_INITIALIZER(name, tsk) {					\
	.private	= tsk,							\
	.func		= default_wake_function,				\
	.entry		= { NULL, NULL } }

#define DECLARE_WAITQUEUE(name, tsk)						\
	struct wait_queue_entry name = __WAITQUEUE_INITIALIZER(name, tsk)

#define __WAIT_QUEUE_HEAD_INITIALIZER(name) {					\
	.lock		= __SPIN_LOCK_UNLOCKED(name.lock),			\
	.head		= { &(name).head, &(name).head } }

#define DECLARE_WAIT_QUEUE_HEAD(name) \
	struct wait_queue_head name = __WAIT_QUEUE_HEAD_INITIALIZER(name)

动态分配节点的初始化：

static inline void init_waitqueue_entry(struct wait_queue_entry *wq_entry, struct task_struct *p)
{
	wq_entry->flags		= 0;
	wq_entry->private	= p;
	wq_entry->func		= default_wake_function;
}

static inline void
init_waitqueue_func_entry(struct wait_queue_entry *wq_entry, wait_queue_func_t func)
{
	wq_entry->flags		= 0;
	wq_entry->private	= NULL;
	wq_entry->func		= func;
}