struct futex_hash_bucket {
spinlock_t lock;
struct plist_head chain;
};
struct futex_q {
struct plist_node list; /* 根据当前线程的nomal_prio值链入 futex_hash_bucket
的chain中:plist_add(&q->list, &hb->chain); */
wait_queue_head_t waiters;
spinlock_t *lock_ptr; /* 指向futex_hash_bucket中的lock */
union futex_key key; /* 选择futex_hash_bucket */
/* For fd, sigio sent using these: */
int fd;
struct file *filp;
/* Optional priority inheritance state: */
struct futex_pi_state *pi_state; /*指向task_struct中第一次预先分配好的pi_state_cache */
struct task_struct *task;
struct rt_mutex_waiter waiter;
};
struct futex_pi_state {
struct list_head list;? /* 链入task_struct中的pi_state_list */
struct rt_mutex pi_mutex;
struct task_struct *owner; /* 锁的当前拥有者 */
atomic_t refcount;
union futex_key key; /* 拷贝futex_q中的 key */
};
struct rt_mutex {
spinlock_t? ? ? ? ? ? ? wait_lock;
struct plist_head? wait_list; /* 所有的该锁的rt_mutex_waiter */
struct task_struct? ? *owner;
}
struct rt_mutex_waiter {
struct plist_node? list_entry; /* 链入rt_mutex中的wait_list */
struct plist_node? pi_list_entry; /* 链入task_struct中的pi_waiters */
struct task_struct? ? *task;
struct rt_mutex? ? *lock;
}
struct rt_mutex锁的owner的低2位作为如下标志使用:
#define RT_MUTEX_OWNER_PENDING? ? 1UL? /* 唤醒时会将实时锁置上此标志,
用于try_to_steal_lock() */
#define RT_MUTEX_HAS_WAITERS? ? ? 2UL? /* 有线程等待实时锁置上此标志 */
#define RT_MUTEX_OWNER_MASKALL? 3UL
futex_lock_pi()流程:
栈上分配struct futex_q q;
分配当前线程的pi_state_cache内存;
根据glibc中的用户态futex锁uaddr的地址和task_struct->mm-> mmap_sem生成q. key;
根据key找到hash_bucket, 初始化q.lock_ptr,并上锁;
如果锁的拥有都为当前线程,释放q.lock_ptr锁后退出;
将uaddr置位FUTEX_WAITERS;
正在持有futex锁uaddr的线程p,第一次会以uaddr&0x0fff_ffff为pid找到;
lookup_pi_state():遍历hash_bucket-> chain,根据q.key值找到相应的pi_state;如果找不到则返回之前分配好的当前线程的pi_state_cache,并初始化pi_state:拷贝q.key,owner指向线程p,初始化pi_mutex(初始化wait_lock,将owner指向线程p),将list链入线程p的pi_state_list(注pi_state始终链入锁的拥有者线程中);
__queue_me():初始化q,将q.list链入hash_bucket->chain中,q.task指向当前线程,并释放q.lock_ptr锁;
如果是高优先级的线程则可以偷锁,偷锁成功后会调用fixup_pi_state_owner()对pi_state以及uaddr的值进行修正;
rt_mutex_timed_lock()->rt_mutex_slowlock()->task_blocks_on_rt_mutex(lock, &waiter, detect):
其中waiter为栈上分配的rt_mutex_waiter结构;设定当前线程为TASK_INTERRUPTIBLE状态;初始化waiter,task指向当前线程,lock指向当前rt_mutex锁,并使用当前线程的prio初始化两个plist_node结构;waiter->list_entry链入lock->wait_list, waiter->pi_list_entry链入owner ->pi_waiters(owner为拥有锁的线程即p);当前线程的pi_blocked_on指向waiter(用于检测死锁和PI处理,会在被唤醒时检查并置NULL),最后__rt_mutex_adjust_prio(owner)进行优先级继承操作,如果存在锁链(一个低优先级rtmutex1的拥有者会继承高优先级的waiter的优先级, 谓之PI;如果此线程阻塞在另外的rtmutex2,这个优先级会传播到rtmutex2的拥有者,依次类推传播,默认允许传播1024个;此种情形暂称之为存在锁链),则调用rt_mutex_adjust_prio_chain()进行优先级的传播;涉及到的锁为:lock->wait_lock和task->pi_lock;
schedule();/* unlock pi时会调用wakeup_next_waiter()唤醒此线程 */
正常流程调用try_to_steal_lock()成功返回:当成功后清除RT_MUTEX_OWNER_PENDING标志。
当rt_mutex_timed_lock()调用失败时(如信号唤醒),如果实时锁的拥有者恰好为当前线程则进行rt_mutex_trylock()获取锁;其它情况进行相应参数修正退出;
把q.list从hash_bucket中摘除,根据情况把q.pi_state从task_struct中摘除,把q.pi_state ->pi_mutex的owner置NULL(如果没有等待者),清除pi_state资源等。
注:2.6.21内核存在许多pi futex的BUG。例如,在频繁fork的时候没有处理cmpxchg_futex_value_locked返回-EFAULT的情况,还是用高版本内核为好。