rtmutex: Turn the plist into an rb-tree

Turn the pi-chains from plist to rb-tree, in the rt_mutex code,
and provide a proper comparison function for -deadline and
-priority tasks.

This is done mainly because:
 - classical prio field of the plist is just an int, which might
   not be enough for representing a deadline;
 - manipulating such a list would become O(nr_deadline_tasks),
   which might be to much, as the number of -deadline task increases.

Therefore, an rb-tree is used, and tasks are queued in it according
to the following logic:
 - among two -priority (i.e., SCHED_BATCH/OTHER/RR/FIFO) tasks, the
   one with the higher (lower, actually!) prio wins;
 - among a -priority and a -deadline task, the latter always wins;
 - among two -deadline tasks, the one with the earliest deadline
   wins.

Queueing and dequeueing functions are changed accordingly, for both
the list of a task's pi-waiters and the list of tasks blocked on
a pi-lock.

Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Dario Faggioli <raistlin@linux.it>
Signed-off-by: Juri Lelli <juri.lelli@gmail.com>
Signed-off-again-by: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/1383831828-15501-10-git-send-email-juri.lelli@gmail.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
This commit is contained in:
Peter Zijlstra
2013-11-07 14:43:43 +01:00
committed by Ingo Molnar
parent af6ace764d
commit fb00aca474
9 changed files with 157 additions and 65 deletions

View File

@@ -14,6 +14,7 @@
#include <linux/export.h>
#include <linux/sched.h>
#include <linux/sched/rt.h>
#include <linux/sched/deadline.h>
#include <linux/timer.h>
#include "rtmutex_common.h"
@@ -91,10 +92,104 @@ static inline void mark_rt_mutex_waiters(struct rt_mutex *lock)
}
#endif
static inline int
rt_mutex_waiter_less(struct rt_mutex_waiter *left,
struct rt_mutex_waiter *right)
{
if (left->task->prio < right->task->prio)
return 1;
/*
* If both tasks are dl_task(), we check their deadlines.
*/
if (dl_prio(left->task->prio) && dl_prio(right->task->prio))
return (left->task->dl.deadline < right->task->dl.deadline);
return 0;
}
static void
rt_mutex_enqueue(struct rt_mutex *lock, struct rt_mutex_waiter *waiter)
{
struct rb_node **link = &lock->waiters.rb_node;
struct rb_node *parent = NULL;
struct rt_mutex_waiter *entry;
int leftmost = 1;
while (*link) {
parent = *link;
entry = rb_entry(parent, struct rt_mutex_waiter, tree_entry);
if (rt_mutex_waiter_less(waiter, entry)) {
link = &parent->rb_left;
} else {
link = &parent->rb_right;
leftmost = 0;
}
}
if (leftmost)
lock->waiters_leftmost = &waiter->tree_entry;
rb_link_node(&waiter->tree_entry, parent, link);
rb_insert_color(&waiter->tree_entry, &lock->waiters);
}
static void
rt_mutex_dequeue(struct rt_mutex *lock, struct rt_mutex_waiter *waiter)
{
if (RB_EMPTY_NODE(&waiter->tree_entry))
return;
if (lock->waiters_leftmost == &waiter->tree_entry)
lock->waiters_leftmost = rb_next(&waiter->tree_entry);
rb_erase(&waiter->tree_entry, &lock->waiters);
RB_CLEAR_NODE(&waiter->tree_entry);
}
static void
rt_mutex_enqueue_pi(struct task_struct *task, struct rt_mutex_waiter *waiter)
{
struct rb_node **link = &task->pi_waiters.rb_node;
struct rb_node *parent = NULL;
struct rt_mutex_waiter *entry;
int leftmost = 1;
while (*link) {
parent = *link;
entry = rb_entry(parent, struct rt_mutex_waiter, pi_tree_entry);
if (rt_mutex_waiter_less(waiter, entry)) {
link = &parent->rb_left;
} else {
link = &parent->rb_right;
leftmost = 0;
}
}
if (leftmost)
task->pi_waiters_leftmost = &waiter->pi_tree_entry;
rb_link_node(&waiter->pi_tree_entry, parent, link);
rb_insert_color(&waiter->pi_tree_entry, &task->pi_waiters);
}
static void
rt_mutex_dequeue_pi(struct task_struct *task, struct rt_mutex_waiter *waiter)
{
if (RB_EMPTY_NODE(&waiter->pi_tree_entry))
return;
if (task->pi_waiters_leftmost == &waiter->pi_tree_entry)
task->pi_waiters_leftmost = rb_next(&waiter->pi_tree_entry);
rb_erase(&waiter->pi_tree_entry, &task->pi_waiters);
RB_CLEAR_NODE(&waiter->pi_tree_entry);
}
/*
* Calculate task priority from the waiter list priority
* Calculate task priority from the waiter tree priority
*
* Return task->normal_prio when the waiter list is empty or when
* Return task->normal_prio when the waiter tree is empty or when
* the waiter is not allowed to do priority boosting
*/
int rt_mutex_getprio(struct task_struct *task)
@@ -102,7 +197,7 @@ int rt_mutex_getprio(struct task_struct *task)
if (likely(!task_has_pi_waiters(task)))
return task->normal_prio;
return min(task_top_pi_waiter(task)->pi_list_entry.prio,
return min(task_top_pi_waiter(task)->task->prio,
task->normal_prio);
}
@@ -233,7 +328,7 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task,
* When deadlock detection is off then we check, if further
* priority adjustment is necessary.
*/
if (!detect_deadlock && waiter->list_entry.prio == task->prio)
if (!detect_deadlock && waiter->task->prio == task->prio)
goto out_unlock_pi;
lock = waiter->lock;
@@ -254,9 +349,9 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task,
top_waiter = rt_mutex_top_waiter(lock);
/* Requeue the waiter */
plist_del(&waiter->list_entry, &lock->wait_list);
waiter->list_entry.prio = task->prio;
plist_add(&waiter->list_entry, &lock->wait_list);
rt_mutex_dequeue(lock, waiter);
waiter->task->prio = task->prio;
rt_mutex_enqueue(lock, waiter);
/* Release the task */
raw_spin_unlock_irqrestore(&task->pi_lock, flags);
@@ -280,17 +375,15 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task,
if (waiter == rt_mutex_top_waiter(lock)) {
/* Boost the owner */
plist_del(&top_waiter->pi_list_entry, &task->pi_waiters);
waiter->pi_list_entry.prio = waiter->list_entry.prio;
plist_add(&waiter->pi_list_entry, &task->pi_waiters);
rt_mutex_dequeue_pi(task, top_waiter);
rt_mutex_enqueue_pi(task, waiter);
__rt_mutex_adjust_prio(task);
} else if (top_waiter == waiter) {
/* Deboost the owner */
plist_del(&waiter->pi_list_entry, &task->pi_waiters);
rt_mutex_dequeue_pi(task, waiter);
waiter = rt_mutex_top_waiter(lock);
waiter->pi_list_entry.prio = waiter->list_entry.prio;
plist_add(&waiter->pi_list_entry, &task->pi_waiters);
rt_mutex_enqueue_pi(task, waiter);
__rt_mutex_adjust_prio(task);
}
@@ -355,7 +448,7 @@ static int try_to_take_rt_mutex(struct rt_mutex *lock, struct task_struct *task,
* 3) it is top waiter
*/
if (rt_mutex_has_waiters(lock)) {
if (task->prio >= rt_mutex_top_waiter(lock)->list_entry.prio) {
if (task->prio >= rt_mutex_top_waiter(lock)->task->prio) {
if (!waiter || waiter != rt_mutex_top_waiter(lock))
return 0;
}
@@ -369,7 +462,7 @@ static int try_to_take_rt_mutex(struct rt_mutex *lock, struct task_struct *task,
/* remove the queued waiter. */
if (waiter) {
plist_del(&waiter->list_entry, &lock->wait_list);
rt_mutex_dequeue(lock, waiter);
task->pi_blocked_on = NULL;
}
@@ -379,8 +472,7 @@ static int try_to_take_rt_mutex(struct rt_mutex *lock, struct task_struct *task,
*/
if (rt_mutex_has_waiters(lock)) {
top = rt_mutex_top_waiter(lock);
top->pi_list_entry.prio = top->list_entry.prio;
plist_add(&top->pi_list_entry, &task->pi_waiters);
rt_mutex_enqueue_pi(task, top);
}
raw_spin_unlock_irqrestore(&task->pi_lock, flags);
}
@@ -416,13 +508,11 @@ static int task_blocks_on_rt_mutex(struct rt_mutex *lock,
__rt_mutex_adjust_prio(task);
waiter->task = task;
waiter->lock = lock;
plist_node_init(&waiter->list_entry, task->prio);
plist_node_init(&waiter->pi_list_entry, task->prio);
/* Get the top priority waiter on the lock */
if (rt_mutex_has_waiters(lock))
top_waiter = rt_mutex_top_waiter(lock);
plist_add(&waiter->list_entry, &lock->wait_list);
rt_mutex_enqueue(lock, waiter);
task->pi_blocked_on = waiter;
@@ -433,8 +523,8 @@ static int task_blocks_on_rt_mutex(struct rt_mutex *lock,
if (waiter == rt_mutex_top_waiter(lock)) {
raw_spin_lock_irqsave(&owner->pi_lock, flags);
plist_del(&top_waiter->pi_list_entry, &owner->pi_waiters);
plist_add(&waiter->pi_list_entry, &owner->pi_waiters);
rt_mutex_dequeue_pi(owner, top_waiter);
rt_mutex_enqueue_pi(owner, waiter);
__rt_mutex_adjust_prio(owner);
if (owner->pi_blocked_on)
@@ -486,7 +576,7 @@ static void wakeup_next_waiter(struct rt_mutex *lock)
* boosted mode and go back to normal after releasing
* lock->wait_lock.
*/
plist_del(&waiter->pi_list_entry, &current->pi_waiters);
rt_mutex_dequeue_pi(current, waiter);
rt_mutex_set_owner(lock, NULL);
@@ -510,7 +600,7 @@ static void remove_waiter(struct rt_mutex *lock,
int chain_walk = 0;
raw_spin_lock_irqsave(&current->pi_lock, flags);
plist_del(&waiter->list_entry, &lock->wait_list);
rt_mutex_dequeue(lock, waiter);
current->pi_blocked_on = NULL;
raw_spin_unlock_irqrestore(&current->pi_lock, flags);
@@ -521,13 +611,13 @@ static void remove_waiter(struct rt_mutex *lock,
raw_spin_lock_irqsave(&owner->pi_lock, flags);
plist_del(&waiter->pi_list_entry, &owner->pi_waiters);
rt_mutex_dequeue_pi(owner, waiter);
if (rt_mutex_has_waiters(lock)) {
struct rt_mutex_waiter *next;
next = rt_mutex_top_waiter(lock);
plist_add(&next->pi_list_entry, &owner->pi_waiters);
rt_mutex_enqueue_pi(owner, next);
}
__rt_mutex_adjust_prio(owner);
@@ -537,8 +627,6 @@ static void remove_waiter(struct rt_mutex *lock,
raw_spin_unlock_irqrestore(&owner->pi_lock, flags);
}
WARN_ON(!plist_node_empty(&waiter->pi_list_entry));
if (!chain_walk)
return;
@@ -565,7 +653,7 @@ void rt_mutex_adjust_pi(struct task_struct *task)
raw_spin_lock_irqsave(&task->pi_lock, flags);
waiter = task->pi_blocked_on;
if (!waiter || waiter->list_entry.prio == task->prio) {
if (!waiter || waiter->task->prio == task->prio) {
raw_spin_unlock_irqrestore(&task->pi_lock, flags);
return;
}
@@ -638,6 +726,8 @@ rt_mutex_slowlock(struct rt_mutex *lock, int state,
int ret = 0;
debug_rt_mutex_init_waiter(&waiter);
RB_CLEAR_NODE(&waiter.pi_tree_entry);
RB_CLEAR_NODE(&waiter.tree_entry);
raw_spin_lock(&lock->wait_lock);
@@ -904,7 +994,8 @@ void __rt_mutex_init(struct rt_mutex *lock, const char *name)
{
lock->owner = NULL;
raw_spin_lock_init(&lock->wait_lock);
plist_head_init(&lock->wait_list);
lock->waiters = RB_ROOT;
lock->waiters_leftmost = NULL;
debug_rt_mutex_init(lock, name);
}