Merge branch 'sched-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull scheduler changes from Ingo Molnar:

 - Add the initial implementation of SCHED_DEADLINE support: a real-time
   scheduling policy where tasks that meet their deadlines and
   periodically execute their instances in less than their runtime quota
   see real-time scheduling and won't miss any of their deadlines.
   Tasks that go over their quota get delayed (Available to privileged
   users for now)

 - Clean up and fix preempt_enable_no_resched() abuse all around the
   tree

 - Do sched_clock() performance optimizations on x86 and elsewhere

 - Fix and improve auto-NUMA balancing

 - Fix and clean up the idle loop

 - Apply various cleanups and fixes

* 'sched-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (60 commits)
  sched: Fix __sched_setscheduler() nice test
  sched: Move SCHED_RESET_ON_FORK into attr::sched_flags
  sched: Fix up attr::sched_priority warning
  sched: Fix up scheduler syscall LTP fails
  sched: Preserve the nice level over sched_setscheduler() and sched_setparam() calls
  sched/core: Fix htmldocs warnings
  sched/deadline: No need to check p if dl_se is valid
  sched/deadline: Remove unused variables
  sched/deadline: Fix sparse static warnings
  m68k: Fix build warning in mac_via.h
  sched, thermal: Clean up preempt_enable_no_resched() abuse
  sched, net: Fixup busy_loop_us_clock()
  sched, net: Clean up preempt_enable_no_resched() abuse
  sched/preempt: Fix up missed PREEMPT_NEED_RESCHED folding
  sched/preempt, locking: Rework local_bh_{dis,en}able()
  sched/clock, x86: Avoid a runtime condition in native_sched_clock()
  sched/clock: Fix up clear_sched_clock_stable()
  sched/clock, x86: Use a static_key for sched_clock_stable
  sched/clock: Remove local_irq_disable() from the clocks
  sched/clock, x86: Rewrite cyc2ns() to avoid the need to disable IRQs
  ...
This commit is contained in:
Linus Torvalds
2014-01-20 10:42:08 -08:00
63 changed files with 3782 additions and 633 deletions

View File

@@ -24,7 +24,7 @@
#include <linux/kallsyms.h>
#include <linux/syscalls.h>
#include <linux/interrupt.h>
#include <linux/plist.h>
#include <linux/rbtree.h>
#include <linux/fs.h>
#include <linux/debug_locks.h>
@@ -57,7 +57,7 @@ static void printk_lock(struct rt_mutex *lock, int print_owner)
void rt_mutex_debug_task_free(struct task_struct *task)
{
DEBUG_LOCKS_WARN_ON(!plist_head_empty(&task->pi_waiters));
DEBUG_LOCKS_WARN_ON(!RB_EMPTY_ROOT(&task->pi_waiters));
DEBUG_LOCKS_WARN_ON(task->pi_blocked_on);
}
@@ -154,16 +154,12 @@ void debug_rt_mutex_proxy_unlock(struct rt_mutex *lock)
void debug_rt_mutex_init_waiter(struct rt_mutex_waiter *waiter)
{
memset(waiter, 0x11, sizeof(*waiter));
plist_node_init(&waiter->list_entry, MAX_PRIO);
plist_node_init(&waiter->pi_list_entry, MAX_PRIO);
waiter->deadlock_task_pid = NULL;
}
void debug_rt_mutex_free_waiter(struct rt_mutex_waiter *waiter)
{
put_pid(waiter->deadlock_task_pid);
DEBUG_LOCKS_WARN_ON(!plist_node_empty(&waiter->list_entry));
DEBUG_LOCKS_WARN_ON(!plist_node_empty(&waiter->pi_list_entry));
memset(waiter, 0x22, sizeof(*waiter));
}

View File

@@ -14,6 +14,7 @@
#include <linux/export.h>
#include <linux/sched.h>
#include <linux/sched/rt.h>
#include <linux/sched/deadline.h>
#include <linux/timer.h>
#include "rtmutex_common.h"
@@ -91,10 +92,107 @@ static inline void mark_rt_mutex_waiters(struct rt_mutex *lock)
}
#endif
static inline int
rt_mutex_waiter_less(struct rt_mutex_waiter *left,
struct rt_mutex_waiter *right)
{
if (left->prio < right->prio)
return 1;
/*
* If both waiters have dl_prio(), we check the deadlines of the
* associated tasks.
* If left waiter has a dl_prio(), and we didn't return 1 above,
* then right waiter has a dl_prio() too.
*/
if (dl_prio(left->prio))
return (left->task->dl.deadline < right->task->dl.deadline);
return 0;
}
static void
rt_mutex_enqueue(struct rt_mutex *lock, struct rt_mutex_waiter *waiter)
{
struct rb_node **link = &lock->waiters.rb_node;
struct rb_node *parent = NULL;
struct rt_mutex_waiter *entry;
int leftmost = 1;
while (*link) {
parent = *link;
entry = rb_entry(parent, struct rt_mutex_waiter, tree_entry);
if (rt_mutex_waiter_less(waiter, entry)) {
link = &parent->rb_left;
} else {
link = &parent->rb_right;
leftmost = 0;
}
}
if (leftmost)
lock->waiters_leftmost = &waiter->tree_entry;
rb_link_node(&waiter->tree_entry, parent, link);
rb_insert_color(&waiter->tree_entry, &lock->waiters);
}
static void
rt_mutex_dequeue(struct rt_mutex *lock, struct rt_mutex_waiter *waiter)
{
if (RB_EMPTY_NODE(&waiter->tree_entry))
return;
if (lock->waiters_leftmost == &waiter->tree_entry)
lock->waiters_leftmost = rb_next(&waiter->tree_entry);
rb_erase(&waiter->tree_entry, &lock->waiters);
RB_CLEAR_NODE(&waiter->tree_entry);
}
static void
rt_mutex_enqueue_pi(struct task_struct *task, struct rt_mutex_waiter *waiter)
{
struct rb_node **link = &task->pi_waiters.rb_node;
struct rb_node *parent = NULL;
struct rt_mutex_waiter *entry;
int leftmost = 1;
while (*link) {
parent = *link;
entry = rb_entry(parent, struct rt_mutex_waiter, pi_tree_entry);
if (rt_mutex_waiter_less(waiter, entry)) {
link = &parent->rb_left;
} else {
link = &parent->rb_right;
leftmost = 0;
}
}
if (leftmost)
task->pi_waiters_leftmost = &waiter->pi_tree_entry;
rb_link_node(&waiter->pi_tree_entry, parent, link);
rb_insert_color(&waiter->pi_tree_entry, &task->pi_waiters);
}
static void
rt_mutex_dequeue_pi(struct task_struct *task, struct rt_mutex_waiter *waiter)
{
if (RB_EMPTY_NODE(&waiter->pi_tree_entry))
return;
if (task->pi_waiters_leftmost == &waiter->pi_tree_entry)
task->pi_waiters_leftmost = rb_next(&waiter->pi_tree_entry);
rb_erase(&waiter->pi_tree_entry, &task->pi_waiters);
RB_CLEAR_NODE(&waiter->pi_tree_entry);
}
/*
* Calculate task priority from the waiter list priority
* Calculate task priority from the waiter tree priority
*
* Return task->normal_prio when the waiter list is empty or when
* Return task->normal_prio when the waiter tree is empty or when
* the waiter is not allowed to do priority boosting
*/
int rt_mutex_getprio(struct task_struct *task)
@@ -102,10 +200,18 @@ int rt_mutex_getprio(struct task_struct *task)
if (likely(!task_has_pi_waiters(task)))
return task->normal_prio;
return min(task_top_pi_waiter(task)->pi_list_entry.prio,
return min(task_top_pi_waiter(task)->prio,
task->normal_prio);
}
struct task_struct *rt_mutex_get_top_task(struct task_struct *task)
{
if (likely(!task_has_pi_waiters(task)))
return NULL;
return task_top_pi_waiter(task)->task;
}
/*
* Adjust the priority of a task, after its pi_waiters got modified.
*
@@ -115,7 +221,7 @@ static void __rt_mutex_adjust_prio(struct task_struct *task)
{
int prio = rt_mutex_getprio(task);
if (task->prio != prio)
if (task->prio != prio || dl_prio(prio))
rt_mutex_setprio(task, prio);
}
@@ -233,7 +339,7 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task,
* When deadlock detection is off then we check, if further
* priority adjustment is necessary.
*/
if (!detect_deadlock && waiter->list_entry.prio == task->prio)
if (!detect_deadlock && waiter->prio == task->prio)
goto out_unlock_pi;
lock = waiter->lock;
@@ -254,9 +360,9 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task,
top_waiter = rt_mutex_top_waiter(lock);
/* Requeue the waiter */
plist_del(&waiter->list_entry, &lock->wait_list);
waiter->list_entry.prio = task->prio;
plist_add(&waiter->list_entry, &lock->wait_list);
rt_mutex_dequeue(lock, waiter);
waiter->prio = task->prio;
rt_mutex_enqueue(lock, waiter);
/* Release the task */
raw_spin_unlock_irqrestore(&task->pi_lock, flags);
@@ -280,17 +386,15 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task,
if (waiter == rt_mutex_top_waiter(lock)) {
/* Boost the owner */
plist_del(&top_waiter->pi_list_entry, &task->pi_waiters);
waiter->pi_list_entry.prio = waiter->list_entry.prio;
plist_add(&waiter->pi_list_entry, &task->pi_waiters);
rt_mutex_dequeue_pi(task, top_waiter);
rt_mutex_enqueue_pi(task, waiter);
__rt_mutex_adjust_prio(task);
} else if (top_waiter == waiter) {
/* Deboost the owner */
plist_del(&waiter->pi_list_entry, &task->pi_waiters);
rt_mutex_dequeue_pi(task, waiter);
waiter = rt_mutex_top_waiter(lock);
waiter->pi_list_entry.prio = waiter->list_entry.prio;
plist_add(&waiter->pi_list_entry, &task->pi_waiters);
rt_mutex_enqueue_pi(task, waiter);
__rt_mutex_adjust_prio(task);
}
@@ -355,7 +459,7 @@ static int try_to_take_rt_mutex(struct rt_mutex *lock, struct task_struct *task,
* 3) it is top waiter
*/
if (rt_mutex_has_waiters(lock)) {
if (task->prio >= rt_mutex_top_waiter(lock)->list_entry.prio) {
if (task->prio >= rt_mutex_top_waiter(lock)->prio) {
if (!waiter || waiter != rt_mutex_top_waiter(lock))
return 0;
}
@@ -369,7 +473,7 @@ static int try_to_take_rt_mutex(struct rt_mutex *lock, struct task_struct *task,
/* remove the queued waiter. */
if (waiter) {
plist_del(&waiter->list_entry, &lock->wait_list);
rt_mutex_dequeue(lock, waiter);
task->pi_blocked_on = NULL;
}
@@ -379,8 +483,7 @@ static int try_to_take_rt_mutex(struct rt_mutex *lock, struct task_struct *task,
*/
if (rt_mutex_has_waiters(lock)) {
top = rt_mutex_top_waiter(lock);
top->pi_list_entry.prio = top->list_entry.prio;
plist_add(&top->pi_list_entry, &task->pi_waiters);
rt_mutex_enqueue_pi(task, top);
}
raw_spin_unlock_irqrestore(&task->pi_lock, flags);
}
@@ -416,13 +519,12 @@ static int task_blocks_on_rt_mutex(struct rt_mutex *lock,
__rt_mutex_adjust_prio(task);
waiter->task = task;
waiter->lock = lock;
plist_node_init(&waiter->list_entry, task->prio);
plist_node_init(&waiter->pi_list_entry, task->prio);
waiter->prio = task->prio;
/* Get the top priority waiter on the lock */
if (rt_mutex_has_waiters(lock))
top_waiter = rt_mutex_top_waiter(lock);
plist_add(&waiter->list_entry, &lock->wait_list);
rt_mutex_enqueue(lock, waiter);
task->pi_blocked_on = waiter;
@@ -433,8 +535,8 @@ static int task_blocks_on_rt_mutex(struct rt_mutex *lock,
if (waiter == rt_mutex_top_waiter(lock)) {
raw_spin_lock_irqsave(&owner->pi_lock, flags);
plist_del(&top_waiter->pi_list_entry, &owner->pi_waiters);
plist_add(&waiter->pi_list_entry, &owner->pi_waiters);
rt_mutex_dequeue_pi(owner, top_waiter);
rt_mutex_enqueue_pi(owner, waiter);
__rt_mutex_adjust_prio(owner);
if (owner->pi_blocked_on)
@@ -486,7 +588,7 @@ static void wakeup_next_waiter(struct rt_mutex *lock)
* boosted mode and go back to normal after releasing
* lock->wait_lock.
*/
plist_del(&waiter->pi_list_entry, &current->pi_waiters);
rt_mutex_dequeue_pi(current, waiter);
rt_mutex_set_owner(lock, NULL);
@@ -510,7 +612,7 @@ static void remove_waiter(struct rt_mutex *lock,
int chain_walk = 0;
raw_spin_lock_irqsave(&current->pi_lock, flags);
plist_del(&waiter->list_entry, &lock->wait_list);
rt_mutex_dequeue(lock, waiter);
current->pi_blocked_on = NULL;
raw_spin_unlock_irqrestore(&current->pi_lock, flags);
@@ -521,13 +623,13 @@ static void remove_waiter(struct rt_mutex *lock,
raw_spin_lock_irqsave(&owner->pi_lock, flags);
plist_del(&waiter->pi_list_entry, &owner->pi_waiters);
rt_mutex_dequeue_pi(owner, waiter);
if (rt_mutex_has_waiters(lock)) {
struct rt_mutex_waiter *next;
next = rt_mutex_top_waiter(lock);
plist_add(&next->pi_list_entry, &owner->pi_waiters);
rt_mutex_enqueue_pi(owner, next);
}
__rt_mutex_adjust_prio(owner);
@@ -537,8 +639,6 @@ static void remove_waiter(struct rt_mutex *lock,
raw_spin_unlock_irqrestore(&owner->pi_lock, flags);
}
WARN_ON(!plist_node_empty(&waiter->pi_list_entry));
if (!chain_walk)
return;
@@ -565,7 +665,8 @@ void rt_mutex_adjust_pi(struct task_struct *task)
raw_spin_lock_irqsave(&task->pi_lock, flags);
waiter = task->pi_blocked_on;
if (!waiter || waiter->list_entry.prio == task->prio) {
if (!waiter || (waiter->prio == task->prio &&
!dl_prio(task->prio))) {
raw_spin_unlock_irqrestore(&task->pi_lock, flags);
return;
}
@@ -638,6 +739,8 @@ rt_mutex_slowlock(struct rt_mutex *lock, int state,
int ret = 0;
debug_rt_mutex_init_waiter(&waiter);
RB_CLEAR_NODE(&waiter.pi_tree_entry);
RB_CLEAR_NODE(&waiter.tree_entry);
raw_spin_lock(&lock->wait_lock);
@@ -904,7 +1007,8 @@ void __rt_mutex_init(struct rt_mutex *lock, const char *name)
{
lock->owner = NULL;
raw_spin_lock_init(&lock->wait_lock);
plist_head_init(&lock->wait_list);
lock->waiters = RB_ROOT;
lock->waiters_leftmost = NULL;
debug_rt_mutex_init(lock, name);
}

View File

@@ -40,13 +40,13 @@ extern void schedule_rt_mutex_test(struct rt_mutex *lock);
* This is the control structure for tasks blocked on a rt_mutex,
* which is allocated on the kernel stack on of the blocked task.
*
* @list_entry: pi node to enqueue into the mutex waiters list
* @pi_list_entry: pi node to enqueue into the mutex owner waiters list
* @tree_entry: pi node to enqueue into the mutex waiters tree
* @pi_tree_entry: pi node to enqueue into the mutex owner waiters tree
* @task: task reference to the blocked task
*/
struct rt_mutex_waiter {
struct plist_node list_entry;
struct plist_node pi_list_entry;
struct rb_node tree_entry;
struct rb_node pi_tree_entry;
struct task_struct *task;
struct rt_mutex *lock;
#ifdef CONFIG_DEBUG_RT_MUTEXES
@@ -54,14 +54,15 @@ struct rt_mutex_waiter {
struct pid *deadlock_task_pid;
struct rt_mutex *deadlock_lock;
#endif
int prio;
};
/*
* Various helpers to access the waiters-plist:
* Various helpers to access the waiters-tree:
*/
static inline int rt_mutex_has_waiters(struct rt_mutex *lock)
{
return !plist_head_empty(&lock->wait_list);
return !RB_EMPTY_ROOT(&lock->waiters);
}
static inline struct rt_mutex_waiter *
@@ -69,8 +70,8 @@ rt_mutex_top_waiter(struct rt_mutex *lock)
{
struct rt_mutex_waiter *w;
w = plist_first_entry(&lock->wait_list, struct rt_mutex_waiter,
list_entry);
w = rb_entry(lock->waiters_leftmost, struct rt_mutex_waiter,
tree_entry);
BUG_ON(w->lock != lock);
return w;
@@ -78,14 +79,14 @@ rt_mutex_top_waiter(struct rt_mutex *lock)
static inline int task_has_pi_waiters(struct task_struct *p)
{
return !plist_head_empty(&p->pi_waiters);
return !RB_EMPTY_ROOT(&p->pi_waiters);
}
static inline struct rt_mutex_waiter *
task_top_pi_waiter(struct task_struct *p)
{
return plist_first_entry(&p->pi_waiters, struct rt_mutex_waiter,
pi_list_entry);
return rb_entry(p->pi_waiters_leftmost, struct rt_mutex_waiter,
pi_tree_entry);
}
/*